diff options
Diffstat (limited to 'include/linux')
504 files changed, 20103 insertions, 11618 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 974d497a897d..6274758648e3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -133,6 +133,7 @@ union acpi_subtable_headers { struct acpi_subtable_header common; struct acpi_hmat_structure hmat; struct acpi_prmt_module_header prmt; + struct acpi_cedt_header cedt; }; typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); @@ -140,6 +141,9 @@ typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, const unsigned long end); +typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, + void *arg, const unsigned long end); + /* Debugger support */ struct acpi_debugger_ops { @@ -216,6 +220,8 @@ static inline int acpi_debugger_notify_command_complete(void) struct acpi_subtable_proc { int id; acpi_tbl_entry_handler handler; + acpi_tbl_entry_handler_arg handler_arg; + void *arg; int count; }; @@ -232,17 +238,31 @@ int acpi_locate_initial_tables (void); void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); + +#ifdef CONFIG_ACPI_TABLE_LIB +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) +#define __init_or_acpilib +#define __initdata_or_acpilib +#else +#define EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_acpilib __init +#define __initdata_or_acpilib __initdata +#endif + int acpi_table_parse(char *id, acpi_tbl_table_handler handler); -int __init acpi_table_parse_entries(char *id, unsigned long table_size, - int entry_id, - acpi_tbl_entry_handler handler, - unsigned int max_entries); -int __init acpi_table_parse_entries_array(char *id, unsigned long table_size, - struct acpi_subtable_proc *proc, int proc_num, - unsigned int max_entries); +int __init_or_acpilib acpi_table_parse_entries(char *id, + unsigned long table_size, int entry_id, + acpi_tbl_entry_handler handler, unsigned int max_entries); +int __init_or_acpilib acpi_table_parse_entries_array(char *id, + unsigned long table_size, struct acpi_subtable_proc *proc, + int proc_num, unsigned int max_entries); int acpi_table_parse_madt(enum acpi_madt_type id, acpi_tbl_entry_handler handler, unsigned int max_entries); +int __init_or_acpilib +acpi_table_parse_cedt(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, void *arg); + int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); @@ -506,7 +526,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -void __init acpi_no_s4_hw_signature(void); +void __init acpi_check_s4_hw_signature(int check); #endif #ifdef CONFIG_PM_SLEEP @@ -577,7 +597,6 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_MSI_SUPPORT 0x00000010 #define OSC_PCI_EDR_SUPPORT 0x00000080 #define OSC_PCI_HPX_TYPE_3_SUPPORT 0x00000100 -#define OSC_PCI_SUPPORT_MASKS 0x0000019f /* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */ #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 0x00000001 @@ -587,7 +606,6 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL 0x00000010 #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 -#define OSC_PCI_CONTROL_MASKS 0x000000bf #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 @@ -976,6 +994,15 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr) return -ENODEV; } +static inline int acpi_register_wakeup_handler(int wake_irq, + bool (*wakeup)(void *context), void *context) +{ + return -ENXIO; +} + +static inline void acpi_unregister_wakeup_handler( + bool (*wakeup)(void *context), void *context) { } + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC @@ -1016,6 +1043,7 @@ int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); int acpi_dev_pm_attach(struct device *dev, bool power_on); bool acpi_storage_d3(struct device *dev); +bool acpi_dev_state_d0(struct device *dev); #else static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } @@ -1027,6 +1055,10 @@ static inline bool acpi_storage_d3(struct device *dev) { return false; } +static inline bool acpi_dev_state_d0(struct device *dev) +{ + return true; +} #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP) @@ -1170,7 +1202,6 @@ int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child); -struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, @@ -1276,12 +1307,6 @@ acpi_get_next_subnode(const struct fwnode_handle *fwnode, } static inline struct fwnode_handle * -acpi_node_get_parent(const struct fwnode_handle *fwnode) -{ - return NULL; -} - -static inline struct fwnode_handle * acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { @@ -1353,6 +1378,7 @@ static inline int lpit_read_residency_count_address(u64 *address) #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); +int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); int find_acpi_cpu_cache_topology(unsigned int cpu, int level); @@ -1365,6 +1391,10 @@ static inline int find_acpi_cpu_topology(unsigned int cpu, int level) { return -EINVAL; } +static inline int find_acpi_cpu_topology_cluster(unsigned int cpu) +{ + return -EINVAL; +} static inline int find_acpi_cpu_topology_package(unsigned int cpu) { return -EINVAL; @@ -1379,6 +1409,12 @@ static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) } #endif +#ifdef CONFIG_ACPI_PCC +void acpi_init_pcc(void); +#else +static inline void acpi_init_pcc(void) { } +#endif + #ifdef CONFIG_ACPI extern void acpi_device_notify(struct device *dev); extern void acpi_device_notify_remove(struct device *dev); diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index c68d87b87283..6c7f47846971 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -90,14 +90,8 @@ enum amba_vendor { AMBA_VENDOR_ST = 0x80, AMBA_VENDOR_QCOM = 0x51, AMBA_VENDOR_LSI = 0xb6, - AMBA_VENDOR_LINUX = 0xfe, /* This value is not official */ }; -/* This is used to generate pseudo-ID for AMBA device */ -#define AMBA_LINUX_ID(conf, rev, part) \ - (((conf) & 0xff) << 24 | ((rev) & 0xf) << 20 | \ - AMBA_VENDOR_LINUX << 12 | ((part) & 0xfff)) - extern struct bus_type amba_bustype; #define to_amba_device(d) container_of(d, struct amba_device, dev) @@ -122,24 +116,6 @@ struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t); void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); int amba_device_register(struct amba_device *, struct resource *); -struct amba_device *amba_apb_device_add(struct device *parent, const char *name, - resource_size_t base, size_t size, - int irq1, int irq2, void *pdata, - unsigned int periphid); -struct amba_device *amba_ahb_device_add(struct device *parent, const char *name, - resource_size_t base, size_t size, - int irq1, int irq2, void *pdata, - unsigned int periphid); -struct amba_device * -amba_apb_device_add_res(struct device *parent, const char *name, - resource_size_t base, size_t size, int irq1, - int irq2, void *pdata, unsigned int periphid, - struct resource *resbase); -struct amba_device * -amba_ahb_device_add_res(struct device *parent, const char *name, - resource_size_t base, size_t size, int irq1, - int irq2, void *pdata, unsigned int periphid, - struct resource *resbase); void amba_device_unregister(struct amba_device *); struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); int amba_request_regions(struct amba_device *, const char *); diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 71881a2b6f78..5deaddbd7927 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -15,6 +15,10 @@ struct inode; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); +struct file *anon_inode_getfile_secure(const char *name, + const struct file_operations *fops, + void *priv, int flags, + const struct inode *context_inode); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); int anon_inode_getfd_secure(const char *name, diff --git a/include/linux/apple-mailbox.h b/include/linux/apple-mailbox.h new file mode 100644 index 000000000000..720fbb70294a --- /dev/null +++ b/include/linux/apple-mailbox.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Apple mailbox message format + * + * Copyright (C) 2021 The Asahi Linux Contributors + */ + +#ifndef _LINUX_APPLE_MAILBOX_H_ +#define _LINUX_APPLE_MAILBOX_H_ + +#include <linux/types.h> + +/* encodes a single 96bit message sent over the single channel */ +struct apple_mbox_msg { + u64 msg0; + u32 msg1; +}; + +#endif diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index f180240dc95f..cce6136b300a 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -56,16 +56,18 @@ static inline unsigned long topology_get_thermal_pressure(int cpu) return per_cpu(thermal_pressure, cpu); } -void topology_set_thermal_pressure(const struct cpumask *cpus, - unsigned long th_pressure); +void topology_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_freq); struct cpu_topology { int thread_id; int core_id; + int cluster_id; int package_id; int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; + cpumask_t cluster_sibling; cpumask_t llc_sibling; }; @@ -73,13 +75,16 @@ struct cpu_topology { extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id) +#define topology_cluster_id(cpu) (cpu_topology[cpu].cluster_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) +#define topology_cluster_cpumask(cpu) (&cpu_topology[cpu].cluster_sibling) #define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +const struct cpumask *cpu_clustergroup_mask(int cpu); void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 505c679b6a9b..85651e41ded8 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -262,6 +262,8 @@ struct ffa_dev_ops { int (*memory_reclaim)(u64 g_handle, u32 flags); int (*memory_share)(struct ffa_device *dev, struct ffa_mem_ops_args *args); + int (*memory_lend)(struct ffa_device *dev, + struct ffa_mem_ops_args *args); }; #endif /* _LINUX_ARM_FFA_H */ diff --git a/include/linux/ata.h b/include/linux/ata.h index 1b44f40c7700..199e47e97d64 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -329,6 +329,7 @@ enum { ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, + ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, diff --git a/include/linux/atalk.h b/include/linux/atalk.h index f6034ba774be..a55bfc6567d0 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -113,7 +113,7 @@ extern int aarp_proto_init(void); /* Inter module exports */ /* Give a device find its atif control structure */ -#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) +#if IS_ENABLED(CONFIG_ATALK) static inline struct atalk_iface *atalk_find_dev(struct net_device *dev) { return dev->atalk_ptr; diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index a0f654370da3..5d69b143c28e 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -45,6 +45,7 @@ atomic_set(atomic_t *v, int i) static __always_inline void atomic_set_release(atomic_t *v, int i) { + kcsan_release(); instrument_atomic_write(v, sizeof(*v)); arch_atomic_set_release(v, i); } @@ -59,6 +60,7 @@ atomic_add(int i, atomic_t *v) static __always_inline int atomic_add_return(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return(i, v); } @@ -73,6 +75,7 @@ atomic_add_return_acquire(int i, atomic_t *v) static __always_inline int atomic_add_return_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return_release(i, v); } @@ -87,6 +90,7 @@ atomic_add_return_relaxed(int i, atomic_t *v) static __always_inline int atomic_fetch_add(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add(i, v); } @@ -101,6 +105,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_add_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_release(i, v); } @@ -122,6 +127,7 @@ atomic_sub(int i, atomic_t *v) static __always_inline int atomic_sub_return(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return(i, v); } @@ -136,6 +142,7 @@ atomic_sub_return_acquire(int i, atomic_t *v) static __always_inline int atomic_sub_return_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return_release(i, v); } @@ -150,6 +157,7 @@ atomic_sub_return_relaxed(int i, atomic_t *v) static __always_inline int atomic_fetch_sub(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub(i, v); } @@ -164,6 +172,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_sub_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub_release(i, v); } @@ -185,6 +194,7 @@ atomic_inc(atomic_t *v) static __always_inline int atomic_inc_return(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return(v); } @@ -199,6 +209,7 @@ atomic_inc_return_acquire(atomic_t *v) static __always_inline int atomic_inc_return_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return_release(v); } @@ -213,6 +224,7 @@ atomic_inc_return_relaxed(atomic_t *v) static __always_inline int atomic_fetch_inc(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc(v); } @@ -227,6 +239,7 @@ atomic_fetch_inc_acquire(atomic_t *v) static __always_inline int atomic_fetch_inc_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc_release(v); } @@ -248,6 +261,7 @@ atomic_dec(atomic_t *v) static __always_inline int atomic_dec_return(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return(v); } @@ -262,6 +276,7 @@ atomic_dec_return_acquire(atomic_t *v) static __always_inline int atomic_dec_return_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return_release(v); } @@ -276,6 +291,7 @@ atomic_dec_return_relaxed(atomic_t *v) static __always_inline int atomic_fetch_dec(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec(v); } @@ -290,6 +306,7 @@ atomic_fetch_dec_acquire(atomic_t *v) static __always_inline int atomic_fetch_dec_release(atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec_release(v); } @@ -311,6 +328,7 @@ atomic_and(int i, atomic_t *v) static __always_inline int atomic_fetch_and(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and(i, v); } @@ -325,6 +343,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_and_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and_release(i, v); } @@ -346,6 +365,7 @@ atomic_andnot(int i, atomic_t *v) static __always_inline int atomic_fetch_andnot(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot(i, v); } @@ -360,6 +380,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_andnot_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot_release(i, v); } @@ -381,6 +402,7 @@ atomic_or(int i, atomic_t *v) static __always_inline int atomic_fetch_or(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or(i, v); } @@ -395,6 +417,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_or_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or_release(i, v); } @@ -416,6 +439,7 @@ atomic_xor(int i, atomic_t *v) static __always_inline int atomic_fetch_xor(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor(i, v); } @@ -430,6 +454,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v) static __always_inline int atomic_fetch_xor_release(int i, atomic_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor_release(i, v); } @@ -444,6 +469,7 @@ atomic_fetch_xor_relaxed(int i, atomic_t *v) static __always_inline int atomic_xchg(atomic_t *v, int i) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg(v, i); } @@ -458,6 +484,7 @@ atomic_xchg_acquire(atomic_t *v, int i) static __always_inline int atomic_xchg_release(atomic_t *v, int i) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg_release(v, i); } @@ -472,6 +499,7 @@ atomic_xchg_relaxed(atomic_t *v, int i) static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg(v, old, new); } @@ -486,6 +514,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new) static __always_inline int atomic_cmpxchg_release(atomic_t *v, int old, int new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg_release(v, old, new); } @@ -500,6 +529,7 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg(v, old, new); @@ -516,6 +546,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) static __always_inline bool atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg_release(v, old, new); @@ -532,6 +563,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_and_test(i, v); } @@ -539,6 +571,7 @@ atomic_sub_and_test(int i, atomic_t *v) static __always_inline bool atomic_dec_and_test(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_and_test(v); } @@ -546,6 +579,7 @@ atomic_dec_and_test(atomic_t *v) static __always_inline bool atomic_inc_and_test(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_and_test(v); } @@ -553,6 +587,7 @@ atomic_inc_and_test(atomic_t *v) static __always_inline bool atomic_add_negative(int i, atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_negative(i, v); } @@ -560,6 +595,7 @@ atomic_add_negative(int i, atomic_t *v) static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_unless(v, a, u); } @@ -567,6 +603,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) static __always_inline bool atomic_add_unless(atomic_t *v, int a, int u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_unless(v, a, u); } @@ -574,6 +611,7 @@ atomic_add_unless(atomic_t *v, int a, int u) static __always_inline bool atomic_inc_not_zero(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_not_zero(v); } @@ -581,6 +619,7 @@ atomic_inc_not_zero(atomic_t *v) static __always_inline bool atomic_inc_unless_negative(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_unless_negative(v); } @@ -588,6 +627,7 @@ atomic_inc_unless_negative(atomic_t *v) static __always_inline bool atomic_dec_unless_positive(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_unless_positive(v); } @@ -595,6 +635,7 @@ atomic_dec_unless_positive(atomic_t *v) static __always_inline int atomic_dec_if_positive(atomic_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_if_positive(v); } @@ -623,6 +664,7 @@ atomic64_set(atomic64_t *v, s64 i) static __always_inline void atomic64_set_release(atomic64_t *v, s64 i) { + kcsan_release(); instrument_atomic_write(v, sizeof(*v)); arch_atomic64_set_release(v, i); } @@ -637,6 +679,7 @@ atomic64_add(s64 i, atomic64_t *v) static __always_inline s64 atomic64_add_return(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return(i, v); } @@ -651,6 +694,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_add_return_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return_release(i, v); } @@ -665,6 +709,7 @@ atomic64_add_return_relaxed(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add(i, v); } @@ -679,6 +724,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_add_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_release(i, v); } @@ -700,6 +746,7 @@ atomic64_sub(s64 i, atomic64_t *v) static __always_inline s64 atomic64_sub_return(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return(i, v); } @@ -714,6 +761,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_sub_return_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return_release(i, v); } @@ -728,6 +776,7 @@ atomic64_sub_return_relaxed(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_sub(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub(i, v); } @@ -742,6 +791,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_sub_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub_release(i, v); } @@ -763,6 +813,7 @@ atomic64_inc(atomic64_t *v) static __always_inline s64 atomic64_inc_return(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return(v); } @@ -777,6 +828,7 @@ atomic64_inc_return_acquire(atomic64_t *v) static __always_inline s64 atomic64_inc_return_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return_release(v); } @@ -791,6 +843,7 @@ atomic64_inc_return_relaxed(atomic64_t *v) static __always_inline s64 atomic64_fetch_inc(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc(v); } @@ -805,6 +858,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v) static __always_inline s64 atomic64_fetch_inc_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc_release(v); } @@ -826,6 +880,7 @@ atomic64_dec(atomic64_t *v) static __always_inline s64 atomic64_dec_return(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return(v); } @@ -840,6 +895,7 @@ atomic64_dec_return_acquire(atomic64_t *v) static __always_inline s64 atomic64_dec_return_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return_release(v); } @@ -854,6 +910,7 @@ atomic64_dec_return_relaxed(atomic64_t *v) static __always_inline s64 atomic64_fetch_dec(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec(v); } @@ -868,6 +925,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v) static __always_inline s64 atomic64_fetch_dec_release(atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec_release(v); } @@ -889,6 +947,7 @@ atomic64_and(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_and(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and(i, v); } @@ -903,6 +962,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_and_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and_release(i, v); } @@ -924,6 +984,7 @@ atomic64_andnot(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_andnot(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot(i, v); } @@ -938,6 +999,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot_release(i, v); } @@ -959,6 +1021,7 @@ atomic64_or(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_or(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or(i, v); } @@ -973,6 +1036,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_or_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or_release(i, v); } @@ -994,6 +1058,7 @@ atomic64_xor(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor(i, v); } @@ -1008,6 +1073,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_xor_release(s64 i, atomic64_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor_release(i, v); } @@ -1022,6 +1088,7 @@ atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v) static __always_inline s64 atomic64_xchg(atomic64_t *v, s64 i) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg(v, i); } @@ -1036,6 +1103,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i) static __always_inline s64 atomic64_xchg_release(atomic64_t *v, s64 i) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg_release(v, i); } @@ -1050,6 +1118,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 i) static __always_inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg(v, old, new); } @@ -1064,6 +1133,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) static __always_inline s64 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg_release(v, old, new); } @@ -1078,6 +1148,7 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg(v, old, new); @@ -1094,6 +1165,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) static __always_inline bool atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg_release(v, old, new); @@ -1110,6 +1182,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_and_test(i, v); } @@ -1117,6 +1190,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v) static __always_inline bool atomic64_dec_and_test(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_and_test(v); } @@ -1124,6 +1198,7 @@ atomic64_dec_and_test(atomic64_t *v) static __always_inline bool atomic64_inc_and_test(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_and_test(v); } @@ -1131,6 +1206,7 @@ atomic64_inc_and_test(atomic64_t *v) static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_negative(i, v); } @@ -1138,6 +1214,7 @@ atomic64_add_negative(s64 i, atomic64_t *v) static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_unless(v, a, u); } @@ -1145,6 +1222,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_unless(v, a, u); } @@ -1152,6 +1230,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_not_zero(v); } @@ -1159,6 +1238,7 @@ atomic64_inc_not_zero(atomic64_t *v) static __always_inline bool atomic64_inc_unless_negative(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_unless_negative(v); } @@ -1166,6 +1246,7 @@ atomic64_inc_unless_negative(atomic64_t *v) static __always_inline bool atomic64_dec_unless_positive(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_unless_positive(v); } @@ -1173,6 +1254,7 @@ atomic64_dec_unless_positive(atomic64_t *v) static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_if_positive(v); } @@ -1201,6 +1283,7 @@ atomic_long_set(atomic_long_t *v, long i) static __always_inline void atomic_long_set_release(atomic_long_t *v, long i) { + kcsan_release(); instrument_atomic_write(v, sizeof(*v)); arch_atomic_long_set_release(v, i); } @@ -1215,6 +1298,7 @@ atomic_long_add(long i, atomic_long_t *v) static __always_inline long atomic_long_add_return(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return(i, v); } @@ -1229,6 +1313,7 @@ atomic_long_add_return_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_add_return_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return_release(i, v); } @@ -1243,6 +1328,7 @@ atomic_long_add_return_relaxed(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_add(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add(i, v); } @@ -1257,6 +1343,7 @@ atomic_long_fetch_add_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_add_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_release(i, v); } @@ -1278,6 +1365,7 @@ atomic_long_sub(long i, atomic_long_t *v) static __always_inline long atomic_long_sub_return(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return(i, v); } @@ -1292,6 +1380,7 @@ atomic_long_sub_return_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_sub_return_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return_release(i, v); } @@ -1306,6 +1395,7 @@ atomic_long_sub_return_relaxed(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_sub(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub(i, v); } @@ -1320,6 +1410,7 @@ atomic_long_fetch_sub_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_sub_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub_release(i, v); } @@ -1341,6 +1432,7 @@ atomic_long_inc(atomic_long_t *v) static __always_inline long atomic_long_inc_return(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return(v); } @@ -1355,6 +1447,7 @@ atomic_long_inc_return_acquire(atomic_long_t *v) static __always_inline long atomic_long_inc_return_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return_release(v); } @@ -1369,6 +1462,7 @@ atomic_long_inc_return_relaxed(atomic_long_t *v) static __always_inline long atomic_long_fetch_inc(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc(v); } @@ -1383,6 +1477,7 @@ atomic_long_fetch_inc_acquire(atomic_long_t *v) static __always_inline long atomic_long_fetch_inc_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc_release(v); } @@ -1404,6 +1499,7 @@ atomic_long_dec(atomic_long_t *v) static __always_inline long atomic_long_dec_return(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return(v); } @@ -1418,6 +1514,7 @@ atomic_long_dec_return_acquire(atomic_long_t *v) static __always_inline long atomic_long_dec_return_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return_release(v); } @@ -1432,6 +1529,7 @@ atomic_long_dec_return_relaxed(atomic_long_t *v) static __always_inline long atomic_long_fetch_dec(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec(v); } @@ -1446,6 +1544,7 @@ atomic_long_fetch_dec_acquire(atomic_long_t *v) static __always_inline long atomic_long_fetch_dec_release(atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec_release(v); } @@ -1467,6 +1566,7 @@ atomic_long_and(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_and(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and(i, v); } @@ -1481,6 +1581,7 @@ atomic_long_fetch_and_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_and_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and_release(i, v); } @@ -1502,6 +1603,7 @@ atomic_long_andnot(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_andnot(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot(i, v); } @@ -1516,6 +1618,7 @@ atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_andnot_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot_release(i, v); } @@ -1537,6 +1640,7 @@ atomic_long_or(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_or(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or(i, v); } @@ -1551,6 +1655,7 @@ atomic_long_fetch_or_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_or_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or_release(i, v); } @@ -1572,6 +1677,7 @@ atomic_long_xor(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_xor(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor(i, v); } @@ -1586,6 +1692,7 @@ atomic_long_fetch_xor_acquire(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_xor_release(long i, atomic_long_t *v) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor_release(i, v); } @@ -1600,6 +1707,7 @@ atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v) static __always_inline long atomic_long_xchg(atomic_long_t *v, long i) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg(v, i); } @@ -1614,6 +1722,7 @@ atomic_long_xchg_acquire(atomic_long_t *v, long i) static __always_inline long atomic_long_xchg_release(atomic_long_t *v, long i) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg_release(v, i); } @@ -1628,6 +1737,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long i) static __always_inline long atomic_long_cmpxchg(atomic_long_t *v, long old, long new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg(v, old, new); } @@ -1642,6 +1752,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) static __always_inline long atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg_release(v, old, new); } @@ -1656,6 +1767,7 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) static __always_inline bool atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg(v, old, new); @@ -1672,6 +1784,7 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) static __always_inline bool atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) { + kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg_release(v, old, new); @@ -1688,6 +1801,7 @@ atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new) static __always_inline bool atomic_long_sub_and_test(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_and_test(i, v); } @@ -1695,6 +1809,7 @@ atomic_long_sub_and_test(long i, atomic_long_t *v) static __always_inline bool atomic_long_dec_and_test(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_and_test(v); } @@ -1702,6 +1817,7 @@ atomic_long_dec_and_test(atomic_long_t *v) static __always_inline bool atomic_long_inc_and_test(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_and_test(v); } @@ -1709,6 +1825,7 @@ atomic_long_inc_and_test(atomic_long_t *v) static __always_inline bool atomic_long_add_negative(long i, atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_negative(i, v); } @@ -1716,6 +1833,7 @@ atomic_long_add_negative(long i, atomic_long_t *v) static __always_inline long atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_unless(v, a, u); } @@ -1723,6 +1841,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) static __always_inline bool atomic_long_add_unless(atomic_long_t *v, long a, long u) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_unless(v, a, u); } @@ -1730,6 +1849,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u) static __always_inline bool atomic_long_inc_not_zero(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_not_zero(v); } @@ -1737,6 +1857,7 @@ atomic_long_inc_not_zero(atomic_long_t *v) static __always_inline bool atomic_long_inc_unless_negative(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_unless_negative(v); } @@ -1744,6 +1865,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v) static __always_inline bool atomic_long_dec_unless_positive(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_unless_positive(v); } @@ -1751,6 +1873,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v) static __always_inline long atomic_long_dec_if_positive(atomic_long_t *v) { + kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_if_positive(v); } @@ -1758,6 +1881,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define xchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg(__ai_ptr, __VA_ARGS__); \ }) @@ -1772,6 +1896,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define xchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_release(__ai_ptr, __VA_ARGS__); \ }) @@ -1786,6 +1911,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg(__ai_ptr, __VA_ARGS__); \ }) @@ -1800,6 +1926,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \ }) @@ -1814,6 +1941,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg64(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \ }) @@ -1828,6 +1956,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg64_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \ }) @@ -1843,6 +1972,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ @@ -1861,6 +1991,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ + kcsan_release(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \ @@ -1892,6 +2023,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define sync_cmpxchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \ }) @@ -1899,6 +2031,7 @@ atomic_long_dec_if_positive(atomic_long_t *v) #define cmpxchg_double(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \ arch_cmpxchg_double(__ai_ptr, __VA_ARGS__); \ }) @@ -1912,4 +2045,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) }) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 2a9553f0a9d5619f19151092df5cabbbf16ce835 +// 87c974b93032afd42143613434d1a7788fa598f9 diff --git a/include/linux/audit.h b/include/linux/audit.h index 82b7c1116a85..d06134ac6245 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/ptrace.h> +#include <linux/audit_arch.h> #include <uapi/linux/audit.h> #include <uapi/linux/netfilter/nf_tables.h> @@ -286,7 +287,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t) /* These are defined in auditsc.c */ /* Public API */ extern int audit_alloc(struct task_struct *task); +extern int audit_alloc_kernel(struct task_struct *task); extern void __audit_free(struct task_struct *task); +extern void __audit_uring_entry(u8 op); +extern void __audit_uring_exit(int success, long code); extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3); extern void __audit_syscall_exit(int ret_success, long ret_value); @@ -323,6 +327,21 @@ static inline void audit_free(struct task_struct *task) if (unlikely(task->audit_context)) __audit_free(task); } +static inline void audit_uring_entry(u8 op) +{ + /* + * We intentionally check audit_context() before audit_enabled as most + * Linux systems (as of ~2021) rely on systemd which forces audit to + * be enabled regardless of the user's audit configuration. + */ + if (unlikely(audit_context() && audit_enabled)) + __audit_uring_entry(op); +} +static inline void audit_uring_exit(int success, long code) +{ + if (unlikely(!audit_dummy_context())) + __audit_uring_exit(success, code); +} static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) @@ -398,6 +417,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *old); extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); +extern void __audit_openat2_how(struct open_how *how); extern void __audit_log_kern_module(char *name); extern void __audit_fanotify(unsigned int response); extern void __audit_tk_injoffset(struct timespec64 offset); @@ -494,6 +514,12 @@ static inline void audit_mmap_fd(int fd, int flags) __audit_mmap_fd(fd, flags); } +static inline void audit_openat2_how(struct open_how *how) +{ + if (unlikely(!audit_dummy_context())) + __audit_openat2_how(how); +} + static inline void audit_log_kern_module(char *name) { if (!audit_dummy_context()) @@ -554,8 +580,16 @@ static inline int audit_alloc(struct task_struct *task) { return 0; } +static inline int audit_alloc_kernel(struct task_struct *task) +{ + return 0; +} static inline void audit_free(struct task_struct *task) { } +static inline void audit_uring_entry(u8 op) +{ } +static inline void audit_uring_exit(int success, long code) +{ } static inline void audit_syscall_entry(int major, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3) @@ -645,6 +679,9 @@ static inline void audit_log_capset(const struct cred *new, static inline void audit_mmap_fd(int fd, int flags) { } +static inline void audit_openat2_how(struct open_how *how) +{ } + static inline void audit_log_kern_module(char *name) { } diff --git a/include/linux/audit_arch.h b/include/linux/audit_arch.h new file mode 100644 index 000000000000..8fdb1afe251a --- /dev/null +++ b/include/linux/audit_arch.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* audit_arch.h -- Arch layer specific support for audit + * + * Copyright 2021 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * Author: Richard Guy Briggs <[email protected]> + */ +#ifndef _LINUX_AUDIT_ARCH_H_ +#define _LINUX_AUDIT_ARCH_H_ + +enum auditsc_class_t { + AUDITSC_NATIVE = 0, + AUDITSC_COMPAT, + AUDITSC_OPEN, + AUDITSC_OPENAT, + AUDITSC_SOCKETCALL, + AUDITSC_EXECVE, + AUDITSC_OPENAT2, + + AUDITSC_NVALS /* count */ +}; + +#endif diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index fc51d45f106b..de21d9d24a95 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -11,12 +11,172 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> +/** + * DOC: DEVICE_LIFESPAN + * + * The registering driver is the entity that allocates memory for the + * auxiliary_device and registers it on the auxiliary bus. It is important to + * note that, as opposed to the platform bus, the registering driver is wholly + * responsible for the management of the memory used for the device object. + * + * To be clear the memory for the auxiliary_device is freed in the release() + * callback defined by the registering driver. The registering driver should + * only call auxiliary_device_delete() and then auxiliary_device_uninit() when + * it is done with the device. The release() function is then automatically + * called if and when other code releases their reference to the devices. + * + * A parent object, defined in the shared header file, contains the + * auxiliary_device. It also contains a pointer to the shared object(s), which + * also is defined in the shared header. Both the parent object and the shared + * object(s) are allocated by the registering driver. This layout allows the + * auxiliary_driver's registering module to perform a container_of() call to go + * from the pointer to the auxiliary_device, that is passed during the call to + * the auxiliary_driver's probe function, up to the parent object, and then + * have access to the shared object(s). + * + * The memory for the shared object(s) must have a lifespan equal to, or + * greater than, the lifespan of the memory for the auxiliary_device. The + * auxiliary_driver should only consider that the shared object is valid as + * long as the auxiliary_device is still registered on the auxiliary bus. It + * is up to the registering driver to manage (e.g. free or keep available) the + * memory for the shared object beyond the life of the auxiliary_device. + * + * The registering driver must unregister all auxiliary devices before its own + * driver.remove() is completed. An easy way to ensure this is to use the + * devm_add_action_or_reset() call to register a function against the parent + * device which unregisters the auxiliary device object(s). + * + * Finally, any operations which operate on the auxiliary devices must continue + * to function (if only to return an error) after the registering driver + * unregisters the auxiliary device. + */ + +/** + * struct auxiliary_device - auxiliary device object. + * @dev: Device, + * The release and parent fields of the device structure must be filled + * in + * @name: Match name found by the auxiliary device driver, + * @id: unique identitier if multiple devices of the same name are exported, + * + * An auxiliary_device represents a part of its parent device's functionality. + * It is given a name that, combined with the registering drivers + * KBUILD_MODNAME, creates a match_name that is used for driver binding, and an + * id that combined with the match_name provide a unique name to register with + * the bus subsystem. For example, a driver registering an auxiliary device is + * named 'foo_mod.ko' and the subdevice is named 'foo_dev'. The match name is + * therefore 'foo_mod.foo_dev'. + * + * Registering an auxiliary_device is a three-step process. + * + * First, a 'struct auxiliary_device' needs to be defined or allocated for each + * sub-device desired. The name, id, dev.release, and dev.parent fields of + * this structure must be filled in as follows. + * + * The 'name' field is to be given a name that is recognized by the auxiliary + * driver. If two auxiliary_devices with the same match_name, eg + * "foo_mod.foo_dev", are registered onto the bus, they must have unique id + * values (e.g. "x" and "y") so that the registered devices names are + * "foo_mod.foo_dev.x" and "foo_mod.foo_dev.y". If match_name + id are not + * unique, then the device_add fails and generates an error message. + * + * The auxiliary_device.dev.type.release or auxiliary_device.dev.release must + * be populated with a non-NULL pointer to successfully register the + * auxiliary_device. This release call is where resources associated with the + * auxiliary device must be free'ed. Because once the device is placed on the + * bus the parent driver can not tell what other code may have a reference to + * this data. + * + * The auxiliary_device.dev.parent should be set. Typically to the registering + * drivers device. + * + * Second, call auxiliary_device_init(), which checks several aspects of the + * auxiliary_device struct and performs a device_initialize(). After this step + * completes, any error state must have a call to auxiliary_device_uninit() in + * its resolution path. + * + * The third and final step in registering an auxiliary_device is to perform a + * call to auxiliary_device_add(), which sets the name of the device and adds + * the device to the bus. + * + * .. code-block:: c + * + * #define MY_DEVICE_NAME "foo_dev" + * + * ... + * + * struct auxiliary_device *my_aux_dev = my_aux_dev_alloc(xxx); + * + * // Step 1: + * my_aux_dev->name = MY_DEVICE_NAME; + * my_aux_dev->id = my_unique_id_alloc(xxx); + * my_aux_dev->dev.release = my_aux_dev_release; + * my_aux_dev->dev.parent = my_dev; + * + * // Step 2: + * if (auxiliary_device_init(my_aux_dev)) + * goto fail; + * + * // Step 3: + * if (auxiliary_device_add(my_aux_dev)) { + * auxiliary_device_uninit(my_aux_dev); + * goto fail; + * } + * + * ... + * + * + * Unregistering an auxiliary_device is a two-step process to mirror the + * register process. First call auxiliary_device_delete(), then call + * auxiliary_device_uninit(). + * + * .. code-block:: c + * + * auxiliary_device_delete(my_dev->my_aux_dev); + * auxiliary_device_uninit(my_dev->my_aux_dev); + */ struct auxiliary_device { struct device dev; const char *name; u32 id; }; +/** + * struct auxiliary_driver - Definition of an auxiliary bus driver + * @probe: Called when a matching device is added to the bus. + * @remove: Called when device is removed from the bus. + * @shutdown: Called at shut-down time to quiesce the device. + * @suspend: Called to put the device to sleep mode. Usually to a power state. + * @resume: Called to bring a device from sleep mode. + * @name: Driver name. + * @driver: Core driver structure. + * @id_table: Table of devices this driver should match on the bus. + * + * Auxiliary drivers follow the standard driver model convention, where + * discovery/enumeration is handled by the core, and drivers provide probe() + * and remove() methods. They support power management and shutdown + * notifications using the standard conventions. + * + * Auxiliary drivers register themselves with the bus by calling + * auxiliary_driver_register(). The id_table contains the match_names of + * auxiliary devices that a driver can bind with. + * + * .. code-block:: c + * + * static const struct auxiliary_device_id my_auxiliary_id_table[] = { + * { .name = "foo_mod.foo_dev" }, + * {}, + * }; + * + * MODULE_DEVICE_TABLE(auxiliary, my_auxiliary_id_table); + * + * struct auxiliary_driver my_drv = { + * .name = "myauxiliarydrv", + * .id_table = my_auxiliary_id_table, + * .probe = my_drv_probe, + * .remove = my_drv_remove + * }; + */ struct auxiliary_driver { int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id); void (*remove)(struct auxiliary_device *auxdev); @@ -28,6 +188,16 @@ struct auxiliary_driver { const struct auxiliary_device_id *id_table; }; +static inline void *auxiliary_get_drvdata(struct auxiliary_device *auxdev) +{ + return dev_get_drvdata(&auxdev->dev); +} + +static inline void auxiliary_set_drvdata(struct auxiliary_device *auxdev, void *data) +{ + dev_set_drvdata(&auxdev->dev, data); +} + static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev) { return container_of(dev, struct auxiliary_device, dev); @@ -66,6 +236,10 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); * Helper macro for auxiliary drivers which do not do anything special in * module init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() + * + * .. code-block:: c + * + * module_auxiliary_driver(my_drv); */ #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index db0e099c2399..2ce27e8e4f19 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -141,6 +141,13 @@ enum virtchnl_ops { VIRTCHNL_OP_DEL_RSS_CFG = 46, VIRTCHNL_OP_ADD_FDIR_FILTER = 47, VIRTCHNL_OP_DEL_FDIR_FILTER = 48, + VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS = 51, + VIRTCHNL_OP_ADD_VLAN_V2 = 52, + VIRTCHNL_OP_DEL_VLAN_V2 = 53, + VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 = 54, + VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, + VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, + VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, VIRTCHNL_OP_MAX, }; @@ -238,27 +245,27 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including * TX/RX Checksum offloading and TSO for non-tunnelled packets. */ -#define VIRTCHNL_VF_OFFLOAD_L2 0x00000001 -#define VIRTCHNL_VF_OFFLOAD_IWARP 0x00000002 -#define VIRTCHNL_VF_OFFLOAD_RSVD 0x00000004 -#define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 -#define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 -#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 -#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES 0x00000040 -#define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 -#define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 -#define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 -#define VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 -#define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 -#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 -#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 -#define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 -#define VIRTCHNL_VF_OFFLOAD_USO 0X02000000 -#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF 0X08000000 -#define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000 - -/* Define below the capability flags that are not offloads */ -#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 +#define VIRTCHNL_VF_OFFLOAD_L2 BIT(0) +#define VIRTCHNL_VF_OFFLOAD_IWARP BIT(1) +#define VIRTCHNL_VF_OFFLOAD_RSS_AQ BIT(3) +#define VIRTCHNL_VF_OFFLOAD_RSS_REG BIT(4) +#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR BIT(5) +#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) +/* used to negotiate communicating link speeds in Mbps */ +#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) +#define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) +#define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) +#define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 BIT(18) +#define VIRTCHNL_VF_OFFLOAD_RSS_PF BIT(19) +#define VIRTCHNL_VF_OFFLOAD_ENCAP BIT(20) +#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM BIT(21) +#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM BIT(22) +#define VIRTCHNL_VF_OFFLOAD_ADQ BIT(23) +#define VIRTCHNL_VF_OFFLOAD_USO BIT(25) +#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) +#define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) + #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ VIRTCHNL_VF_OFFLOAD_RSS_PF) @@ -476,6 +483,351 @@ struct virtchnl_vlan_filter_list { VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list); +/* This enum is used for all of the VIRTCHNL_VF_OFFLOAD_VLAN_V2_CAPS related + * structures and opcodes. + * + * VIRTCHNL_VLAN_UNSUPPORTED - This field is not supported and if a VF driver + * populates it the PF should return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED. + * + * VIRTCHNL_VLAN_ETHERTYPE_8100 - This field supports 0x8100 ethertype. + * VIRTCHNL_VLAN_ETHERTYPE_88A8 - This field supports 0x88A8 ethertype. + * VIRTCHNL_VLAN_ETHERTYPE_9100 - This field supports 0x9100 ethertype. + * + * VIRTCHNL_VLAN_ETHERTYPE_AND - Used when multiple ethertypes can be supported + * by the PF concurrently. For example, if the PF can support + * VIRTCHNL_VLAN_ETHERTYPE_8100 AND VIRTCHNL_VLAN_ETHERTYPE_88A8 filters it + * would OR the following bits: + * + * VIRTHCNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_AND; + * + * The VF would interpret this as VLAN filtering can be supported on both 0x8100 + * and 0x88A8 VLAN ethertypes. + * + * VIRTCHNL_ETHERTYPE_XOR - Used when only a single ethertype can be supported + * by the PF concurrently. For example if the PF can support + * VIRTCHNL_VLAN_ETHERTYPE_8100 XOR VIRTCHNL_VLAN_ETHERTYPE_88A8 stripping + * offload it would OR the following bits: + * + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_XOR; + * + * The VF would interpret this as VLAN stripping can be supported on either + * 0x8100 or 0x88a8 VLAN ethertypes. So when requesting VLAN stripping via + * VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 the specified ethertype will override + * the previously set value. + * + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1 - Used to tell the VF to insert and/or + * strip the VLAN tag using the L2TAG1 field of the Tx/Rx descriptors. + * + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 - Used to tell the VF to insert hardware + * offloaded VLAN tags using the L2TAG2 field of the Tx descriptor. + * + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 - Used to tell the VF to strip hardware + * offloaded VLAN tags using the L2TAG2_2 field of the Rx descriptor. + * + * VIRTCHNL_VLAN_PRIO - This field supports VLAN priority bits. This is used for + * VLAN filtering if the underlying PF supports it. + * + * VIRTCHNL_VLAN_TOGGLE_ALLOWED - This field is used to say whether a + * certain VLAN capability can be toggled. For example if the underlying PF/CP + * allows the VF to toggle VLAN filtering, stripping, and/or insertion it should + * set this bit along with the supported ethertypes. + */ +enum virtchnl_vlan_support { + VIRTCHNL_VLAN_UNSUPPORTED = 0, + VIRTCHNL_VLAN_ETHERTYPE_8100 = BIT(0), + VIRTCHNL_VLAN_ETHERTYPE_88A8 = BIT(1), + VIRTCHNL_VLAN_ETHERTYPE_9100 = BIT(2), + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1 = BIT(8), + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2 = BIT(9), + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 = BIT(10), + VIRTCHNL_VLAN_PRIO = BIT(24), + VIRTCHNL_VLAN_FILTER_MASK = BIT(28), + VIRTCHNL_VLAN_ETHERTYPE_AND = BIT(29), + VIRTCHNL_VLAN_ETHERTYPE_XOR = BIT(30), + VIRTCHNL_VLAN_TOGGLE = BIT(31), +}; + +/* This structure is used as part of the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS + * for filtering, insertion, and stripping capabilities. + * + * If only outer capabilities are supported (for filtering, insertion, and/or + * stripping) then this refers to the outer most or single VLAN from the VF's + * perspective. + * + * If only inner capabilities are supported (for filtering, insertion, and/or + * stripping) then this refers to the outer most or single VLAN from the VF's + * perspective. Functionally this is the same as if only outer capabilities are + * supported. The VF driver is just forced to use the inner fields when + * adding/deleting filters and enabling/disabling offloads (if supported). + * + * If both outer and inner capabilities are supported (for filtering, insertion, + * and/or stripping) then outer refers to the outer most or single VLAN and + * inner refers to the second VLAN, if it exists, in the packet. + * + * There is no support for tunneled VLAN offloads, so outer or inner are never + * referring to a tunneled packet from the VF's perspective. + */ +struct virtchnl_vlan_supported_caps { + u32 outer; + u32 inner; +}; + +/* The PF populates these fields based on the supported VLAN filtering. If a + * field is VIRTCHNL_VLAN_UNSUPPORTED then it's not supported and the PF will + * reject any VIRTCHNL_OP_ADD_VLAN_V2 or VIRTCHNL_OP_DEL_VLAN_V2 messages using + * the unsupported fields. + * + * Also, a VF is only allowed to toggle its VLAN filtering setting if the + * VIRTCHNL_VLAN_TOGGLE bit is set. + * + * The ethertype(s) specified in the ethertype_init field are the ethertypes + * enabled for VLAN filtering. VLAN filtering in this case refers to the outer + * most VLAN from the VF's perspective. If both inner and outer filtering are + * allowed then ethertype_init only refers to the outer most VLAN as only + * VLAN ethertype supported for inner VLAN filtering is + * VIRTCHNL_VLAN_ETHERTYPE_8100. By default, inner VLAN filtering is disabled + * when both inner and outer filtering are allowed. + * + * The max_filters field tells the VF how many VLAN filters it's allowed to have + * at any one time. If it exceeds this amount and tries to add another filter, + * then the request will be rejected by the PF. To prevent failures, the VF + * should keep track of how many VLAN filters it has added and not attempt to + * add more than max_filters. + */ +struct virtchnl_vlan_filtering_caps { + struct virtchnl_vlan_supported_caps filtering_support; + u32 ethertype_init; + u16 max_filters; + u8 pad[2]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vlan_filtering_caps); + +/* This enum is used for the virtchnl_vlan_offload_caps structure to specify + * if the PF supports a different ethertype for stripping and insertion. + * + * VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION - The ethertype(s) specified + * for stripping affect the ethertype(s) specified for insertion and visa versa + * as well. If the VF tries to configure VLAN stripping via + * VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 with VIRTCHNL_VLAN_ETHERTYPE_8100 then + * that will be the ethertype for both stripping and insertion. + * + * VIRTCHNL_ETHERTYPE_MATCH_NOT_REQUIRED - The ethertype(s) specified for + * stripping do not affect the ethertype(s) specified for insertion and visa + * versa. + */ +enum virtchnl_vlan_ethertype_match { + VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION = 0, + VIRTCHNL_ETHERTYPE_MATCH_NOT_REQUIRED = 1, +}; + +/* The PF populates these fields based on the supported VLAN offloads. If a + * field is VIRTCHNL_VLAN_UNSUPPORTED then it's not supported and the PF will + * reject any VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 or + * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 messages using the unsupported fields. + * + * Also, a VF is only allowed to toggle its VLAN offload setting if the + * VIRTCHNL_VLAN_TOGGLE_ALLOWED bit is set. + * + * The VF driver needs to be aware of how the tags are stripped by hardware and + * inserted by the VF driver based on the level of offload support. The PF will + * populate these fields based on where the VLAN tags are expected to be + * offloaded via the VIRTHCNL_VLAN_TAG_LOCATION_* bits. The VF will need to + * interpret these fields. See the definition of the + * VIRTCHNL_VLAN_TAG_LOCATION_* bits above the virtchnl_vlan_support + * enumeration. + */ +struct virtchnl_vlan_offload_caps { + struct virtchnl_vlan_supported_caps stripping_support; + struct virtchnl_vlan_supported_caps insertion_support; + u32 ethertype_init; + u8 ethertype_match; + u8 pad[3]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_vlan_offload_caps); + +/* VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS + * VF sends this message to determine its VLAN capabilities. + * + * PF will mark which capabilities it supports based on hardware support and + * current configuration. For example, if a port VLAN is configured the PF will + * not allow outer VLAN filtering, stripping, or insertion to be configured so + * it will block these features from the VF. + * + * The VF will need to cross reference its capabilities with the PFs + * capabilities in the response message from the PF to determine the VLAN + * support. + */ +struct virtchnl_vlan_caps { + struct virtchnl_vlan_filtering_caps filtering; + struct virtchnl_vlan_offload_caps offloads; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_caps); + +struct virtchnl_vlan { + u16 tci; /* tci[15:13] = PCP and tci[11:0] = VID */ + u16 tci_mask; /* only valid if VIRTCHNL_VLAN_FILTER_MASK set in + * filtering caps + */ + u16 tpid; /* 0x8100, 0x88a8, etc. and only type(s) set in + * filtering caps. Note that tpid here does not refer to + * VIRTCHNL_VLAN_ETHERTYPE_*, but it refers to the + * actual 2-byte VLAN TPID + */ + u8 pad[2]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vlan); + +struct virtchnl_vlan_filter { + struct virtchnl_vlan inner; + struct virtchnl_vlan outer; + u8 pad[16]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(32, virtchnl_vlan_filter); + +/* VIRTCHNL_OP_ADD_VLAN_V2 + * VIRTCHNL_OP_DEL_VLAN_V2 + * + * VF sends these messages to add/del one or more VLAN tag filters for Rx + * traffic. + * + * The PF attempts to add the filters and returns status. + * + * The VF should only ever attempt to add/del virtchnl_vlan_filter(s) using the + * supported fields negotiated via VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS. + */ +struct virtchnl_vlan_filter_list_v2 { + u16 vport_id; + u16 num_elements; + u8 pad[4]; + struct virtchnl_vlan_filter filters[1]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_filter_list_v2); + +/* VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 + * VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 + * VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 + * VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 + * + * VF sends this message to enable or disable VLAN stripping or insertion. It + * also needs to specify an ethertype. The VF knows which VLAN ethertypes are + * allowed and whether or not it's allowed to enable/disable the specific + * offload via the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS message. The VF needs to + * parse the virtchnl_vlan_caps.offloads fields to determine which offload + * messages are allowed. + * + * For example, if the PF populates the virtchnl_vlan_caps.offloads in the + * following manner the VF will be allowed to enable and/or disable 0x8100 inner + * VLAN insertion and/or stripping via the opcodes listed above. Inner in this + * case means the outer most or single VLAN from the VF's perspective. This is + * because no outer offloads are supported. See the comments above the + * virtchnl_vlan_supported_caps structure for more details. + * + * virtchnl_vlan_caps.offloads.stripping_support.inner = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100; + * + * virtchnl_vlan_caps.offloads.insertion_support.inner = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100; + * + * In order to enable inner (again note that in this case inner is the outer + * most or single VLAN from the VF's perspective) VLAN stripping for 0x8100 + * VLANs, the VF would populate the virtchnl_vlan_setting structure in the + * following manner and send the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 message. + * + * virtchnl_vlan_setting.inner_ethertype_setting = + * VIRTCHNL_VLAN_ETHERTYPE_8100; + * + * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on + * initialization. + * + * The reason that VLAN TPID(s) are not being used for the + * outer_ethertype_setting and inner_ethertype_setting fields is because it's + * possible a device could support VLAN insertion and/or stripping offload on + * multiple ethertypes concurrently, so this method allows a VF to request + * multiple ethertypes in one message using the virtchnl_vlan_support + * enumeration. + * + * For example, if the PF populates the virtchnl_vlan_caps.offloads in the + * following manner the VF will be allowed to enable 0x8100 and 0x88a8 outer + * VLAN insertion and stripping simultaneously. The + * virtchnl_vlan_caps.offloads.ethertype_match field will also have to be + * populated based on what the PF can support. + * + * virtchnl_vlan_caps.offloads.stripping_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_AND; + * + * virtchnl_vlan_caps.offloads.insertion_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_AND; + * + * In order to enable outer VLAN stripping for 0x8100 and 0x88a8 VLANs, the VF + * would populate the virthcnl_vlan_offload_structure in the following manner + * and send the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 message. + * + * virtchnl_vlan_setting.outer_ethertype_setting = + * VIRTHCNL_VLAN_ETHERTYPE_8100 | + * VIRTHCNL_VLAN_ETHERTYPE_88A8; + * + * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on + * initialization. + * + * There is also the case where a PF and the underlying hardware can support + * VLAN offloads on multiple ethertypes, but not concurrently. For example, if + * the PF populates the virtchnl_vlan_caps.offloads in the following manner the + * VF will be allowed to enable and/or disable 0x8100 XOR 0x88a8 outer VLAN + * offloads. The ethertypes must match for stripping and insertion. + * + * virtchnl_vlan_caps.offloads.stripping_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_XOR; + * + * virtchnl_vlan_caps.offloads.insertion_support.outer = + * VIRTCHNL_VLAN_TOGGLE | + * VIRTCHNL_VLAN_ETHERTYPE_8100 | + * VIRTCHNL_VLAN_ETHERTYPE_88A8 | + * VIRTCHNL_VLAN_ETHERTYPE_XOR; + * + * virtchnl_vlan_caps.offloads.ethertype_match = + * VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION; + * + * In order to enable outer VLAN stripping for 0x88a8 VLANs, the VF would + * populate the virtchnl_vlan_setting structure in the following manner and send + * the VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2. Also, this will change the + * ethertype for VLAN insertion if it's enabled. So, for completeness, a + * VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 with the same ethertype should be sent. + * + * virtchnl_vlan_setting.outer_ethertype_setting = VIRTHCNL_VLAN_ETHERTYPE_88A8; + * + * virtchnl_vlan_setting.vport_id = vport_id or vsi_id assigned to the VF on + * initialization. + */ +struct virtchnl_vlan_setting { + u32 outer_ethertype_setting; + u32 inner_ethertype_setting; + u16 vport_id; + u8 pad[6]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vlan_setting); + /* VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE * VF sends VSI id and flags. * PF returns status code in retval. @@ -1157,6 +1509,30 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DEL_FDIR_FILTER: valid_len = sizeof(struct virtchnl_fdir_del); break; + case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS: + break; + case VIRTCHNL_OP_ADD_VLAN_V2: + case VIRTCHNL_OP_DEL_VLAN_V2: + valid_len = sizeof(struct virtchnl_vlan_filter_list_v2); + if (msglen >= valid_len) { + struct virtchnl_vlan_filter_list_v2 *vfl = + (struct virtchnl_vlan_filter_list_v2 *)msg; + + valid_len += (vfl->num_elements - 1) * + sizeof(struct virtchnl_vlan_filter); + + if (vfl->num_elements == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2: + case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2: + case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2: + case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: + valid_len = sizeof(struct virtchnl_vlan_setting); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 33207004cfde..993c5628a726 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -103,6 +103,9 @@ struct wb_completion { * change as blkcg is disabled and enabled higher up in the hierarchy, a wb * is tested for blkcg after lookup and removed from index on mismatch so * that a new wb for the combination can be created. + * + * Each bdi_writeback that is not embedded into the backing_dev_info must hold + * a reference to the parent backing_dev_info. See cgwb_create() for details. */ struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ac7f231b8825..483979c1b9f4 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -12,13 +12,13 @@ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/sched.h> -#include <linux/blkdev.h> #include <linux/device.h> #include <linux/writeback.h> -#include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> #include <linux/slab.h> +struct blkcg; + static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); @@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) return atomic_long_read(&bdi->tot_write_bandwidth); } -static inline void __add_wb_stat(struct bdi_writeback *wb, +static inline void wb_stat_mod(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); @@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb, static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, 1); + wb_stat_mod(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, -1); + wb_stat_mod(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) @@ -133,20 +133,7 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb) return test_bit(WB_writeback_running, &wb->state); } -static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) -{ - struct super_block *sb; - - if (!inode) - return &noop_backing_dev_info; - - sb = inode->i_sb; -#ifdef CONFIG_BLOCK - if (sb_is_blkdev_sb(sb)) - return I_BDEV(inode)->bd_disk->bdi; -#endif - return sb->s_bdi; -} +struct backing_dev_info *inode_to_bdi(struct inode *inode); static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) { @@ -154,7 +141,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) } long congestion_wait(int sync, long timeout); -long wait_iff_congested(int sync, long timeout); static inline bool mapping_can_writeback(struct address_space *mapping) { diff --git a/include/linux/bio.h b/include/linux/bio.h index 00952e92eae1..117d7f248ac9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -6,19 +6,10 @@ #define __LINUX_BIO_H #include <linux/mempool.h> -#include <linux/ioprio.h> /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> #include <linux/uio.h> -#define BIO_DEBUG - -#ifdef BIO_DEBUG -#define BIO_BUG_ON BUG_ON -#else -#define BIO_BUG_ON -#endif - #define BIO_MAX_VECS 256U static inline unsigned int bio_max_segs(unsigned int nr_segs) @@ -78,22 +69,6 @@ static inline bool bio_no_advance_iter(const struct bio *bio) bio_op(bio) == REQ_OP_WRITE_ZEROES; } -static inline bool bio_mergeable(struct bio *bio) -{ - if (bio->bi_opf & REQ_NOMERGE_FLAGS) - return false; - - return true; -} - -static inline unsigned int bio_cur_bytes(struct bio *bio) -{ - if (bio_has_data(bio)) - return bio_iovec(bio).bv_len; - else /* dataless requests such as discard */ - return bio->bi_iter.bi_size; -} - static inline void *bio_data(struct bio *bio) { if (bio_has_data(bio)) @@ -102,25 +77,6 @@ static inline void *bio_data(struct bio *bio) return NULL; } -/** - * bio_full - check if the bio is full - * @bio: bio to check - * @len: length of one segment to be added - * - * Return true if @bio is full and one segment with @len bytes can't be - * added to the bio, otherwise return false - */ -static inline bool bio_full(struct bio *bio, unsigned len) -{ - if (bio->bi_vcnt >= bio->bi_max_vecs) - return true; - - if (bio->bi_iter.bi_size > UINT_MAX - len) - return true; - - return false; -} - static inline bool bio_next_segment(const struct bio *bio, struct bvec_iter_all *iter) { @@ -163,6 +119,28 @@ static inline void bio_advance_iter_single(const struct bio *bio, bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); } +void __bio_advance(struct bio *, unsigned bytes); + +/** + * bio_advance - increment/complete a bio by some number of bytes + * @bio: bio to advance + * @nbytes: number of bytes to complete + * + * This updates bi_sector, bi_size and bi_idx; if the number of bytes to + * complete doesn't align with a bvec boundary, then bv_len and bv_offset will + * be updated on the last bvec as well. + * + * @bio will then represent the remaining, uncompleted portion of the io. + */ +static inline void bio_advance(struct bio *bio, unsigned int nbytes) +{ + if (nbytes == bio->bi_iter.bi_size) { + bio->bi_iter.bi_size = 0; + return; + } + __bio_advance(bio, nbytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ @@ -188,7 +166,7 @@ static inline void bio_advance_iter_single(const struct bio *bio, */ #define bio_for_each_bvec_all(bvl, bio, i) \ for (i = 0, bvl = bio_first_bvec_all(bio); \ - i < (bio)->bi_vcnt; i++, bvl++) \ + i < (bio)->bi_vcnt; i++, bvl++) #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) @@ -265,37 +243,6 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit) bio->bi_flags &= ~(1U << bit); } -static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) -{ - *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); -} - -static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) -{ - struct bvec_iter iter = bio->bi_iter; - int idx; - - bio_get_first_bvec(bio, bv); - if (bv->bv_len == bio->bi_iter.bi_size) - return; /* this bio only has a single bvec */ - - bio_advance_iter(bio, &iter, iter.bi_size); - - if (!iter.bi_bvec_done) - idx = iter.bi_idx - 1; - else /* in the middle of bvec */ - idx = iter.bi_idx; - - *bv = bio->bi_io_vec[idx]; - - /* - * iter.bi_bvec_done records actual length of the last bvec - * if this bio ends in the middle of one io vector - */ - if (iter.bi_bvec_done) - bv->bv_len = iter.bi_bvec_done; -} - static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); @@ -313,6 +260,57 @@ static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) return &bio->bi_io_vec[bio->bi_vcnt - 1]; } +/** + * struct folio_iter - State for iterating all folios in a bio. + * @folio: The current folio we're iterating. NULL after the last folio. + * @offset: The byte offset within the current folio. + * @length: The number of bytes in this iteration (will not cross folio + * boundary). + */ +struct folio_iter { + struct folio *folio; + size_t offset; + size_t length; + /* private: for use by the iterator */ + size_t _seg_count; + int _i; +}; + +static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, + int i) +{ + struct bio_vec *bvec = bio_first_bvec_all(bio) + i; + + fi->folio = page_folio(bvec->bv_page); + fi->offset = bvec->bv_offset + + PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + fi->_seg_count = bvec->bv_len; + fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); + fi->_i = i; +} + +static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) +{ + fi->_seg_count -= fi->length; + if (fi->_seg_count) { + fi->folio = folio_next(fi->folio); + fi->offset = 0; + fi->length = min(folio_size(fi->folio), fi->_seg_count); + } else if (fi->_i + 1 < bio->bi_vcnt) { + bio_first_folio(fi, bio, fi->_i + 1); + } else { + fi->folio = NULL; + } +} + +/** + * bio_for_each_folio_all - Iterate over each folio in a bio. + * @fi: struct folio_iter which is updated for each folio. + * @bio: struct bio to iterate over. + */ +#define bio_for_each_folio_all(fi, bio) \ + for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio)) + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ @@ -385,7 +383,7 @@ extern struct bio *bio_split(struct bio *bio, int sectors, * @gfp: gfp mask * @bs: bio set to allocate from * - * Returns a bio representing the next @sectors of @bio - if the bio is smaller + * Return: a bio representing the next @sectors of @bio - if the bio is smaller * than @sectors, returns the original bio unchanged. */ static inline struct bio *bio_next_split(struct bio *bio, int sectors, @@ -424,7 +422,7 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); } -extern blk_qc_t submit_bio(struct bio *); +void submit_bio(struct bio *bio); extern void bio_endio(struct bio *); @@ -456,25 +454,23 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; extern int submit_bio_wait(struct bio *bio); -extern void bio_advance(struct bio *, unsigned); - extern void bio_init(struct bio *bio, struct bio_vec *table, unsigned short max_vecs); extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); -extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); +int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); +bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); int bio_add_zone_append_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); -bool __bio_try_merge_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int off, bool *same_page); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); -void bio_release_pages(struct bio *bio, bool mark_dirty); +void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); +void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); @@ -482,27 +478,16 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); -void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); void zero_fill_bio(struct bio *bio); -extern const char *bio_devname(struct bio *bio, char *buffer); +static inline void bio_release_pages(struct bio *bio, bool mark_dirty) +{ + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + __bio_release_pages(bio, mark_dirty); +} -#define bio_set_dev(bio, bdev) \ -do { \ - bio_clear_flag(bio, BIO_REMAPPED); \ - if ((bio)->bi_bdev != (bdev)) \ - bio_clear_flag(bio, BIO_THROTTLED); \ - (bio)->bi_bdev = (bdev); \ - bio_associate_blkg(bio); \ -} while (0) - -#define bio_copy_dev(dst, src) \ -do { \ - bio_clear_flag(dst, BIO_REMAPPED); \ - (dst)->bi_bdev = (src)->bi_bdev; \ - bio_clone_blkg_association(dst, src); \ -} while (0) +extern const char *bio_devname(struct bio *bio, char *buffer); #define bio_dev(bio) \ disk_devt((bio)->bi_bdev->bd_disk) @@ -521,6 +506,22 @@ static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ +static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) +{ + bio_clear_flag(bio, BIO_REMAPPED); + if (bio->bi_bdev != bdev) + bio_clear_flag(bio, BIO_THROTTLED); + bio->bi_bdev = bdev; + bio_associate_blkg(bio); +} + +static inline void bio_copy_dev(struct bio *dst, struct bio *src) +{ + bio_clear_flag(dst, BIO_REMAPPED); + dst->bi_bdev = src->bi_bdev; + bio_clone_blkg_association(dst, src); +} + /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * @@ -784,7 +785,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, */ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { - bio->bi_opf |= REQ_HIPRI; + bio->bi_opf |= REQ_POLLED; if (!is_sync_kiocb(kiocb)) bio->bi_opf |= REQ_NOWAIT; } diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4e035aca6f7e..6093fa6db260 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -41,6 +41,22 @@ #define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ ({ \ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ @@ -49,7 +65,8 @@ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ - BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ _pfx "type of reg too small for mask"); \ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ (1ULL << __bf_shf(_mask))); \ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 37f36dad18bd..a241dcf50f39 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -123,6 +123,8 @@ struct device; */ unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node); +unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node); void bitmap_free(const unsigned long *bitmap); /* Managed variants of the above. */ diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h new file mode 100644 index 000000000000..bbab65bd5428 --- /dev/null +++ b/include/linux/blk-crypto-profile.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_BLK_CRYPTO_PROFILE_H +#define __LINUX_BLK_CRYPTO_PROFILE_H + +#include <linux/bio.h> +#include <linux/blk-crypto.h> + +struct blk_crypto_profile; + +/** + * struct blk_crypto_ll_ops - functions to control inline encryption hardware + * + * Low-level operations for controlling inline encryption hardware. This + * interface must be implemented by storage drivers that support inline + * encryption. All functions may sleep, are serialized by profile->lock, and + * are never called while profile->dev (if set) is runtime-suspended. + */ +struct blk_crypto_ll_ops { + + /** + * @keyslot_program: Program a key into the inline encryption hardware. + * + * Program @key into the specified @slot in the inline encryption + * hardware, overwriting any key that the keyslot may already contain. + * The keyslot is guaranteed to not be in-use by any I/O. + * + * This is required if the device has keyslots. Otherwise (i.e. if the + * device is a layered device, or if the device is real hardware that + * simply doesn't have the concept of keyslots) it is never called. + * + * Must return 0 on success, or -errno on failure. + */ + int (*keyslot_program)(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot); + + /** + * @keyslot_evict: Evict a key from the inline encryption hardware. + * + * If the device has keyslots, this function must evict the key from the + * specified @slot. The slot will contain @key, but there should be no + * need for the @key argument to be used as @slot should be sufficient. + * The keyslot is guaranteed to not be in-use by any I/O. + * + * If the device doesn't have keyslots itself, this function must evict + * @key from any underlying devices. @slot won't be valid in this case. + * + * If there are no keyslots and no underlying devices, this function + * isn't required. + * + * Must return 0 on success, or -errno on failure. + */ + int (*keyslot_evict)(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot); +}; + +/** + * struct blk_crypto_profile - inline encryption profile for a device + * + * This struct contains a storage device's inline encryption capabilities (e.g. + * the supported crypto algorithms), driver-provided functions to control the + * inline encryption hardware (e.g. programming and evicting keys), and optional + * device-independent keyslot management data. + */ +struct blk_crypto_profile { + + /* public: Drivers must initialize the following fields. */ + + /** + * @ll_ops: Driver-provided functions to control the inline encryption + * hardware, e.g. program and evict keys. + */ + struct blk_crypto_ll_ops ll_ops; + + /** + * @max_dun_bytes_supported: The maximum number of bytes supported for + * specifying the data unit number (DUN). Specifically, the range of + * supported DUNs is 0 through (1 << (8 * max_dun_bytes_supported)) - 1. + */ + unsigned int max_dun_bytes_supported; + + /** + * @modes_supported: Array of bitmasks that specifies whether each + * combination of crypto mode and data unit size is supported. + * Specifically, the i'th bit of modes_supported[crypto_mode] is set if + * crypto_mode can be used with a data unit size of (1 << i). Note that + * only data unit sizes that are powers of 2 can be supported. + */ + unsigned int modes_supported[BLK_ENCRYPTION_MODE_MAX]; + + /** + * @dev: An optional device for runtime power management. If the driver + * provides this device, it will be runtime-resumed before any function + * in @ll_ops is called and will remain resumed during the call. + */ + struct device *dev; + + /* private: The following fields shouldn't be accessed by drivers. */ + + /* Number of keyslots, or 0 if not applicable */ + unsigned int num_slots; + + /* + * Serializes all calls to functions in @ll_ops as well as all changes + * to @slot_hashtable. This can also be taken in read mode to look up + * keyslots while ensuring that they can't be changed concurrently. + */ + struct rw_semaphore lock; + + /* List of idle slots, with least recently used slot at front */ + wait_queue_head_t idle_slots_wait_queue; + struct list_head idle_slots; + spinlock_t idle_slots_lock; + + /* + * Hash table which maps struct *blk_crypto_key to keyslots, so that we + * can find a key's keyslot in O(1) time rather than O(num_slots). + * Protected by 'lock'. + */ + struct hlist_head *slot_hashtable; + unsigned int log_slot_ht_size; + + /* Per-keyslot data */ + struct blk_crypto_keyslot *slots; +}; + +int blk_crypto_profile_init(struct blk_crypto_profile *profile, + unsigned int num_slots); + +int devm_blk_crypto_profile_init(struct device *dev, + struct blk_crypto_profile *profile, + unsigned int num_slots); + +unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot); + +blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + struct blk_crypto_keyslot **slot_ptr); + +void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot); + +bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, + const struct blk_crypto_config *cfg); + +int __blk_crypto_evict_key(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key); + +void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); + +void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); + +void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, + const struct blk_crypto_profile *child); + +bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target, + const struct blk_crypto_profile *reference); + +void blk_crypto_update_capabilities(struct blk_crypto_profile *dst, + const struct blk_crypto_profile *src); + +#endif /* __LINUX_BLK_CRYPTO_PROFILE_H */ diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h new file mode 100644 index 000000000000..8a038ea0717e --- /dev/null +++ b/include/linux/blk-integrity.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BLK_INTEGRITY_H +#define _LINUX_BLK_INTEGRITY_H + +#include <linux/blk-mq.h> + +struct request; + +enum blk_integrity_flags { + BLK_INTEGRITY_VERIFY = 1 << 0, + BLK_INTEGRITY_GENERATE = 1 << 1, + BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, + BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, +}; + +struct blk_integrity_iter { + void *prot_buf; + void *data_buf; + sector_t seed; + unsigned int data_size; + unsigned short interval; + const char *disk_name; +}; + +typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); +typedef void (integrity_prepare_fn) (struct request *); +typedef void (integrity_complete_fn) (struct request *, unsigned int); + +struct blk_integrity_profile { + integrity_processing_fn *generate_fn; + integrity_processing_fn *verify_fn; + integrity_prepare_fn *prepare_fn; + integrity_complete_fn *complete_fn; + const char *name; +}; + +#ifdef CONFIG_BLK_DEV_INTEGRITY +void blk_integrity_register(struct gendisk *, struct blk_integrity *); +void blk_integrity_unregister(struct gendisk *); +int blk_integrity_compare(struct gendisk *, struct gendisk *); +int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); + +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + struct blk_integrity *bi = &disk->queue->integrity; + + if (!bi->profile) + return NULL; + + return bi; +} + +static inline struct blk_integrity * +bdev_get_integrity(struct block_device *bdev) +{ + return blk_get_integrity(bdev->bd_disk); +} + +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return q->integrity.profile; +} + +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(const struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + +/** + * bio_integrity_intervals - Return number of integrity intervals for a bio + * @bi: blk_integrity profile for device + * @sectors: Size of the bio in 512-byte sectors + * + * Description: The block layer calculates everything in 512 byte + * sectors but integrity metadata is done in terms of the data integrity + * interval size of the storage device. Convert the block layer sectors + * to the appropriate number of integrity intervals. + */ +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return sectors >> (bi->interval_exp - 9); +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return bio_integrity_intervals(bi, sectors) * bi->tuple_size; +} + +static inline bool blk_integrity_rq(struct request *rq) +{ + return rq->cmd_flags & REQ_INTEGRITY; +} + +/* + * Return the first bvec that contains integrity data. Only drivers that are + * limited to a single integrity segment should use this helper. + */ +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) + return NULL; + return rq->bio->bi_integrity->bip_vec; +} +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline int blk_rq_count_integrity_sg(struct request_queue *q, + struct bio *b) +{ + return 0; +} +static inline int blk_rq_map_integrity_sg(struct request_queue *q, + struct bio *b, + struct scatterlist *s) +{ + return 0; +} +static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) +{ + return NULL; +} +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + return NULL; +} +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return false; +} +static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) +{ + return 0; +} +static inline void blk_integrity_register(struct gendisk *d, + struct blk_integrity *b) +{ +} +static inline void blk_integrity_unregister(struct gendisk *d) +{ +} +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ +} +static inline unsigned short +queue_max_integrity_segments(const struct request_queue *q) +{ + return 0; +} + +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return 0; +} +static inline int blk_integrity_rq(struct request *rq) +{ + return 0; +} + +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + return NULL; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#endif /* _LINUX_BLK_INTEGRITY_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 13ba1861e688..d319ffa59354 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -4,12 +4,276 @@ #include <linux/blkdev.h> #include <linux/sbitmap.h> -#include <linux/srcu.h> #include <linux/lockdep.h> +#include <linux/scatterlist.h> +#include <linux/prefetch.h> struct blk_mq_tags; struct blk_flush_queue; +#define BLKDEV_MIN_RQ 4 +#define BLKDEV_DEFAULT_RQ 128 + +typedef void (rq_end_io_fn)(struct request *, blk_status_t); + +/* + * request flags */ +typedef __u32 __bitwise req_flags_t; + +/* drive already may have started this one */ +#define RQF_STARTED ((__force req_flags_t)(1 << 1)) +/* may not be passed by ioscheduler */ +#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) +/* request for flush sequence */ +#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) +/* merge of different types, fail separately */ +#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) +/* track inflight for MQ */ +#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) +/* don't call prep for this one */ +#define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) +/* vaguely specified driver internal error. Ignored by the block layer */ +#define RQF_FAILED ((__force req_flags_t)(1 << 10)) +/* don't warn about errors */ +#define RQF_QUIET ((__force req_flags_t)(1 << 11)) +/* elevator private data attached */ +#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) +/* account into disk and partition IO statistics */ +#define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) +/* runtime pm request */ +#define RQF_PM ((__force req_flags_t)(1 << 15)) +/* on IO scheduler merge hash */ +#define RQF_HASHED ((__force req_flags_t)(1 << 16)) +/* track IO completion time */ +#define RQF_STATS ((__force req_flags_t)(1 << 17)) +/* Look at ->special_vec for the actual data payload instead of the + bio chain. */ +#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) +/* The per-zone write lock is held for this request */ +#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) +/* already slept for hybrid poll */ +#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) +/* ->timeout has been called, don't expire again */ +#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) +/* queue has elevator attached */ +#define RQF_ELV ((__force req_flags_t)(1 << 22)) + +/* flags that prevent us from merging requests: */ +#define RQF_NOMERGE_FLAGS \ + (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) + +enum mq_rq_state { + MQ_RQ_IDLE = 0, + MQ_RQ_IN_FLIGHT = 1, + MQ_RQ_COMPLETE = 2, +}; + +/* + * Try to put the fields that are referenced together in the same cacheline. + * + * If you modify this structure, make sure to update blk_rq_init() and + * especially blk_mq_rq_ctx_init() to take care of the added fields. + */ +struct request { + struct request_queue *q; + struct blk_mq_ctx *mq_ctx; + struct blk_mq_hw_ctx *mq_hctx; + + unsigned int cmd_flags; /* op and common flags */ + req_flags_t rq_flags; + + int tag; + int internal_tag; + + unsigned int timeout; + + /* the following two fields are internal, NEVER access directly */ + unsigned int __data_len; /* total data len */ + sector_t __sector; /* sector cursor */ + + struct bio *bio; + struct bio *biotail; + + union { + struct list_head queuelist; + struct request *rq_next; + }; + + struct block_device *part; +#ifdef CONFIG_BLK_RQ_ALLOC_TIME + /* Time that the first bio started allocating this request. */ + u64 alloc_time_ns; +#endif + /* Time that this request was allocated for this IO. */ + u64 start_time_ns; + /* Time that I/O was submitted to the device. */ + u64 io_start_time_ns; + +#ifdef CONFIG_BLK_WBT + unsigned short wbt_flags; +#endif + /* + * rq sectors used for blk stats. It has the same value + * with blk_rq_sectors(rq), except that it never be zeroed + * by completion. + */ + unsigned short stats_sectors; + + /* + * Number of scatter-gather DMA addr+len pairs after + * physical address coalescing is performed. + */ + unsigned short nr_phys_segments; + +#ifdef CONFIG_BLK_DEV_INTEGRITY + unsigned short nr_integrity_segments; +#endif + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *crypt_ctx; + struct blk_crypto_keyslot *crypt_keyslot; +#endif + + unsigned short write_hint; + unsigned short ioprio; + + enum mq_rq_state state; + atomic_t ref; + + unsigned long deadline; + + /* + * The hash is used inside the scheduler, and killed once the + * request reaches the dispatch list. The ipi_list is only used + * to queue the request for softirq completion, which is long + * after the request has been unhashed (and even removed from + * the dispatch list). + */ + union { + struct hlist_node hash; /* merge hash */ + struct llist_node ipi_list; + }; + + /* + * The rb_node is only used inside the io scheduler, requests + * are pruned when moved to the dispatch queue. So let the + * completion_data share space with the rb_node. + */ + union { + struct rb_node rb_node; /* sort/lookup */ + struct bio_vec special_vec; + void *completion_data; + int error_count; /* for legacy drivers, don't use */ + }; + + + /* + * Three pointers are available for the IO schedulers, if they need + * more they have to dynamically allocate it. Flush requests are + * never put on the IO scheduler. So let the flush fields share + * space with the elevator data. + */ + union { + struct { + struct io_cq *icq; + void *priv[2]; + } elv; + + struct { + unsigned int seq; + struct list_head list; + rq_end_io_fn *saved_end_io; + } flush; + }; + + union { + struct __call_single_data csd; + u64 fifo_time; + }; + + /* + * completion callback. + */ + rq_end_io_fn *end_io; + void *end_io_data; +}; + +#define req_op(req) \ + ((req)->cmd_flags & REQ_OP_MASK) + +static inline bool blk_rq_is_passthrough(struct request *rq) +{ + return blk_op_is_passthrough(req_op(rq)); +} + +static inline unsigned short req_get_ioprio(struct request *req) +{ + return req->ioprio; +} + +#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) + +#define rq_dma_dir(rq) \ + (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) + +#define rq_list_add(listptr, rq) do { \ + (rq)->rq_next = *(listptr); \ + *(listptr) = rq; \ +} while (0) + +#define rq_list_pop(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) { \ + __req = *(listptr); \ + *(listptr) = __req->rq_next; \ + } \ + __req; \ +}) + +#define rq_list_peek(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) \ + __req = *(listptr); \ + __req; \ +}) + +#define rq_list_for_each(listptr, pos) \ + for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) + +#define rq_list_for_each_safe(listptr, pos, nxt) \ + for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \ + pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL) + +#define rq_list_next(rq) (rq)->rq_next +#define rq_list_empty(list) ((list) == (struct request *) NULL) + +/** + * rq_list_move() - move a struct request from one list to another + * @src: The source list @rq is currently in + * @dst: The destination list that @rq will be appended to + * @rq: The request to move + * @prev: The request preceding @rq in @src (NULL if @rq is the head) + */ +static inline void rq_list_move(struct request **src, struct request **dst, + struct request *rq, struct request *prev) +{ + if (prev) + prev->rq_next = rq->rq_next; + else + *src = rq->rq_next; + rq_list_add(dst, rq); +} + +enum blk_eh_timer_return { + BLK_EH_DONE, /* drivers has completed the command */ + BLK_EH_RESET_TIMER, /* reset timer and try again */ +}; + +#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ +#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ + /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device @@ -126,9 +390,6 @@ struct blk_mq_hw_ctx { unsigned long queued; /** @run: Number of dispatched requests. */ unsigned long run; -#define BLK_MQ_MAX_DISPATCH_ORDER 7 - /** @dispatched: Number of dispatch requests by queue. */ - unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; @@ -148,13 +409,6 @@ struct blk_mq_hw_ctx { /** @kobj: Kernel object for sysfs. */ struct kobject kobj; - /** @poll_considered: Count times blk_poll() was called. */ - unsigned long poll_considered; - /** @poll_invoked: Count how many requests blk_poll() polled. */ - unsigned long poll_invoked; - /** @poll_success: Count how many polled requests were completed. */ - unsigned long poll_success; - #ifdef CONFIG_BLK_DEBUG_FS /** * @debugfs_dir: debugfs directory for this hardware queue. Named @@ -170,13 +424,6 @@ struct blk_mq_hw_ctx { * q->unused_hctx_list. */ struct list_head hctx_list; - - /** - * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is - * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also - * blk_mq_hw_ctx_size(). - */ - struct srcu_struct srcu[]; }; /** @@ -232,13 +479,11 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. - * @active_queues_shared_sbitmap: - * number of active request queues per tag set. - * @__bitmap_tags: A shared tags sbitmap, used over all hctx's - * @__breserved_tags: - * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. + * @shared_tags: + * Shared set of tags. Has @nr_hw_queues elements. If set, + * shared by all @tags. * @tag_list_lock: Serializes tag_list accesses. * @tag_list: List of the request queues that use this tag set. See also * request_queue.tag_set_list. @@ -255,12 +500,11 @@ struct blk_mq_tag_set { unsigned int timeout; unsigned int flags; void *driver_data; - atomic_t active_queues_shared_sbitmap; - struct sbitmap_queue __bitmap_tags; - struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags; + struct blk_mq_tags *shared_tags; + struct mutex tag_list_lock; struct list_head tag_list; }; @@ -276,8 +520,6 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, - bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); /** @@ -301,6 +543,14 @@ struct blk_mq_ops { void (*commit_rqs)(struct blk_mq_hw_ctx *); /** + * @queue_rqs: Queue a list of new requests. Driver is guaranteed + * that each request belongs to the same queue. If the driver doesn't + * empty the @rqlist completely, then the rest will be queued + * individually by the block layer upon return. + */ + void (*queue_rqs)(struct request **rqlist); + + /** * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case @@ -330,7 +580,7 @@ struct blk_mq_ops { /** * @poll: Called to poll for completion of a specific tag. */ - int (*poll)(struct blk_mq_hw_ctx *); + int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); /** * @complete: Mark the request as complete. @@ -364,11 +614,6 @@ struct blk_mq_ops { unsigned int); /** - * @initialize_rq_fn: Called from inside blk_get_request(). - */ - void (*initialize_rq_fn)(struct request *rq); - - /** * @cleanup_rq: Called before freeing one request which isn't completed * yet, and usually for freeing the driver private data. */ @@ -432,6 +677,8 @@ enum { ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT) +#define BLK_MQ_NO_HCTX_IDX (-1U) + struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, struct lock_class_key *lkclass); #define blk_mq_alloc_disk(set, queuedata) \ @@ -451,8 +698,6 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, unsigned int set_flags); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); -void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); - void blk_mq_free_request(struct request *rq); bool blk_mq_queue_inflight(struct request_queue *q); @@ -471,7 +716,40 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, struct request *blk_mq_alloc_request_hctx(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx); -struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); + +/* + * Tag address space map. + */ +struct blk_mq_tags { + unsigned int nr_tags; + unsigned int nr_reserved_tags; + + atomic_t active_queues; + + struct sbitmap_queue bitmap_tags; + struct sbitmap_queue breserved_tags; + + struct request **rqs; + struct request **static_rqs; + struct list_head page_list; + + /* + * used to clear request reference in rqs[] before freeing one + * request pool + */ + spinlock_t lock; +}; + +static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, + unsigned int tag) +{ + if (tag < tags->nr_tags) { + prefetch(tags->rqs[tag]); + return tags->rqs[tag]; + } + + return NULL; +} enum { BLK_MQ_UNIQUE_TAG_BITS = 16, @@ -521,9 +799,49 @@ static inline void blk_mq_set_request_complete(struct request *rq) WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); } +/* + * Complete the request directly instead of deferring it to softirq or + * completing it another CPU. Useful in preemptible instead of an interrupt. + */ +static inline void blk_mq_complete_request_direct(struct request *rq, + void (*complete)(struct request *rq)) +{ + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); + complete(rq); +} + void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); +void blk_mq_end_request_batch(struct io_comp_batch *ib); + +/* + * Only need start/end time stamping if we have iostat or + * blk stats enabled, or using an IO scheduler. + */ +static inline bool blk_mq_need_time_stamp(struct request *rq) +{ + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); +} + +/* + * Batched completions only work when there is no I/O error and no special + * ->end_io handler. + */ +static inline bool blk_mq_add_to_batch(struct request *req, + struct io_comp_batch *iob, int ioerror, + void (*complete)(struct io_comp_batch *)) +{ + if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror) + return false; + if (!iob->complete) + iob->complete = complete; + else if (iob->complete != complete) + return false; + iob->need_ts |= blk_mq_need_time_stamp(req); + rq_list_add(&iob->req_list, req); + return true; +} void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); @@ -538,6 +856,7 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); +void blk_mq_wait_quiesce_done(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); @@ -605,16 +924,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) -static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag != -1) - return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); - - return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | - BLK_QC_T_INTERNAL; -} - static inline void blk_mq_cleanup_rq(struct request *rq) { if (rq->q->mq_ops->cleanup_rq) @@ -628,13 +937,253 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; rq->ioprio = bio_prio(bio); - - if (bio->bi_bdev) - rq->rq_disk = bio->bi_bdev->bd_disk; } -blk_qc_t blk_mq_submit_bio(struct bio *bio); void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key); -#endif +static inline bool rq_is_sync(struct request *rq) +{ + return op_is_sync(rq->cmd_flags); +} + +void blk_rq_init(struct request_queue *q, struct request *rq); +int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); +void blk_rq_unprep_clone(struct request *rq); +blk_status_t blk_insert_cloned_request(struct request_queue *q, + struct request *rq); + +struct rq_map_data { + struct page **pages; + int page_order; + int nr_entries; + unsigned long offset; + int null_mapped; + int from_user; +}; + +int blk_rq_map_user(struct request_queue *, struct request *, + struct rq_map_data *, void __user *, unsigned long, gfp_t); +int blk_rq_map_user_iov(struct request_queue *, struct request *, + struct rq_map_data *, const struct iov_iter *, gfp_t); +int blk_rq_unmap_user(struct bio *); +int blk_rq_map_kern(struct request_queue *, struct request *, void *, + unsigned int, gfp_t); +int blk_rq_append_bio(struct request *rq, struct bio *bio); +void blk_execute_rq_nowait(struct request *rq, bool at_head, + rq_end_io_fn *end_io); +blk_status_t blk_execute_rq(struct request *rq, bool at_head); + +struct req_iterator { + struct bvec_iter iter; + struct bio *bio; +}; + +#define __rq_for_each_bio(_bio, rq) \ + if ((rq->bio)) \ + for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) + +#define rq_for_each_segment(bvl, _rq, _iter) \ + __rq_for_each_bio(_iter.bio, _rq) \ + bio_for_each_segment(bvl, _iter.bio, _iter.iter) + +#define rq_for_each_bvec(bvl, _rq, _iter) \ + __rq_for_each_bio(_iter.bio, _rq) \ + bio_for_each_bvec(bvl, _iter.bio, _iter.iter) + +#define rq_iter_last(bvec, _iter) \ + (_iter.bio->bi_next == NULL && \ + bio_iter_last(bvec, _iter.iter)) + +/* + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_stats_sectors() : sectors of the entire request used for stats + */ +static inline sector_t blk_rq_pos(const struct request *rq) +{ + return rq->__sector; +} + +static inline unsigned int blk_rq_bytes(const struct request *rq) +{ + return rq->__data_len; +} + +static inline int blk_rq_cur_bytes(const struct request *rq) +{ + if (!rq->bio) + return 0; + if (!bio_has_data(rq->bio)) /* dataless requests such as discard */ + return rq->bio->bi_iter.bi_size; + return bio_iovec(rq->bio).bv_len; +} + +static inline unsigned int blk_rq_sectors(const struct request *rq) +{ + return blk_rq_bytes(rq) >> SECTOR_SHIFT; +} + +static inline unsigned int blk_rq_cur_sectors(const struct request *rq) +{ + return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; +} + +static inline unsigned int blk_rq_stats_sectors(const struct request *rq) +{ + return rq->stats_sectors; +} + +/* + * Some commands like WRITE SAME have a payload or data transfer size which + * is different from the size of the request. Any driver that supports such + * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to + * calculate the data transfer size. + */ +static inline unsigned int blk_rq_payload_bytes(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return rq->special_vec.bv_len; + return blk_rq_bytes(rq); +} + +/* + * Return the first full biovec in the request. The caller needs to check that + * there are any bvecs before calling this helper. + */ +static inline struct bio_vec req_bvec(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return rq->special_vec; + return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); +} + +static inline unsigned int blk_rq_count_bios(struct request *rq) +{ + unsigned int nr_bios = 0; + struct bio *bio; + + __rq_for_each_bio(bio, rq) + nr_bios++; + + return nr_bios; +} + +void blk_steal_bios(struct bio_list *list, struct request *rq); + +/* + * Request completion related functions. + * + * blk_update_request() completes given number of bytes and updates + * the request without completing it. + */ +bool blk_update_request(struct request *rq, blk_status_t error, + unsigned int nr_bytes); +void blk_abort_request(struct request *); + +/* + * Number of physical segments as sent to the device. + * + * Normally this is the number of discontiguous data segments sent by the + * submitter. But for data-less command like discard we might have no + * actual data segments submitted, but the driver might have to add it's + * own special payload. In that case we still return 1 here so that this + * special payload will be mapped. + */ +static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return 1; + return rq->nr_phys_segments; +} + +/* + * Number of discard segments (or ranges) the driver needs to fill in. + * Each discard bio merged into a request is counted as one segment. + */ +static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) +{ + return max_t(unsigned short, rq->nr_phys_segments, 1); +} + +int __blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist, struct scatterlist **last_sg); +static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist) +{ + struct scatterlist *last_sg = NULL; + + return __blk_rq_map_sg(q, rq, sglist, &last_sg); +} +void blk_dump_rq_flags(struct request *, char *); + +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int blk_rq_zone_no(struct request *rq) +{ + return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); +} + +static inline unsigned int blk_rq_zone_is_seq(struct request *rq) +{ + return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); +} + +bool blk_req_needs_zone_write_lock(struct request *rq); +bool blk_req_zone_write_trylock(struct request *rq); +void __blk_req_zone_write_lock(struct request *rq); +void __blk_req_zone_write_unlock(struct request *rq); + +static inline void blk_req_zone_write_lock(struct request *rq) +{ + if (blk_req_needs_zone_write_lock(rq)) + __blk_req_zone_write_lock(rq); +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ + if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) + __blk_req_zone_write_unlock(rq); +} + +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return rq->q->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + if (!blk_req_needs_zone_write_lock(rq)) + return true; + return !blk_req_zone_is_write_locked(rq); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline bool blk_req_needs_zone_write_lock(struct request *rq) +{ + return false; +} + +static inline void blk_req_zone_write_lock(struct request *rq) +{ +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ +} +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return false; +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + return true; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +#endif /* BLK_MQ_H */ diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h index b80c65aba249..2580e05a8ab6 100644 --- a/include/linux/blk-pm.h +++ b/include/linux/blk-pm.h @@ -14,7 +14,7 @@ extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); extern int blk_pre_runtime_suspend(struct request_queue *q); extern void blk_post_runtime_suspend(struct request_queue *q, int err); extern void blk_pre_runtime_resume(struct request_queue *q); -extern void blk_post_runtime_resume(struct request_queue *q, int err); +extern void blk_post_runtime_resume(struct request_queue *q); extern void blk_set_runtime_active(struct request_queue *q); #else static inline void blk_pm_runtime_init(struct request_queue *q, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index be622b5a21ed..fe065c394fff 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,8 +20,26 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; +/* + * The basic unit of block I/O is a sector. It is used in a number of contexts + * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 + * bytes. Variables of type sector_t represent an offset or size that is a + * multiple of 512 bytes. Hence these two constants. + */ +#ifndef SECTOR_SHIFT +#define SECTOR_SHIFT 9 +#endif +#ifndef SECTOR_SIZE +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#endif + +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) +#define SECTOR_MASK (PAGE_SECTORS - 1) + struct block_device { sector_t bd_start_sect; + sector_t bd_nr_sectors; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; bool bd_read_only; /* read-only policy */ @@ -38,6 +56,7 @@ struct block_device { u8 bd_partno; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; + struct request_queue * bd_queue; /* The counter of freeze processes */ int bd_fsfreeze_count; @@ -208,6 +227,9 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef unsigned int blk_qc_t; +#define BLK_QC_T_NONE -1U + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -227,8 +249,8 @@ struct bio { struct bvec_iter bi_iter; + blk_qc_t bi_cookie; bio_end_io_t *bi_end_io; - void *bi_private; #ifdef CONFIG_BLK_CGROUP /* @@ -384,7 +406,7 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ - __REQ_HIPRI, + __REQ_POLLED, /* caller polls for completion using bio_poll */ /* for driver use */ __REQ_DRV, @@ -409,7 +431,7 @@ enum req_flag_bits { #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_HIPRI (1ULL << __REQ_HIPRI) +#define REQ_POLLED (1ULL << __REQ_POLLED) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) @@ -431,8 +453,6 @@ enum stat_group { #define bio_op(bio) \ ((bio)->bi_opf & REQ_OP_MASK) -#define req_op(req) \ - ((req)->cmd_flags & REQ_OP_MASK) /* obsolete, don't use in new code */ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, @@ -497,31 +517,6 @@ static inline int op_stat_group(unsigned int op) return op_is_write(op); } -typedef unsigned int blk_qc_t; -#define BLK_QC_T_NONE -1U -#define BLK_QC_T_SHIFT 16 -#define BLK_QC_T_INTERNAL (1U << 31) - -static inline bool blk_qc_t_valid(blk_qc_t cookie) -{ - return cookie != BLK_QC_T_NONE; -} - -static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) -{ - return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; -} - -static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) -{ - return cookie & ((1u << BLK_QC_T_SHIFT) - 1); -} - -static inline bool blk_qc_t_is_internal(blk_qc_t cookie) -{ - return (cookie & BLK_QC_T_INTERNAL) != 0; -} - struct blk_rq_stat { u64 mean; u64 min; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12b9dbcc980e..9c95df26fc26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -3,8 +3,6 @@ #define _LINUX_BLKDEV_H #include <linux/sched.h> -#include <linux/sched/clock.h> -#include <linux/major.h> #include <linux/genhd.h> #include <linux/list.h> #include <linux/llist.h> @@ -12,18 +10,13 @@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/wait.h> -#include <linux/mempool.h> -#include <linux/pfn.h> #include <linux/bio.h> -#include <linux/stringify.h> #include <linux/gfp.h> -#include <linux/smp.h> #include <linux/rcupdate.h> #include <linux/percpu-refcount.h> -#include <linux/scatterlist.h> #include <linux/blkzoned.h> -#include <linux/pm.h> #include <linux/sbitmap.h> +#include <linux/srcu.h> struct module; struct request_queue; @@ -33,14 +26,12 @@ struct request; struct sg_io_hdr; struct blkcg_gq; struct blk_flush_queue; +struct kiocb; struct pr_ops; struct rq_qos; struct blk_queue_stats; struct blk_stat_callback; -struct blk_keyslot_manager; - -#define BLKDEV_MIN_RQ 4 -#define BLKDEV_MAX_RQ 128 /* Default maximum */ +struct blk_crypto_profile; /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -54,186 +45,13 @@ struct blk_keyslot_manager; */ #define BLKCG_MAX_POLS 6 -typedef void (rq_end_io_fn)(struct request *, blk_status_t); - -/* - * request flags */ -typedef __u32 __bitwise req_flags_t; - -/* drive already may have started this one */ -#define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* may not be passed by ioscheduler */ -#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) -/* request for flush sequence */ -#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) -/* merge of different types, fail separately */ -#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* track inflight for MQ */ -#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) -/* don't call prep for this one */ -#define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) -/* vaguely specified driver internal error. Ignored by the block layer */ -#define RQF_FAILED ((__force req_flags_t)(1 << 10)) -/* don't warn about errors */ -#define RQF_QUIET ((__force req_flags_t)(1 << 11)) -/* elevator private data attached */ -#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) -/* account into disk and partition IO statistics */ -#define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) -/* runtime pm request */ -#define RQF_PM ((__force req_flags_t)(1 << 15)) -/* on IO scheduler merge hash */ -#define RQF_HASHED ((__force req_flags_t)(1 << 16)) -/* track IO completion time */ -#define RQF_STATS ((__force req_flags_t)(1 << 17)) -/* Look at ->special_vec for the actual data payload instead of the - bio chain. */ -#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) -/* The per-zone write lock is held for this request */ -#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) -/* already slept for hybrid poll */ -#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) -/* ->timeout has been called, don't expire again */ -#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) - -/* flags that prevent us from merging requests: */ -#define RQF_NOMERGE_FLAGS \ - (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) - -/* - * Request state for blk-mq. - */ -enum mq_rq_state { - MQ_RQ_IDLE = 0, - MQ_RQ_IN_FLIGHT = 1, - MQ_RQ_COMPLETE = 2, -}; - -/* - * Try to put the fields that are referenced together in the same cacheline. - * - * If you modify this structure, make sure to update blk_rq_init() and - * especially blk_mq_rq_ctx_init() to take care of the added fields. - */ -struct request { - struct request_queue *q; - struct blk_mq_ctx *mq_ctx; - struct blk_mq_hw_ctx *mq_hctx; - - unsigned int cmd_flags; /* op and common flags */ - req_flags_t rq_flags; - - int tag; - int internal_tag; - - /* the following two fields are internal, NEVER access directly */ - unsigned int __data_len; /* total data len */ - sector_t __sector; /* sector cursor */ - - struct bio *bio; - struct bio *biotail; - - struct list_head queuelist; - - /* - * The hash is used inside the scheduler, and killed once the - * request reaches the dispatch list. The ipi_list is only used - * to queue the request for softirq completion, which is long - * after the request has been unhashed (and even removed from - * the dispatch list). - */ - union { - struct hlist_node hash; /* merge hash */ - struct llist_node ipi_list; - }; - - /* - * The rb_node is only used inside the io scheduler, requests - * are pruned when moved to the dispatch queue. So let the - * completion_data share space with the rb_node. - */ - union { - struct rb_node rb_node; /* sort/lookup */ - struct bio_vec special_vec; - void *completion_data; - int error_count; /* for legacy drivers, don't use */ - }; - - /* - * Three pointers are available for the IO schedulers, if they need - * more they have to dynamically allocate it. Flush requests are - * never put on the IO scheduler. So let the flush fields share - * space with the elevator data. - */ - union { - struct { - struct io_cq *icq; - void *priv[2]; - } elv; - - struct { - unsigned int seq; - struct list_head list; - rq_end_io_fn *saved_end_io; - } flush; - }; - - struct gendisk *rq_disk; - struct block_device *part; -#ifdef CONFIG_BLK_RQ_ALLOC_TIME - /* Time that the first bio started allocating this request. */ - u64 alloc_time_ns; -#endif - /* Time that this request was allocated for this IO. */ - u64 start_time_ns; - /* Time that I/O was submitted to the device. */ - u64 io_start_time_ns; - -#ifdef CONFIG_BLK_WBT - unsigned short wbt_flags; -#endif - /* - * rq sectors used for blk stats. It has the same value - * with blk_rq_sectors(rq), except that it never be zeroed - * by completion. - */ - unsigned short stats_sectors; - - /* - * Number of scatter-gather DMA addr+len pairs after - * physical address coalescing is performed. - */ - unsigned short nr_phys_segments; - -#if defined(CONFIG_BLK_DEV_INTEGRITY) - unsigned short nr_integrity_segments; -#endif - -#ifdef CONFIG_BLK_INLINE_ENCRYPTION - struct bio_crypt_ctx *crypt_ctx; - struct blk_ksm_keyslot *crypt_keyslot; -#endif - - unsigned short write_hint; - unsigned short ioprio; - - enum mq_rq_state state; - refcount_t ref; - - unsigned int timeout; - unsigned long deadline; - - union { - struct __call_single_data csd; - u64 fifo_time; - }; +static inline int blk_validate_block_size(unsigned long bsize) +{ + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + return -EINVAL; - /* - * completion callback. - */ - rq_end_io_fn *end_io; - void *end_io_data; -}; + return 0; +} static inline bool blk_op_is_passthrough(unsigned int op) { @@ -241,35 +59,6 @@ static inline bool blk_op_is_passthrough(unsigned int op) return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } -static inline bool blk_rq_is_passthrough(struct request *rq) -{ - return blk_op_is_passthrough(req_op(rq)); -} - -static inline unsigned short req_get_ioprio(struct request *req) -{ - return req->ioprio; -} - -#include <linux/elevator.h> - -struct blk_queue_ctx; - -struct bio_vec; - -enum blk_eh_timer_return { - BLK_EH_DONE, /* drivers has completed the command */ - BLK_EH_RESET_TIMER, /* reset timer and try again */ -}; - -enum blk_queue_state { - Queue_down, - Queue_up, -}; - -#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ -#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ - /* * Zoned block device models (zoned limit). * @@ -370,6 +159,34 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, #endif /* CONFIG_BLK_DEV_ZONED */ +/* + * Independent access ranges: struct blk_independent_access_range describes + * a range of contiguous sectors that can be accessed using device command + * execution resources that are independent from the resources used for + * other access ranges. This is typically found with single-LUN multi-actuator + * HDDs where each access range is served by a different set of heads. + * The set of independent ranges supported by the device is defined using + * struct blk_independent_access_ranges. The independent ranges must not overlap + * and must include all sectors within the disk capacity (no sector holes + * allowed). + * For a device with multiple ranges, requests targeting sectors in different + * ranges can be executed in parallel. A request can straddle an access range + * boundary. + */ +struct blk_independent_access_range { + struct kobject kobj; + struct request_queue *queue; + sector_t sector; + sector_t nr_sectors; +}; + +struct blk_independent_access_ranges { + struct kobject kobj; + bool sysfs_registered; + unsigned int nr_ia_ranges; + struct blk_independent_access_range ia_range[]; +}; + struct request_queue { struct request *last_merge; struct elevator_queue *elevator; @@ -444,23 +261,21 @@ struct request_queue { unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION - /* Inline crypto capabilities */ - struct blk_keyslot_manager *ksm; + struct blk_crypto_profile *crypto_profile; #endif unsigned int rq_timeout; int poll_nsec; struct blk_stat_callback *poll_cb; - struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS]; + struct blk_rq_stat *poll_stat; struct timer_list timeout; struct work_struct timeout_work; - atomic_t nr_active_requests_shared_sbitmap; + atomic_t nr_active_requests_shared_tags; - struct sbitmap_queue sched_bitmap_tags; - struct sbitmap_queue sched_breserved_tags; + struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -536,6 +351,8 @@ struct request_queue { */ struct mutex mq_freeze_lock; + int quiesce_depth; + struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; struct bio_set bio_split; @@ -549,15 +366,26 @@ struct request_queue { bool mq_sysfs_init_done; - size_t cmd_size; - #define BLK_MAX_WRITE_HINTS 5 u64 write_hints[BLK_MAX_WRITE_HINTS]; + + /* + * Independent sector access ranges. This is always NULL for + * devices that do not have multiple independent access ranges. + */ + struct blk_independent_access_ranges *ia_ranges; + + /** + * @srcu: Sleepable RCU. Use as lock when type of the request queue + * is blocking (BLK_MQ_F_BLOCKING). Must be the last member + */ + struct srcu_struct srcu[]; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ +#define QUEUE_FLAG_HAS_SRCU 2 /* SRCU is allocated */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ @@ -577,9 +405,7 @@ struct request_queue { #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ -#define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ -#define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ @@ -597,6 +423,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) +#define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) @@ -613,8 +440,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_secure_erase(q) \ (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -#define blk_queue_scsi_passthrough(q) \ - test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) #ifdef CONFIG_BLK_RQ_ALLOC_TIME @@ -638,11 +463,6 @@ extern void blk_clear_pm_only(struct request_queue *q); #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) - -#define rq_dma_dir(rq) \ - (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) - #define dma_map_bvec(dev, bv, dir, attrs) \ dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ (dir), (attrs)) @@ -758,42 +578,6 @@ static inline unsigned int queue_max_active_zones(const struct request_queue *q) } #endif /* CONFIG_BLK_DEV_ZONED */ -static inline bool rq_is_sync(struct request *rq) -{ - return op_is_sync(rq->cmd_flags); -} - -static inline bool rq_mergeable(struct request *rq) -{ - if (blk_rq_is_passthrough(rq)) - return false; - - if (req_op(rq) == REQ_OP_FLUSH) - return false; - - if (req_op(rq) == REQ_OP_WRITE_ZEROES) - return false; - - if (req_op(rq) == REQ_OP_ZONE_APPEND) - return false; - - if (rq->cmd_flags & REQ_NOMERGE_FLAGS) - return false; - if (rq->rq_flags & RQF_NOMERGE_FLAGS) - return false; - - return true; -} - -static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) -{ - if (bio_page(a) == bio_page(b) && - bio_offset(a) == bio_offset(b)) - return true; - - return false; -} - static inline unsigned int blk_queue_depth(struct request_queue *q) { if (q->queue_depth) @@ -808,83 +592,20 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) #define BLK_MIN_SG_TIMEOUT (7 * HZ) -struct rq_map_data { - struct page **pages; - int page_order; - int nr_entries; - unsigned long offset; - int null_mapped; - int from_user; -}; - -struct req_iterator { - struct bvec_iter iter; - struct bio *bio; -}; - /* This should not be used directly - use rq_for_each_segment */ #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) -#define __rq_for_each_bio(_bio, rq) \ - if ((rq->bio)) \ - for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) - -#define rq_for_each_segment(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bio_for_each_segment(bvl, _iter.bio, _iter.iter) - -#define rq_for_each_bvec(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bio_for_each_bvec(bvl, _iter.bio, _iter.iter) -#define rq_iter_last(bvec, _iter) \ - (_iter.bio->bi_next == NULL && \ - bio_iter_last(bvec, _iter.iter)) - -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" -#endif -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -extern void rq_flush_dcache_pages(struct request *rq); -#else -static inline void rq_flush_dcache_pages(struct request *rq) -{ -} -#endif extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -blk_qc_t submit_bio_noacct(struct bio *bio); -extern void blk_rq_init(struct request_queue *q, struct request *rq); -extern void blk_put_request(struct request *); -extern struct request *blk_get_request(struct request_queue *, unsigned int op, - blk_mq_req_flags_t flags); +void submit_bio_noacct(struct bio *bio); + extern int blk_lld_busy(struct request_queue *q); -extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, - struct bio_set *bs, gfp_t gfp_mask, - int (*bio_ctr)(struct bio *, struct bio *, void *), - void *data); -extern void blk_rq_unprep_clone(struct request *rq); -extern blk_status_t blk_insert_cloned_request(struct request_queue *q, - struct request *rq); -int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_queue_split(struct bio **); extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -extern int blk_rq_map_user(struct request_queue *, struct request *, - struct rq_map_data *, void __user *, unsigned long, - gfp_t); -extern int blk_rq_unmap_user(struct bio *); -extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); -extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct rq_map_data *, const struct iov_iter *, - gfp_t); -extern void blk_execute_rq_nowait(struct gendisk *, - struct request *, int, rq_end_io_fn *); - -blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, - int at_head); /* Helper to convert REQ_OP_XXX to its string format XXX */ extern const char *blk_op_str(unsigned int op); @@ -892,69 +613,17 @@ extern const char *blk_op_str(unsigned int op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); +/* only poll the hardware once, don't continue until a completion was found */ +#define BLK_POLL_ONESHOT (1 << 0) +/* do not sleep to wait for the expected completion time */ +#define BLK_POLL_NOSLEEP (1 << 1) +int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); +int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, + unsigned int flags); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { - return bdev->bd_disk->queue; /* this is never NULL */ -} - -/* - * The basic unit of block I/O is a sector. It is used in a number of contexts - * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 - * bytes. Variables of type sector_t represent an offset or size that is a - * multiple of 512 bytes. Hence these two constants. - */ -#ifndef SECTOR_SHIFT -#define SECTOR_SHIFT 9 -#endif -#ifndef SECTOR_SIZE -#define SECTOR_SIZE (1 << SECTOR_SHIFT) -#endif - -#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) -#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) -#define SECTOR_MASK (PAGE_SECTORS - 1) - -/* - * blk_rq_pos() : the current sector - * blk_rq_bytes() : bytes left in the entire request - * blk_rq_cur_bytes() : bytes left in the current segment - * blk_rq_err_bytes() : bytes left till the next error boundary - * blk_rq_sectors() : sectors left in the entire request - * blk_rq_cur_sectors() : sectors left in the current segment - * blk_rq_stats_sectors() : sectors of the entire request used for stats - */ -static inline sector_t blk_rq_pos(const struct request *rq) -{ - return rq->__sector; -} - -static inline unsigned int blk_rq_bytes(const struct request *rq) -{ - return rq->__data_len; -} - -static inline int blk_rq_cur_bytes(const struct request *rq) -{ - return rq->bio ? bio_cur_bytes(rq->bio) : 0; -} - -extern unsigned int blk_rq_err_bytes(const struct request *rq); - -static inline unsigned int blk_rq_sectors(const struct request *rq) -{ - return blk_rq_bytes(rq) >> SECTOR_SHIFT; -} - -static inline unsigned int blk_rq_cur_sectors(const struct request *rq) -{ - return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; -} - -static inline unsigned int blk_rq_stats_sectors(const struct request *rq) -{ - return rq->stats_sectors; + return bdev->bd_queue; /* this is never NULL */ } #ifdef CONFIG_BLK_DEV_ZONED @@ -973,42 +642,8 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), bio->bi_iter.bi_sector); } - -static inline unsigned int blk_rq_zone_no(struct request *rq) -{ - return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); -} - -static inline unsigned int blk_rq_zone_is_seq(struct request *rq) -{ - return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); -} #endif /* CONFIG_BLK_DEV_ZONED */ -/* - * Some commands like WRITE SAME have a payload or data transfer size which - * is different from the size of the request. Any driver that supports such - * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to - * calculate the data transfer size. - */ -static inline unsigned int blk_rq_payload_bytes(struct request *rq) -{ - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - return rq->special_vec.bv_len; - return blk_rq_bytes(rq); -} - -/* - * Return the first full biovec in the request. The caller needs to check that - * there are any bvecs before calling this helper. - */ -static inline struct bio_vec req_bvec(struct request *rq) -{ - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - return rq->special_vec; - return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); -} - static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { @@ -1048,47 +683,6 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, return min(q->limits.max_sectors, chunk_sectors); } -static inline unsigned int blk_rq_get_max_sectors(struct request *rq, - sector_t offset) -{ - struct request_queue *q = rq->q; - - if (blk_rq_is_passthrough(rq)) - return q->limits.max_hw_sectors; - - if (!q->limits.chunk_sectors || - req_op(rq) == REQ_OP_DISCARD || - req_op(rq) == REQ_OP_SECURE_ERASE) - return blk_queue_get_max_sectors(q, req_op(rq)); - - return min(blk_max_size_offset(q, offset, 0), - blk_queue_get_max_sectors(q, req_op(rq))); -} - -static inline unsigned int blk_rq_count_bios(struct request *rq) -{ - unsigned int nr_bios = 0; - struct bio *bio; - - __rq_for_each_bio(bio, rq) - nr_bios++; - - return nr_bios; -} - -void blk_steal_bios(struct bio_list *list, struct request *rq); - -/* - * Request completion related functions. - * - * blk_update_request() completes given number of bytes and updates - * the request without completing it. - */ -extern bool blk_update_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); - -extern void blk_abort_request(struct request *); - /* * Access functions for manipulating queue properties */ @@ -1133,46 +727,24 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -extern void blk_queue_required_elevator_features(struct request_queue *q, - unsigned int features); -extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, - struct device *dev); -/* - * Number of physical segments as sent to the device. - * - * Normally this is the number of discontiguous data segments sent by the - * submitter. But for data-less command like discard we might have no - * actual data segments submitted, but the driver might have to add it's - * own special payload. In that case we still return 1 here so that this - * special payload will be mapped. - */ -static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) -{ - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - return 1; - return rq->nr_phys_segments; -} +struct blk_independent_access_ranges * +disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); +void disk_set_independent_access_ranges(struct gendisk *disk, + struct blk_independent_access_ranges *iars); /* - * Number of discard segments (or ranges) the driver needs to fill in. - * Each discard bio merged into a request is counted as one segment. + * Elevator features for blk_queue_required_elevator_features: */ -static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) -{ - return max_t(unsigned short, rq->nr_phys_segments, 1); -} - -int __blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist, struct scatterlist **last_sg); -static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist) -{ - struct scatterlist *last_sg = NULL; +/* Supports zoned block devices sequential write constraint */ +#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) +/* Supports scheduling on multiple hardware queues */ +#define ELEVATOR_F_MQ_AWARE (1U << 1) - return __blk_rq_map_sg(q, rq, sglist, &last_sg); -} -extern void blk_dump_rq_flags(struct request *, char *); +extern void blk_queue_required_elevator_features(struct request_queue *q, + unsigned int features); +extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, + struct device *dev); bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); @@ -1187,19 +759,24 @@ extern void blk_set_queue_dying(struct request_queue *); * as the lock contention for request_queue lock is reduced. * * It is ok not to disable preemption when adding the request to the plug list - * or when attempting a merge, because blk_schedule_flush_list() will only flush - * the plug list when the task sleeps by itself. For details, please see - * schedule() where blk_schedule_flush_plug() is called. + * or when attempting a merge. For details, please see schedule() where + * blk_flush_plug() is called. */ struct blk_plug { - struct list_head mq_list; /* blk-mq requests */ - struct list_head cb_list; /* md requires an unplug callback */ + struct request *mq_list; /* blk-mq requests */ + + /* if ios_left is > 1, we can batch tag/rq allocations */ + struct request *cached_rq; + unsigned short nr_ios; + unsigned short rq_count; + bool multiple_queues; + bool has_elevator; bool nowait; + + struct list_head cb_list; /* md requires an unplug callback */ }; -#define BLK_MAX_REQUEST_COUNT 16 -#define BLK_PLUG_FLUSH_SIZE (128 * 1024) struct blk_plug_cb; typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); @@ -1211,32 +788,17 @@ struct blk_plug_cb { extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, int size); extern void blk_start_plug(struct blk_plug *); +extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -extern void blk_flush_plug_list(struct blk_plug *, bool); - -static inline void blk_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - if (plug) - blk_flush_plug_list(plug, false); -} - -static inline void blk_schedule_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - if (plug) - blk_flush_plug_list(plug, true); -} +void blk_flush_plug(struct blk_plug *plug, bool from_schedule); static inline bool blk_needs_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; return plug && - (!list_empty(&plug->mq_list) || - !list_empty(&plug->cb_list)); + (plug->mq_list || !list_empty(&plug->cb_list)); } int blkdev_issue_flush(struct block_device *bdev); @@ -1245,23 +807,23 @@ long nr_blockdev_pages(void); struct blk_plug { }; -static inline void blk_start_plug(struct blk_plug *plug) +static inline void blk_start_plug_nr_ios(struct blk_plug *plug, + unsigned short nr_ios) { } -static inline void blk_finish_plug(struct blk_plug *plug) +static inline void blk_start_plug(struct blk_plug *plug) { } -static inline void blk_flush_plug(struct task_struct *task) +static inline void blk_finish_plug(struct blk_plug *plug) { } -static inline void blk_schedule_flush_plug(struct task_struct *task) +static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } - static inline bool blk_needs_flush_plug(struct task_struct *tsk) { return false; @@ -1499,22 +1061,6 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector return offset << SECTOR_SHIFT; } -/* - * Two cases of handling DISCARD merge: - * If max_discard_segments > 1, the driver takes every bio - * as a range and send them to controller together. The ranges - * needn't to be contiguous. - * Otherwise, the bios/requests will be handled as same as - * others which should be contiguous. - */ -static inline bool blk_discard_mergable(struct request *req) -{ - if (req_op(req) == REQ_OP_DISCARD && - queue_max_discard_segments(req->q) > 1) - return true; - return false; -} - static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1628,210 +1174,32 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -enum blk_integrity_flags { - BLK_INTEGRITY_VERIFY = 1 << 0, - BLK_INTEGRITY_GENERATE = 1 << 1, - BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, - BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, -}; - -struct blk_integrity_iter { - void *prot_buf; - void *data_buf; - sector_t seed; - unsigned int data_size; - unsigned short interval; - const char *disk_name; -}; - -typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); -typedef void (integrity_prepare_fn) (struct request *); -typedef void (integrity_complete_fn) (struct request *, unsigned int); - -struct blk_integrity_profile { - integrity_processing_fn *generate_fn; - integrity_processing_fn *verify_fn; - integrity_prepare_fn *prepare_fn; - integrity_complete_fn *complete_fn; - const char *name; -}; - -extern void blk_integrity_register(struct gendisk *, struct blk_integrity *); -extern void blk_integrity_unregister(struct gendisk *); -extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, - struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); - -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) -{ - struct blk_integrity *bi = &disk->queue->integrity; - - if (!bi->profile) - return NULL; - - return bi; -} - -static inline -struct blk_integrity *bdev_get_integrity(struct block_device *bdev) -{ - return blk_get_integrity(bdev->bd_disk); -} - -static inline bool -blk_integrity_queue_supports_integrity(struct request_queue *q) -{ - return q->integrity.profile; -} - -static inline bool blk_integrity_rq(struct request *rq) -{ - return rq->cmd_flags & REQ_INTEGRITY; -} - -static inline void blk_queue_max_integrity_segments(struct request_queue *q, - unsigned int segs) -{ - q->limits.max_integrity_segments = segs; -} - -static inline unsigned short -queue_max_integrity_segments(const struct request_queue *q) -{ - return q->limits.max_integrity_segments; -} - -/** - * bio_integrity_intervals - Return number of integrity intervals for a bio - * @bi: blk_integrity profile for device - * @sectors: Size of the bio in 512-byte sectors - * - * Description: The block layer calculates everything in 512 byte - * sectors but integrity metadata is done in terms of the data integrity - * interval size of the storage device. Convert the block layer sectors - * to the appropriate number of integrity intervals. - */ -static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, - unsigned int sectors) -{ - return sectors >> (bi->interval_exp - 9); -} - -static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, - unsigned int sectors) -{ - return bio_integrity_intervals(bi, sectors) * bi->tuple_size; -} - -/* - * Return the first bvec that contains integrity data. Only drivers that are - * limited to a single integrity segment should use this helper. - */ -static inline struct bio_vec *rq_integrity_vec(struct request *rq) -{ - if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) - return NULL; - return rq->bio->bi_integrity->bip_vec; -} - -#else /* CONFIG_BLK_DEV_INTEGRITY */ - -struct bio; -struct block_device; -struct gendisk; -struct blk_integrity; - -static inline int blk_integrity_rq(struct request *rq) -{ - return 0; -} -static inline int blk_rq_count_integrity_sg(struct request_queue *q, - struct bio *b) -{ - return 0; -} -static inline int blk_rq_map_integrity_sg(struct request_queue *q, - struct bio *b, - struct scatterlist *s) -{ - return 0; -} -static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) -{ - return NULL; -} -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) -{ - return NULL; -} -static inline bool -blk_integrity_queue_supports_integrity(struct request_queue *q) -{ - return false; -} -static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) -{ - return 0; -} -static inline void blk_integrity_register(struct gendisk *d, - struct blk_integrity *b) -{ -} -static inline void blk_integrity_unregister(struct gendisk *d) -{ -} -static inline void blk_queue_max_integrity_segments(struct request_queue *q, - unsigned int segs) -{ -} -static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) -{ - return 0; -} - -static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, - unsigned int sectors) -{ - return 0; -} - -static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, - unsigned int sectors) -{ - return 0; -} - -static inline struct bio_vec *rq_integrity_vec(struct request *rq) -{ - return NULL; -} - -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #ifdef CONFIG_BLK_INLINE_ENCRYPTION -bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q); - -void blk_ksm_unregister(struct request_queue *q); +bool blk_crypto_register(struct blk_crypto_profile *profile, + struct request_queue *q); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ -static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm, - struct request_queue *q) +static inline bool blk_crypto_register(struct blk_crypto_profile *profile, + struct request_queue *q) { return true; } -static inline void blk_ksm_unregister(struct request_queue *q) { } - #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ +enum blk_unique_id { + /* these match the Designator Types specified in SPC */ + BLK_UID_T10 = 1, + BLK_UID_EUI64 = 2, + BLK_UID_NAA = 3, +}; + +#define NFL4_UFLG_MASK 0x0000003F struct block_device_operations { - blk_qc_t (*submit_bio) (struct bio *bio); + void (*submit_bio)(struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); @@ -1847,6 +1215,9 @@ struct block_device_operations { int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); char *(*devnode)(struct gendisk *disk, umode_t *mode); + /* returns the length of the identifier or a negative errno: */ + int (*get_unique_id)(struct gendisk *disk, u8 id[16], + enum blk_unique_id id_type); struct module *owner; const struct pr_ops *pr_ops; @@ -1869,60 +1240,6 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -#ifdef CONFIG_BLK_DEV_ZONED -bool blk_req_needs_zone_write_lock(struct request *rq); -bool blk_req_zone_write_trylock(struct request *rq); -void __blk_req_zone_write_lock(struct request *rq); -void __blk_req_zone_write_unlock(struct request *rq); - -static inline void blk_req_zone_write_lock(struct request *rq) -{ - if (blk_req_needs_zone_write_lock(rq)) - __blk_req_zone_write_lock(rq); -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ - if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) - __blk_req_zone_write_unlock(rq); -} - -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return rq->q->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - if (!blk_req_needs_zone_write_lock(rq)) - return true; - return !blk_req_zone_is_write_locked(rq); -} -#else -static inline bool blk_req_needs_zone_write_lock(struct request *rq) -{ - return false; -} - -static inline void blk_req_zone_write_lock(struct request *rq) -{ -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ -} -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return false; -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - return true; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline void blk_wake_io_task(struct task_struct *waiter) { /* @@ -1991,6 +1308,8 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); int sync_blockdev(struct block_device *bdev); +int sync_blockdev_nowait(struct block_device *bdev); +void sync_bdevs(bool wait); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1999,10 +1318,25 @@ static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline int sync_blockdev_nowait(struct block_device *bdev) +{ + return 0; +} +static inline void sync_bdevs(bool wait) +{ +} #endif int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); int thaw_bdev(struct block_device *bdev); +struct io_comp_batch { + struct request *req_list; + bool need_ts; + void (*complete)(struct io_comp_batch *); +}; + +#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } + #endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index a083e15df608..22501a293fa5 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -2,7 +2,7 @@ #ifndef BLKTRACE_H #define BLKTRACE_H -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/relay.h> #include <linux/compat.h> #include <uapi/linux/blktrace_api.h> diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 537e1b991f11..a4665c7ab07c 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -7,8 +7,18 @@ * Author: Masami Hiramatsu <[email protected]> */ +#ifdef __KERNEL__ #include <linux/kernel.h> #include <linux/types.h> +#else /* !__KERNEL__ */ +/* + * NOTE: This is only for tools/bootconfig, because tools/bootconfig will + * run the parser sanity test. + * This does NOT mean linux/bootconfig.h is available in the user space. + * However, if you change this file, please make sure the tools/bootconfig + * has no issue on building and running. + */ +#endif #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 @@ -25,10 +35,10 @@ * The checksum will be used with the BOOTCONFIG_MAGIC and the size for * embedding the bootconfig in the initrd image. */ -static inline __init u32 xbc_calc_checksum(void *data, u32 size) +static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size) { unsigned char *p = data; - u32 ret = 0; + uint32_t ret = 0; while (size--) ret += *p++; @@ -38,10 +48,10 @@ static inline __init u32 xbc_calc_checksum(void *data, u32 size) /* XBC tree node */ struct xbc_node { - u16 next; - u16 child; - u16 parent; - u16 data; + uint16_t next; + uint16_t child; + uint16_t parent; + uint16_t data; } __attribute__ ((__packed__)); #define XBC_KEY 0 @@ -271,13 +281,12 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node, } /* XBC node initializer */ -int __init xbc_init(char *buf, const char **emsg, int *epos); +int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); +/* XBC node and size information */ +int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_destroy_all(void); - -/* Debug dump functions */ -void __init xbc_debug_dump(void); +void __init xbc_exit(void); #endif diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index 2bc8b1f69c93..cc35d010fa94 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page); */ static inline void free_bootmem_page(struct page *page) { - unsigned long magic = (unsigned long)page->freelist; + unsigned long magic = page->index; /* * The reserve_bootmem_region sets the reserved flag on bootmem diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index eed86eb0a1de..fc53e0ad56d9 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -2,6 +2,7 @@ #ifndef _LINUX_BH_H #define _LINUX_BH_H +#include <linux/instruction_pointer.h> #include <linux/preempt.h> #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h new file mode 100644 index 000000000000..695d1224a71b --- /dev/null +++ b/include/linux/bpf-cgroup-defs.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_CGROUP_DEFS_H +#define _BPF_CGROUP_DEFS_H + +#ifdef CONFIG_CGROUP_BPF + +#include <linux/list.h> +#include <linux/percpu-refcount.h> +#include <linux/workqueue.h> + +struct bpf_prog_array; + +enum cgroup_bpf_attach_type { + CGROUP_BPF_ATTACH_TYPE_INVALID = -1, + CGROUP_INET_INGRESS = 0, + CGROUP_INET_EGRESS, + CGROUP_INET_SOCK_CREATE, + CGROUP_SOCK_OPS, + CGROUP_DEVICE, + CGROUP_INET4_BIND, + CGROUP_INET6_BIND, + CGROUP_INET4_CONNECT, + CGROUP_INET6_CONNECT, + CGROUP_INET4_POST_BIND, + CGROUP_INET6_POST_BIND, + CGROUP_UDP4_SENDMSG, + CGROUP_UDP6_SENDMSG, + CGROUP_SYSCTL, + CGROUP_UDP4_RECVMSG, + CGROUP_UDP6_RECVMSG, + CGROUP_GETSOCKOPT, + CGROUP_SETSOCKOPT, + CGROUP_INET4_GETPEERNAME, + CGROUP_INET6_GETPEERNAME, + CGROUP_INET4_GETSOCKNAME, + CGROUP_INET6_GETSOCKNAME, + CGROUP_INET_SOCK_RELEASE, + MAX_CGROUP_BPF_ATTACH_TYPE +}; + +struct cgroup_bpf { + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS + */ + struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; + u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; + + /* list of cgroup shared storages */ + struct list_head storages; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array *inactive; + + /* reference counter used to detach bpf programs after cgroup removal */ + struct percpu_ref refcnt; + + /* cgroup_bpf is released using a work queue */ + struct work_struct release_work; +}; + +#else /* CONFIG_CGROUP_BPF */ +struct cgroup_bpf {}; +#endif /* CONFIG_CGROUP_BPF */ + +#endif diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 2746fd804216..b525d8cdc25b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -3,10 +3,10 @@ #define _BPF_CGROUP_H #include <linux/bpf.h> +#include <linux/bpf-cgroup-defs.h> #include <linux/errno.h> #include <linux/jump_label.h> #include <linux/percpu.h> -#include <linux/percpu-refcount.h> #include <linux/rbtree.h> #include <uapi/linux/bpf.h> @@ -23,33 +23,6 @@ struct ctl_table_header; struct task_struct; #ifdef CONFIG_CGROUP_BPF -enum cgroup_bpf_attach_type { - CGROUP_BPF_ATTACH_TYPE_INVALID = -1, - CGROUP_INET_INGRESS = 0, - CGROUP_INET_EGRESS, - CGROUP_INET_SOCK_CREATE, - CGROUP_SOCK_OPS, - CGROUP_DEVICE, - CGROUP_INET4_BIND, - CGROUP_INET6_BIND, - CGROUP_INET4_CONNECT, - CGROUP_INET6_CONNECT, - CGROUP_INET4_POST_BIND, - CGROUP_INET6_POST_BIND, - CGROUP_UDP4_SENDMSG, - CGROUP_UDP6_SENDMSG, - CGROUP_SYSCTL, - CGROUP_UDP4_RECVMSG, - CGROUP_UDP6_RECVMSG, - CGROUP_GETSOCKOPT, - CGROUP_SETSOCKOPT, - CGROUP_INET4_GETPEERNAME, - CGROUP_INET6_GETPEERNAME, - CGROUP_INET4_GETSOCKNAME, - CGROUP_INET6_GETSOCKNAME, - CGROUP_INET_SOCK_RELEASE, - MAX_CGROUP_BPF_ATTACH_TYPE -}; #define CGROUP_ATYPE(type) \ case BPF_##type: return type @@ -127,56 +100,9 @@ struct bpf_prog_list { struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; -struct bpf_prog_array; - -struct cgroup_bpf { - /* array of effective progs in this cgroup */ - struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE]; - - /* attached progs to this cgroup and attach flags - * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will - * have either zero or one element - * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS - */ - struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; - u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; - - /* list of cgroup shared storages */ - struct list_head storages; - - /* temp storage for effective prog array used by prog_attach/detach */ - struct bpf_prog_array *inactive; - - /* reference counter used to detach bpf programs after cgroup removal */ - struct percpu_ref refcnt; - - /* cgroup_bpf is released using a work queue */ - struct work_struct release_work; -}; - int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); -int __cgroup_bpf_attach(struct cgroup *cgrp, - struct bpf_prog *prog, struct bpf_prog *replace_prog, - struct bpf_cgroup_link *link, - enum bpf_attach_type type, u32 flags); -int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_cgroup_link *link, - enum bpf_attach_type type); -int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, - union bpf_attr __user *uattr); - -/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ -int cgroup_bpf_attach(struct cgroup *cgrp, - struct bpf_prog *prog, struct bpf_prog *replace_prog, - struct bpf_cgroup_link *link, enum bpf_attach_type type, - u32 flags); -int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type); -int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, - union bpf_attr __user *uattr); - int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum cgroup_bpf_attach_type atype); @@ -471,7 +397,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else -struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} @@ -517,6 +442,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define cgroup_bpf_enabled(atype) (0) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 722f799c1a2e..413cfa5e4b07 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -3,15 +3,9 @@ #define _BPF_NETNS_H #include <linux/mutex.h> +#include <net/netns/bpf.h> #include <uapi/linux/bpf.h> -enum netns_bpf_attach_type { - NETNS_BPF_INVALID = -1, - NETNS_BPF_FLOW_DISSECTOR = 0, - NETNS_BPF_SK_LOOKUP, - MAX_NETNS_BPF_ATTACH_TYPE -}; - static inline enum netns_bpf_attach_type to_netns_bpf_attach_type(enum bpf_attach_type attach_type) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3db6f6c95489..6e947cd91152 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -48,6 +48,7 @@ extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; +typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); @@ -142,7 +143,8 @@ struct bpf_map_ops { int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); - int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + int (*map_for_each_callback)(struct bpf_map *map, + bpf_callback_t callback_fn, void *callback_ctx, u64 flags); /* BTF name and id of struct allocated by map_alloc */ @@ -166,6 +168,7 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; + u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ int timer_off; /* >=0 valid offset, <0 error */ @@ -173,15 +176,15 @@ struct bpf_map { int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; + u32 btf_vmlinux_value_type_id; struct btf *btf; #ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif char name[BPF_OBJ_NAME_LEN]; - u32 btf_vmlinux_value_type_id; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ - /* 22 bytes hole */ + /* 14 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -190,7 +193,7 @@ struct bpf_map { atomic64_t usercnt; struct work_struct work; struct mutex freeze_mutex; - u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ + atomic64_t writecnt; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -294,6 +297,34 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, extern const struct bpf_map_ops bpf_map_offload_ops; +/* bpf_type_flag contains a set of flags that are applicable to the values of + * arg_type, ret_type and reg_type. For example, a pointer value may be null, + * or a memory is read-only. We classify types into two categories: base types + * and extended types. Extended types are base types combined with a type flag. + * + * Currently there are no more than 32 base types in arg_type, ret_type and + * reg_types. + */ +#define BPF_BASE_TYPE_BITS 8 + +enum bpf_type_flag { + /* PTR may be NULL. */ + PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), + + /* MEM is read-only. When applied on bpf_arg, it indicates the arg is + * compatible with both mutable and immutable memory. + */ + MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_RDONLY, +}; + +/* Max number of base types. */ +#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) + +/* Max number of all types. */ +#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1)) + /* function argument constraints */ enum bpf_arg_type { ARG_DONTCARE = 0, /* unused argument in helper function */ @@ -305,13 +336,11 @@ enum bpf_arg_type { ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ - ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ - ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, * helper function must fill all bytes or clear * them in error case. @@ -321,42 +350,65 @@ enum bpf_arg_type { ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ - ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ - ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ - ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ - ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ + ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ __BPF_ARG_TYPE_MAX, + + /* Extended arg_types. */ + ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE, + ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, + ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, + ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, + ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, + ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* type of values returned from helper functions */ enum bpf_return_type { RET_INTEGER, /* function returns integer */ RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ - RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ - RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ - RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ - RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ - RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ - RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ - RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ + RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ + RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ + RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ + RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ + __BPF_RET_TYPE_MAX, + + /* Extended ret_types. */ + RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE, + RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, + RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, + RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, + RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL @@ -418,18 +470,15 @@ enum bpf_reg_type { PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ - PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ PTR_TO_STACK, /* reg == frame_pointer + offset */ PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ PTR_TO_SOCKET, /* reg points to struct bpf_sock */ - PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ - PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ - PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ /* PTR_TO_BTF_ID points to a kernel struct that does not need @@ -447,18 +496,25 @@ enum bpf_reg_type { * been checked for null. Used primarily to inform the verifier * an explicit null check is required for this struct. */ - PTR_TO_BTF_ID_OR_NULL, PTR_TO_MEM, /* reg points to valid memory region */ - PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ - PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ - PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ - PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ - PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ - PTR_TO_MAP_KEY, /* reg points to a map element key */ __BPF_REG_TYPE_MAX, + + /* Extended reg_types. */ + PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE, + PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, + PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, + PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, + PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, + + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ + __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT, }; +static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); /* The information passed from prog-specific *_is_valid_access * back to the verifier. @@ -481,6 +537,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static inline bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -511,7 +573,7 @@ struct bpf_verifier_ops { const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); - bool (*check_kfunc_call)(u32 kfunc_btf_id); + bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); }; struct bpf_prog_offload_ops { @@ -723,6 +785,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -767,6 +830,7 @@ void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); int bpf_jit_charge_modmem(u32 pages); void bpf_jit_uncharge_modmem(u32 pages); +bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) @@ -795,6 +859,10 @@ static inline bool is_bpf_image_address(unsigned long address) { return false; } +static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) +{ + return false; +} #endif struct bpf_func_info_aux { @@ -875,6 +943,7 @@ struct bpf_prog_aux { void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; struct bpf_kfunc_desc_tab *kfunc_tab; + struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; @@ -884,6 +953,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ + u32 verified_insns; struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY @@ -998,6 +1068,10 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, + struct bpf_prog *prog, + const struct btf_func_model *model, + void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1012,6 +1086,22 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } + +#ifdef CONFIG_NET +/* Define it here to avoid the use of forward declaration */ +struct bpf_dummy_ops_state { + int val; +}; + +struct bpf_dummy_ops { + int (*test_1)(struct bpf_dummy_ops_state *cb); + int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, + char a3, unsigned long a4); +}; + +int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +#endif #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { @@ -1050,7 +1140,7 @@ struct bpf_array { }; #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ -#define MAX_TAIL_CALL_CNT 32 +#define MAX_TAIL_CALL_CNT 33 #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ @@ -1092,6 +1182,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); @@ -1320,28 +1411,16 @@ extern struct mutex bpf_stats_enabled_mutex; * kprobes, tracepoints) to prevent deadlocks on map operations as any of * these events can happen inside a region which holds a map bucket lock * and can deadlock on it. - * - * Use the preemption safe inc/dec variants on RT because migrate disable - * is preemptible on RT and preemption in the middle of the RMW operation - * might lead to inconsistent state. Use the raw variants for non RT - * kernels as migrate_disable() maps to preempt_disable() so the slightly - * more expensive save operation can be avoided. */ static inline void bpf_disable_instrumentation(void) { migrate_disable(); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_inc(bpf_prog_active); - else - __this_cpu_inc(bpf_prog_active); + this_cpu_inc(bpf_prog_active); } static inline void bpf_enable_instrumentation(void) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_dec(bpf_prog_active); - else - __this_cpu_dec(bpf_prog_active); + this_cpu_dec(bpf_prog_active); migrate_enable(); } @@ -1387,6 +1466,7 @@ void bpf_map_put(struct bpf_map *map); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); +bool bpf_map_write_active(const struct bpf_map *map); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, @@ -1589,17 +1669,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); struct btf *bpf_get_btf_vmlinux(void); /* Map specifics */ -struct xdp_buff; +struct xdp_frame; struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; void __dev_flush(void); -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx); -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); @@ -1608,7 +1688,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress); void __cpu_map_flush(void); -int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, struct sk_buff *skb); @@ -1639,10 +1719,33 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id); +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); + +static inline bool bpf_tracing_ctx_access(int off, int size, + enum bpf_access_type type) +{ + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return true; +} + +static inline bool bpf_tracing_btf_ctx_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (!bpf_tracing_ctx_access(off, size, type)) + return false; + return btf_ctx_access(off, size, type, prog, info); +} + int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, @@ -1677,6 +1780,14 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); +struct bpf_core_ctx { + struct bpf_verifier_log *log; + const struct btf *btf; +}; + +int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1755,26 +1866,26 @@ static inline void __dev_flush(void) { } -struct xdp_buff; +struct xdp_frame; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; static inline -int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, +int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, +int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; } static inline -int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, +int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { return 0; @@ -1802,7 +1913,7 @@ static inline void __cpu_map_flush(void) } static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, - struct xdp_buff *xdp, + struct xdp_frame *xdpf, struct net_device *dev_rx) { return 0; @@ -1860,7 +1971,8 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } -static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) +static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, + struct module *owner) { return false; } @@ -2091,6 +2203,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; @@ -2105,6 +2218,10 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; +extern const struct bpf_func_proto bpf_find_vma_proto; +extern const struct bpf_func_proto bpf_loop_proto; +extern const struct bpf_func_proto bpf_strncmp_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2220,6 +2337,8 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); +#define MAX_BPRINTF_VARARGS 12 + int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 **bin_buf, u32 num_args); void bpf_bprintf_cleanup(void); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 24496bc28e7b..37b3906af8b1 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -8,6 +8,7 @@ #define _BPF_LOCAL_STORAGE_H #include <linux/bpf.h> +#include <linux/filter.h> #include <linux/rculist.h> #include <linux/list.h> #include <linux/hash.h> @@ -16,6 +17,9 @@ #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 +#define bpf_rcu_lock_held() \ + (rcu_read_lock_held() || rcu_read_lock_trace_held() || \ + rcu_read_lock_bh_held()) struct bpf_local_storage_map_bucket { struct hlist_head list; raw_spinlock_t lock; @@ -161,4 +165,6 @@ struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, void *value, u64 map_flags); +void bpf_local_storage_free_rcu(struct rcu_head *rcu); + #endif /* _BPF_LOCAL_STORAGE_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index bbe1eefa4c8a..48a91c51c015 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -125,6 +125,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5424124dbe36..143401d4c9d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -18,6 +18,8 @@ * that converting umax_value to int cannot overflow. */ #define BPF_MAX_VAR_SIZ (1 << 29) +/* size of type_str_buf in bpf_verifier. */ +#define TYPE_STR_BUF_LEN 64 /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that @@ -388,6 +390,8 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) #define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) #define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */ +#define BPF_LOG_MIN_ALIGNMENT 8U +#define BPF_LOG_ALIGNMENT 40U static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { @@ -396,6 +400,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) log->level == BPF_LOG_KERNEL); } +static inline bool +bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) +{ + return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && + log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); +} + #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_info { @@ -467,6 +478,16 @@ struct bpf_verifier_env { /* longest register parentage chain walked for liveness marking */ u32 longest_mark_read_walk; bpfptr_t fd_array; + + /* bit mask to keep track of whether a register has been accessed + * since the last time the function state was printed + */ + u32 scratched_regs; + /* Same as scratched_regs but for stack slots */ + u64 scratched_stack_slots; + u32 prev_log_len, prev_insn_print_len; + /* buffer used in reg_type_str() to generate reg_type string */ + char type_str_buf[TYPE_STR_BUF_LEN]; }; __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, @@ -527,5 +548,20 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *tgt_prog, u32 btf_id, struct bpf_attach_target_info *tgt_info); +void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); + +#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) + +/* extract base type from bpf_{arg, return, reg}_type. */ +static inline u32 base_type(u32 type) +{ + return type & BPF_BASE_TYPE_MASK; +} + +/* extract flags from an extended type. See bpf_type_flag in bpf.h. */ +static inline u32 type_flag(u32 type) +{ + return type & ~BPF_BASE_TYPE_MASK; +} #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 546e27fc6d46..46e1757d06a3 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -3,6 +3,7 @@ #ifndef _LINUX_BPFPTR_H #define _LINUX_BPFPTR_H +#include <linux/mm.h> #include <linux/sockptr.h> typedef sockptr_t bpfptr_t; diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index c2c2147dfeb8..747fad264033 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -32,6 +32,7 @@ #define PHY_ID_BCM72113 0x35905310 #define PHY_ID_BCM72116 0x35905350 +#define PHY_ID_BCM72165 0x35905340 #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7255 0xae025120 #define PHY_ID_BCM7260 0xae025190 @@ -49,6 +50,7 @@ #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 +#define PHY_ID_BCM7712 0x35905330 #define PHY_ID_BCM_CYGNUS 0xae025200 #define PHY_ID_BCM_OMEGA 0xae025100 @@ -66,6 +68,7 @@ #define PHY_BRCM_CLEAR_RGMII_MODE 0x00000004 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00000008 #define PHY_BRCM_EN_MASTER_MODE 0x00000010 +#define PHY_BRCM_IDDQ_SUSPEND 0x00000020 /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) @@ -83,6 +86,7 @@ #define MII_BCM54XX_EXP_DATA 0x15 /* Expansion register data */ #define MII_BCM54XX_EXP_SEL 0x17 /* Expansion register select */ +#define MII_BCM54XX_EXP_SEL_TOP 0x0d00 /* TOP_MISC expansion register select */ #define MII_BCM54XX_EXP_SEL_SSD 0x0e00 /* Secondary SerDes select */ #define MII_BCM54XX_EXP_SEL_ER 0x0f00 /* Expansion register select */ #define MII_BCM54XX_EXP_SEL_ETC 0x0d00 /* Expansion register spare + 2k mem */ @@ -233,6 +237,7 @@ #define MII_BCM54XX_EXP_EXP08 0x0F08 #define MII_BCM54XX_EXP_EXP08_RJCT_2MHZ 0x0001 #define MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE 0x0200 +#define MII_BCM54XX_EXP_EXP08_FORCE_DAC_WAKE 0x0100 #define MII_BCM54XX_EXP_EXP75 0x0f75 #define MII_BCM54XX_EXP_EXP75_VDACCTRL 0x003c #define MII_BCM54XX_EXP_EXP75_CM_OSC 0x0001 @@ -241,6 +246,12 @@ #define MII_BCM54XX_EXP_EXP97 0x0f97 #define MII_BCM54XX_EXP_EXP97_MYST 0x0c0c +/* Top-MISC expansion registers */ +#define BCM54XX_TOP_MISC_IDDQ_CTRL (MII_BCM54XX_EXP_SEL_TOP + 0x06) +#define BCM54XX_TOP_MISC_IDDQ_LP (1 << 0) +#define BCM54XX_TOP_MISC_IDDQ_SD (1 << 2) +#define BCM54XX_TOP_MISC_IDDQ_SR (1 << 3) + /* * BCM5482: Secondary SerDes registers */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 214fde93214b..0c74348cbc9d 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -5,6 +5,7 @@ #define _LINUX_BTF_H 1 #include <linux/types.h> +#include <linux/bpfptr.h> #include <uapi/linux/btf.h> #include <uapi/linux/bpf.h> @@ -143,6 +144,53 @@ static inline bool btf_type_is_enum(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; } +static inline bool str_is_empty(const char *s) +{ + return !s || !s[0]; +} + +static inline u16 btf_kind(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info); +} + +static inline bool btf_is_enum(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM; +} + +static inline bool btf_is_composite(const struct btf_type *t) +{ + u16 kind = btf_kind(t); + + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; +} + +static inline bool btf_is_array(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ARRAY; +} + +static inline bool btf_is_int(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_INT; +} + +static inline bool btf_is_ptr(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_PTR; +} + +static inline u8 btf_int_offset(const struct btf_type *t) +{ + return BTF_INT_OFFSET(*(u32 *)(t + 1)); +} + +static inline u8 btf_int_encoding(const struct btf_type *t) +{ + return BTF_INT_ENCODING(*(u32 *)(t + 1)); +} + static inline bool btf_type_is_scalar(const struct btf_type *t) { return btf_type_is_int(t) || btf_type_is_enum(t); @@ -183,6 +231,11 @@ static inline u16 btf_type_vlen(const struct btf_type *t) return BTF_INFO_VLEN(t->info); } +static inline u16 btf_vlen(const struct btf_type *t) +{ + return btf_type_vlen(t); +} + static inline u16 btf_func_linkage(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); @@ -193,25 +246,54 @@ static inline bool btf_type_kflag(const struct btf_type *t) return BTF_INFO_KFLAG(t->info); } -static inline u32 btf_member_bit_offset(const struct btf_type *struct_type, - const struct btf_member *member) +static inline u32 __btf_member_bit_offset(const struct btf_type *struct_type, + const struct btf_member *member) { return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) : member->offset; } -static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type, - const struct btf_member *member) +static inline u32 __btf_member_bitfield_size(const struct btf_type *struct_type, + const struct btf_member *member) { return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) : 0; } +static inline struct btf_member *btf_members(const struct btf_type *t) +{ + return (struct btf_member *)(t + 1); +} + +static inline u32 btf_member_bit_offset(const struct btf_type *t, u32 member_idx) +{ + const struct btf_member *m = btf_members(t) + member_idx; + + return __btf_member_bit_offset(t, m); +} + +static inline u32 btf_member_bitfield_size(const struct btf_type *t, u32 member_idx) +{ + const struct btf_member *m = btf_members(t) + member_idx; + + return __btf_member_bitfield_size(t, m); +} + static inline const struct btf_member *btf_type_member(const struct btf_type *t) { return (const struct btf_member *)(t + 1); } +static inline struct btf_array *btf_array(const struct btf_type *t) +{ + return (struct btf_array *)(t + 1); +} + +static inline struct btf_enum *btf_enum(const struct btf_type *t) +{ + return (struct btf_enum *)(t + 1); +} + static inline const struct btf_var_secinfo *btf_type_var_secinfo( const struct btf_type *t) { @@ -238,4 +320,48 @@ static inline const char *btf_name_by_offset(const struct btf *btf, } #endif +struct kfunc_btf_id_set { + struct list_head list; + struct btf_id_set *set; + struct module *owner; +}; + +struct kfunc_btf_id_list { + struct list_head list; + struct mutex mutex; +}; + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s); +void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s); +bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, + struct module *owner); + +extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; +extern struct kfunc_btf_id_list prog_test_kfunc_list; +#else +static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ +} +static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ +} +static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, + u32 kfunc_id, struct module *owner) +{ + return false; +} + +static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; +static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; +#endif + +#define DEFINE_KFUNC_BTF_ID_SET(set, name) \ + struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ + THIS_MODULE } + #endif diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 47d9abfbdb55..919c0fde1c51 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -73,7 +73,7 @@ asm( \ __BTF_ID_LIST(name, local) \ extern u32 name[]; -#define BTF_ID_LIST_GLOBAL(name) \ +#define BTF_ID_LIST_GLOBAL(name, n) \ __BTF_ID_LIST(name, globl) /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with @@ -83,7 +83,7 @@ __BTF_ID_LIST(name, globl) BTF_ID_LIST(name) \ BTF_ID(prefix, typename) #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \ - BTF_ID_LIST_GLOBAL(name) \ + BTF_ID_LIST_GLOBAL(name, 1) \ BTF_ID(prefix, typename) /* @@ -149,7 +149,7 @@ extern struct btf_id_set name; #define BTF_ID_LIST(name) static u32 name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; #define BTF_SET_START(name) static struct btf_id_set name = { 0 }; @@ -189,6 +189,18 @@ MAX_BTF_SOCK_TYPE, extern u32 btf_sock_ids[]; #endif -extern u32 btf_task_struct_ids[]; +#define BTF_TRACING_TYPE_xxx \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct) + +enum { +#define BTF_TRACING_TYPE(name, type) name, +BTF_TRACING_TYPE_xxx +#undef BTF_TRACING_TYPE +MAX_BTF_TRACING_TYPE, +}; + +extern u32 btf_tracing_ids[]; #endif diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 0e9bdd42dafb..35c25dff651a 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -44,7 +44,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -}; +} __packed; struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 4b13e0a3e15b..c9a4c96c9943 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -190,7 +190,7 @@ static inline void be64_add_cpu(__be64 *var, u64 val) static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) { - int i; + size_t i; for (i = 0; i < len; i++) dst[i] = cpu_to_be32(src[i]); @@ -198,7 +198,7 @@ static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) { - int i; + size_t i; for (i = 0; i < len; i++) dst[i] = be32_to_cpu(src[i]); diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h new file mode 100644 index 000000000000..fef8b607f97e --- /dev/null +++ b/include/linux/cacheflush.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CACHEFLUSH_H +#define _LINUX_CACHEFLUSH_H + +#include <asm/cacheflush.h> + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio); +#endif +#else +static inline void flush_dcache_folio(struct folio *folio) +{ +} +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0 +#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */ + +#endif /* _LINUX_CACHEFLUSH_H */ diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2f909ed084c6..4ff37cb763ae 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,7 +3,6 @@ #define _LINUX_CACHEINFO_H #include <linux/bitops.h> -#include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/smp.h> diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 9de6e9053e34..a81652d1c6f3 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -12,12 +12,8 @@ #define CAN_SYNC_SEG 1 -/* Kilobits and Megabits per second */ -#define CAN_KBPS 1000UL -#define CAN_MBPS 1000000UL - -/* Megahertz */ -#define CAN_MHZ 1000000UL +#define CAN_CTRLMODE_TDC_MASK \ + (CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL) /* * struct can_tdc - CAN FD Transmission Delay Compensation parameters @@ -28,34 +24,54 @@ * * To solve this issue, ISO 11898-1 introduces in section 11.3.3 * "Transmitter delay compensation" a SSP (Secondary Sample Point) - * equal to the distance, in time quanta, from the start of the bit - * time on the TX pin to the actual measurement on the RX pin. + * equal to the distance from the start of the bit time on the TX pin + * to the actual measurement on the RX pin. * * This structure contains the parameters to calculate that SSP. * - * @tdcv: Transmitter Delay Compensation Value. Distance, in time - * quanta, from when the bit is sent on the TX pin to when it is - * received on the RX pin of the transmitter. Possible options: + * -+----------- one bit ----------+-- TX pin + * |<--- Sample Point --->| * - * 0: automatic mode. The controller dynamically measures @tdcv - * for each transmitted CAN FD frame. + * --+----------- one bit ----------+-- RX pin + * |<-------- TDCV -------->| + * |<------- TDCO ------->| + * |<----------- Secondary Sample Point ---------->| * - * Other values: manual mode. Use the fixed provided value. + * To increase precision, contrary to the other bittiming parameters + * which are measured in time quanta, the TDC parameters are measured + * in clock periods (also referred as "minimum time quantum" in ISO + * 11898-1). * - * @tdco: Transmitter Delay Compensation Offset. Offset value, in time - * quanta, defining the distance between the start of the bit - * reception on the RX pin of the transceiver and the SSP - * position such that SSP = @tdcv + @tdco. + * @tdcv: Transmitter Delay Compensation Value. The time needed for + * the signal to propagate, i.e. the distance, in clock periods, + * from the start of the bit on the TX pin to when it is received + * on the RX pin. @tdcv depends on the controller modes: + * + * CAN_CTRLMODE_TDC_AUTO is set: The transceiver dynamically + * measures @tdcv for each transmitted CAN FD frame and the + * value provided here should be ignored. + * + * CAN_CTRLMODE_TDC_MANUAL is set: use the fixed provided @tdcv + * value. * - * If @tdco is zero, then TDC is disabled and both @tdcv and - * @tdcf should be ignored. + * N.B. CAN_CTRLMODE_TDC_AUTO and CAN_CTRLMODE_TDC_MANUAL are + * mutually exclusive. Only one can be set at a time. If both + * CAN_TDC_CTRLMODE_AUTO and CAN_TDC_CTRLMODE_MANUAL are unset, + * TDC is disabled and all the values of this structure should be + * ignored. + * + * @tdco: Transmitter Delay Compensation Offset. Offset value, in + * clock periods, defining the distance between the start of the + * bit reception on the RX pin of the transceiver and the SSP + * position such that SSP = @tdcv + @tdco. * * @tdcf: Transmitter Delay Compensation Filter window. Defines the - * minimum value for the SSP position in time quanta. If SSP is - * less than @tdcf, then no delay compensations occur and the - * normal sampling point is used instead. The feature is enabled - * if and only if @tdcv is set to zero (automatic mode) and @tdcf - * is configured to a value greater than @tdco. + * minimum value for the SSP position in clock periods. If the + * SSP position is less than @tdcf, then no delay compensations + * occur and the normal sampling point is used instead. The + * feature is enabled if and only if @tdcv is set to zero + * (automatic mode) and @tdcf is configured to a value greater + * than @tdco. */ struct can_tdc { u32 tdcv; @@ -67,19 +83,32 @@ struct can_tdc { * struct can_tdc_const - CAN hardware-dependent constant for * Transmission Delay Compensation * - * @tdcv_max: Transmitter Delay Compensation Value maximum value. - * Should be set to zero if the controller does not support - * manual mode for tdcv. + * @tdcv_min: Transmitter Delay Compensation Value minimum value. If + * the controller does not support manual mode for tdcv + * (c.f. flag CAN_CTRLMODE_TDC_MANUAL) then this value is + * ignored. + * @tdcv_max: Transmitter Delay Compensation Value maximum value. If + * the controller does not support manual mode for tdcv + * (c.f. flag CAN_CTRLMODE_TDC_MANUAL) then this value is + * ignored. + * + * @tdco_min: Transmitter Delay Compensation Offset minimum value. * @tdco_max: Transmitter Delay Compensation Offset maximum value. * Should not be zero. If the controller does not support TDC, * then the pointer to this structure should be NULL. + * + * @tdcf_min: Transmitter Delay Compensation Filter window minimum + * value. If @tdcf_max is zero, this value is ignored. * @tdcf_max: Transmitter Delay Compensation Filter window maximum * value. Should be set to zero if the controller does not * support this feature. */ struct can_tdc_const { + u32 tdcv_min; u32 tdcv_max; + u32 tdco_min; u32 tdco_max; + u32 tdcf_min; u32 tdcf_max; }; @@ -87,7 +116,9 @@ struct can_tdc_const { int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc); -void can_calc_tdco(struct net_device *dev); +void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, + const struct can_bittiming *dbt, + u32 *ctrlmode, u32 ctrlmode_supported); #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, @@ -97,7 +128,10 @@ can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, return -EINVAL; } -static inline void can_calc_tdco(struct net_device *dev) +static inline void +can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, + const struct can_bittiming *dbt, + u32 *ctrlmode, u32 ctrlmode_supported) { } #endif /* CONFIG_CAN_CALC_BITTIMING */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 2413253e54c7..c2ea47f30046 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -64,12 +64,14 @@ struct can_priv { struct gpio_desc *termination_gpio; u16 termination_gpio_ohms[CAN_TERMINATION_GPIO_MAX]; + unsigned int echo_skb_max; + struct sk_buff **echo_skb; + enum can_state state; /* CAN controller features - see include/uapi/linux/can/netlink.h */ u32 ctrlmode; /* current options setting */ u32 ctrlmode_supported; /* options that can be modified by netlink */ - u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; struct delayed_work restart_work; @@ -82,9 +84,7 @@ struct can_priv { enum can_state *state); int (*do_get_berr_counter)(const struct net_device *dev, struct can_berr_counter *bec); - - unsigned int echo_skb_max; - struct sk_buff **echo_skb; + int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); #ifdef CONFIG_CAN_LEDS struct led_trigger *tx_led_trig; @@ -96,20 +96,64 @@ struct can_priv { #endif }; +static inline bool can_tdc_is_enabled(const struct can_priv *priv) +{ + return !!(priv->ctrlmode & CAN_CTRLMODE_TDC_MASK); +} + +/* + * can_get_relative_tdco() - TDCO relative to the sample point + * + * struct can_tdc::tdco represents the absolute offset from TDCV. Some + * controllers use instead an offset relative to the Sample Point (SP) + * such that: + * + * SSP = TDCV + absolute TDCO + * = TDCV + SP + relative TDCO + * + * -+----------- one bit ----------+-- TX pin + * |<--- Sample Point --->| + * + * --+----------- one bit ----------+-- RX pin + * |<-------- TDCV -------->| + * |<------------------------>| absolute TDCO + * |<--- Sample Point --->| + * | |<->| relative TDCO + * |<------------- Secondary Sample Point ------------>| + */ +static inline s32 can_get_relative_tdco(const struct can_priv *priv) +{ + const struct can_bittiming *dbt = &priv->data_bittiming; + s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + + dbt->phase_seg1) * dbt->brp; + + return (s32)priv->tdc.tdco - sample_point_in_tc; +} /* helper to define static CAN controller features at device creation time */ -static inline void can_set_static_ctrlmode(struct net_device *dev, - u32 static_mode) +static inline int __must_check can_set_static_ctrlmode(struct net_device *dev, + u32 static_mode) { struct can_priv *priv = netdev_priv(dev); /* alloc_candev() succeeded => netdev_priv() is valid at this point */ + if (priv->ctrlmode_supported & static_mode) { + netdev_warn(dev, + "Controller features can not be supported and static at the same time\n"); + return -EINVAL; + } priv->ctrlmode = static_mode; - priv->ctrlmode_static = static_mode; /* override MTU which was set by default in can_setup()? */ if (static_mode & CAN_CTRLMODE_FD) dev->mtu = CANFD_MTU; + + return 0; +} + +static inline u32 can_get_static_ctrlmode(struct can_priv *priv) +{ + return priv->ctrlmode & ~priv->ctrlmode_supported; } void can_setup(struct net_device *dev); diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index d311bc369a39..fdb22b00674a 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -21,8 +21,9 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx, - unsigned int *frame_len_ptr); +unsigned int __must_check can_get_echo_skb(struct net_device *dev, + unsigned int idx, + unsigned int *frame_len_ptr); void can_free_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h new file mode 100644 index 000000000000..efd8205282da --- /dev/null +++ b/include/linux/cc_platform.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <[email protected]> + */ + +#ifndef _LINUX_CC_PLATFORM_H +#define _LINUX_CC_PLATFORM_H + +#include <linux/types.h> +#include <linux/stddef.h> + +/** + * enum cc_attr - Confidential computing attributes + * + * These attributes represent confidential computing features that are + * currently active. + */ +enum cc_attr { + /** + * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active + * + * The platform/OS is running with active memory encryption. This + * includes running either as a bare-metal system or a hypervisor + * and actively using memory encryption or as a guest/virtual machine + * and actively using memory encryption. + * + * Examples include SME, SEV and SEV-ES. + */ + CC_ATTR_MEM_ENCRYPT, + + /** + * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active + * + * The platform/OS is running as a bare-metal system or a hypervisor + * and actively using memory encryption. + * + * Examples include SME. + */ + CC_ATTR_HOST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption. + * + * Examples include SEV and SEV-ES. + */ + CC_ATTR_GUEST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption and register state encryption. + * + * Examples include SEV-ES. + */ + CC_ATTR_GUEST_STATE_ENCRYPT, + + /** + * @CC_ATTR_GUEST_UNROLL_STRING_IO: String I/O is implemented with + * IN/OUT instructions + * + * The platform/OS is running as a guest/virtual machine and uses + * IN/OUT instructions in place of string I/O. + * + * Examples include TDX guest & SEV. + */ + CC_ATTR_GUEST_UNROLL_STRING_IO, +}; + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + +/** + * cc_platform_has() - Checks if the specified cc_attr attribute is active + * @attr: Confidential computing attribute to check + * + * The cc_platform_has() function will return an indicator as to whether the + * specified Confidential Computing attribute is currently active. + * + * Context: Any context + * Return: + * * TRUE - Specified Confidential Computing attribute is active + * * FALSE - Specified Confidential Computing attribute is not active + */ +bool cc_platform_has(enum cc_attr attr); + +#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ + +static inline bool cc_platform_has(enum cc_attr attr) { return false; } + +#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ + +#endif /* _LINUX_CC_PLATFORM_H */ diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index c4fef00abdf3..0a89f111e00e 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -64,6 +64,7 @@ struct cdrom_device_info { int for_data; int (*exit)(struct cdrom_device_info *); int mrw_mode_page; + __s64 last_media_change_ms; }; struct cdrom_device_ops { diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index bc2699feddbe..7ad6c3d0db7d 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -302,6 +302,8 @@ enum { CEPH_SESSION_REQUEST_FLUSH_MDLOG, }; +#define CEPH_SESSION_BLOCKLISTED (1 << 0) /* session blocklisted */ + extern const char *ceph_session_op_name(int op); struct ceph_mds_session_head { diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 409d8c29bc4f..309acbcb5a8a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -295,7 +295,6 @@ extern bool libceph_compatible(void *data); extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); -extern void *ceph_kvmalloc(size_t size, gfp_t flags); struct fs_parameter; struct fc_log; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 83fa08a06507..3431011f364d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -475,6 +475,14 @@ extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, u64 expected_object_size, u64 expected_write_size, u32 flags); +extern int osd_req_op_copy_from_init(struct ceph_osd_request *req, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, + u8 copy_from_flags); extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, @@ -515,17 +523,6 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, struct page *req_page, size_t req_len, struct page **resp_pages, size_t *resp_len); -int ceph_osdc_copy_from(struct ceph_osd_client *osdc, - u64 src_snapid, u64 src_version, - struct ceph_object_id *src_oid, - struct ceph_object_locator *src_oloc, - u32 src_fadvise_flags, - struct ceph_object_id *dst_oid, - struct ceph_object_locator *dst_oloc, - u32 dst_fadvise_flags, - u32 truncate_seq, u64 truncate_size, - u8 copy_from_flags); - /* watch/notify */ struct ceph_osd_linger_request * ceph_osdc_watch(struct ceph_osd_client *osdc, diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index db2e147e069f..1bfcfb1af352 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -19,7 +19,7 @@ #include <linux/percpu-rwsem.h> #include <linux/u64_stats_sync.h> #include <linux/workqueue.h> -#include <linux/bpf-cgroup.h> +#include <linux/bpf-cgroup-defs.h> #include <linux/psi_types.h> #ifdef CONFIG_CGROUPS @@ -413,7 +413,7 @@ struct cgroup { /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_ss_mask is the effective + * "cgroup.subtree_control" while ->subtree_ss_mask is the effective * one which may have more subsystems enabled. Controller knobs * are made available iff it's enabled in ->subtree_control. */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index f59c875271a0..2faa6f7aa8a8 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -490,6 +490,13 @@ struct clk_hw *__clk_hw_register_gate(struct device *dev, unsigned long flags, void __iomem *reg, u8 bit_idx, u8 clk_gate_flags, spinlock_t *lock); +struct clk_hw *__devm_clk_hw_register_gate(struct device *dev, + struct device_node *np, const char *name, + const char *parent_name, const struct clk_hw *parent_hw, + const struct clk_parent_data *parent_data, + unsigned long flags, + void __iomem *reg, u8 bit_idx, + u8 clk_gate_flags, spinlock_t *lock); struct clk *clk_register_gate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, @@ -544,6 +551,22 @@ struct clk *clk_register_gate(struct device *dev, const char *name, __clk_hw_register_gate((dev), NULL, (name), NULL, NULL, (parent_data), \ (flags), (reg), (bit_idx), \ (clk_gate_flags), (lock)) +/** + * devm_clk_hw_register_gate - register a gate clock with the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of this clock's parent + * @flags: framework-specific flags for this clock + * @reg: register address to control gating of this clock + * @bit_idx: which bit in the register controls gating of this clock + * @clk_gate_flags: gate-specific flags for this clock + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_gate(dev, name, parent_name, flags, reg, bit_idx,\ + clk_gate_flags, lock) \ + __devm_clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \ + NULL, (flags), (reg), (bit_idx), \ + (clk_gate_flags), (lock)) void clk_unregister_gate(struct clk *clk); void clk_hw_unregister_gate(struct clk_hw *hw); int clk_gate_is_enabled(struct clk_hw *hw); diff --git a/include/linux/clk/sunxi-ng.h b/include/linux/clk/sunxi-ng.h index 3cd14acde0a1..cf32123b39f5 100644 --- a/include/linux/clk/sunxi-ng.h +++ b/include/linux/clk/sunxi-ng.h @@ -6,22 +6,7 @@ #ifndef _LINUX_CLK_SUNXI_NG_H_ #define _LINUX_CLK_SUNXI_NG_H_ -#include <linux/errno.h> - -#ifdef CONFIG_SUNXI_CCU int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, bool new_mode); int sunxi_ccu_get_mmc_timing_mode(struct clk *clk); -#else -static inline int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, - bool new_mode) -{ - return -ENOTSUPP; -} - -static inline int sunxi_ccu_get_mmc_timing_mode(struct clk *clk) -{ - return -ENOTSUPP; -} -#endif #endif diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index d128ad1570aa..3650e926e93f 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -42,6 +42,7 @@ struct tegra_cpu_car_ops { #endif }; +#ifdef CONFIG_ARCH_TEGRA extern struct tegra_cpu_car_ops *tegra_cpu_car_ops; static inline void tegra_wait_cpu_in_reset(u32 cpu) @@ -83,8 +84,29 @@ static inline void tegra_disable_cpu_clock(u32 cpu) tegra_cpu_car_ops->disable_clock(cpu); } +#else +static inline void tegra_wait_cpu_in_reset(u32 cpu) +{ +} -#ifdef CONFIG_PM_SLEEP +static inline void tegra_put_cpu_in_reset(u32 cpu) +{ +} + +static inline void tegra_cpu_out_of_reset(u32 cpu) +{ +} + +static inline void tegra_enable_cpu_clock(u32 cpu) +{ +} + +static inline void tegra_disable_cpu_clock(u32 cpu) +{ +} +#endif + +#if defined(CONFIG_ARCH_TEGRA) && defined(CONFIG_PM_SLEEP) static inline bool tegra_cpu_rail_off_ready(void) { if (WARN_ON(!tegra_cpu_car_ops->rail_off_ready)) diff --git a/include/linux/cma.h b/include/linux/cma.h index 53fd8c3cdbd0..bd801023504b 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -46,6 +46,7 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn); +extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h new file mode 100644 index 000000000000..d8264417e53c --- /dev/null +++ b/include/linux/comedi/comedi_8254.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_8254.h + * Generic 8254 timer/counter support + * Copyright (C) 2014 H Hartley Sweeten <[email protected]> + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 2000 David A. Schleef <[email protected]> + */ + +#ifndef _COMEDI_8254_H +#define _COMEDI_8254_H + +#include <linux/types.h> + +struct comedi_device; +struct comedi_insn; +struct comedi_subdevice; + +/* + * Common oscillator base values in nanoseconds + */ +#define I8254_OSC_BASE_10MHZ 100 +#define I8254_OSC_BASE_5MHZ 200 +#define I8254_OSC_BASE_4MHZ 250 +#define I8254_OSC_BASE_2MHZ 500 +#define I8254_OSC_BASE_1MHZ 1000 +#define I8254_OSC_BASE_100KHZ 10000 +#define I8254_OSC_BASE_10KHZ 100000 +#define I8254_OSC_BASE_1KHZ 1000000 + +/* + * I/O access size used to read/write registers + */ +#define I8254_IO8 1 +#define I8254_IO16 2 +#define I8254_IO32 4 + +/* + * Register map for generic 8254 timer (I8254_IO8 with 0 regshift) + */ +#define I8254_COUNTER0_REG 0x00 +#define I8254_COUNTER1_REG 0x01 +#define I8254_COUNTER2_REG 0x02 +#define I8254_CTRL_REG 0x03 +#define I8254_CTRL_SEL_CTR(x) ((x) << 6) +#define I8254_CTRL_READBACK(x) (I8254_CTRL_SEL_CTR(3) | BIT(x)) +#define I8254_CTRL_READBACK_COUNT I8254_CTRL_READBACK(4) +#define I8254_CTRL_READBACK_STATUS I8254_CTRL_READBACK(5) +#define I8254_CTRL_READBACK_SEL_CTR(x) (2 << (x)) +#define I8254_CTRL_RW(x) (((x) & 0x3) << 4) +#define I8254_CTRL_LATCH I8254_CTRL_RW(0) +#define I8254_CTRL_LSB_ONLY I8254_CTRL_RW(1) +#define I8254_CTRL_MSB_ONLY I8254_CTRL_RW(2) +#define I8254_CTRL_LSB_MSB I8254_CTRL_RW(3) + +/* counter maps zero to 0x10000 */ +#define I8254_MAX_COUNT 0x10000 + +/** + * struct comedi_8254 - private data used by this module + * @iobase: PIO base address of the registers (in/out) + * @mmio: MMIO base address of the registers (read/write) + * @iosize: I/O size used to access the registers (b/w/l) + * @regshift: register gap shift + * @osc_base: cascaded oscillator speed in ns + * @divisor: divisor for single counter + * @divisor1: divisor loaded into first cascaded counter + * @divisor2: divisor loaded into second cascaded counter + * #next_div: next divisor for single counter + * @next_div1: next divisor to use for first cascaded counter + * @next_div2: next divisor to use for second cascaded counter + * @clock_src; current clock source for each counter (driver specific) + * @gate_src; current gate source for each counter (driver specific) + * @busy: flags used to indicate that a counter is "busy" + * @insn_config: driver specific (*insn_config) callback + */ +struct comedi_8254 { + unsigned long iobase; + void __iomem *mmio; + unsigned int iosize; + unsigned int regshift; + unsigned int osc_base; + unsigned int divisor; + unsigned int divisor1; + unsigned int divisor2; + unsigned int next_div; + unsigned int next_div1; + unsigned int next_div2; + unsigned int clock_src[3]; + unsigned int gate_src[3]; + bool busy[3]; + + int (*insn_config)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); +}; + +unsigned int comedi_8254_status(struct comedi_8254 *i8254, + unsigned int counter); +unsigned int comedi_8254_read(struct comedi_8254 *i8254, unsigned int counter); +void comedi_8254_write(struct comedi_8254 *i8254, + unsigned int counter, unsigned int val); + +int comedi_8254_set_mode(struct comedi_8254 *i8254, + unsigned int counter, unsigned int mode); +int comedi_8254_load(struct comedi_8254 *i8254, + unsigned int counter, unsigned int val, unsigned int mode); + +void comedi_8254_pacer_enable(struct comedi_8254 *i8254, + unsigned int counter1, unsigned int counter2, + bool enable); +void comedi_8254_update_divisors(struct comedi_8254 *i8254); +void comedi_8254_cascade_ns_to_timer(struct comedi_8254 *i8254, + unsigned int *nanosec, unsigned int flags); +void comedi_8254_ns_to_timer(struct comedi_8254 *i8254, + unsigned int *nanosec, unsigned int flags); + +void comedi_8254_set_busy(struct comedi_8254 *i8254, + unsigned int counter, bool busy); + +void comedi_8254_subdevice_init(struct comedi_subdevice *s, + struct comedi_8254 *i8254); + +struct comedi_8254 *comedi_8254_init(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); +struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); + +#endif /* _COMEDI_8254_H */ diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h new file mode 100644 index 000000000000..b2a5bc6b3a49 --- /dev/null +++ b/include/linux/comedi/comedi_8255.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_8255.h + * Generic 8255 digital I/O subdevice support + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1998 David A. Schleef <[email protected]> + */ + +#ifndef _COMEDI_8255_H +#define _COMEDI_8255_H + +#define I8255_SIZE 0x04 + +#define I8255_DATA_A_REG 0x00 +#define I8255_DATA_B_REG 0x01 +#define I8255_DATA_C_REG 0x02 +#define I8255_CTRL_REG 0x03 +#define I8255_CTRL_C_LO_IO BIT(0) +#define I8255_CTRL_B_IO BIT(1) +#define I8255_CTRL_B_MODE BIT(2) +#define I8255_CTRL_C_HI_IO BIT(3) +#define I8255_CTRL_A_IO BIT(4) +#define I8255_CTRL_A_MODE(x) ((x) << 5) +#define I8255_CTRL_CW BIT(7) + +struct comedi_device; +struct comedi_subdevice; + +int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long regbase), + unsigned long regbase); + +int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long regbase), + unsigned long regbase); + +unsigned long subdev_8255_regbase(struct comedi_subdevice *s); + +#endif diff --git a/include/linux/comedi/comedi_isadma.h b/include/linux/comedi/comedi_isadma.h new file mode 100644 index 000000000000..9d2b12db7e6e --- /dev/null +++ b/include/linux/comedi/comedi_isadma.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * COMEDI ISA DMA support functions + * Copyright (c) 2014 H Hartley Sweeten <[email protected]> + */ + +#ifndef _COMEDI_ISADMA_H +#define _COMEDI_ISADMA_H + +#include <linux/types.h> + +struct comedi_device; +struct device; + +/* + * These are used to avoid issues when <asm/dma.h> and the DMA_MODE_ + * defines are not available. + */ +#define COMEDI_ISADMA_READ 0 +#define COMEDI_ISADMA_WRITE 1 + +/** + * struct comedi_isadma_desc - cookie for ISA DMA + * @virt_addr: virtual address of buffer + * @hw_addr: hardware (bus) address of buffer + * @chan: DMA channel + * @maxsize: allocated size of buffer (in bytes) + * @size: transfer size (in bytes) + * @mode: DMA_MODE_READ or DMA_MODE_WRITE + */ +struct comedi_isadma_desc { + void *virt_addr; + dma_addr_t hw_addr; + unsigned int chan; + unsigned int maxsize; + unsigned int size; + char mode; +}; + +/** + * struct comedi_isadma - ISA DMA data + * @dev: device to allocate non-coherent memory for + * @desc: cookie for each DMA buffer + * @n_desc: the number of cookies + * @cur_dma: the current cookie in use + * @chan: the first DMA channel requested + * @chan2: the second DMA channel requested + */ +struct comedi_isadma { + struct device *dev; + struct comedi_isadma_desc *desc; + int n_desc; + int cur_dma; + unsigned int chan; + unsigned int chan2; +}; + +#if IS_ENABLED(CONFIG_ISA_DMA_API) + +void comedi_isadma_program(struct comedi_isadma_desc *desc); +unsigned int comedi_isadma_disable(unsigned int dma_chan); +unsigned int comedi_isadma_disable_on_sample(unsigned int dma_chan, + unsigned int size); +unsigned int comedi_isadma_poll(struct comedi_isadma *dma); +void comedi_isadma_set_mode(struct comedi_isadma_desc *desc, char dma_dir); + +struct comedi_isadma *comedi_isadma_alloc(struct comedi_device *dev, + int n_desc, unsigned int dma_chan1, + unsigned int dma_chan2, + unsigned int maxsize, char dma_dir); +void comedi_isadma_free(struct comedi_isadma *dma); + +#else /* !IS_ENABLED(CONFIG_ISA_DMA_API) */ + +static inline void comedi_isadma_program(struct comedi_isadma_desc *desc) +{ +} + +static inline unsigned int comedi_isadma_disable(unsigned int dma_chan) +{ + return 0; +} + +static inline unsigned int +comedi_isadma_disable_on_sample(unsigned int dma_chan, unsigned int size) +{ + return 0; +} + +static inline unsigned int comedi_isadma_poll(struct comedi_isadma *dma) +{ + return 0; +} + +static inline void comedi_isadma_set_mode(struct comedi_isadma_desc *desc, + char dma_dir) +{ +} + +static inline struct comedi_isadma * +comedi_isadma_alloc(struct comedi_device *dev, int n_desc, + unsigned int dma_chan1, unsigned int dma_chan2, + unsigned int maxsize, char dma_dir) +{ + return NULL; +} + +static inline void comedi_isadma_free(struct comedi_isadma *dma) +{ +} + +#endif /* !IS_ENABLED(CONFIG_ISA_DMA_API) */ + +#endif /* #ifndef _COMEDI_ISADMA_H */ diff --git a/include/linux/comedi/comedi_pci.h b/include/linux/comedi/comedi_pci.h new file mode 100644 index 000000000000..2fb50663e3ed --- /dev/null +++ b/include/linux/comedi/comedi_pci.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_pci.h + * header file for Comedi PCI drivers + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef <[email protected]> + */ + +#ifndef _COMEDI_PCI_H +#define _COMEDI_PCI_H + +#include <linux/pci.h> +#include <linux/comedi/comedidev.h> + +/* + * PCI Vendor IDs not in <linux/pci_ids.h> + */ +#define PCI_VENDOR_ID_KOLTER 0x1001 +#define PCI_VENDOR_ID_ICP 0x104c +#define PCI_VENDOR_ID_DT 0x1116 +#define PCI_VENDOR_ID_IOTECH 0x1616 +#define PCI_VENDOR_ID_CONTEC 0x1221 +#define PCI_VENDOR_ID_RTD 0x1435 +#define PCI_VENDOR_ID_HUMUSOFT 0x186c + +struct pci_dev *comedi_to_pci_dev(struct comedi_device *dev); + +int comedi_pci_enable(struct comedi_device *dev); +void comedi_pci_disable(struct comedi_device *dev); +void comedi_pci_detach(struct comedi_device *dev); + +int comedi_pci_auto_config(struct pci_dev *pcidev, struct comedi_driver *driver, + unsigned long context); +void comedi_pci_auto_unconfig(struct pci_dev *pcidev); + +int comedi_pci_driver_register(struct comedi_driver *comedi_driver, + struct pci_driver *pci_driver); +void comedi_pci_driver_unregister(struct comedi_driver *comedi_driver, + struct pci_driver *pci_driver); + +/** + * module_comedi_pci_driver() - Helper macro for registering a comedi PCI driver + * @__comedi_driver: comedi_driver struct + * @__pci_driver: pci_driver struct + * + * Helper macro for comedi PCI drivers which do not do anything special + * in module init/exit. This eliminates a lot of boilerplate. Each + * module may only use this macro once, and calling it replaces + * module_init() and module_exit() + */ +#define module_comedi_pci_driver(__comedi_driver, __pci_driver) \ + module_driver(__comedi_driver, comedi_pci_driver_register, \ + comedi_pci_driver_unregister, &(__pci_driver)) + +#endif /* _COMEDI_PCI_H */ diff --git a/include/linux/comedi/comedi_pcmcia.h b/include/linux/comedi/comedi_pcmcia.h new file mode 100644 index 000000000000..a33dfb65b869 --- /dev/null +++ b/include/linux/comedi/comedi_pcmcia.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedi_pcmcia.h + * header file for Comedi PCMCIA drivers + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef <[email protected]> + */ + +#ifndef _COMEDI_PCMCIA_H +#define _COMEDI_PCMCIA_H + +#include <pcmcia/cistpl.h> +#include <pcmcia/ds.h> +#include <linux/comedi/comedidev.h> + +struct pcmcia_device *comedi_to_pcmcia_dev(struct comedi_device *dev); + +int comedi_pcmcia_enable(struct comedi_device *dev, + int (*conf_check)(struct pcmcia_device *p_dev, + void *priv_data)); +void comedi_pcmcia_disable(struct comedi_device *dev); + +int comedi_pcmcia_auto_config(struct pcmcia_device *link, + struct comedi_driver *driver); +void comedi_pcmcia_auto_unconfig(struct pcmcia_device *link); + +int comedi_pcmcia_driver_register(struct comedi_driver *comedi_driver, + struct pcmcia_driver *pcmcia_driver); +void comedi_pcmcia_driver_unregister(struct comedi_driver *comedi_driver, + struct pcmcia_driver *pcmcia_driver); + +/** + * module_comedi_pcmcia_driver() - Helper macro for registering a comedi + * PCMCIA driver + * @__comedi_driver: comedi_driver struct + * @__pcmcia_driver: pcmcia_driver struct + * + * Helper macro for comedi PCMCIA drivers which do not do anything special + * in module init/exit. This eliminates a lot of boilerplate. Each + * module may only use this macro once, and calling it replaces + * module_init() and module_exit() + */ +#define module_comedi_pcmcia_driver(__comedi_driver, __pcmcia_driver) \ + module_driver(__comedi_driver, comedi_pcmcia_driver_register, \ + comedi_pcmcia_driver_unregister, &(__pcmcia_driver)) + +#endif /* _COMEDI_PCMCIA_H */ diff --git a/include/linux/comedi/comedi_usb.h b/include/linux/comedi/comedi_usb.h new file mode 100644 index 000000000000..5d17dd425bd2 --- /dev/null +++ b/include/linux/comedi/comedi_usb.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* comedi_usb.h + * header file for USB Comedi drivers + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef <[email protected]> + */ + +#ifndef _COMEDI_USB_H +#define _COMEDI_USB_H + +#include <linux/usb.h> +#include <linux/comedi/comedidev.h> + +struct usb_interface *comedi_to_usb_interface(struct comedi_device *dev); +struct usb_device *comedi_to_usb_dev(struct comedi_device *dev); + +int comedi_usb_auto_config(struct usb_interface *intf, + struct comedi_driver *driver, unsigned long context); +void comedi_usb_auto_unconfig(struct usb_interface *intf); + +int comedi_usb_driver_register(struct comedi_driver *comedi_driver, + struct usb_driver *usb_driver); +void comedi_usb_driver_unregister(struct comedi_driver *comedi_driver, + struct usb_driver *usb_driver); + +/** + * module_comedi_usb_driver() - Helper macro for registering a comedi USB driver + * @__comedi_driver: comedi_driver struct + * @__usb_driver: usb_driver struct + * + * Helper macro for comedi USB drivers which do not do anything special + * in module init/exit. This eliminates a lot of boilerplate. Each + * module may only use this macro once, and calling it replaces + * module_init() and module_exit() + */ +#define module_comedi_usb_driver(__comedi_driver, __usb_driver) \ + module_driver(__comedi_driver, comedi_usb_driver_register, \ + comedi_usb_driver_unregister, &(__usb_driver)) + +#endif /* _COMEDI_USB_H */ diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h new file mode 100644 index 000000000000..0a1150900ef3 --- /dev/null +++ b/include/linux/comedi/comedidev.h @@ -0,0 +1,1053 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedidev.h + * header file for kernel-only structures, variables, and constants + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1997-2000 David A. Schleef <[email protected]> + */ + +#ifndef _COMEDIDEV_H +#define _COMEDIDEV_H + +#include <linux/dma-mapping.h> +#include <linux/mutex.h> +#include <linux/spinlock_types.h> +#include <linux/rwsem.h> +#include <linux/kref.h> +#include <linux/comedi.h> + +#define COMEDI_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c)) +#define COMEDI_VERSION_CODE COMEDI_VERSION(COMEDI_MAJORVERSION, \ + COMEDI_MINORVERSION, COMEDI_MICROVERSION) +#define COMEDI_RELEASE VERSION + +#define COMEDI_NUM_BOARD_MINORS 0x30 + +/** + * struct comedi_subdevice - Working data for a COMEDI subdevice + * @device: COMEDI device to which this subdevice belongs. (Initialized by + * comedi_alloc_subdevices().) + * @index: Index of this subdevice within device's array of subdevices. + * (Initialized by comedi_alloc_subdevices().) + * @type: Type of subdevice from &enum comedi_subdevice_type. (Initialized by + * the low-level driver.) + * @n_chan: Number of channels the subdevice supports. (Initialized by the + * low-level driver.) + * @subdev_flags: Various "SDF" flags indicating aspects of the subdevice to + * the COMEDI core and user application. (Initialized by the low-level + * driver.) + * @len_chanlist: Maximum length of a channel list if the subdevice supports + * asynchronous acquisition commands. (Optionally initialized by the + * low-level driver, or changed from 0 to 1 during post-configuration.) + * @private: Private data pointer which is either set by the low-level driver + * itself, or by a call to comedi_alloc_spriv() which allocates storage. + * In the latter case, the storage is automatically freed after the + * low-level driver's "detach" handler is called for the device. + * (Initialized by the low-level driver.) + * @async: Pointer to &struct comedi_async id the subdevice supports + * asynchronous acquisition commands. (Allocated and initialized during + * post-configuration if needed.) + * @lock: Pointer to a file object that performed a %COMEDI_LOCK ioctl on the + * subdevice. (Initially NULL.) + * @busy: Pointer to a file object that is performing an asynchronous + * acquisition command on the subdevice. (Initially NULL.) + * @runflags: Internal flags for use by COMEDI core, mostly indicating whether + * an asynchronous acquisition command is running. + * @spin_lock: Generic spin-lock for use by the COMEDI core and the low-level + * driver. (Initialized by comedi_alloc_subdevices().) + * @io_bits: Bit-mask indicating the channel directions for a DIO subdevice + * with no more than 32 channels. A '1' at a bit position indicates the + * corresponding channel is configured as an output. (Initialized by the + * low-level driver for a DIO subdevice. Forced to all-outputs during + * post-configuration for a digital output subdevice.) + * @maxdata: If non-zero, this is the maximum raw data value of each channel. + * If zero, the maximum data value is channel-specific. (Initialized by + * the low-level driver.) + * @maxdata_list: If the maximum data value is channel-specific, this points + * to an array of maximum data values indexed by channel index. + * (Initialized by the low-level driver.) + * @range_table: If non-NULL, this points to a COMEDI range table for the + * subdevice. If NULL, the range table is channel-specific. (Initialized + * by the low-level driver, will be set to an "invalid" range table during + * post-configuration if @range_table and @range_table_list are both + * NULL.) + * @range_table_list: If the COMEDI range table is channel-specific, this + * points to an array of pointers to COMEDI range tables indexed by + * channel number. (Initialized by the low-level driver.) + * @chanlist: Not used. + * @insn_read: Optional pointer to a handler for the %INSN_READ instruction. + * (Initialized by the low-level driver, or set to a default handler + * during post-configuration.) + * @insn_write: Optional pointer to a handler for the %INSN_WRITE instruction. + * (Initialized by the low-level driver, or set to a default handler + * during post-configuration.) + * @insn_bits: Optional pointer to a handler for the %INSN_BITS instruction + * for a digital input, digital output or digital input/output subdevice. + * (Initialized by the low-level driver, or set to a default handler + * during post-configuration.) + * @insn_config: Optional pointer to a handler for the %INSN_CONFIG + * instruction. (Initialized by the low-level driver, or set to a default + * handler during post-configuration.) + * @do_cmd: If the subdevice supports asynchronous acquisition commands, this + * points to a handler to set it up in hardware. (Initialized by the + * low-level driver.) + * @do_cmdtest: If the subdevice supports asynchronous acquisition commands, + * this points to a handler used to check and possibly tweak a prospective + * acquisition command without setting it up in hardware. (Initialized by + * the low-level driver.) + * @poll: If the subdevice supports asynchronous acquisition commands, this + * is an optional pointer to a handler for the %COMEDI_POLL ioctl which + * instructs the low-level driver to synchronize buffers. (Initialized by + * the low-level driver if needed.) + * @cancel: If the subdevice supports asynchronous acquisition commands, this + * points to a handler used to terminate a running command. (Initialized + * by the low-level driver.) + * @buf_change: If the subdevice supports asynchronous acquisition commands, + * this is an optional pointer to a handler that is called when the data + * buffer for handling asynchronous commands is allocated or reallocated. + * (Initialized by the low-level driver if needed.) + * @munge: If the subdevice supports asynchronous acquisition commands and + * uses DMA to transfer data from the hardware to the acquisition buffer, + * this points to a function used to "munge" the data values from the + * hardware into the format expected by COMEDI. (Initialized by the + * low-level driver if needed.) + * @async_dma_dir: If the subdevice supports asynchronous acquisition commands + * and uses DMA to transfer data from the hardware to the acquisition + * buffer, this sets the DMA direction for the buffer. (initialized to + * %DMA_NONE by comedi_alloc_subdevices() and changed by the low-level + * driver if necessary.) + * @state: Handy bit-mask indicating the output states for a DIO or digital + * output subdevice with no more than 32 channels. (Initialized by the + * low-level driver.) + * @class_dev: If the subdevice supports asynchronous acquisition commands, + * this points to a sysfs comediX_subdY device where X is the minor device + * number of the COMEDI device and Y is the subdevice number. The minor + * device number for the sysfs device is allocated dynamically in the + * range 48 to 255. This is used to allow the COMEDI device to be opened + * with a different default read or write subdevice. (Allocated during + * post-configuration if needed.) + * @minor: If @class_dev is set, this is its dynamically allocated minor + * device number. (Set during post-configuration if necessary.) + * @readback: Optional pointer to memory allocated by + * comedi_alloc_subdev_readback() used to hold the values written to + * analog output channels so they can be read back. The storage is + * automatically freed after the low-level driver's "detach" handler is + * called for the device. (Initialized by the low-level driver.) + * + * This is the main control structure for a COMEDI subdevice. If the subdevice + * supports asynchronous acquisition commands, additional information is stored + * in the &struct comedi_async pointed to by @async. + * + * Most of the subdevice is initialized by the low-level driver's "attach" or + * "auto_attach" handlers but parts of it are initialized by + * comedi_alloc_subdevices(), and other parts are initialized during + * post-configuration on return from that handler. + * + * A low-level driver that sets @insn_bits for a digital input, digital output, + * or DIO subdevice may leave @insn_read and @insn_write uninitialized, in + * which case they will be set to a default handler during post-configuration + * that uses @insn_bits to emulate the %INSN_READ and %INSN_WRITE instructions. + */ +struct comedi_subdevice { + struct comedi_device *device; + int index; + int type; + int n_chan; + int subdev_flags; + int len_chanlist; /* maximum length of channel/gain list */ + + void *private; + + struct comedi_async *async; + + void *lock; + void *busy; + unsigned int runflags; + spinlock_t spin_lock; /* generic spin-lock for COMEDI and drivers */ + + unsigned int io_bits; + + unsigned int maxdata; /* if maxdata==0, use list */ + const unsigned int *maxdata_list; /* list is channel specific */ + + const struct comedi_lrange *range_table; + const struct comedi_lrange *const *range_table_list; + + unsigned int *chanlist; /* driver-owned chanlist (not used) */ + + int (*insn_read)(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + int (*insn_write)(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + int (*insn_bits)(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + int (*insn_config)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, + unsigned int *data); + + int (*do_cmd)(struct comedi_device *dev, struct comedi_subdevice *s); + int (*do_cmdtest)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_cmd *cmd); + int (*poll)(struct comedi_device *dev, struct comedi_subdevice *s); + int (*cancel)(struct comedi_device *dev, struct comedi_subdevice *s); + + /* called when the buffer changes */ + int (*buf_change)(struct comedi_device *dev, + struct comedi_subdevice *s); + + void (*munge)(struct comedi_device *dev, struct comedi_subdevice *s, + void *data, unsigned int num_bytes, + unsigned int start_chan_index); + enum dma_data_direction async_dma_dir; + + unsigned int state; + + struct device *class_dev; + int minor; + + unsigned int *readback; +}; + +/** + * struct comedi_buf_page - Describe a page of a COMEDI buffer + * @virt_addr: Kernel address of page. + * @dma_addr: DMA address of page if in DMA coherent memory. + */ +struct comedi_buf_page { + void *virt_addr; + dma_addr_t dma_addr; +}; + +/** + * struct comedi_buf_map - Describe pages in a COMEDI buffer + * @dma_hw_dev: Low-level hardware &struct device pointer copied from the + * COMEDI device's hw_dev member. + * @page_list: Pointer to array of &struct comedi_buf_page, one for each + * page in the buffer. + * @n_pages: Number of pages in the buffer. + * @dma_dir: DMA direction used to allocate pages of DMA coherent memory, + * or %DMA_NONE if pages allocated from regular memory. + * @refcount: &struct kref reference counter used to free the buffer. + * + * A COMEDI data buffer is allocated as individual pages, either in + * conventional memory or DMA coherent memory, depending on the attached, + * low-level hardware device. (The buffer pages also get mapped into the + * kernel's contiguous virtual address space pointed to by the 'prealloc_buf' + * member of &struct comedi_async.) + * + * The buffer is normally freed when the COMEDI device is detached from the + * low-level driver (which may happen due to device removal), but if it happens + * to be mmapped at the time, the pages cannot be freed until the buffer has + * been munmapped. That is what the reference counter is for. (The virtual + * address space pointed by 'prealloc_buf' is freed when the COMEDI device is + * detached.) + */ +struct comedi_buf_map { + struct device *dma_hw_dev; + struct comedi_buf_page *page_list; + unsigned int n_pages; + enum dma_data_direction dma_dir; + struct kref refcount; +}; + +/** + * struct comedi_async - Control data for asynchronous COMEDI commands + * @prealloc_buf: Kernel virtual address of allocated acquisition buffer. + * @prealloc_bufsz: Buffer size (in bytes). + * @buf_map: Map of buffer pages. + * @max_bufsize: Maximum allowed buffer size (in bytes). + * @buf_write_count: "Write completed" count (in bytes, modulo 2**32). + * @buf_write_alloc_count: "Allocated for writing" count (in bytes, + * modulo 2**32). + * @buf_read_count: "Read completed" count (in bytes, modulo 2**32). + * @buf_read_alloc_count: "Allocated for reading" count (in bytes, + * modulo 2**32). + * @buf_write_ptr: Buffer position for writer. + * @buf_read_ptr: Buffer position for reader. + * @cur_chan: Current position in chanlist for scan (for those drivers that + * use it). + * @scans_done: The number of scans completed. + * @scan_progress: Amount received or sent for current scan (in bytes). + * @munge_chan: Current position in chanlist for "munging". + * @munge_count: "Munge" count (in bytes, modulo 2**32). + * @munge_ptr: Buffer position for "munging". + * @events: Bit-vector of events that have occurred. + * @cmd: Details of comedi command in progress. + * @wait_head: Task wait queue for file reader or writer. + * @cb_mask: Bit-vector of events that should wake waiting tasks. + * @inttrig: Software trigger function for command, or NULL. + * + * Note about the ..._count and ..._ptr members: + * + * Think of the _Count values being integers of unlimited size, indexing + * into a buffer of infinite length (though only an advancing portion + * of the buffer of fixed length prealloc_bufsz is accessible at any + * time). Then: + * + * Buf_Read_Count <= Buf_Read_Alloc_Count <= Munge_Count <= + * Buf_Write_Count <= Buf_Write_Alloc_Count <= + * (Buf_Read_Count + prealloc_bufsz) + * + * (Those aren't the actual members, apart from prealloc_bufsz.) When the + * buffer is reset, those _Count values start at 0 and only increase in value, + * maintaining the above inequalities until the next time the buffer is + * reset. The buffer is divided into the following regions by the inequalities: + * + * [0, Buf_Read_Count): + * old region no longer accessible + * + * [Buf_Read_Count, Buf_Read_Alloc_Count): + * filled and munged region allocated for reading but not yet read + * + * [Buf_Read_Alloc_Count, Munge_Count): + * filled and munged region not yet allocated for reading + * + * [Munge_Count, Buf_Write_Count): + * filled region not yet munged + * + * [Buf_Write_Count, Buf_Write_Alloc_Count): + * unfilled region allocated for writing but not yet written + * + * [Buf_Write_Alloc_Count, Buf_Read_Count + prealloc_bufsz): + * unfilled region not yet allocated for writing + * + * [Buf_Read_Count + prealloc_bufsz, infinity): + * unfilled region not yet accessible + * + * Data needs to be written into the buffer before it can be read out, + * and may need to be converted (or "munged") between the two + * operations. Extra unfilled buffer space may need to allocated for + * writing (advancing Buf_Write_Alloc_Count) before new data is written. + * After writing new data, the newly filled space needs to be released + * (advancing Buf_Write_Count). This also results in the new data being + * "munged" (advancing Munge_Count). Before data is read out of the + * buffer, extra space may need to be allocated for reading (advancing + * Buf_Read_Alloc_Count). After the data has been read out, the space + * needs to be released (advancing Buf_Read_Count). + * + * The actual members, buf_read_count, buf_read_alloc_count, + * munge_count, buf_write_count, and buf_write_alloc_count take the + * value of the corresponding capitalized _Count values modulo 2^32 + * (UINT_MAX+1). Subtracting a "higher" _count value from a "lower" + * _count value gives the same answer as subtracting a "higher" _Count + * value from a lower _Count value because prealloc_bufsz < UINT_MAX+1. + * The modulo operation is done implicitly. + * + * The buf_read_ptr, munge_ptr, and buf_write_ptr members take the value + * of the corresponding capitalized _Count values modulo prealloc_bufsz. + * These correspond to byte indices in the physical buffer. The modulo + * operation is done by subtracting prealloc_bufsz when the value + * exceeds prealloc_bufsz (assuming prealloc_bufsz plus the increment is + * less than or equal to UINT_MAX). + */ +struct comedi_async { + void *prealloc_buf; + unsigned int prealloc_bufsz; + struct comedi_buf_map *buf_map; + unsigned int max_bufsize; + unsigned int buf_write_count; + unsigned int buf_write_alloc_count; + unsigned int buf_read_count; + unsigned int buf_read_alloc_count; + unsigned int buf_write_ptr; + unsigned int buf_read_ptr; + unsigned int cur_chan; + unsigned int scans_done; + unsigned int scan_progress; + unsigned int munge_chan; + unsigned int munge_count; + unsigned int munge_ptr; + unsigned int events; + struct comedi_cmd cmd; + wait_queue_head_t wait_head; + unsigned int cb_mask; + int (*inttrig)(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned int x); +}; + +/** + * enum comedi_cb - &struct comedi_async callback "events" + * @COMEDI_CB_EOS: end-of-scan + * @COMEDI_CB_EOA: end-of-acquisition/output + * @COMEDI_CB_BLOCK: data has arrived, wakes up read() / write() + * @COMEDI_CB_EOBUF: DEPRECATED: end of buffer + * @COMEDI_CB_ERROR: card error during acquisition + * @COMEDI_CB_OVERFLOW: buffer overflow/underflow + * @COMEDI_CB_ERROR_MASK: events that indicate an error has occurred + * @COMEDI_CB_CANCEL_MASK: events that will cancel an async command + */ +enum comedi_cb { + COMEDI_CB_EOS = BIT(0), + COMEDI_CB_EOA = BIT(1), + COMEDI_CB_BLOCK = BIT(2), + COMEDI_CB_EOBUF = BIT(3), + COMEDI_CB_ERROR = BIT(4), + COMEDI_CB_OVERFLOW = BIT(5), + /* masks */ + COMEDI_CB_ERROR_MASK = (COMEDI_CB_ERROR | COMEDI_CB_OVERFLOW), + COMEDI_CB_CANCEL_MASK = (COMEDI_CB_EOA | COMEDI_CB_ERROR_MASK) +}; + +/** + * struct comedi_driver - COMEDI driver registration + * @driver_name: Name of driver. + * @module: Owning module. + * @attach: The optional "attach" handler for manually configured COMEDI + * devices. + * @detach: The "detach" handler for deconfiguring COMEDI devices. + * @auto_attach: The optional "auto_attach" handler for automatically + * configured COMEDI devices. + * @num_names: Optional number of "board names" supported. + * @board_name: Optional pointer to a pointer to a board name. The pointer + * to a board name is embedded in an element of a driver-defined array + * of static, read-only board type information. + * @offset: Optional size of each element of the driver-defined array of + * static, read-only board type information, i.e. the offset between each + * pointer to a board name. + * + * This is used with comedi_driver_register() and comedi_driver_unregister() to + * register and unregister a low-level COMEDI driver with the COMEDI core. + * + * If @num_names is non-zero, @board_name should be non-NULL, and @offset + * should be at least sizeof(*board_name). These are used by the handler for + * the %COMEDI_DEVCONFIG ioctl to match a hardware device and its driver by + * board name. If @num_names is zero, the %COMEDI_DEVCONFIG ioctl matches a + * hardware device and its driver by driver name. This is only useful if the + * @attach handler is set. If @num_names is non-zero, the driver's @attach + * handler will be called with the COMEDI device structure's board_ptr member + * pointing to the matched pointer to a board name within the driver's private + * array of static, read-only board type information. + * + * The @detach handler has two roles. If a COMEDI device was successfully + * configured by the @attach or @auto_attach handler, it is called when the + * device is being deconfigured (by the %COMEDI_DEVCONFIG ioctl, or due to + * unloading of the driver, or due to device removal). It is also called when + * the @attach or @auto_attach handler returns an error. Therefore, the + * @attach or @auto_attach handlers can defer clean-up on error until the + * @detach handler is called. If the @attach or @auto_attach handlers free + * any resources themselves, they must prevent the @detach handler from + * freeing the same resources. The @detach handler must not assume that all + * resources requested by the @attach or @auto_attach handler were + * successfully allocated. + */ +struct comedi_driver { + /* private: */ + struct comedi_driver *next; /* Next in list of COMEDI drivers. */ + /* public: */ + const char *driver_name; + struct module *module; + int (*attach)(struct comedi_device *dev, struct comedi_devconfig *it); + void (*detach)(struct comedi_device *dev); + int (*auto_attach)(struct comedi_device *dev, unsigned long context); + unsigned int num_names; + const char *const *board_name; + int offset; +}; + +/** + * struct comedi_device - Working data for a COMEDI device + * @use_count: Number of open file objects. + * @driver: Low-level COMEDI driver attached to this COMEDI device. + * @pacer: Optional pointer to a dynamically allocated acquisition pacer + * control. It is freed automatically after the COMEDI device is + * detached from the low-level driver. + * @private: Optional pointer to private data allocated by the low-level + * driver. It is freed automatically after the COMEDI device is + * detached from the low-level driver. + * @class_dev: Sysfs comediX device. + * @minor: Minor device number of COMEDI char device (0-47). + * @detach_count: Counter incremented every time the COMEDI device is detached. + * Used for checking a previous attachment is still valid. + * @hw_dev: Optional pointer to the low-level hardware &struct device. It is + * required for automatically configured COMEDI devices and optional for + * COMEDI devices configured by the %COMEDI_DEVCONFIG ioctl, although + * the bus-specific COMEDI functions only work if it is set correctly. + * It is also passed to dma_alloc_coherent() for COMEDI subdevices that + * have their 'async_dma_dir' member set to something other than + * %DMA_NONE. + * @board_name: Pointer to a COMEDI board name or a COMEDI driver name. When + * the low-level driver's "attach" handler is called by the handler for + * the %COMEDI_DEVCONFIG ioctl, it either points to a matched board name + * string if the 'num_names' member of the &struct comedi_driver is + * non-zero, otherwise it points to the low-level driver name string. + * When the low-lever driver's "auto_attach" handler is called for an + * automatically configured COMEDI device, it points to the low-level + * driver name string. The low-level driver is free to change it in its + * "attach" or "auto_attach" handler if it wishes. + * @board_ptr: Optional pointer to private, read-only board type information in + * the low-level driver. If the 'num_names' member of the &struct + * comedi_driver is non-zero, the handler for the %COMEDI_DEVCONFIG ioctl + * will point it to a pointer to a matched board name string within the + * driver's private array of static, read-only board type information when + * calling the driver's "attach" handler. The low-level driver is free to + * change it. + * @attached: Flag indicating that the COMEDI device is attached to a low-level + * driver. + * @ioenabled: Flag used to indicate that a PCI device has been enabled and + * its regions requested. + * @spinlock: Generic spin-lock for use by the low-level driver. + * @mutex: Generic mutex for use by the COMEDI core module. + * @attach_lock: &struct rw_semaphore used to guard against the COMEDI device + * being detached while an operation is in progress. The down_write() + * operation is only allowed while @mutex is held and is used when + * changing @attached and @detach_count and calling the low-level driver's + * "detach" handler. The down_read() operation is generally used without + * holding @mutex. + * @refcount: &struct kref reference counter for freeing COMEDI device. + * @n_subdevices: Number of COMEDI subdevices allocated by the low-level + * driver for this device. + * @subdevices: Dynamically allocated array of COMEDI subdevices. + * @mmio: Optional pointer to a remapped MMIO region set by the low-level + * driver. + * @iobase: Optional base of an I/O port region requested by the low-level + * driver. + * @iolen: Length of I/O port region requested at @iobase. + * @irq: Optional IRQ number requested by the low-level driver. + * @read_subdev: Optional pointer to a default COMEDI subdevice operated on by + * the read() file operation. Set by the low-level driver. + * @write_subdev: Optional pointer to a default COMEDI subdevice operated on by + * the write() file operation. Set by the low-level driver. + * @async_queue: Storage for fasync_helper(). + * @open: Optional pointer to a function set by the low-level driver to be + * called when @use_count changes from 0 to 1. + * @close: Optional pointer to a function set by the low-level driver to be + * called when @use_count changed from 1 to 0. + * @insn_device_config: Optional pointer to a handler for all sub-instructions + * except %INSN_DEVICE_CONFIG_GET_ROUTES of the %INSN_DEVICE_CONFIG + * instruction. If this is not initialized by the low-level driver, a + * default handler will be set during post-configuration. + * @get_valid_routes: Optional pointer to a handler for the + * %INSN_DEVICE_CONFIG_GET_ROUTES sub-instruction of the + * %INSN_DEVICE_CONFIG instruction set. If this is not initialized by the + * low-level driver, a default handler that copies zero routes back to the + * user will be used. + * + * This is the main control data structure for a COMEDI device (as far as the + * COMEDI core is concerned). There are two groups of COMEDI devices - + * "legacy" devices that are configured by the handler for the + * %COMEDI_DEVCONFIG ioctl, and automatically configured devices resulting + * from a call to comedi_auto_config() as a result of a bus driver probe in + * a low-level COMEDI driver. The "legacy" COMEDI devices are allocated + * during module initialization if the "comedi_num_legacy_minors" module + * parameter is non-zero and use minor device numbers from 0 to + * comedi_num_legacy_minors minus one. The automatically configured COMEDI + * devices are allocated on demand and use minor device numbers from + * comedi_num_legacy_minors to 47. + */ +struct comedi_device { + int use_count; + struct comedi_driver *driver; + struct comedi_8254 *pacer; + void *private; + + struct device *class_dev; + int minor; + unsigned int detach_count; + struct device *hw_dev; + + const char *board_name; + const void *board_ptr; + unsigned int attached:1; + unsigned int ioenabled:1; + spinlock_t spinlock; /* generic spin-lock for low-level driver */ + struct mutex mutex; /* generic mutex for COMEDI core */ + struct rw_semaphore attach_lock; + struct kref refcount; + + int n_subdevices; + struct comedi_subdevice *subdevices; + + /* dumb */ + void __iomem *mmio; + unsigned long iobase; + unsigned long iolen; + unsigned int irq; + + struct comedi_subdevice *read_subdev; + struct comedi_subdevice *write_subdev; + + struct fasync_struct *async_queue; + + int (*open)(struct comedi_device *dev); + void (*close)(struct comedi_device *dev); + int (*insn_device_config)(struct comedi_device *dev, + struct comedi_insn *insn, unsigned int *data); + unsigned int (*get_valid_routes)(struct comedi_device *dev, + unsigned int n_pairs, + unsigned int *pair_data); +}; + +/* + * function prototypes + */ + +void comedi_event(struct comedi_device *dev, struct comedi_subdevice *s); + +struct comedi_device *comedi_dev_get_from_minor(unsigned int minor); +int comedi_dev_put(struct comedi_device *dev); + +bool comedi_is_subdevice_running(struct comedi_subdevice *s); + +void *comedi_alloc_spriv(struct comedi_subdevice *s, size_t size); +void comedi_set_spriv_auto_free(struct comedi_subdevice *s); + +int comedi_check_chanlist(struct comedi_subdevice *s, + int n, + unsigned int *chanlist); + +/* range stuff */ + +#define RANGE(a, b) {(a) * 1e6, (b) * 1e6, 0} +#define RANGE_ext(a, b) {(a) * 1e6, (b) * 1e6, RF_EXTERNAL} +#define RANGE_mA(a, b) {(a) * 1e6, (b) * 1e6, UNIT_mA} +#define RANGE_unitless(a, b) {(a) * 1e6, (b) * 1e6, 0} +#define BIP_RANGE(a) {-(a) * 1e6, (a) * 1e6, 0} +#define UNI_RANGE(a) {0, (a) * 1e6, 0} + +extern const struct comedi_lrange range_bipolar10; +extern const struct comedi_lrange range_bipolar5; +extern const struct comedi_lrange range_bipolar2_5; +extern const struct comedi_lrange range_unipolar10; +extern const struct comedi_lrange range_unipolar5; +extern const struct comedi_lrange range_unipolar2_5; +extern const struct comedi_lrange range_0_20mA; +extern const struct comedi_lrange range_4_20mA; +extern const struct comedi_lrange range_0_32mA; +extern const struct comedi_lrange range_unknown; + +#define range_digital range_unipolar5 + +/** + * struct comedi_lrange - Describes a COMEDI range table + * @length: Number of entries in the range table. + * @range: Array of &struct comedi_krange, one for each range. + * + * Each element of @range[] describes the minimum and maximum physical range + * and the type of units. Typically, the type of unit is %UNIT_volt + * (i.e. volts) and the minimum and maximum are in millionths of a volt. + * There may also be a flag that indicates the minimum and maximum are merely + * scale factors for an unknown, external reference. + */ +struct comedi_lrange { + int length; + struct comedi_krange range[]; +}; + +/** + * comedi_range_is_bipolar() - Test if subdevice range is bipolar + * @s: COMEDI subdevice. + * @range: Index of range within a range table. + * + * Tests whether a range is bipolar by checking whether its minimum value + * is negative. + * + * Assumes @range is valid. Does not work for subdevices using a + * channel-specific range table list. + * + * Return: + * %true if the range is bipolar. + * %false if the range is unipolar. + */ +static inline bool comedi_range_is_bipolar(struct comedi_subdevice *s, + unsigned int range) +{ + return s->range_table->range[range].min < 0; +} + +/** + * comedi_range_is_unipolar() - Test if subdevice range is unipolar + * @s: COMEDI subdevice. + * @range: Index of range within a range table. + * + * Tests whether a range is unipolar by checking whether its minimum value + * is at least 0. + * + * Assumes @range is valid. Does not work for subdevices using a + * channel-specific range table list. + * + * Return: + * %true if the range is unipolar. + * %false if the range is bipolar. + */ +static inline bool comedi_range_is_unipolar(struct comedi_subdevice *s, + unsigned int range) +{ + return s->range_table->range[range].min >= 0; +} + +/** + * comedi_range_is_external() - Test if subdevice range is external + * @s: COMEDI subdevice. + * @range: Index of range within a range table. + * + * Tests whether a range is externally reference by checking whether its + * %RF_EXTERNAL flag is set. + * + * Assumes @range is valid. Does not work for subdevices using a + * channel-specific range table list. + * + * Return: + * %true if the range is external. + * %false if the range is internal. + */ +static inline bool comedi_range_is_external(struct comedi_subdevice *s, + unsigned int range) +{ + return !!(s->range_table->range[range].flags & RF_EXTERNAL); +} + +/** + * comedi_chan_range_is_bipolar() - Test if channel-specific range is bipolar + * @s: COMEDI subdevice. + * @chan: The channel number. + * @range: Index of range within a range table. + * + * Tests whether a range is bipolar by checking whether its minimum value + * is negative. + * + * Assumes @chan and @range are valid. Only works for subdevices with a + * channel-specific range table list. + * + * Return: + * %true if the range is bipolar. + * %false if the range is unipolar. + */ +static inline bool comedi_chan_range_is_bipolar(struct comedi_subdevice *s, + unsigned int chan, + unsigned int range) +{ + return s->range_table_list[chan]->range[range].min < 0; +} + +/** + * comedi_chan_range_is_unipolar() - Test if channel-specific range is unipolar + * @s: COMEDI subdevice. + * @chan: The channel number. + * @range: Index of range within a range table. + * + * Tests whether a range is unipolar by checking whether its minimum value + * is at least 0. + * + * Assumes @chan and @range are valid. Only works for subdevices with a + * channel-specific range table list. + * + * Return: + * %true if the range is unipolar. + * %false if the range is bipolar. + */ +static inline bool comedi_chan_range_is_unipolar(struct comedi_subdevice *s, + unsigned int chan, + unsigned int range) +{ + return s->range_table_list[chan]->range[range].min >= 0; +} + +/** + * comedi_chan_range_is_external() - Test if channel-specific range is external + * @s: COMEDI subdevice. + * @chan: The channel number. + * @range: Index of range within a range table. + * + * Tests whether a range is externally reference by checking whether its + * %RF_EXTERNAL flag is set. + * + * Assumes @chan and @range are valid. Only works for subdevices with a + * channel-specific range table list. + * + * Return: + * %true if the range is bipolar. + * %false if the range is unipolar. + */ +static inline bool comedi_chan_range_is_external(struct comedi_subdevice *s, + unsigned int chan, + unsigned int range) +{ + return !!(s->range_table_list[chan]->range[range].flags & RF_EXTERNAL); +} + +/** + * comedi_offset_munge() - Convert between offset binary and 2's complement + * @s: COMEDI subdevice. + * @val: Value to be converted. + * + * Toggles the highest bit of a sample value to toggle between offset binary + * and 2's complement. Assumes that @s->maxdata is a power of 2 minus 1. + * + * Return: The converted value. + */ +static inline unsigned int comedi_offset_munge(struct comedi_subdevice *s, + unsigned int val) +{ + return val ^ s->maxdata ^ (s->maxdata >> 1); +} + +/** + * comedi_bytes_per_sample() - Determine subdevice sample size + * @s: COMEDI subdevice. + * + * The sample size will be 4 (sizeof int) or 2 (sizeof short) depending on + * whether the %SDF_LSAMPL subdevice flag is set or not. + * + * Return: The subdevice sample size. + */ +static inline unsigned int comedi_bytes_per_sample(struct comedi_subdevice *s) +{ + return s->subdev_flags & SDF_LSAMPL ? sizeof(int) : sizeof(short); +} + +/** + * comedi_sample_shift() - Determine log2 of subdevice sample size + * @s: COMEDI subdevice. + * + * The sample size will be 4 (sizeof int) or 2 (sizeof short) depending on + * whether the %SDF_LSAMPL subdevice flag is set or not. The log2 of the + * sample size will be 2 or 1 and can be used as the right operand of a + * bit-shift operator to multiply or divide something by the sample size. + * + * Return: log2 of the subdevice sample size. + */ +static inline unsigned int comedi_sample_shift(struct comedi_subdevice *s) +{ + return s->subdev_flags & SDF_LSAMPL ? 2 : 1; +} + +/** + * comedi_bytes_to_samples() - Convert a number of bytes to a number of samples + * @s: COMEDI subdevice. + * @nbytes: Number of bytes + * + * Return: The number of bytes divided by the subdevice sample size. + */ +static inline unsigned int comedi_bytes_to_samples(struct comedi_subdevice *s, + unsigned int nbytes) +{ + return nbytes >> comedi_sample_shift(s); +} + +/** + * comedi_samples_to_bytes() - Convert a number of samples to a number of bytes + * @s: COMEDI subdevice. + * @nsamples: Number of samples. + * + * Return: The number of samples multiplied by the subdevice sample size. + * (Does not check for arithmetic overflow.) + */ +static inline unsigned int comedi_samples_to_bytes(struct comedi_subdevice *s, + unsigned int nsamples) +{ + return nsamples << comedi_sample_shift(s); +} + +/** + * comedi_check_trigger_src() - Trivially validate a comedi_cmd trigger source + * @src: Pointer to the trigger source to validate. + * @flags: Bitmask of valid %TRIG_* for the trigger. + * + * This is used in "step 1" of the do_cmdtest functions of comedi drivers + * to validate the comedi_cmd triggers. The mask of the @src against the + * @flags allows the userspace comedilib to pass all the comedi_cmd + * triggers as %TRIG_ANY and get back a bitmask of the valid trigger sources. + * + * Return: + * 0 if trigger sources in *@src are all supported. + * -EINVAL if any trigger source in *@src is unsupported. + */ +static inline int comedi_check_trigger_src(unsigned int *src, + unsigned int flags) +{ + unsigned int orig_src = *src; + + *src = orig_src & flags; + if (*src == TRIG_INVALID || *src != orig_src) + return -EINVAL; + return 0; +} + +/** + * comedi_check_trigger_is_unique() - Make sure a trigger source is unique + * @src: The trigger source to check. + * + * Return: + * 0 if no more than one trigger source is set. + * -EINVAL if more than one trigger source is set. + */ +static inline int comedi_check_trigger_is_unique(unsigned int src) +{ + /* this test is true if more than one _src bit is set */ + if ((src & (src - 1)) != 0) + return -EINVAL; + return 0; +} + +/** + * comedi_check_trigger_arg_is() - Trivially validate a trigger argument + * @arg: Pointer to the trigger arg to validate. + * @val: The value the argument should be. + * + * Forces *@arg to be @val. + * + * Return: + * 0 if *@arg was already @val. + * -EINVAL if *@arg differed from @val. + */ +static inline int comedi_check_trigger_arg_is(unsigned int *arg, + unsigned int val) +{ + if (*arg != val) { + *arg = val; + return -EINVAL; + } + return 0; +} + +/** + * comedi_check_trigger_arg_min() - Trivially validate a trigger argument min + * @arg: Pointer to the trigger arg to validate. + * @val: The minimum value the argument should be. + * + * Forces *@arg to be at least @val, setting it to @val if necessary. + * + * Return: + * 0 if *@arg was already at least @val. + * -EINVAL if *@arg was less than @val. + */ +static inline int comedi_check_trigger_arg_min(unsigned int *arg, + unsigned int val) +{ + if (*arg < val) { + *arg = val; + return -EINVAL; + } + return 0; +} + +/** + * comedi_check_trigger_arg_max() - Trivially validate a trigger argument max + * @arg: Pointer to the trigger arg to validate. + * @val: The maximum value the argument should be. + * + * Forces *@arg to be no more than @val, setting it to @val if necessary. + * + * Return: + * 0 if*@arg was already no more than @val. + * -EINVAL if *@arg was greater than @val. + */ +static inline int comedi_check_trigger_arg_max(unsigned int *arg, + unsigned int val) +{ + if (*arg > val) { + *arg = val; + return -EINVAL; + } + return 0; +} + +/* + * Must set dev->hw_dev if you wish to dma directly into comedi's buffer. + * Also useful for retrieving a previously configured hardware device of + * known bus type. Set automatically for auto-configured devices. + * Automatically set to NULL when detaching hardware device. + */ +int comedi_set_hw_dev(struct comedi_device *dev, struct device *hw_dev); + +/** + * comedi_buf_n_bytes_ready - Determine amount of unread data in buffer + * @s: COMEDI subdevice. + * + * Determines the number of bytes of unread data in the asynchronous + * acquisition data buffer for a subdevice. The data in question might not + * have been fully "munged" yet. + * + * Returns: The amount of unread data in bytes. + */ +static inline unsigned int comedi_buf_n_bytes_ready(struct comedi_subdevice *s) +{ + return s->async->buf_write_count - s->async->buf_read_count; +} + +unsigned int comedi_buf_write_alloc(struct comedi_subdevice *s, unsigned int n); +unsigned int comedi_buf_write_free(struct comedi_subdevice *s, unsigned int n); + +unsigned int comedi_buf_read_n_available(struct comedi_subdevice *s); +unsigned int comedi_buf_read_alloc(struct comedi_subdevice *s, unsigned int n); +unsigned int comedi_buf_read_free(struct comedi_subdevice *s, unsigned int n); + +unsigned int comedi_buf_write_samples(struct comedi_subdevice *s, + const void *data, unsigned int nsamples); +unsigned int comedi_buf_read_samples(struct comedi_subdevice *s, + void *data, unsigned int nsamples); + +/* drivers.c - general comedi driver functions */ + +#define COMEDI_TIMEOUT_MS 1000 + +int comedi_timeout(struct comedi_device *dev, struct comedi_subdevice *s, + struct comedi_insn *insn, + int (*cb)(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned long context), + unsigned long context); + +unsigned int comedi_handle_events(struct comedi_device *dev, + struct comedi_subdevice *s); + +int comedi_dio_insn_config(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data, + unsigned int mask); +unsigned int comedi_dio_update_state(struct comedi_subdevice *s, + unsigned int *data); +unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s, + struct comedi_cmd *cmd); +unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s); +unsigned int comedi_nscans_left(struct comedi_subdevice *s, + unsigned int nscans); +unsigned int comedi_nsamples_left(struct comedi_subdevice *s, + unsigned int nsamples); +void comedi_inc_scan_progress(struct comedi_subdevice *s, + unsigned int num_bytes); + +void *comedi_alloc_devpriv(struct comedi_device *dev, size_t size); +int comedi_alloc_subdevices(struct comedi_device *dev, int num_subdevices); +int comedi_alloc_subdev_readback(struct comedi_subdevice *s); + +int comedi_readback_insn_read(struct comedi_device *dev, + struct comedi_subdevice *s, + struct comedi_insn *insn, unsigned int *data); + +int comedi_load_firmware(struct comedi_device *dev, struct device *hw_dev, + const char *name, + int (*cb)(struct comedi_device *dev, + const u8 *data, size_t size, + unsigned long context), + unsigned long context); + +int __comedi_request_region(struct comedi_device *dev, + unsigned long start, unsigned long len); +int comedi_request_region(struct comedi_device *dev, + unsigned long start, unsigned long len); +void comedi_legacy_detach(struct comedi_device *dev); + +int comedi_auto_config(struct device *hardware_device, + struct comedi_driver *driver, unsigned long context); +void comedi_auto_unconfig(struct device *hardware_device); + +int comedi_driver_register(struct comedi_driver *driver); +void comedi_driver_unregister(struct comedi_driver *driver); + +/** + * module_comedi_driver() - Helper macro for registering a comedi driver + * @__comedi_driver: comedi_driver struct + * + * Helper macro for comedi drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only use + * this macro once, and calling it replaces module_init() and module_exit(). + */ +#define module_comedi_driver(__comedi_driver) \ + module_driver(__comedi_driver, comedi_driver_register, \ + comedi_driver_unregister) + +#endif /* _COMEDIDEV_H */ diff --git a/include/linux/comedi/comedilib.h b/include/linux/comedi/comedilib.h new file mode 100644 index 000000000000..0223c9cd9215 --- /dev/null +++ b/include/linux/comedi/comedilib.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * comedilib.h + * Header file for kcomedilib + * + * COMEDI - Linux Control and Measurement Device Interface + * Copyright (C) 1998-2001 David A. Schleef <[email protected]> + */ + +#ifndef _LINUX_COMEDILIB_H +#define _LINUX_COMEDILIB_H + +struct comedi_device *comedi_open(const char *path); +int comedi_close(struct comedi_device *dev); +int comedi_dio_get_config(struct comedi_device *dev, unsigned int subdev, + unsigned int chan, unsigned int *io); +int comedi_dio_config(struct comedi_device *dev, unsigned int subdev, + unsigned int chan, unsigned int io); +int comedi_dio_bitfield2(struct comedi_device *dev, unsigned int subdev, + unsigned int mask, unsigned int *bits, + unsigned int base_channel); +int comedi_find_subdevice_by_type(struct comedi_device *dev, int type, + unsigned int subd); +int comedi_get_n_channels(struct comedi_device *dev, unsigned int subdevice); + +#endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index bd2b881c6b63..ccbbd31b3aae 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -41,8 +41,6 @@ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) -#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif @@ -124,6 +122,14 @@ #endif /* + * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel, + * matching the defines used by Clang. + */ +#ifdef __SANITIZE_HWADDRESS__ +#define __SANITIZE_ADDRESS__ +#endif + +/* * Turn individual warnings and errors on and off locally, depending * on version. */ @@ -144,3 +150,11 @@ #else #define __diag_GCC_8(s) #endif + +/* + * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" + * attribute) do not work, and must be disabled. + */ +#if GCC_VERSION < 90100 +#undef __alloc_size__ +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3d5af56337bd..429dcebe2b99 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -121,7 +121,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_reachable() __annotate_reachable(__COUNTER__) @@ -129,7 +129,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index e6ec63403965..37e260020221 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -34,6 +34,15 @@ #define __aligned_largest __attribute__((__aligned__)) /* + * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally + * available and includes other attributes. + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size + */ +#define __alloc_size__(x, ...) __attribute__((__alloc_size__(x, ## __VA_ARGS__))) + +/* * Note: users of __always_inline currently do not write "inline" themselves, * which seems to be required by gcc to apply the attribute according * to its docs (and also "warning: always_inline function might not be @@ -104,7 +113,6 @@ #define __deprecated /* - * Optional: only supported since gcc >= 5.1 * Optional: not supported by clang * Optional: not supported by icc * @@ -153,6 +161,7 @@ /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#malloc */ #define __malloc __attribute__((__malloc__)) @@ -300,6 +309,24 @@ #endif /* + * Optional: only supported since clang >= 14.0 + * + * clang: https://clang.llvm.org/docs/AttributeReference.html#disable-sanitizer-instrumentation + * + * disable_sanitizer_instrumentation is not always similar to + * no_sanitize((<sanitizer-name>)): the latter may still let specific sanitizers + * insert code into functions to prevent false positives. Unlike that, + * disable_sanitizer_instrumentation prevents all kinds of instrumentation to + * functions with the attribute. + */ +#if __has_attribute(disable_sanitizer_instrumentation) +# define __disable_sanitizer_instrumentation \ + __attribute__((disable_sanitizer_instrumentation)) +#else +# define __disable_sanitizer_instrumentation +#endif + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index b6ff83a714ca..3c1795fdb568 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -198,9 +198,20 @@ struct ftrace_likely_data { # define __no_kasan_or_inline __always_inline #endif -#define __no_kcsan __no_sanitize_thread #ifdef __SANITIZE_THREAD__ +/* + * Clang still emits instrumentation for __tsan_func_{entry,exit}() and builtin + * atomics even with __no_sanitize_thread (to avoid false positives in userspace + * ThreadSanitizer). The kernel's requirements are stricter and we really do not + * want any instrumentation with __no_kcsan. + * + * Therefore we add __disable_sanitizer_instrumentation where available to + * disable all instrumentation. See Kconfig.kcsan where this is mandatory. + */ +# define __no_kcsan __no_sanitize_thread __disable_sanitizer_instrumentation # define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused +#else +# define __no_kcsan #endif #ifndef __no_sanitize_or_inline @@ -250,6 +261,18 @@ struct ftrace_likely_data { # define __cficanonical #endif +/* + * Any place that could be marked with the "alloc_size" attribute is also + * a place to be marked with the "malloc" attribute. Do this as part of the + * __alloc_size macro to avoid redundant attributes and to avoid missing a + * __malloc marking. + */ +#ifdef __alloc_size__ +# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc +#else +# define __alloc_size(x, ...) __malloc +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif @@ -290,15 +313,16 @@ struct ftrace_likely_data { (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) -/* Compile time object size, -1 for unknown */ -#ifndef __compiletime_object_size -# define __compiletime_object_size(obj) -1 -#endif - #ifdef __OPTIMIZE__ # define __compiletime_assert(condition, msg, prefix, suffix) \ do { \ - extern void prefix ## suffix(void) __compiletime_error(msg); \ + /* \ + * __noreturn is needed to give the compiler enough \ + * information to avoid certain possibly-uninitialized \ + * warnings (regardless of the build failing). \ + */ \ + __noreturn extern void prefix ## suffix(void) \ + __compiletime_error(msg); \ if (!(condition)) \ prefix ## suffix(); \ } while (0) diff --git a/include/linux/console.h b/include/linux/console.h index 20874db50bc8..7cd758a4f44e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -149,6 +149,8 @@ struct console { short flags; short index; int cflag; + uint ispeed; + uint ospeed; void *data; struct console *next; }; @@ -217,12 +219,6 @@ extern atomic_t ignore_console_lock_warning; #define VESA_HSYNC_SUSPEND 2 #define VESA_POWERDOWN 3 -#ifdef CONFIG_VGA_CONSOLE -extern bool vgacon_text_force(void); -#else -static inline bool vgacon_text_force(void) { return false; } -#endif - extern void console_init(void); /* For deferred console takeover */ diff --git a/include/linux/container_of.h b/include/linux/container_of.h new file mode 100644 index 000000000000..2f4944b791b8 --- /dev/null +++ b/include/linux/container_of.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CONTAINER_OF_H +#define _LINUX_CONTAINER_OF_H + +#include <linux/build_bug.h> +#include <linux/err.h> + +#define typeof_member(T, m) typeof(((T*)0)->m) + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + void *__mptr = (void *)(ptr); \ + static_assert(__same_type(*(ptr), ((type *)0)->member) || \ + __same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + ((type *)(__mptr - offsetof(type, member))); }) + +/** + * container_of_safe - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged. + */ +#define container_of_safe(ptr, type, member) ({ \ + void *__mptr = (void *)(ptr); \ + static_assert(__same_type(*(ptr), ((type *)0)->member) || \ + __same_type(*(ptr), void), \ + "pointer type mismatch in container_of_safe()"); \ + IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) : \ + ((type *)(__mptr - offsetof(type, member))); }) + +#endif /* _LINUX_CONTAINER_OF_H */ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 4d7fced3a39f..7a14807c9d1a 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -105,7 +105,7 @@ static inline void user_exit_irqoff(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } -static inline bool context_tracking_guest_enter(void) { return false; } +static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } #endif /* !CONFIG_CONTEXT_TRACKING */ diff --git a/include/linux/counter.h b/include/linux/counter.h index d16ce2819b48..1fe17f5adb09 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -6,417 +6,280 @@ #ifndef _COUNTER_H_ #define _COUNTER_H_ -#include <linux/counter_enum.h> +#include <linux/cdev.h> #include <linux/device.h> +#include <linux/kernel.h> +#include <linux/kfifo.h> +#include <linux/mutex.h> +#include <linux/spinlock_types.h> #include <linux/types.h> - -enum counter_count_direction { - COUNTER_COUNT_DIRECTION_FORWARD = 0, - COUNTER_COUNT_DIRECTION_BACKWARD -}; -extern const char *const counter_count_direction_str[2]; - -enum counter_count_mode { - COUNTER_COUNT_MODE_NORMAL = 0, - COUNTER_COUNT_MODE_RANGE_LIMIT, - COUNTER_COUNT_MODE_NON_RECYCLE, - COUNTER_COUNT_MODE_MODULO_N -}; -extern const char *const counter_count_mode_str[4]; +#include <linux/wait.h> +#include <uapi/linux/counter.h> struct counter_device; +struct counter_count; +struct counter_synapse; struct counter_signal; +enum counter_comp_type { + COUNTER_COMP_U8, + COUNTER_COMP_U64, + COUNTER_COMP_BOOL, + COUNTER_COMP_SIGNAL_LEVEL, + COUNTER_COMP_FUNCTION, + COUNTER_COMP_SYNAPSE_ACTION, + COUNTER_COMP_ENUM, + COUNTER_COMP_COUNT_DIRECTION, + COUNTER_COMP_COUNT_MODE, +}; + /** - * struct counter_signal_ext - Counter Signal extensions - * @name: attribute name - * @read: read callback for this attribute; may be NULL - * @write: write callback for this attribute; may be NULL - * @priv: data private to the driver + * struct counter_comp - Counter component node + * @type: Counter component data type + * @name: device-specific component name + * @priv: component-relevant data + * @action_read: Synapse action mode read callback. The read value of the + * respective Synapse action mode should be passed back via + * the action parameter. + * @device_u8_read: Device u8 component read callback. The read value of the + * respective Device u8 component should be passed back via + * the val parameter. + * @count_u8_read: Count u8 component read callback. The read value of the + * respective Count u8 component should be passed back via + * the val parameter. + * @signal_u8_read: Signal u8 component read callback. The read value of the + * respective Signal u8 component should be passed back via + * the val parameter. + * @device_u32_read: Device u32 component read callback. The read value of + * the respective Device u32 component should be passed + * back via the val parameter. + * @count_u32_read: Count u32 component read callback. The read value of the + * respective Count u32 component should be passed back via + * the val parameter. + * @signal_u32_read: Signal u32 component read callback. The read value of + * the respective Signal u32 component should be passed + * back via the val parameter. + * @device_u64_read: Device u64 component read callback. The read value of + * the respective Device u64 component should be passed + * back via the val parameter. + * @count_u64_read: Count u64 component read callback. The read value of the + * respective Count u64 component should be passed back via + * the val parameter. + * @signal_u64_read: Signal u64 component read callback. The read value of + * the respective Signal u64 component should be passed + * back via the val parameter. + * @action_write: Synapse action mode write callback. The write value of + * the respective Synapse action mode is passed via the + * action parameter. + * @device_u8_write: Device u8 component write callback. The write value of + * the respective Device u8 component is passed via the val + * parameter. + * @count_u8_write: Count u8 component write callback. The write value of + * the respective Count u8 component is passed via the val + * parameter. + * @signal_u8_write: Signal u8 component write callback. The write value of + * the respective Signal u8 component is passed via the val + * parameter. + * @device_u32_write: Device u32 component write callback. The write value of + * the respective Device u32 component is passed via the + * val parameter. + * @count_u32_write: Count u32 component write callback. The write value of + * the respective Count u32 component is passed via the val + * parameter. + * @signal_u32_write: Signal u32 component write callback. The write value of + * the respective Signal u32 component is passed via the + * val parameter. + * @device_u64_write: Device u64 component write callback. The write value of + * the respective Device u64 component is passed via the + * val parameter. + * @count_u64_write: Count u64 component write callback. The write value of + * the respective Count u64 component is passed via the val + * parameter. + * @signal_u64_write: Signal u64 component write callback. The write value of + * the respective Signal u64 component is passed via the + * val parameter. */ -struct counter_signal_ext { +struct counter_comp { + enum counter_comp_type type; const char *name; - ssize_t (*read)(struct counter_device *counter, - struct counter_signal *signal, void *priv, char *buf); - ssize_t (*write)(struct counter_device *counter, - struct counter_signal *signal, void *priv, - const char *buf, size_t len); void *priv; + union { + int (*action_read)(struct counter_device *counter, + struct counter_count *count, + struct counter_synapse *synapse, + enum counter_synapse_action *action); + int (*device_u8_read)(struct counter_device *counter, u8 *val); + int (*count_u8_read)(struct counter_device *counter, + struct counter_count *count, u8 *val); + int (*signal_u8_read)(struct counter_device *counter, + struct counter_signal *signal, u8 *val); + int (*device_u32_read)(struct counter_device *counter, + u32 *val); + int (*count_u32_read)(struct counter_device *counter, + struct counter_count *count, u32 *val); + int (*signal_u32_read)(struct counter_device *counter, + struct counter_signal *signal, u32 *val); + int (*device_u64_read)(struct counter_device *counter, + u64 *val); + int (*count_u64_read)(struct counter_device *counter, + struct counter_count *count, u64 *val); + int (*signal_u64_read)(struct counter_device *counter, + struct counter_signal *signal, u64 *val); + }; + union { + int (*action_write)(struct counter_device *counter, + struct counter_count *count, + struct counter_synapse *synapse, + enum counter_synapse_action action); + int (*device_u8_write)(struct counter_device *counter, u8 val); + int (*count_u8_write)(struct counter_device *counter, + struct counter_count *count, u8 val); + int (*signal_u8_write)(struct counter_device *counter, + struct counter_signal *signal, u8 val); + int (*device_u32_write)(struct counter_device *counter, + u32 val); + int (*count_u32_write)(struct counter_device *counter, + struct counter_count *count, u32 val); + int (*signal_u32_write)(struct counter_device *counter, + struct counter_signal *signal, u32 val); + int (*device_u64_write)(struct counter_device *counter, + u64 val); + int (*count_u64_write)(struct counter_device *counter, + struct counter_count *count, u64 val); + int (*signal_u64_write)(struct counter_device *counter, + struct counter_signal *signal, u64 val); + }; }; /** * struct counter_signal - Counter Signal node - * @id: unique ID used to identify signal - * @name: device-specific Signal name; ideally, this should match the name - * as it appears in the datasheet documentation - * @ext: optional array of Counter Signal extensions - * @num_ext: number of Counter Signal extensions specified in @ext - * @priv: optional private data supplied by driver + * @id: unique ID used to identify the Signal + * @name: device-specific Signal name + * @ext: optional array of Signal extensions + * @num_ext: number of Signal extensions specified in @ext */ struct counter_signal { int id; const char *name; - const struct counter_signal_ext *ext; + struct counter_comp *ext; size_t num_ext; - - void *priv; -}; - -/** - * struct counter_signal_enum_ext - Signal enum extension attribute - * @items: Array of strings - * @num_items: Number of items specified in @items - * @set: Set callback function; may be NULL - * @get: Get callback function; may be NULL - * - * The counter_signal_enum_ext structure can be used to implement enum style - * Signal extension attributes. Enum style attributes are those which have a set - * of strings that map to unsigned integer values. The Generic Counter Signal - * enum extension helper code takes care of mapping between value and string, as - * well as generating a "_available" file which contains a list of all available - * items. The get callback is used to query the currently active item; the index - * of the item within the respective items array is returned via the 'item' - * parameter. The set callback is called when the attribute is updated; the - * 'item' parameter contains the index of the newly activated item within the - * respective items array. - */ -struct counter_signal_enum_ext { - const char * const *items; - size_t num_items; - int (*get)(struct counter_device *counter, - struct counter_signal *signal, size_t *item); - int (*set)(struct counter_device *counter, - struct counter_signal *signal, size_t item); -}; - -/** - * COUNTER_SIGNAL_ENUM() - Initialize Signal enum extension - * @_name: Attribute name - * @_e: Pointer to a counter_signal_enum_ext structure - * - * This should usually be used together with COUNTER_SIGNAL_ENUM_AVAILABLE() - */ -#define COUNTER_SIGNAL_ENUM(_name, _e) \ -{ \ - .name = (_name), \ - .read = counter_signal_enum_read, \ - .write = counter_signal_enum_write, \ - .priv = (_e) \ -} - -/** - * COUNTER_SIGNAL_ENUM_AVAILABLE() - Initialize Signal enum available extension - * @_name: Attribute name ("_available" will be appended to the name) - * @_e: Pointer to a counter_signal_enum_ext structure - * - * Creates a read only attribute that lists all the available enum items in a - * newline separated list. This should usually be used together with - * COUNTER_SIGNAL_ENUM() - */ -#define COUNTER_SIGNAL_ENUM_AVAILABLE(_name, _e) \ -{ \ - .name = (_name "_available"), \ - .read = counter_signal_enum_available_read, \ - .priv = (_e) \ -} - -enum counter_synapse_action { - COUNTER_SYNAPSE_ACTION_NONE = 0, - COUNTER_SYNAPSE_ACTION_RISING_EDGE, - COUNTER_SYNAPSE_ACTION_FALLING_EDGE, - COUNTER_SYNAPSE_ACTION_BOTH_EDGES }; /** * struct counter_synapse - Counter Synapse node - * @action: index of current action mode * @actions_list: array of available action modes * @num_actions: number of action modes specified in @actions_list - * @signal: pointer to associated signal + * @signal: pointer to the associated Signal */ struct counter_synapse { - size_t action; const enum counter_synapse_action *actions_list; size_t num_actions; struct counter_signal *signal; }; -struct counter_count; - -/** - * struct counter_count_ext - Counter Count extension - * @name: attribute name - * @read: read callback for this attribute; may be NULL - * @write: write callback for this attribute; may be NULL - * @priv: data private to the driver - */ -struct counter_count_ext { - const char *name; - ssize_t (*read)(struct counter_device *counter, - struct counter_count *count, void *priv, char *buf); - ssize_t (*write)(struct counter_device *counter, - struct counter_count *count, void *priv, - const char *buf, size_t len); - void *priv; -}; - -enum counter_function { - COUNTER_FUNCTION_INCREASE = 0, - COUNTER_FUNCTION_DECREASE, - COUNTER_FUNCTION_PULSE_DIRECTION, - COUNTER_FUNCTION_QUADRATURE_X1_A, - COUNTER_FUNCTION_QUADRATURE_X1_B, - COUNTER_FUNCTION_QUADRATURE_X2_A, - COUNTER_FUNCTION_QUADRATURE_X2_B, - COUNTER_FUNCTION_QUADRATURE_X4 -}; - /** * struct counter_count - Counter Count node - * @id: unique ID used to identify Count - * @name: device-specific Count name; ideally, this should match - * the name as it appears in the datasheet documentation - * @function: index of current function mode - * @functions_list: array available function modes + * @id: unique ID used to identify the Count + * @name: device-specific Count name + * @functions_list: array of available function modes * @num_functions: number of function modes specified in @functions_list - * @synapses: array of synapses for initialization - * @num_synapses: number of synapses specified in @synapses - * @ext: optional array of Counter Count extensions - * @num_ext: number of Counter Count extensions specified in @ext - * @priv: optional private data supplied by driver + * @synapses: array of Synapses for initialization + * @num_synapses: number of Synapses specified in @synapses + * @ext: optional array of Count extensions + * @num_ext: number of Count extensions specified in @ext */ struct counter_count { int id; const char *name; - size_t function; const enum counter_function *functions_list; size_t num_functions; struct counter_synapse *synapses; size_t num_synapses; - const struct counter_count_ext *ext; + struct counter_comp *ext; size_t num_ext; - - void *priv; -}; - -/** - * struct counter_count_enum_ext - Count enum extension attribute - * @items: Array of strings - * @num_items: Number of items specified in @items - * @set: Set callback function; may be NULL - * @get: Get callback function; may be NULL - * - * The counter_count_enum_ext structure can be used to implement enum style - * Count extension attributes. Enum style attributes are those which have a set - * of strings that map to unsigned integer values. The Generic Counter Count - * enum extension helper code takes care of mapping between value and string, as - * well as generating a "_available" file which contains a list of all available - * items. The get callback is used to query the currently active item; the index - * of the item within the respective items array is returned via the 'item' - * parameter. The set callback is called when the attribute is updated; the - * 'item' parameter contains the index of the newly activated item within the - * respective items array. - */ -struct counter_count_enum_ext { - const char * const *items; - size_t num_items; - int (*get)(struct counter_device *counter, struct counter_count *count, - size_t *item); - int (*set)(struct counter_device *counter, struct counter_count *count, - size_t item); -}; - -/** - * COUNTER_COUNT_ENUM() - Initialize Count enum extension - * @_name: Attribute name - * @_e: Pointer to a counter_count_enum_ext structure - * - * This should usually be used together with COUNTER_COUNT_ENUM_AVAILABLE() - */ -#define COUNTER_COUNT_ENUM(_name, _e) \ -{ \ - .name = (_name), \ - .read = counter_count_enum_read, \ - .write = counter_count_enum_write, \ - .priv = (_e) \ -} - -/** - * COUNTER_COUNT_ENUM_AVAILABLE() - Initialize Count enum available extension - * @_name: Attribute name ("_available" will be appended to the name) - * @_e: Pointer to a counter_count_enum_ext structure - * - * Creates a read only attribute that lists all the available enum items in a - * newline separated list. This should usually be used together with - * COUNTER_COUNT_ENUM() - */ -#define COUNTER_COUNT_ENUM_AVAILABLE(_name, _e) \ -{ \ - .name = (_name "_available"), \ - .read = counter_count_enum_available_read, \ - .priv = (_e) \ -} - -/** - * struct counter_device_attr_group - internal container for attribute group - * @attr_group: Counter sysfs attributes group - * @attr_list: list to keep track of created Counter sysfs attributes - * @num_attr: number of Counter sysfs attributes - */ -struct counter_device_attr_group { - struct attribute_group attr_group; - struct list_head attr_list; - size_t num_attr; }; /** - * struct counter_device_state - internal state container for a Counter device - * @id: unique ID used to identify the Counter - * @dev: internal device structure - * @groups_list: attribute groups list (for Signals, Counts, and ext) - * @num_groups: number of attribute groups containers - * @groups: Counter sysfs attribute groups (to populate @dev.groups) + * struct counter_event_node - Counter Event node + * @l: list of current watching Counter events + * @event: event that triggers + * @channel: event channel + * @comp_list: list of components to watch when event triggers */ -struct counter_device_state { - int id; - struct device dev; - struct counter_device_attr_group *groups_list; - size_t num_groups; - const struct attribute_group **groups; -}; - -enum counter_signal_level { - COUNTER_SIGNAL_LEVEL_LOW, - COUNTER_SIGNAL_LEVEL_HIGH, +struct counter_event_node { + struct list_head l; + u8 event; + u8 channel; + struct list_head comp_list; }; /** * struct counter_ops - Callbacks from driver - * @signal_read: optional read callback for Signal attribute. The read - * level of the respective Signal should be passed back via - * the level parameter. - * @count_read: optional read callback for Count attribute. The read - * value of the respective Count should be passed back via - * the val parameter. - * @count_write: optional write callback for Count attribute. The write - * value for the respective Count is passed in via the val + * @signal_read: optional read callback for Signals. The read level of + * the respective Signal should be passed back via the + * level parameter. + * @count_read: read callback for Counts. The read value of the + * respective Count should be passed back via the value + * parameter. + * @count_write: optional write callback for Counts. The write value for + * the respective Count is passed in via the value * parameter. - * @function_get: function to get the current count function mode. Returns - * 0 on success and negative error code on error. The index - * of the respective Count's returned function mode should - * be passed back via the function parameter. - * @function_set: function to set the count function mode. function is the - * index of the requested function mode from the respective - * Count's functions_list array. - * @action_get: function to get the current action mode. Returns 0 on - * success and negative error code on error. The index of - * the respective Synapse's returned action mode should be + * @function_read: read callback the Count function modes. The read + * function mode of the respective Count should be passed + * back via the function parameter. + * @function_write: optional write callback for Count function modes. The + * function mode to write for the respective Count is + * passed in via the function parameter. + * @action_read: optional read callback the Synapse action modes. The + * read action mode of the respective Synapse should be * passed back via the action parameter. - * @action_set: function to set the action mode. action is the index of - * the requested action mode from the respective Synapse's - * actions_list array. + * @action_write: optional write callback for Synapse action modes. The + * action mode to write for the respective Synapse is + * passed in via the action parameter. + * @events_configure: optional write callback to configure events. The list of + * struct counter_event_node may be accessed via the + * events_list member of the counter parameter. + * @watch_validate: optional callback to validate a watch. The Counter + * component watch configuration is passed in via the watch + * parameter. A return value of 0 indicates a valid Counter + * component watch configuration. */ struct counter_ops { int (*signal_read)(struct counter_device *counter, struct counter_signal *signal, enum counter_signal_level *level); int (*count_read)(struct counter_device *counter, - struct counter_count *count, unsigned long *val); + struct counter_count *count, u64 *value); int (*count_write)(struct counter_device *counter, - struct counter_count *count, unsigned long val); - int (*function_get)(struct counter_device *counter, - struct counter_count *count, size_t *function); - int (*function_set)(struct counter_device *counter, - struct counter_count *count, size_t function); - int (*action_get)(struct counter_device *counter, - struct counter_count *count, - struct counter_synapse *synapse, size_t *action); - int (*action_set)(struct counter_device *counter, - struct counter_count *count, - struct counter_synapse *synapse, size_t action); + struct counter_count *count, u64 value); + int (*function_read)(struct counter_device *counter, + struct counter_count *count, + enum counter_function *function); + int (*function_write)(struct counter_device *counter, + struct counter_count *count, + enum counter_function function); + int (*action_read)(struct counter_device *counter, + struct counter_count *count, + struct counter_synapse *synapse, + enum counter_synapse_action *action); + int (*action_write)(struct counter_device *counter, + struct counter_count *count, + struct counter_synapse *synapse, + enum counter_synapse_action action); + int (*events_configure)(struct counter_device *counter); + int (*watch_validate)(struct counter_device *counter, + const struct counter_watch *watch); }; /** - * struct counter_device_ext - Counter device extension - * @name: attribute name - * @read: read callback for this attribute; may be NULL - * @write: write callback for this attribute; may be NULL - * @priv: data private to the driver - */ -struct counter_device_ext { - const char *name; - ssize_t (*read)(struct counter_device *counter, void *priv, char *buf); - ssize_t (*write)(struct counter_device *counter, void *priv, - const char *buf, size_t len); - void *priv; -}; - -/** - * struct counter_device_enum_ext - Counter enum extension attribute - * @items: Array of strings - * @num_items: Number of items specified in @items - * @set: Set callback function; may be NULL - * @get: Get callback function; may be NULL - * - * The counter_device_enum_ext structure can be used to implement enum style - * Counter extension attributes. Enum style attributes are those which have a - * set of strings that map to unsigned integer values. The Generic Counter enum - * extension helper code takes care of mapping between value and string, as well - * as generating a "_available" file which contains a list of all available - * items. The get callback is used to query the currently active item; the index - * of the item within the respective items array is returned via the 'item' - * parameter. The set callback is called when the attribute is updated; the - * 'item' parameter contains the index of the newly activated item within the - * respective items array. - */ -struct counter_device_enum_ext { - const char * const *items; - size_t num_items; - int (*get)(struct counter_device *counter, size_t *item); - int (*set)(struct counter_device *counter, size_t item); -}; - -/** - * COUNTER_DEVICE_ENUM() - Initialize Counter enum extension - * @_name: Attribute name - * @_e: Pointer to a counter_device_enum_ext structure - * - * This should usually be used together with COUNTER_DEVICE_ENUM_AVAILABLE() - */ -#define COUNTER_DEVICE_ENUM(_name, _e) \ -{ \ - .name = (_name), \ - .read = counter_device_enum_read, \ - .write = counter_device_enum_write, \ - .priv = (_e) \ -} - -/** - * COUNTER_DEVICE_ENUM_AVAILABLE() - Initialize Counter enum available extension - * @_name: Attribute name ("_available" will be appended to the name) - * @_e: Pointer to a counter_device_enum_ext structure - * - * Creates a read only attribute that lists all the available enum items in a - * newline separated list. This should usually be used together with - * COUNTER_DEVICE_ENUM() - */ -#define COUNTER_DEVICE_ENUM_AVAILABLE(_name, _e) \ -{ \ - .name = (_name "_available"), \ - .read = counter_device_enum_available_read, \ - .priv = (_e) \ -} - -/** * struct counter_device - Counter data structure - * @name: name of the device as it appears in the datasheet + * @name: name of the device * @parent: optional parent device providing the counters - * @device_state: internal device state container * @ops: callbacks from driver * @signals: array of Signals * @num_signals: number of Signals specified in @signals @@ -425,11 +288,21 @@ struct counter_device_enum_ext { * @ext: optional array of Counter device extensions * @num_ext: number of Counter device extensions specified in @ext * @priv: optional private data supplied by driver + * @dev: internal device structure + * @chrdev: internal character device structure + * @events_list: list of current watching Counter events + * @events_list_lock: lock to protect Counter events list operations + * @next_events_list: list of next watching Counter events + * @n_events_list_lock: lock to protect Counter next events list operations + * @events: queue of detected Counter events + * @events_wait: wait queue to allow blocking reads of Counter events + * @events_in_lock: lock to protect Counter events queue in operations + * @events_out_lock: lock to protect Counter events queue out operations + * @ops_exist_lock: lock to prevent use during removal */ struct counter_device { const char *name; struct device *parent; - struct counter_device_state *device_state; const struct counter_ops *ops; @@ -438,17 +311,176 @@ struct counter_device { struct counter_count *counts; size_t num_counts; - const struct counter_device_ext *ext; + struct counter_comp *ext; size_t num_ext; - void *priv; + struct device dev; + struct cdev chrdev; + struct list_head events_list; + spinlock_t events_list_lock; + struct list_head next_events_list; + struct mutex n_events_list_lock; + DECLARE_KFIFO_PTR(events, struct counter_event); + wait_queue_head_t events_wait; + spinlock_t events_in_lock; + struct mutex events_out_lock; + struct mutex ops_exist_lock; }; -int counter_register(struct counter_device *const counter); +void *counter_priv(const struct counter_device *const counter); + +struct counter_device *counter_alloc(size_t sizeof_priv); +void counter_put(struct counter_device *const counter); +int counter_add(struct counter_device *const counter); + void counter_unregister(struct counter_device *const counter); -int devm_counter_register(struct device *dev, - struct counter_device *const counter); -void devm_counter_unregister(struct device *dev, - struct counter_device *const counter); +struct counter_device *devm_counter_alloc(struct device *dev, + size_t sizeof_priv); +int devm_counter_add(struct device *dev, + struct counter_device *const counter); +void counter_push_event(struct counter_device *const counter, const u8 event, + const u8 channel); + +#define COUNTER_COMP_DEVICE_U8(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_U8, \ + .name = (_name), \ + .device_u8_read = (_read), \ + .device_u8_write = (_write), \ +} +#define COUNTER_COMP_COUNT_U8(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_U8, \ + .name = (_name), \ + .count_u8_read = (_read), \ + .count_u8_write = (_write), \ +} +#define COUNTER_COMP_SIGNAL_U8(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_U8, \ + .name = (_name), \ + .signal_u8_read = (_read), \ + .signal_u8_write = (_write), \ +} + +#define COUNTER_COMP_DEVICE_U64(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_U64, \ + .name = (_name), \ + .device_u64_read = (_read), \ + .device_u64_write = (_write), \ +} +#define COUNTER_COMP_COUNT_U64(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_U64, \ + .name = (_name), \ + .count_u64_read = (_read), \ + .count_u64_write = (_write), \ +} +#define COUNTER_COMP_SIGNAL_U64(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_U64, \ + .name = (_name), \ + .signal_u64_read = (_read), \ + .signal_u64_write = (_write), \ +} + +#define COUNTER_COMP_DEVICE_BOOL(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_BOOL, \ + .name = (_name), \ + .device_u8_read = (_read), \ + .device_u8_write = (_write), \ +} +#define COUNTER_COMP_COUNT_BOOL(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_BOOL, \ + .name = (_name), \ + .count_u8_read = (_read), \ + .count_u8_write = (_write), \ +} +#define COUNTER_COMP_SIGNAL_BOOL(_name, _read, _write) \ +{ \ + .type = COUNTER_COMP_BOOL, \ + .name = (_name), \ + .signal_u8_read = (_read), \ + .signal_u8_write = (_write), \ +} + +struct counter_available { + union { + const u32 *enums; + const char *const *strs; + }; + size_t num_items; +}; + +#define DEFINE_COUNTER_AVAILABLE(_name, _enums) \ + struct counter_available _name = { \ + .enums = (_enums), \ + .num_items = ARRAY_SIZE(_enums), \ + } + +#define DEFINE_COUNTER_ENUM(_name, _strs) \ + struct counter_available _name = { \ + .strs = (_strs), \ + .num_items = ARRAY_SIZE(_strs), \ + } + +#define COUNTER_COMP_DEVICE_ENUM(_name, _get, _set, _available) \ +{ \ + .type = COUNTER_COMP_ENUM, \ + .name = (_name), \ + .device_u32_read = (_get), \ + .device_u32_write = (_set), \ + .priv = &(_available), \ +} +#define COUNTER_COMP_COUNT_ENUM(_name, _get, _set, _available) \ +{ \ + .type = COUNTER_COMP_ENUM, \ + .name = (_name), \ + .count_u32_read = (_get), \ + .count_u32_write = (_set), \ + .priv = &(_available), \ +} +#define COUNTER_COMP_SIGNAL_ENUM(_name, _get, _set, _available) \ +{ \ + .type = COUNTER_COMP_ENUM, \ + .name = (_name), \ + .signal_u32_read = (_get), \ + .signal_u32_write = (_set), \ + .priv = &(_available), \ +} + +#define COUNTER_COMP_CEILING(_read, _write) \ + COUNTER_COMP_COUNT_U64("ceiling", _read, _write) + +#define COUNTER_COMP_COUNT_MODE(_read, _write, _available) \ +{ \ + .type = COUNTER_COMP_COUNT_MODE, \ + .name = "count_mode", \ + .count_u32_read = (_read), \ + .count_u32_write = (_write), \ + .priv = &(_available), \ +} + +#define COUNTER_COMP_DIRECTION(_read) \ +{ \ + .type = COUNTER_COMP_COUNT_DIRECTION, \ + .name = "direction", \ + .count_u32_read = (_read), \ +} + +#define COUNTER_COMP_ENABLE(_read, _write) \ + COUNTER_COMP_COUNT_BOOL("enable", _read, _write) + +#define COUNTER_COMP_FLOOR(_read, _write) \ + COUNTER_COMP_COUNT_U64("floor", _read, _write) + +#define COUNTER_COMP_PRESET(_read, _write) \ + COUNTER_COMP_COUNT_U64("preset", _read, _write) + +#define COUNTER_COMP_PRESET_ENABLE(_read, _write) \ + COUNTER_COMP_COUNT_BOOL("preset_enable", _read, _write) #endif /* _COUNTER_H_ */ diff --git a/include/linux/counter_enum.h b/include/linux/counter_enum.h deleted file mode 100644 index 9f917298a88f..000000000000 --- a/include/linux/counter_enum.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Counter interface enum functions - * Copyright (C) 2018 William Breathitt Gray - */ -#ifndef _COUNTER_ENUM_H_ -#define _COUNTER_ENUM_H_ - -#include <linux/types.h> - -struct counter_device; -struct counter_signal; -struct counter_count; - -ssize_t counter_signal_enum_read(struct counter_device *counter, - struct counter_signal *signal, void *priv, - char *buf); -ssize_t counter_signal_enum_write(struct counter_device *counter, - struct counter_signal *signal, void *priv, - const char *buf, size_t len); - -ssize_t counter_signal_enum_available_read(struct counter_device *counter, - struct counter_signal *signal, - void *priv, char *buf); - -ssize_t counter_count_enum_read(struct counter_device *counter, - struct counter_count *count, void *priv, - char *buf); -ssize_t counter_count_enum_write(struct counter_device *counter, - struct counter_count *count, void *priv, - const char *buf, size_t len); - -ssize_t counter_count_enum_available_read(struct counter_device *counter, - struct counter_count *count, - void *priv, char *buf); - -ssize_t counter_device_enum_read(struct counter_device *counter, void *priv, - char *buf); -ssize_t counter_device_enum_write(struct counter_device *counter, void *priv, - const char *buf, size_t len); - -ssize_t counter_device_enum_available_read(struct counter_device *counter, - void *priv, char *buf); - -#endif /* _COUNTER_ENUM_H_ */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ff88bb3e44fc..1ab29e61b078 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -119,6 +119,13 @@ struct cpufreq_policy { bool strict_target; /* + * Set if inefficient frequencies were found in the frequency table. + * This indicates if the relation flag CPUFREQ_RELATION_E can be + * honored. + */ + bool efficiencies_available; + + /* * Preferred average time interval between consecutive invocations of * the driver to set the frequency for this policy. To be set by the * scaling driver (0, which is the default, means no preference). @@ -273,6 +280,12 @@ static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy #define CPUFREQ_RELATION_L 0 /* lowest frequency at or above target */ #define CPUFREQ_RELATION_H 1 /* highest frequency below or at target */ #define CPUFREQ_RELATION_C 2 /* closest frequency to target */ +/* relation flags */ +#define CPUFREQ_RELATION_E BIT(2) /* Get if possible an efficient frequency */ + +#define CPUFREQ_RELATION_LE (CPUFREQ_RELATION_L | CPUFREQ_RELATION_E) +#define CPUFREQ_RELATION_HE (CPUFREQ_RELATION_H | CPUFREQ_RELATION_E) +#define CPUFREQ_RELATION_CE (CPUFREQ_RELATION_C | CPUFREQ_RELATION_E) struct freq_attr { struct attribute attr; @@ -385,7 +398,7 @@ struct cpufreq_driver { /* flags */ /* - * Set by drivers that need to update internale upper and lower boundaries along + * Set by drivers that need to update internal upper and lower boundaries along * with the target frequency and so the core and governors should also invoke * the diver if the target frequency does not change, but the policy min or max * may have changed. @@ -627,9 +640,11 @@ struct cpufreq_governor *cpufreq_fallback_governor(void); static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy) { if (policy->max < policy->cur) - __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_HE); else if (policy->min > policy->cur) - __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, policy->min, + CPUFREQ_RELATION_LE); } /* Governor attribute set */ @@ -660,10 +675,11 @@ struct governor_attr { *********************************************************************/ /* Special Values of .frequency field */ -#define CPUFREQ_ENTRY_INVALID ~0u -#define CPUFREQ_TABLE_END ~1u +#define CPUFREQ_ENTRY_INVALID ~0u +#define CPUFREQ_TABLE_END ~1u /* Special Values of .flags field */ -#define CPUFREQ_BOOST_FREQ (1 << 0) +#define CPUFREQ_BOOST_FREQ (1 << 0) +#define CPUFREQ_INEFFICIENT_FREQ (1 << 1) struct cpufreq_frequency_table { unsigned int flags; @@ -740,6 +756,22 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, continue; \ else +/** + * cpufreq_for_each_efficient_entry_idx - iterate with index over a cpufreq + * frequency_table excluding CPUFREQ_ENTRY_INVALID and + * CPUFREQ_INEFFICIENT_FREQ frequencies. + * @pos: the &struct cpufreq_frequency_table to use as a loop cursor. + * @table: the &struct cpufreq_frequency_table to iterate over. + * @idx: the table entry currently being processed. + * @efficiencies: set to true to only iterate over efficient frequencies. + */ + +#define cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) \ + cpufreq_for_each_valid_entry_idx(pos, table, idx) \ + if (efficiencies && (pos->flags & CPUFREQ_INEFFICIENT_FREQ)) \ + continue; \ + else + int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); @@ -764,14 +796,15 @@ bool policy_has_boost_freq(struct cpufreq_policy *policy); /* Find lowest freq at or above target in a table in ascending order */ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { struct cpufreq_frequency_table *table = policy->freq_table; struct cpufreq_frequency_table *pos; unsigned int freq; int idx, best = -1; - cpufreq_for_each_valid_entry_idx(pos, table, idx) { + cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) { freq = pos->frequency; if (freq >= target_freq) @@ -785,14 +818,15 @@ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy, /* Find lowest freq at or above target in a table in descending order */ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { struct cpufreq_frequency_table *table = policy->freq_table; struct cpufreq_frequency_table *pos; unsigned int freq; int idx, best = -1; - cpufreq_for_each_valid_entry_idx(pos, table, idx) { + cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) { freq = pos->frequency; if (freq == target_freq) @@ -815,26 +849,30 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, /* Works only on sorted freq-tables */ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { target_freq = clamp_val(target_freq, policy->min, policy->max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) - return cpufreq_table_find_index_al(policy, target_freq); + return cpufreq_table_find_index_al(policy, target_freq, + efficiencies); else - return cpufreq_table_find_index_dl(policy, target_freq); + return cpufreq_table_find_index_dl(policy, target_freq, + efficiencies); } /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { struct cpufreq_frequency_table *table = policy->freq_table; struct cpufreq_frequency_table *pos; unsigned int freq; int idx, best = -1; - cpufreq_for_each_valid_entry_idx(pos, table, idx) { + cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) { freq = pos->frequency; if (freq == target_freq) @@ -857,14 +895,15 @@ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, /* Find highest freq at or below target in a table in descending order */ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { struct cpufreq_frequency_table *table = policy->freq_table; struct cpufreq_frequency_table *pos; unsigned int freq; int idx, best = -1; - cpufreq_for_each_valid_entry_idx(pos, table, idx) { + cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) { freq = pos->frequency; if (freq <= target_freq) @@ -878,26 +917,30 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, /* Works only on sorted freq-tables */ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { target_freq = clamp_val(target_freq, policy->min, policy->max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) - return cpufreq_table_find_index_ah(policy, target_freq); + return cpufreq_table_find_index_ah(policy, target_freq, + efficiencies); else - return cpufreq_table_find_index_dh(policy, target_freq); + return cpufreq_table_find_index_dh(policy, target_freq, + efficiencies); } /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { struct cpufreq_frequency_table *table = policy->freq_table; struct cpufreq_frequency_table *pos; unsigned int freq; int idx, best = -1; - cpufreq_for_each_valid_entry_idx(pos, table, idx) { + cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) { freq = pos->frequency; if (freq == target_freq) @@ -924,14 +967,15 @@ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, /* Find closest freq to target in a table in descending order */ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { struct cpufreq_frequency_table *table = policy->freq_table; struct cpufreq_frequency_table *pos; unsigned int freq; int idx, best = -1; - cpufreq_for_each_valid_entry_idx(pos, table, idx) { + cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) { freq = pos->frequency; if (freq == target_freq) @@ -958,35 +1002,58 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, /* Works only on sorted freq-tables */ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq) + unsigned int target_freq, + bool efficiencies) { target_freq = clamp_val(target_freq, policy->min, policy->max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) - return cpufreq_table_find_index_ac(policy, target_freq); + return cpufreq_table_find_index_ac(policy, target_freq, + efficiencies); else - return cpufreq_table_find_index_dc(policy, target_freq); + return cpufreq_table_find_index_dc(policy, target_freq, + efficiencies); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { + bool efficiencies = policy->efficiencies_available && + (relation & CPUFREQ_RELATION_E); + int idx; + + /* cpufreq_table_index_unsorted() has no use for this flag anyway */ + relation &= ~CPUFREQ_RELATION_E; + if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) return cpufreq_table_index_unsorted(policy, target_freq, relation); - +retry: switch (relation) { case CPUFREQ_RELATION_L: - return cpufreq_table_find_index_l(policy, target_freq); + idx = cpufreq_table_find_index_l(policy, target_freq, + efficiencies); + break; case CPUFREQ_RELATION_H: - return cpufreq_table_find_index_h(policy, target_freq); + idx = cpufreq_table_find_index_h(policy, target_freq, + efficiencies); + break; case CPUFREQ_RELATION_C: - return cpufreq_table_find_index_c(policy, target_freq); + idx = cpufreq_table_find_index_c(policy, target_freq, + efficiencies); + break; default: WARN_ON_ONCE(1); return 0; } + + if (idx < 0 && efficiencies) { + efficiencies = false; + goto retry; + } + + return idx; } static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy) @@ -1003,6 +1070,37 @@ static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy return count; } +/** + * cpufreq_table_set_inefficient() - Mark a frequency as inefficient + * @policy: the &struct cpufreq_policy containing the inefficient frequency + * @frequency: the inefficient frequency + * + * The &struct cpufreq_policy must use a sorted frequency table + * + * Return: %0 on success or a negative errno code + */ + +static inline int +cpufreq_table_set_inefficient(struct cpufreq_policy *policy, + unsigned int frequency) +{ + struct cpufreq_frequency_table *pos; + + /* Not supported */ + if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED) + return -EINVAL; + + cpufreq_for_each_valid_entry(pos, policy->freq_table) { + if (pos->frequency == frequency) { + pos->flags |= CPUFREQ_INEFFICIENT_FREQ; + policy->efficiencies_available = true; + return 0; + } + } + + return -EINVAL; +} + static inline int parse_perf_domain(int cpu, const char *list_name, const char *cell_name) { @@ -1041,7 +1139,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ if (cpu == pcpu) continue; - ret = parse_perf_domain(pcpu, list_name, cell_name); + ret = parse_perf_domain(cpu, list_name, cell_name); if (ret < 0) continue; @@ -1071,6 +1169,13 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) return false; } +static inline int +cpufreq_table_set_inefficient(struct cpufreq_policy *policy, + unsigned int frequency) +{ + return -EINVAL; +} + static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, const char *cell_name, struct cpumask *cpumask) { diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 991911048857..411a428ace4d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -99,6 +99,7 @@ enum cpuhp_state { CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, + CPUHP_AP_DTPM_CPU_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -224,6 +225,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, @@ -246,7 +248,6 @@ enum cpuhp_state { CPUHP_AP_MM_DEMOTION_ONLINE, CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE, - CPUHP_AP_DTPM_CPU_ONLINE, CPUHP_AP_ACTIVE, CPUHP_ONLINE, }; diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index d2b9c41c8edf..d58e0476ee8e 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -34,6 +34,8 @@ */ extern struct static_key_false cpusets_pre_enable_key; extern struct static_key_false cpusets_enabled_key; +extern struct static_key_false cpusets_insane_config_key; + static inline bool cpusets_enabled(void) { return static_branch_unlikely(&cpusets_enabled_key); @@ -51,6 +53,19 @@ static inline void cpuset_dec(void) static_branch_dec_cpuslocked(&cpusets_pre_enable_key); } +/* + * This will get enabled whenever a cpuset configuration is considered + * unsupportable in general. E.g. movable only node which cannot satisfy + * any non movable allocations (see update_nodemask). Page allocator + * needs to make additional checks for those configurations and this + * check is meant to guard those checks without any overhead for sane + * configurations. + */ +static inline bool cpusets_insane_config(void) +{ + return static_branch_unlikely(&cpusets_insane_config_key); +} + extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); @@ -167,6 +182,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) static inline bool cpusets_enabled(void) { return false; } +static inline bool cpusets_insane_config(void) { return false; } + static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 2618577a4d6d..620821549b23 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -8,8 +8,6 @@ #include <linux/pgtable.h> #include <uapi/linux/vmcore.h> -#include <linux/pgtable.h> /* for pgprot_t */ - /* For IS_ENABLED(CONFIG_CRASH_DUMP) */ #define ELFCORE_ADDR_MAX (-1ULL) #define ELFCORE_ADDR_ERR (-2ULL) @@ -91,12 +89,32 @@ static inline void vmcore_unusable(void) elfcorehdr_addr = ELFCORE_ADDR_ERR; } -#define HAVE_OLDMEM_PFN_IS_RAM 1 -extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)); -extern void unregister_oldmem_pfn_is_ram(void); +/** + * struct vmcore_cb - driver callbacks for /proc/vmcore handling + * @pfn_is_ram: check whether a PFN really is RAM and should be accessed when + * reading the vmcore. Will return "true" if it is RAM or if the + * callback cannot tell. If any callback returns "false", it's not + * RAM and the page must not be accessed; zeroes should be + * indicated in the vmcore instead. For example, a ballooned page + * contains no data and reading from such a page will cause high + * load in the hypervisor. + * @next: List head to manage registered callbacks internally; initialized by + * register_vmcore_cb(). + * + * vmcore callbacks allow drivers managing physical memory ranges to + * coordinate with vmcore handling code, for example, to prevent accessing + * physical memory ranges that should not be accessed when reading the vmcore, + * although included in the vmcore header as memory ranges to dump. + */ +struct vmcore_cb { + bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn); + struct list_head next; +}; +extern void register_vmcore_cb(struct vmcore_cb *cb); +extern void unregister_vmcore_cb(struct vmcore_cb *cb); #else /* !CONFIG_CRASH_DUMP */ -static inline bool is_kdump_kernel(void) { return 0; } +static inline bool is_kdump_kernel(void) { return false; } #endif /* CONFIG_CRASH_DUMP */ /* Device Dump information to be filled by drivers */ diff --git a/include/linux/cuda.h b/include/linux/cuda.h index 45bfe9d61271..daf3e6f98444 100644 --- a/include/linux/cuda.h +++ b/include/linux/cuda.h @@ -12,7 +12,7 @@ #include <uapi/linux/cuda.h> -extern int find_via_cuda(void); +extern int __init find_via_cuda(void); extern int cuda_request(struct adb_request *req, void (*done)(struct adb_request *), int nbytes, ...); extern void cuda_poll(void); diff --git a/include/linux/damon.h b/include/linux/damon.h index d68b67b8d458..5e1e3a128b77 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -11,9 +11,18 @@ #include <linux/mutex.h> #include <linux/time64.h> #include <linux/types.h> +#include <linux/random.h> /* Minimal region size. Every damon_region is aligned by this. */ #define DAMON_MIN_REGION PAGE_SIZE +/* Max priority score for DAMON-based operation schemes */ +#define DAMOS_MAX_SCORE (99) + +/* Get a random number in [l, r) */ +static inline unsigned long damon_rand(unsigned long l, unsigned long r) +{ + return l + prandom_u32_max(r - l); +} /** * struct damon_addr_range - Represents an address region of [@start, @end). @@ -31,12 +40,22 @@ struct damon_addr_range { * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. * @list: List head for siblings. + * @age: Age of this region. + * + * @age is initially zero, increased for each aggregation interval, and reset + * to zero again if the access frequency is significantly changed. If two + * regions are merged into a new region, both @nr_accesses and @age of the new + * region are set as region size-weighted average of those of the two regions. */ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; struct list_head list; + + unsigned int age; +/* private: Internal value for age calculation. */ + unsigned int last_nr_accesses; }; /** @@ -59,16 +78,194 @@ struct damon_target { struct list_head list; }; +/** + * enum damos_action - Represents an action of a Data Access Monitoring-based + * Operation Scheme. + * + * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. + * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. + * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. + * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. + * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + * @DAMOS_STAT: Do nothing but count the stat. + */ +enum damos_action { + DAMOS_WILLNEED, + DAMOS_COLD, + DAMOS_PAGEOUT, + DAMOS_HUGEPAGE, + DAMOS_NOHUGEPAGE, + DAMOS_STAT, /* Do nothing but only record the stat */ +}; + +/** + * struct damos_quota - Controls the aggressiveness of the given scheme. + * @ms: Maximum milliseconds that the scheme can use. + * @sz: Maximum bytes of memory that the action can be applied. + * @reset_interval: Charge reset interval in milliseconds. + * + * @weight_sz: Weight of the region's size for prioritization. + * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. + * @weight_age: Weight of the region's age for prioritization. + * + * To avoid consuming too much CPU time or IO resources for applying the + * &struct damos->action to large memory, DAMON allows users to set time and/or + * size quotas. The quotas can be set by writing non-zero values to &ms and + * &sz, respectively. If the time quota is set, DAMON tries to use only up to + * &ms milliseconds within &reset_interval for applying the action. If the + * size quota is set, DAMON tries to apply the action only up to &sz bytes + * within &reset_interval. + * + * Internally, the time quota is transformed to a size quota using estimated + * throughput of the scheme's action. DAMON then compares it against &sz and + * uses smaller one as the effective quota. + * + * For selecting regions within the quota, DAMON prioritizes current scheme's + * target memory regions using the &struct damon_primitive->get_scheme_score. + * You could customize the prioritization logic by setting &weight_sz, + * &weight_nr_accesses, and &weight_age, because monitoring primitives are + * encouraged to respect those. + */ +struct damos_quota { + unsigned long ms; + unsigned long sz; + unsigned long reset_interval; + + unsigned int weight_sz; + unsigned int weight_nr_accesses; + unsigned int weight_age; + +/* private: */ + /* For throughput estimation */ + unsigned long total_charged_sz; + unsigned long total_charged_ns; + + unsigned long esz; /* Effective size quota in bytes */ + + /* For charging the quota */ + unsigned long charged_sz; + unsigned long charged_from; + struct damon_target *charge_target_from; + unsigned long charge_addr_from; + + /* For prioritization */ + unsigned long histogram[DAMOS_MAX_SCORE + 1]; + unsigned int min_score; +}; + +/** + * enum damos_wmark_metric - Represents the watermark metric. + * + * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. + * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. + */ +enum damos_wmark_metric { + DAMOS_WMARK_NONE, + DAMOS_WMARK_FREE_MEM_RATE, +}; + +/** + * struct damos_watermarks - Controls when a given scheme should be activated. + * @metric: Metric for the watermarks. + * @interval: Watermarks check time interval in microseconds. + * @high: High watermark. + * @mid: Middle watermark. + * @low: Low watermark. + * + * If &metric is &DAMOS_WMARK_NONE, the scheme is always active. Being active + * means DAMON does monitoring and applying the action of the scheme to + * appropriate memory regions. Else, DAMON checks &metric of the system for at + * least every &interval microseconds and works as below. + * + * If &metric is higher than &high, the scheme is inactivated. If &metric is + * between &mid and &low, the scheme is activated. If &metric is lower than + * &low, the scheme is inactivated. + */ +struct damos_watermarks { + enum damos_wmark_metric metric; + unsigned long interval; + unsigned long high; + unsigned long mid; + unsigned long low; + +/* private: */ + bool activated; +}; + +/** + * struct damos_stat - Statistics on a given scheme. + * @nr_tried: Total number of regions that the scheme is tried to be applied. + * @sz_tried: Total size of regions that the scheme is tried to be applied. + * @nr_applied: Total number of regions that the scheme is applied. + * @sz_applied: Total size of regions that the scheme is applied. + * @qt_exceeds: Total number of times the quota of the scheme has exceeded. + */ +struct damos_stat { + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; + unsigned long qt_exceeds; +}; + +/** + * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * @min_sz_region: Minimum size of target regions. + * @max_sz_region: Maximum size of target regions. + * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. + * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. + * @min_age_region: Minimum age of target regions. + * @max_age_region: Maximum age of target regions. + * @action: &damo_action to be applied to the target regions. + * @quota: Control the aggressiveness of this scheme. + * @wmarks: Watermarks for automated (in)activation of this scheme. + * @stat: Statistics of this scheme. + * @list: List head for siblings. + * + * For each aggregation interval, DAMON finds regions which fit in the + * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, + * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to + * those. To avoid consuming too much CPU time or IO resources for the + * &action, "a is used. + * + * To do the work only when needed, schemes can be activated for specific + * system situations using &wmarks. If all schemes that registered to the + * monitoring context are inactive, DAMON stops monitoring either, and just + * repeatedly checks the watermarks. + * + * If all schemes that registered to a &struct damon_ctx are inactive, DAMON + * stops monitoring and just repeatedly checks the watermarks. + * + * After applying the &action to each region, &stat_count and &stat_sz is + * updated to reflect the number of regions and total size of regions that the + * &action is applied. + */ +struct damos { + unsigned long min_sz_region; + unsigned long max_sz_region; + unsigned int min_nr_accesses; + unsigned int max_nr_accesses; + unsigned int min_age_region; + unsigned int max_age_region; + enum damos_action action; + struct damos_quota quota; + struct damos_watermarks wmarks; + struct damos_stat stat; + struct list_head list; +}; + struct damon_ctx; /** - * struct damon_primitive Monitoring primitives for given use cases. + * struct damon_primitive - Monitoring primitives for given use cases. * * @init: Initialize primitive-internal data structures. * @update: Update primitive-internal data structures. * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. * @reset_aggregated: Reset aggregated accesses monitoring results. + * @get_scheme_score: Get the score of a region for a scheme. + * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. * @cleanup: Clean up the context. * @@ -94,6 +291,12 @@ struct damon_ctx; * of its update. The value will be used for regions adjustment threshold. * @reset_aggregated should reset the access monitoring results that aggregated * by @check_accesses. + * @get_scheme_score should return the priority score of a region for a scheme + * as an integer in [0, &DAMOS_MAX_SCORE]. + * @apply_scheme is called from @kdamond when a region for user provided + * DAMON-based operation scheme is found. It should apply the scheme's action + * to the region and return bytes of the region that the action is successfully + * applied. * @target_valid should check whether the target is still valid for the * monitoring. * @cleanup is called from @kdamond just before its termination. @@ -104,12 +307,18 @@ struct damon_primitive { void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); void (*reset_aggregated)(struct damon_ctx *context); + int (*get_scheme_score)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); + unsigned long (*apply_scheme)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); bool (*target_valid)(void *target); void (*cleanup)(struct damon_ctx *context); }; -/* - * struct damon_callback Monitoring events notification callbacks. +/** + * struct damon_callback - Monitoring events notification callbacks. * * @before_start: Called before starting the monitoring. * @after_sampling: Called after each sampling. @@ -136,7 +345,7 @@ struct damon_callback { int (*before_start)(struct damon_ctx *context); int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); - int (*before_terminate)(struct damon_ctx *context); + void (*before_terminate)(struct damon_ctx *context); }; /** @@ -182,6 +391,7 @@ struct damon_callback { * @min_nr_regions: The minimum number of adaptive monitoring regions. * @max_nr_regions: The maximum number of adaptive monitoring regions. * @adaptive_targets: Head of monitoring targets (&damon_target) list. + * @schemes: Head of schemes (&damos) list. */ struct damon_ctx { unsigned long sample_interval; @@ -194,7 +404,6 @@ struct damon_ctx { /* public: */ struct task_struct *kdamond; - bool kdamond_stop; struct mutex kdamond_lock; struct damon_primitive primitive; @@ -203,13 +412,23 @@ struct damon_ctx { unsigned long min_nr_regions; unsigned long max_nr_regions; struct list_head adaptive_targets; + struct list_head schemes; }; -#define damon_next_region(r) \ - (container_of(r->list.next, struct damon_region, list)) +static inline struct damon_region *damon_next_region(struct damon_region *r) +{ + return container_of(r->list.next, struct damon_region, list); +} -#define damon_prev_region(r) \ - (container_of(r->list.prev, struct damon_region, list)) +static inline struct damon_region *damon_prev_region(struct damon_region *r) +{ + return container_of(r->list.prev, struct damon_region, list); +} + +static inline struct damon_region *damon_last_region(struct damon_target *t) +{ + return list_last_entry(&t->regions_list, struct damon_region, list); +} #define damon_for_each_region(r, t) \ list_for_each_entry(r, &t->regions_list, list) @@ -223,17 +442,42 @@ struct damon_ctx { #define damon_for_each_target_safe(t, next, ctx) \ list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) +#define damon_for_each_scheme(s, ctx) \ + list_for_each_entry(s, &(ctx)->schemes, list) + +#define damon_for_each_scheme_safe(s, next, ctx) \ + list_for_each_entry_safe(s, next, &(ctx)->schemes, list) + #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); -inline void damon_insert_region(struct damon_region *r, + +/* + * Add a region between two other regions + */ +static inline void damon_insert_region(struct damon_region *r, struct damon_region *prev, struct damon_region *next, - struct damon_target *t); + struct damon_target *t) +{ + __list_add(&r->list, &prev->list, &next->list); + t->nr_regions++; +} + void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks); +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); +void damon_destroy_scheme(struct damos *s); + struct damon_target *damon_new_target(unsigned long id); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); +bool damon_targets_empty(struct damon_ctx *ctx); void damon_free_target(struct damon_target *t); void damon_destroy_target(struct damon_target *t); unsigned int damon_nr_regions(struct damon_target *t); @@ -245,6 +489,8 @@ int damon_set_targets(struct damon_ctx *ctx, int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, unsigned long aggr_int, unsigned long primitive_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg); +int damon_set_schemes(struct damon_ctx *ctx, + struct damos **schemes, ssize_t nr_schemes); int damon_nr_running_ctxs(void); int damon_start(struct damon_ctx **ctxs, int nr_ctxs); @@ -253,16 +499,13 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); #endif /* CONFIG_DAMON */ #ifdef CONFIG_DAMON_VADDR - -/* Monitoring primitives for virtual memory address spaces */ -void damon_va_init(struct damon_ctx *ctx); -void damon_va_update(struct damon_ctx *ctx); -void damon_va_prepare_access_checks(struct damon_ctx *ctx); -unsigned int damon_va_check_accesses(struct damon_ctx *ctx); bool damon_va_target_valid(void *t); -void damon_va_cleanup(struct damon_ctx *ctx); void damon_va_set_primitives(struct damon_ctx *ctx); - #endif /* CONFIG_DAMON_VADDR */ +#ifdef CONFIG_DAMON_PADDR +bool damon_pa_target_valid(void *t); +void damon_pa_set_primitives(struct damon_ctx *ctx); +#endif /* CONFIG_DAMON_PADDR */ + #endif /* _DAMON_H */ diff --git a/include/linux/dax.h b/include/linux/dax.h index 2619d94c308d..9fc5f99a0ae2 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -6,14 +6,14 @@ #include <linux/mm.h> #include <linux/radix-tree.h> -/* Flag for synchronous flush */ -#define DAXDEV_F_SYNC (1UL << 0) - typedef unsigned long dax_entry_t; +struct dax_device; +struct gendisk; struct iomap_ops; +struct iomap_iter; struct iomap; -struct dax_device; + struct dax_operations { /* * direct_access: translate a device-relative @@ -28,35 +28,18 @@ struct dax_operations { */ bool (*dax_supported)(struct dax_device *, struct block_device *, int, sector_t, sector_t); - /* copy_from_iter: required operation for fs-dax direct-i/o */ - size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); - /* copy_to_iter: required operation for fs-dax direct-i/o */ - size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, - struct iov_iter *); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); }; -extern struct attribute_group dax_attribute_group; - #if IS_ENABLED(CONFIG_DAX) -struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops, unsigned long flags); +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); -bool __dax_synchronous(struct dax_device *dax_dev); -static inline bool dax_synchronous(struct dax_device *dax_dev) -{ - return __dax_synchronous(dax_dev); -} -void __set_dax_synchronous(struct dax_device *dax_dev); -static inline void set_dax_synchronous(struct dax_device *dax_dev) -{ - __set_dax_synchronous(dax_dev); -} +bool dax_synchronous(struct dax_device *dax_dev); +void set_dax_synchronous(struct dax_device *dax_dev); /* * Check if given mapping is supported by the file / underlying device. */ @@ -70,8 +53,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, return dax_synchronous(dax_dev); } #else -static inline struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops, unsigned long flags) +static inline struct dax_device *alloc_dax(void *private, + const struct dax_operations *ops) { /* * Callers should check IS_ENABLED(CONFIG_DAX) to know if this @@ -106,48 +89,46 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, } #endif -struct writeback_control; -int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); -#if IS_ENABLED(CONFIG_FS_DAX) -bool generic_fsdax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t sectors); - -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len); +void set_dax_nocache(struct dax_device *dax_dev); +void set_dax_nomc(struct dax_device *dax_dev); +struct writeback_control; +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); +void dax_remove_host(struct gendisk *disk); +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, + u64 *start_off); static inline void fs_put_dax(struct dax_device *dax_dev) { put_dax(dax_dev); } - -struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); -int dax_writeback_mapping_range(struct address_space *mapping, - struct dax_device *dax_dev, struct writeback_control *wbc); - -struct page *dax_layout_busy_page(struct address_space *mapping); -struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -dax_entry_t dax_lock_page(struct page *page); -void dax_unlock_page(struct page *page, dax_entry_t cookie); #else -#define generic_fsdax_supported NULL - -static inline bool dax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t len) +static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { - return false; + return 0; } - -static inline void fs_put_dax(struct dax_device *dax_dev) +static inline void dax_remove_host(struct gendisk *disk) { } - -static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) +static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, + u64 *start_off) { return NULL; } +static inline void fs_put_dax(struct dax_device *dax_dev) +{ +} +#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ + +#if IS_ENABLED(CONFIG_FS_DAX) +int dax_writeback_mapping_range(struct address_space *mapping, + struct dax_device *dax_dev, struct writeback_control *wbc); +struct page *dax_layout_busy_page(struct address_space *mapping); +struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); +dax_entry_t dax_lock_page(struct page *page); +void dax_unlock_page(struct page *page, dax_entry_t cookie); +#else static inline struct page *dax_layout_busy_page(struct address_space *mapping) { return NULL; @@ -176,6 +157,11 @@ static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) } #endif +int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, + const struct iomap_ops *ops); +int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, + const struct iomap_ops *ops); + #if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); @@ -210,7 +196,6 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3f49e65169c6..dbb409d77d4f 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -47,8 +47,6 @@ extern int debug_locks_off(void); # define locking_selftest() do { } while (0) #endif -struct task_struct; - #ifdef CONFIG_LOCKDEP extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 868e9eacd69e..9192986b1a73 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -25,13 +25,21 @@ #define STATIC_RW_DATA static #endif +/* + * When an architecture needs to share the malloc()/free() implementation + * between compilation units, it needs to have non-local visibility. + */ +#ifndef MALLOC_VISIBLE +#define MALLOC_VISIBLE static +#endif + /* A trivial malloc implementation, adapted from * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 */ STATIC_RW_DATA unsigned long malloc_ptr; STATIC_RW_DATA int malloc_count; -static void *malloc(int size) +MALLOC_VISIBLE void *malloc(int size) { void *p; @@ -52,7 +60,7 @@ static void *malloc(int size) return p; } -static void free(void *where) +MALLOC_VISIBLE void free(void *where) { malloc_count--; if (!malloc_count) diff --git a/include/linux/delay.h b/include/linux/delay.h index 1d0e2ce6b6d9..039e7e0c7378 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -19,7 +19,8 @@ * https://lists.openwall.net/linux-kernel/2011/01/09/56 */ -#include <linux/kernel.h> +#include <linux/math.h> +#include <linux/sched.h> extern unsigned long loops_per_jiffy; @@ -58,7 +59,18 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -void usleep_range(unsigned long min, unsigned long max); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); + +static inline void usleep_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); +} + +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} static inline void ssleep(unsigned int seconds) { diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 114553b487ef..b26fecf6c8e8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -147,8 +147,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); -typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); @@ -200,8 +198,6 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; - dm_dax_copy_iter_fn dax_copy_from_iter; - dm_dax_copy_iter_fn dax_copy_to_iter; dm_dax_zero_page_range_fn dax_zero_page_range; /* For internal device-mapper use. */ @@ -576,9 +572,9 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *t); /* - * Table keyslot manager functions + * Table blk_crypto_profile functions */ -void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); +void dm_destroy_crypto_profile(struct blk_crypto_profile *profile); /*----------------------------------------------------------------- * Macros. diff --git a/include/linux/device.h b/include/linux/device.h index e270cb740b9e..93459724dcde 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -45,6 +45,7 @@ struct iommu_ops; struct iommu_group; struct dev_pin_info; struct dev_iommu; +struct msi_device_data; /** * struct subsys_interface - interfaces to device functions @@ -372,6 +373,20 @@ struct dev_links_info { }; /** + * struct dev_msi_info - Device data related to MSI + * @domain: The MSI interrupt domain associated to the device + * @data: Pointer to MSI device data + */ +struct dev_msi_info { +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + struct irq_domain *domain; +#endif +#ifdef CONFIG_GENERIC_MSI_IRQ + struct msi_device_data *data; +#endif +}; + +/** * struct device - The basic device structure * @parent: The device's "parent" device, the device to which it is attached. * In most cases, a parent device is some sort of bus or host @@ -407,9 +422,7 @@ struct dev_links_info { * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pin-control.rst for details. - * @msi_lock: Lock to protect MSI mask cache and mask register - * @msi_list: Hosts MSI descriptors - * @msi_domain: The generic MSI domain this device is using. + * @msi: MSI related data * @numa_node: NUMA node this device is close to. * @dma_ops: DMA mapping operations for this device. * @dma_mask: Dma mask (if dma'ble device). @@ -501,16 +514,10 @@ struct device { struct em_perf_domain *em_pd; #endif -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - struct irq_domain *msi_domain; -#endif #ifdef CONFIG_PINCTRL struct dev_pin_info *pins; #endif -#ifdef CONFIG_GENERIC_MSI_IRQ - raw_spinlock_t msi_lock; - struct list_head msi_list; -#endif + struct dev_msi_info msi; #ifdef CONFIG_DMA_OPS const struct dma_map_ops *dma_ops; #endif @@ -668,7 +675,7 @@ static inline void set_dev_node(struct device *dev, int node) static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - return dev->msi_domain; + return dev->msi.domain; #else return NULL; #endif @@ -677,7 +684,7 @@ static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d) { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - dev->msi_domain = d; + dev->msi.domain = d; #endif } diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 062777a45a74..a039ab809753 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -143,6 +143,7 @@ int device_match_of_node(struct device *dev, const void *np); int device_match_fwnode(struct device *dev, const void *fwnode); int device_match_devt(struct device *dev, const void *pdevt); int device_match_acpi_dev(struct device *dev, const void *adev); +int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); /* iterator helpers for buses */ diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index a498ebcf4993..15e7c5e15d62 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -18,6 +18,7 @@ #include <linux/klist.h> #include <linux/pm.h> #include <linux/device/bus.h> +#include <linux/module.h> /** * enum probe_type - device driver probe type to try diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 8b32b4bdd590..7ab50076e7a6 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -86,8 +86,8 @@ struct dma_buf_ops { * @pin: * * This is called by dma_buf_pin() and lets the exporter know that the - * DMA-buf can't be moved any more. The exporter should pin the buffer - * into system memory to make sure it is generally accessible by other + * DMA-buf can't be moved any more. Ideally, the exporter should + * pin the buffer so that it is generally accessible by all * devices. * * This is called with the &dmabuf.resv object locked and is mutual @@ -420,20 +420,27 @@ struct dma_buf { * - Dynamic importers should set fences for any access that they can't * disable immediately from their &dma_buf_attach_ops.move_notify * callback. + * + * IMPORTANT: + * + * All drivers must obey the struct dma_resv rules, specifically the + * rules for updating fences, see &dma_resv.fence_excl and + * &dma_resv.fence. If these dependency rules are broken access tracking + * can be lost resulting in use after free issues. */ struct dma_resv *resv; /** @poll: for userspace poll support */ wait_queue_head_t poll; - /** @cb_excl: for userspace poll support */ - /** @cb_shared: for userspace poll support */ + /** @cb_in: for userspace poll support */ + /** @cb_out: for userspace poll support */ struct dma_buf_poll_cb_t { struct dma_fence_cb cb; wait_queue_head_t *poll; __poll_t active; - } cb_excl, cb_shared; + } cb_in, cb_out; #ifdef CONFIG_DMABUF_SYSFS_STATS /** * @sysfs_entry: diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 6ffb4b2c6371..1ea691753bd3 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -214,19 +214,15 @@ struct dma_fence_ops { * Custom wait implementation, defaults to dma_fence_default_wait() if * not set. * - * The dma_fence_default_wait implementation should work for any fence, as long - * as @enable_signaling works correctly. This hook allows drivers to - * have an optimized version for the case where a process context is - * already available, e.g. if @enable_signaling for the general case - * needs to set up a worker thread. + * Deprecated and should not be used by new implementations. Only used + * by existing implementations which need special handling for their + * hardware reset procedure. * * Must return -ERESTARTSYS if the wait is intr = true and the wait was * interrupted, and remaining jiffies if fence has signaled, or 0 if wait * timed out. Can also return other error values on custom implementations, * which should be treated as if the fence is signaled. For example a hardware * lockup could be reported like that. - * - * This callback is optional. */ signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout); @@ -268,6 +264,7 @@ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, void dma_fence_release(struct kref *kref); void dma_fence_free(struct dma_fence *fence); +void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq); /** * dma_fence_put - decreases refcount of the fence @@ -590,26 +587,4 @@ struct dma_fence *dma_fence_get_stub(void); struct dma_fence *dma_fence_allocate_private_stub(void); u64 dma_fence_context_alloc(unsigned num); -#define DMA_FENCE_TRACE(f, fmt, args...) \ - do { \ - struct dma_fence *__ff = (f); \ - if (IS_ENABLED(CONFIG_DMA_FENCE_TRACE)) \ - pr_info("f %llu#%llu: " fmt, \ - __ff->context, __ff->seqno, ##args); \ - } while (0) - -#define DMA_FENCE_WARN(f, fmt, args...) \ - do { \ - struct dma_fence *__ff = (f); \ - pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ - ##args); \ - } while (0) - -#define DMA_FENCE_ERR(f, fmt, args...) \ - do { \ - struct dma_fence *__ff = (f); \ - pr_err("f %llu#%llu: " fmt, __ff->context, __ff->seqno, \ - ##args); \ - } while (0) - #endif /* __LINUX_DMA_FENCE_H */ diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index e1ca2080a1ff..eebf04325b34 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -62,19 +62,211 @@ struct dma_resv_list { /** * struct dma_resv - a reservation object manages fences for a buffer - * @lock: update side lock - * @seq: sequence count for managing RCU read-side synchronization - * @fence_excl: the exclusive fence, if there is one currently - * @fence: list of current shared fences + * + * There are multiple uses for this, with sometimes slightly different rules in + * how the fence slots are used. + * + * One use is to synchronize cross-driver access to a struct dma_buf, either for + * dynamic buffer management or just to handle implicit synchronization between + * different users of the buffer in userspace. See &dma_buf.resv for a more + * in-depth discussion. + * + * The other major use is to manage access and locking within a driver in a + * buffer based memory manager. struct ttm_buffer_object is the canonical + * example here, since this is where reservation objects originated from. But + * use in drivers is spreading and some drivers also manage struct + * drm_gem_object with the same scheme. */ struct dma_resv { + /** + * @lock: + * + * Update side lock. Don't use directly, instead use the wrapper + * functions like dma_resv_lock() and dma_resv_unlock(). + * + * Drivers which use the reservation object to manage memory dynamically + * also use this lock to protect buffer object state like placement, + * allocation policies or throughout command submission. + */ struct ww_mutex lock; + + /** + * @seq: + * + * Sequence count for managing RCU read-side synchronization, allows + * read-only access to @fence_excl and @fence while ensuring we take a + * consistent snapshot. + */ seqcount_ww_mutex_t seq; + /** + * @fence_excl: + * + * The exclusive fence, if there is one currently. + * + * There are two ways to update this fence: + * + * - First by calling dma_resv_add_excl_fence(), which replaces all + * fences attached to the reservation object. To guarantee that no + * fences are lost, this new fence must signal only after all previous + * fences, both shared and exclusive, have signalled. In some cases it + * is convenient to achieve that by attaching a struct dma_fence_array + * with all the new and old fences. + * + * - Alternatively the fence can be set directly, which leaves the + * shared fences unchanged. To guarantee that no fences are lost, this + * new fence must signal only after the previous exclusive fence has + * signalled. Since the shared fences are staying intact, it is not + * necessary to maintain any ordering against those. If semantically + * only a new access is added without actually treating the previous + * one as a dependency the exclusive fences can be strung together + * using struct dma_fence_chain. + * + * Note that actual semantics of what an exclusive or shared fence mean + * is defined by the user, for reservation objects shared across drivers + * see &dma_buf.resv. + */ struct dma_fence __rcu *fence_excl; + + /** + * @fence: + * + * List of current shared fences. + * + * There are no ordering constraints of shared fences against the + * exclusive fence slot. If a waiter needs to wait for all access, it + * has to wait for both sets of fences to signal. + * + * A new fence is added by calling dma_resv_add_shared_fence(). Since + * this often needs to be done past the point of no return in command + * submission it cannot fail, and therefore sufficient slots need to be + * reserved by calling dma_resv_reserve_shared(). + * + * Note that actual semantics of what an exclusive or shared fence mean + * is defined by the user, for reservation objects shared across drivers + * see &dma_buf.resv. + */ struct dma_resv_list __rcu *fence; }; +/** + * struct dma_resv_iter - current position into the dma_resv fences + * + * Don't touch this directly in the driver, use the accessor function instead. + */ +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @all_fences: If all fences should be returned */ + bool all_fences; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences; private, *MUST* not dereference */ + struct dma_resv_list *fences; + + /** @shared_count: number of shared fences */ + unsigned int shared_count; + + /** @is_restarted: true if this is the first returned fence */ + bool is_restarted; +}; + +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor); + +/** + * dma_resv_iter_begin - initialize a dma_resv_iter object + * @cursor: The dma_resv_iter object to initialize + * @obj: The dma_resv object which we want to iterate over + * @all_fences: If all fences should be returned or just the exclusive one + */ +static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, + struct dma_resv *obj, + bool all_fences) +{ + cursor->obj = obj; + cursor->all_fences = all_fences; + cursor->fence = NULL; +} + +/** + * dma_resv_iter_end - cleanup a dma_resv_iter object + * @cursor: the dma_resv_iter object which should be cleaned up + * + * Make sure that the reference to the fence in the cursor is properly + * dropped. + */ +static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{ + dma_fence_put(cursor->fence); +} + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == 0; +} + +/** + * dma_resv_iter_is_restarted - test if this is the first fence after a restart + * @cursor: the cursor with the current position + * + * Return true if this is the first fence in an iteration after a restart. + */ +static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) +{ + return cursor->is_restarted; +} + +/** + * dma_resv_for_each_fence_unlocked - unlocked fence iterator + * @cursor: a struct dma_resv_iter pointer + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * &dma_resv.lock and using RCU instead. The cursor needs to be initialized + * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside + * the iterator a reference to the dma_fence is held and the RCU lock dropped. + * When the dma_resv is modified the iteration starts over again. + */ +#define dma_resv_for_each_fence_unlocked(cursor, fence) \ + for (fence = dma_resv_iter_first_unlocked(cursor); \ + fence; fence = dma_resv_iter_next_unlocked(cursor)) + +/** + * dma_resv_for_each_fence - fence iterator + * @cursor: a struct dma_resv_iter pointer + * @obj: a dma_resv object pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object while holding the + * &dma_resv.lock. @all_fences controls if the shared fences are returned as + * well. The cursor initialisation is part of the iterator and the fence stays + * valid as long as the lock is held and so no extra reference to the fence is + * taken. + */ +#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ + for (dma_resv_iter_begin(cursor, obj, all_fences), \ + fence = dma_resv_iter_first(cursor); fence; \ + fence = dma_resv_iter_next(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) @@ -98,6 +290,13 @@ static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {} * undefined order, a #ww_acquire_ctx is passed to unwind if a cycle * is detected. See ww_mutex_lock() and ww_acquire_init(). A reservation * object may be locked by itself by passing NULL as @ctx. + * + * When a die situation is indicated by returning -EDEADLK all locks held by + * @ctx must be unlocked and then dma_resv_lock_slow() called on @obj. + * + * Unlocked by calling dma_resv_unlock(). + * + * See also dma_resv_lock_interruptible() for the interruptible variant. */ static inline int dma_resv_lock(struct dma_resv *obj, struct ww_acquire_ctx *ctx) @@ -119,6 +318,12 @@ static inline int dma_resv_lock(struct dma_resv *obj, * undefined order, a #ww_acquire_ctx is passed to unwind if a cycle * is detected. See ww_mutex_lock() and ww_acquire_init(). A reservation * object may be locked by itself by passing NULL as @ctx. + * + * When a die situation is indicated by returning -EDEADLK all locks held by + * @ctx must be unlocked and then dma_resv_lock_slow_interruptible() called on + * @obj. + * + * Unlocked by calling dma_resv_unlock(). */ static inline int dma_resv_lock_interruptible(struct dma_resv *obj, struct ww_acquire_ctx *ctx) @@ -134,6 +339,8 @@ static inline int dma_resv_lock_interruptible(struct dma_resv *obj, * Acquires the reservation object after a die case. This function * will sleep until the lock becomes available. See dma_resv_lock() as * well. + * + * See also dma_resv_lock_slow_interruptible() for the interruptible variant. */ static inline void dma_resv_lock_slow(struct dma_resv *obj, struct ww_acquire_ctx *ctx) @@ -167,13 +374,13 @@ static inline int dma_resv_lock_slow_interruptible(struct dma_resv *obj, * if they overlap with a writer. * * Also note that since no context is provided, no deadlock protection is - * possible. + * possible, which is also not needed for a trylock. * * Returns true if the lock was acquired, false otherwise. */ static inline bool __must_check dma_resv_trylock(struct dma_resv *obj) { - return ww_mutex_trylock(&obj->lock); + return ww_mutex_trylock(&obj->lock, NULL); } /** @@ -193,6 +400,11 @@ static inline bool dma_resv_is_locked(struct dma_resv *obj) * * Returns the context used to lock a reservation object or NULL if no context * was used or the object is not locked at all. + * + * WARNING: This interface is pretty horrible, but TTM needs it because it + * doesn't pass the struct ww_acquire_ctx around in some very long callchains. + * Everyone else just uses it to check whether they're holding a reservation or + * not. */ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj) { @@ -229,32 +441,6 @@ dma_resv_excl_fence(struct dma_resv *obj) } /** - * dma_resv_get_excl_unlocked - get the reservation object's - * exclusive fence, without lock held. - * @obj: the reservation object - * - * If there is an exclusive fence, this atomically increments it's - * reference count and returns it. - * - * RETURNS - * The exclusive fence or NULL if none - */ -static inline struct dma_fence * -dma_resv_get_excl_unlocked(struct dma_resv *obj) -{ - struct dma_fence *fence; - - if (!rcu_access_pointer(obj->fence_excl)) - return NULL; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&obj->fence_excl); - rcu_read_unlock(); - - return fence; -} - -/** * dma_resv_shared_list - get the reservation object's shared fence list * @obj: the reservation object * @@ -278,5 +464,6 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout); bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all); +void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq); #endif /* _LINUX_RESERVATION_H */ diff --git a/include/linux/dma/qcom_adm.h b/include/linux/dma/qcom_adm.h new file mode 100644 index 000000000000..af20df674f0c --- /dev/null +++ b/include/linux/dma/qcom_adm.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef __LINUX_DMA_QCOM_ADM_H +#define __LINUX_DMA_QCOM_ADM_H + +#include <linux/types.h> + +struct qcom_adm_peripheral_config { + u32 crci; + u32 mux; +}; + +#endif /* __LINUX_DMA_QCOM_ADM_H */ diff --git a/include/linux/dma/xilinx_dpdma.h b/include/linux/dma/xilinx_dpdma.h new file mode 100644 index 000000000000..83a1377f03f8 --- /dev/null +++ b/include/linux/dma/xilinx_dpdma.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __LINUX_DMA_XILINX_DPDMA_H +#define __LINUX_DMA_XILINX_DPDMA_H + +#include <linux/types.h> + +struct xilinx_dpdma_peripheral_config { + bool video_group; +}; + +#endif /* __LINUX_DMA_XILINX_DPDMA_H */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e5c2c9e71bf1..0349b35235e6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -418,9 +418,6 @@ enum dma_slave_buswidth { * @device_fc: Flow Controller Settings. Only valid for slave channels. Fill * with 'true' if peripheral should be flow controller. Direction will be * selected at Runtime. - * @slave_id: Slave requester id. Only valid for slave channels. The dma - * slave peripheral will have unique id as dma requester which need to be - * pass as slave config. * @peripheral_config: peripheral configuration for programming peripheral * for dmaengine transfer * @peripheral_size: peripheral configuration buffer size @@ -448,7 +445,6 @@ struct dma_slave_config { u32 src_port_window_size; u32 dst_port_window_size; bool device_fc; - unsigned int slave_id; void *peripheral_config; size_t peripheral_size; }; @@ -944,10 +940,8 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); void (*device_release)(struct dma_device *dev); /* debugfs support */ -#ifdef CONFIG_DEBUG_FS void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev); struct dentry *dbg_dev_root; -#endif }; static inline int dmaengine_slave_config(struct dma_chan *chan, diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e04436a7ff27..45e903d84733 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -131,6 +131,14 @@ static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg) return 0; } +#ifdef CONFIG_DMAR_DEBUG +void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, + unsigned long long addr, u32 pasid); +#else +static inline void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, + unsigned long long addr, u32 pasid) {} +#endif + #ifdef CONFIG_INTEL_IOMMU extern int iommu_detected, no_iommu; extern int intel_iommu_init(void); diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index 0aad774beaec..b87c3b85a166 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -26,7 +26,7 @@ struct dnotify_struct { FS_MODIFY | FS_MODIFY_CHILD |\ FS_ACCESS | FS_ACCESS_CHILD |\ FS_ATTRIB | FS_ATTRIB_CHILD |\ - FS_CREATE | FS_DN_RENAME |\ + FS_CREATE | FS_RENAME |\ FS_MOVED_FROM | FS_MOVED_TO) extern int dir_notify_enable; diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index c7fa4a3498fe..939a1beaddf7 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -7,8 +7,10 @@ #include <linux/refcount.h> #include <linux/types.h> +#include <net/dsa.h> struct dsa_switch; +struct dsa_port; struct sk_buff; struct net_device; @@ -36,18 +38,16 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, - struct net_device *br, - int bridge_num); + struct dsa_bridge bridge); void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, - struct net_device *br, - int bridge_num); + struct dsa_bridge bridge); -u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num); +u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num); -u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port); +u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp); -u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port); +u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp); int dsa_8021q_rx_switch_id(u16 vid); diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h index 5a3470bcc8a7..b8fef35591aa 100644 --- a/include/linux/dsa/loop.h +++ b/include/linux/dsa/loop.h @@ -2,6 +2,7 @@ #ifndef DSA_LOOP_H #define DSA_LOOP_H +#include <linux/if_vlan.h> #include <linux/types.h> #include <linux/ethtool.h> #include <net/dsa.h> diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 8ae999f587c4..dca2969015d8 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -8,10 +8,12 @@ #include <linux/kthread.h> #include <linux/packing.h> #include <linux/skbuff.h> +#include <net/dsa.h> struct ocelot_skb_cb { struct sk_buff *clone; unsigned int ptp_class; /* valid only for clones */ + u32 tstamp_lo; u8 ptp_cmd; u8 ts_id; }; @@ -167,11 +169,18 @@ struct felix_deferred_xmit_work { struct kthread_work work; }; -struct felix_port { +struct ocelot_8021q_tagger_data { void (*xmit_work_fn)(struct kthread_work *work); - struct kthread_worker *xmit_worker; }; +static inline struct ocelot_8021q_tagger_data * +ocelot_8021q_tagger_data(struct dsa_switch *ds) +{ + BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_OCELOT_8021Q); + + return ds->tagger_data; +} + static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val) { packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0); @@ -242,9 +251,9 @@ static inline void ocelot_ifh_set_tag_type(void *injection, u64 tag_type) packing(injection, &tag_type, 16, 16, OCELOT_TAG_LEN, PACK, 0); } -static inline void ocelot_ifh_set_vid(void *injection, u64 vid) +static inline void ocelot_ifh_set_vlan_tci(void *injection, u64 vlan_tci) { - packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0); + packing(injection, &vlan_tci, 15, 0, OCELOT_TAG_LEN, PACK, 0); } /* Determine the PTP REW_OP to use for injecting the given skb */ diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 9e07079528a5..159e43171ccc 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -35,23 +35,26 @@ #define SJA1105_META_SMAC 0x222222222222ull #define SJA1105_META_DMAC 0x0180C200000Eull -#define SJA1105_HWTS_RX_EN 0 +enum sja1110_meta_tstamp { + SJA1110_META_TSTAMP_TX = 0, + SJA1110_META_TSTAMP_RX = 1, +}; -/* Global tagger data: each struct sja1105_port has a reference to - * the structure defined in struct sja1105_private. - */ +struct sja1105_deferred_xmit_work { + struct dsa_port *dp; + struct sk_buff *skb; + struct kthread_work work; +}; + +/* Global tagger data */ struct sja1105_tagger_data { - struct sk_buff *stampable_skb; - /* Protects concurrent access to the meta state machine - * from taggers running on multiple ports on SMP systems - */ - spinlock_t meta_lock; - unsigned long state; - u8 ts_id; - /* Used on SJA1110 where meta frames are generated only for - * 2-step TX timestamps - */ - struct sk_buff_head skb_txtstamp_queue; + /* Tagger to switch */ + void (*xmit_work_fn)(struct kthread_work *work); + void (*meta_tstamp_handler)(struct dsa_switch *ds, int port, u8 ts_id, + enum sja1110_meta_tstamp dir, u64 tstamp); + /* Switch to tagger */ + bool (*rxtstamp_get_state)(struct dsa_switch *ds); + void (*rxtstamp_set_state)(struct dsa_switch *ds, bool on); }; struct sja1105_skb_cb { @@ -64,33 +67,13 @@ struct sja1105_skb_cb { #define SJA1105_SKB_CB(skb) \ ((struct sja1105_skb_cb *)((skb)->cb)) -struct sja1105_port { - struct kthread_worker *xmit_worker; - struct kthread_work xmit_work; - struct sk_buff_head xmit_queue; - struct sja1105_tagger_data *data; - struct dsa_port *dp; - bool hwts_tx_en; -}; - -/* Timestamps are in units of 8 ns clock ticks (equivalent to - * a fixed 125 MHz clock). - */ -#define SJA1105_TICK_NS 8 - -static inline s64 ns_to_sja1105_ticks(s64 ns) -{ - return ns / SJA1105_TICK_NS; -} - -static inline s64 sja1105_ticks_to_ns(s64 ticks) +static inline struct sja1105_tagger_data * +sja1105_tagger_data(struct dsa_switch *ds) { - return ticks * SJA1105_TICK_NS; -} + BUG_ON(ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1105 && + ds->dst->tag_ops->proto != DSA_TAG_PROTO_SJA1110); -static inline bool dsa_port_is_sja1105(struct dsa_port *dp) -{ - return true; + return ds->tagger_data; } #endif /* _NET_DSA_SJA1105_H */ diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h index e80a332e3d8a..d37e5d06a357 100644 --- a/include/linux/dtpm.h +++ b/include/linux/dtpm.h @@ -23,34 +23,32 @@ struct dtpm { u64 power_max; u64 power_min; int weight; - void *private; }; struct dtpm_ops { u64 (*set_power_uw)(struct dtpm *, u64); u64 (*get_power_uw)(struct dtpm *); + int (*update_power_uw)(struct dtpm *); void (*release)(struct dtpm *); }; -struct dtpm_descr; - -typedef int (*dtpm_init_t)(struct dtpm_descr *); +typedef int (*dtpm_init_t)(void); struct dtpm_descr { - struct dtpm *parent; - const char *name; dtpm_init_t init; }; /* Init section thermal table */ -extern struct dtpm_descr *__dtpm_table[]; -extern struct dtpm_descr *__dtpm_table_end[]; +extern struct dtpm_descr __dtpm_table[]; +extern struct dtpm_descr __dtpm_table_end[]; -#define DTPM_TABLE_ENTRY(name) \ - static typeof(name) *__dtpm_table_entry_##name \ - __used __section("__dtpm_table") = &name +#define DTPM_TABLE_ENTRY(name, __init) \ + static struct dtpm_descr __dtpm_table_entry_##name \ + __used __section("__dtpm_table") = { \ + .init = __init, \ + } -#define DTPM_DECLARE(name) DTPM_TABLE_ENTRY(name) +#define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init) #define for_each_dtpm_table(__dtpm) \ for (__dtpm = __dtpm_table; \ @@ -62,16 +60,14 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone) return container_of(zone, struct dtpm, zone); } -int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max); +int dtpm_update_power(struct dtpm *dtpm); int dtpm_release_zone(struct powercap_zone *pcz); -struct dtpm *dtpm_alloc(struct dtpm_ops *ops); +void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops); void dtpm_unregister(struct dtpm *dtpm); int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent); -int dtpm_register_cpu(struct dtpm *parent); - #endif diff --git a/include/linux/edac.h b/include/linux/edac.h index 4207d06996a4..e730b3468719 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -182,6 +182,8 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_LPDDR4: Low-Power DDR4 memory. * @MEM_DDR5: Unbuffered DDR5 RAM + * @MEM_RDDR5: Registered DDR5 RAM + * @MEM_LRDDR5: Load-Reduced DDR5 memory. * @MEM_NVDIMM: Non-volatile RAM * @MEM_WIO2: Wide I/O 2. * @MEM_HBM2: High bandwidth Memory Gen 2. @@ -211,6 +213,8 @@ enum mem_type { MEM_LRDDR4, MEM_LPDDR4, MEM_DDR5, + MEM_RDDR5, + MEM_LRDDR5, MEM_NVDIMM, MEM_WIO2, MEM_HBM2, @@ -239,6 +243,8 @@ enum mem_type { #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) #define MEM_FLAG_DDR5 BIT(MEM_DDR5) +#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) +#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2) diff --git a/include/linux/efi.h b/include/linux/efi.h index 6b5d36babfcc..0de9fb1fdc5a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -362,6 +362,7 @@ void efi_native_runtime_setup(void); /* OEM GUIDs */ #define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55) +#define AMD_SEV_MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, 0x9e51, 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75) typedef struct { efi_guid_t guid; @@ -569,8 +570,8 @@ extern struct efi { unsigned long flags; } efi; -#define EFI_RT_SUPPORTED_GET_TIME 0x0001 -#define EFI_RT_SUPPORTED_SET_TIME 0x0002 +#define EFI_RT_SUPPORTED_GET_TIME 0x0001 +#define EFI_RT_SUPPORTED_SET_TIME 0x0002 #define EFI_RT_SUPPORTED_GET_WAKEUP_TIME 0x0004 #define EFI_RT_SUPPORTED_SET_WAKEUP_TIME 0x0008 #define EFI_RT_SUPPORTED_GET_VARIABLE 0x0010 @@ -837,7 +838,7 @@ extern int efi_status_to_err(efi_status_t status); #define EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS 0x0000000000000020 #define EFI_VARIABLE_APPEND_WRITE 0x0000000000000040 -#define EFI_VARIABLE_MASK (EFI_VARIABLE_NON_VOLATILE | \ +#define EFI_VARIABLE_MASK (EFI_VARIABLE_NON_VOLATILE | \ EFI_VARIABLE_BOOTSERVICE_ACCESS | \ EFI_VARIABLE_RUNTIME_ACCESS | \ EFI_VARIABLE_HARDWARE_ERROR_RECORD | \ @@ -1282,4 +1283,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( } #endif +#ifdef CONFIG_SYSFB +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); +#else +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } +#endif + #endif /* _LINUX_EFI_H */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h deleted file mode 100644 index ef9ceead3db1..000000000000 --- a/include/linux/elevator.h +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_ELEVATOR_H -#define _LINUX_ELEVATOR_H - -#include <linux/percpu.h> -#include <linux/hashtable.h> - -#ifdef CONFIG_BLOCK - -struct io_cq; -struct elevator_type; -#ifdef CONFIG_BLK_DEBUG_FS -struct blk_mq_debugfs_attr; -#endif - -/* - * Return values from elevator merger - */ -enum elv_merge { - ELEVATOR_NO_MERGE = 0, - ELEVATOR_FRONT_MERGE = 1, - ELEVATOR_BACK_MERGE = 2, - ELEVATOR_DISCARD_MERGE = 3, -}; - -struct blk_mq_alloc_data; -struct blk_mq_hw_ctx; - -struct elevator_mq_ops { - int (*init_sched)(struct request_queue *, struct elevator_type *); - void (*exit_sched)(struct elevator_queue *); - int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); - void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); - void (*depth_updated)(struct blk_mq_hw_ctx *); - - bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int); - int (*request_merge)(struct request_queue *q, struct request **, struct bio *); - void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); - void (*requests_merged)(struct request_queue *, struct request *, struct request *); - void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *); - void (*prepare_request)(struct request *); - void (*finish_request)(struct request *); - void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); - struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); - bool (*has_work)(struct blk_mq_hw_ctx *); - void (*completed_request)(struct request *, u64); - void (*requeue_request)(struct request *); - struct request *(*former_request)(struct request_queue *, struct request *); - struct request *(*next_request)(struct request_queue *, struct request *); - void (*init_icq)(struct io_cq *); - void (*exit_icq)(struct io_cq *); -}; - -#define ELV_NAME_MAX (16) - -struct elv_fs_entry { - struct attribute attr; - ssize_t (*show)(struct elevator_queue *, char *); - ssize_t (*store)(struct elevator_queue *, const char *, size_t); -}; - -/* - * identifies an elevator type, such as AS or deadline - */ -struct elevator_type -{ - /* managed by elevator core */ - struct kmem_cache *icq_cache; - - /* fields provided by elevator implementation */ - struct elevator_mq_ops ops; - - size_t icq_size; /* see iocontext.h */ - size_t icq_align; /* ditto */ - struct elv_fs_entry *elevator_attrs; - const char *elevator_name; - const char *elevator_alias; - const unsigned int elevator_features; - struct module *elevator_owner; -#ifdef CONFIG_BLK_DEBUG_FS - const struct blk_mq_debugfs_attr *queue_debugfs_attrs; - const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; -#endif - - /* managed by elevator core */ - char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */ - struct list_head list; -}; - -#define ELV_HASH_BITS 6 - -void elv_rqhash_del(struct request_queue *q, struct request *rq); -void elv_rqhash_add(struct request_queue *q, struct request *rq); -void elv_rqhash_reposition(struct request_queue *q, struct request *rq); -struct request *elv_rqhash_find(struct request_queue *q, sector_t offset); - -/* - * each queue has an elevator_queue associated with it - */ -struct elevator_queue -{ - struct elevator_type *type; - void *elevator_data; - struct kobject kobj; - struct mutex sysfs_lock; - unsigned int registered:1; - DECLARE_HASHTABLE(hash, ELV_HASH_BITS); -}; - -/* - * block elevator interface - */ -extern enum elv_merge elv_merge(struct request_queue *, struct request **, - struct bio *); -extern void elv_merge_requests(struct request_queue *, struct request *, - struct request *); -extern void elv_merged_request(struct request_queue *, struct request *, - enum elv_merge); -extern bool elv_attempt_insert_merge(struct request_queue *, struct request *, - struct list_head *); -extern struct request *elv_former_request(struct request_queue *, struct request *); -extern struct request *elv_latter_request(struct request_queue *, struct request *); -void elevator_init_mq(struct request_queue *q); - -/* - * io scheduler registration - */ -extern int elv_register(struct elevator_type *); -extern void elv_unregister(struct elevator_type *); - -/* - * io scheduler sysfs switching - */ -extern ssize_t elv_iosched_show(struct request_queue *, char *); -extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); - -extern bool elv_bio_merge_ok(struct request *, struct bio *); -extern struct elevator_queue *elevator_alloc(struct request_queue *, - struct elevator_type *); - -/* - * Helper functions. - */ -extern struct request *elv_rb_former_request(struct request_queue *, struct request *); -extern struct request *elv_rb_latter_request(struct request_queue *, struct request *); - -/* - * rb support functions. - */ -extern void elv_rb_add(struct rb_root *, struct request *); -extern void elv_rb_del(struct rb_root *, struct request *); -extern struct request *elv_rb_find(struct rb_root *, sector_t); - -/* - * Insertion selection - */ -#define ELEVATOR_INSERT_FRONT 1 -#define ELEVATOR_INSERT_BACK 2 -#define ELEVATOR_INSERT_SORT 3 -#define ELEVATOR_INSERT_REQUEUE 4 -#define ELEVATOR_INSERT_FLUSH 5 -#define ELEVATOR_INSERT_SORT_MERGE 6 - -#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) -#define rb_entry_rq(node) rb_entry((node), struct request, rb_node) - -#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) -#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) - -/* - * Elevator features. - */ - -/* Supports zoned block devices sequential write constraint */ -#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) -/* Supports scheduling on multiple hardware queues */ -#define ELEVATOR_F_MQ_AWARE (1U << 1) - -#endif /* CONFIG_BLOCK */ -#endif diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 39dcadd492b5..6377adc3b78d 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -17,19 +17,30 @@ * device). It can be a total power: static and dynamic. * @cost: The cost coefficient associated with this level, used during * energy calculation. Equal to: power * max_frequency / frequency + * @flags: see "em_perf_state flags" description below. */ struct em_perf_state { unsigned long frequency; unsigned long power; unsigned long cost; + unsigned long flags; }; +/* + * em_perf_state flags: + * + * EM_PERF_STATE_INEFFICIENT: The performance state is inefficient. There is + * in this em_perf_domain, another performance state with a higher frequency + * but a lower or equal power cost. Such inefficient states are ignored when + * using em_pd_get_efficient_*() functions. + */ +#define EM_PERF_STATE_INEFFICIENT BIT(0) + /** * struct em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states - * @milliwatts: Flag indicating the power values are in milli-Watts - * or some other scale. + * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here * for performance reasons to avoid potential cache * misses during energy calculations in the scheduler @@ -44,10 +55,22 @@ struct em_perf_state { struct em_perf_domain { struct em_perf_state *table; int nr_perf_states; - int milliwatts; + unsigned long flags; unsigned long cpus[]; }; +/* + * em_perf_domain flags: + * + * EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some + * other scale. + * + * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating + * energy consumption. + */ +#define EM_PERF_DOMAIN_MILLIWATTS BIT(0) +#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) + #define em_span_cpus(em) (to_cpumask((em)->cpus)) #ifdef CONFIG_ENERGY_MODEL @@ -102,6 +125,37 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, void em_dev_unregister_perf_domain(struct device *dev); /** + * em_pd_get_efficient_state() - Get an efficient performance state from the EM + * @pd : Performance domain for which we want an efficient frequency + * @freq : Frequency to map with the EM + * + * It is called from the scheduler code quite frequently and as a consequence + * doesn't implement any check. + * + * Return: An efficient performance state, high enough to meet @freq + * requirement. + */ +static inline +struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd, + unsigned long freq) +{ + struct em_perf_state *ps; + int i; + + for (i = 0; i < pd->nr_perf_states; i++) { + ps = &pd->table[i]; + if (ps->frequency >= freq) { + if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && + ps->flags & EM_PERF_STATE_INEFFICIENT) + continue; + break; + } + } + + return ps; +} + +/** * em_cpu_energy() - Estimates the energy consumed by the CPUs of a * performance domain * @pd : performance domain for which energy has to be estimated @@ -123,7 +177,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, { unsigned long freq, scale_cpu; struct em_perf_state *ps; - int i, cpu; + int cpu; if (!sum_util) return 0; @@ -148,11 +202,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * Find the lowest performance state of the Energy Model above the * requested frequency. */ - for (i = 0; i < pd->nr_perf_states; i++) { - ps = &pd->table[i]; - if (ps->frequency >= freq) - break; - } + ps = em_pd_get_efficient_state(pd, freq); /* * The capacity of a CPU in the domain at the performance state (ps) diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 0d7865a0731c..07c878d6e323 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -75,7 +75,7 @@ static inline void xfer_to_guest_mode_prepare(void) */ static inline bool __xfer_to_guest_mode_work_pending(void) { - unsigned long ti_work = READ_ONCE(current_thread_info()->flags); + unsigned long ti_work = read_thread_flags(); return !!(ti_work & XFER_TO_GUEST_MODE_WORK); } diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index c58d50451485..2ad71cc90b37 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -26,9 +26,16 @@ #ifdef __KERNEL__ struct device; +struct fwnode_handle; + int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); +int platform_get_ethdev_address(struct device *dev, struct net_device *netdev); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); +int device_get_mac_address(struct device *dev, char *addr); +int device_get_ethdev_address(struct device *dev, struct net_device *netdev); +int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr); + u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; @@ -227,8 +234,6 @@ static inline void eth_random_addr(u8 *addr) addr[0] |= 0x02; /* set local assignment bit (IEEE802) */ } -#define random_ether_addr(addr) eth_random_addr(addr) - /** * eth_broadcast_addr - Assign broadcast address * @addr: Pointer to a six-byte array containing the Ethernet address @@ -262,8 +267,11 @@ static inline void eth_zero_addr(u8 *addr) */ static inline void eth_hw_addr_random(struct net_device *dev) { + u8 addr[ETH_ALEN]; + + eth_random_addr(addr); + __dev_addr_set(dev, addr, ETH_ALEN); dev->addr_assign_type = NET_ADDR_RANDOM; - eth_random_addr(dev->dev_addr); } /** @@ -323,7 +331,7 @@ static inline void eth_hw_addr_inherit(struct net_device *dst, struct net_device *src) { dst->addr_assign_type = src->addr_assign_type; - ether_addr_copy(dst->dev_addr, src->dev_addr); + eth_hw_addr_set(dst, src->dev_addr); } /** @@ -544,6 +552,27 @@ static inline unsigned long compare_ether_header(const void *a, const void *b) } /** + * eth_hw_addr_gen - Generate and assign Ethernet address to a port + * @dev: pointer to port's net_device structure + * @base_addr: base Ethernet address + * @id: offset to add to the base address + * + * Generate a MAC address using a base address and an offset and assign it + * to a net_device. Commonly used by switch drivers which need to compute + * addresses for all their ports. addr_assign_type is not changed. + */ +static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, + unsigned int id) +{ + u64 u = ether_addr_to_u64(base_addr); + u8 addr[ETH_ALEN]; + + u += id; + u64_to_ether_addr(u, addr); + eth_hw_addr_set(dev, addr); +} + +/** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad * diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 849524b55d89..a26f37a27167 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -67,6 +67,22 @@ enum { ETH_RSS_HASH_FUNCS_COUNT }; +/** + * struct kernel_ethtool_ringparam - RX/TX ring configuration + * @rx_buf_len: Current length of buffers on the rx ring. + */ +struct kernel_ethtool_ringparam { + u32 rx_buf_len; +}; + +/** + * enum ethtool_supported_ring_param - indicator caps for setting ring params + * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len + */ +enum ethtool_supported_ring_param { + ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), +}; + #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) #define __ETH_RSS_HASH(name) __ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT) @@ -94,6 +110,7 @@ struct ethtool_link_ext_state_info { enum ethtool_link_ext_substate_link_logical_mismatch link_logical_mismatch; enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; enum ethtool_link_ext_substate_cable_issue cable_issue; + enum ethtool_link_ext_substate_module module; u8 __link_ext_substate; }; }; @@ -416,10 +433,22 @@ struct ethtool_module_eeprom { }; /** + * struct ethtool_module_power_mode_params - module power mode parameters + * @policy: The power mode policy enforced by the host for the plug-in module. + * @mode: The operational power mode of the plug-in module. Should be filled by + * device drivers on get operations. + */ +struct ethtool_module_power_mode_params { + enum ethtool_module_power_mode_policy policy; + enum ethtool_module_power_mode mode; +}; + +/** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. * @supported_coalesce_params: supported types of interrupt coalescing. + * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -580,6 +609,11 @@ struct ethtool_module_eeprom { * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics. * @get_rmon_stats: Query some of the RMON (RFC 2819) statistics. * Set %ranges to a pointer to zero-terminated array of byte ranges. + * @get_module_power_mode: Get the power mode policy for the plug-in module + * used by the network device and its operational power mode, if + * plugged-in. + * @set_module_power_mode: Set the power mode policy for the plug-in module + * used by the network device. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -596,6 +630,7 @@ struct ethtool_module_eeprom { struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 supported_coalesce_params; + u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); @@ -621,9 +656,13 @@ struct ethtool_ops { struct kernel_ethtool_coalesce *, struct netlink_ext_ack *); void (*get_ringparam)(struct net_device *, - struct ethtool_ringparam *); + struct ethtool_ringparam *, + struct kernel_ethtool_ringparam *, + struct netlink_ext_ack *); int (*set_ringparam)(struct net_device *, - struct ethtool_ringparam *); + struct ethtool_ringparam *, + struct kernel_ethtool_ringparam *, + struct netlink_ext_ack *); void (*get_pause_stats)(struct net_device *dev, struct ethtool_pause_stats *pause_stats); void (*get_pauseparam)(struct net_device *, @@ -705,6 +744,12 @@ struct ethtool_ops { void (*get_rmon_stats)(struct net_device *dev, struct ethtool_rmon_stats *rmon_stats, const struct ethtool_rmon_hist_range **ranges); + int (*get_module_power_mode)(struct net_device *dev, + struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack); + int (*set_module_power_mode)(struct net_device *dev, + const struct ethtool_module_power_mode_params *params, + struct netlink_ext_ack *extack); }; int ethtool_check_ops(const struct ethtool_ops *ops); diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 1e7bf78cb382..aba348d58ff6 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -10,6 +10,9 @@ #define __ETHTOOL_LINK_MODE_MASK_NWORDS \ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) +#define ETHTOOL_PAUSE_STAT_CNT (__ETHTOOL_A_PAUSE_STAT_CNT - \ + ETHTOOL_A_PAUSE_STAT_TX_FRAMES) + enum ethtool_multicast_groups { ETHNL_MCGRP_MONITOR, }; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3260fe714846..fe848901fcc3 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -221,8 +221,6 @@ struct export_operations { #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply atomic attribute updates */ -#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do - asychronous blocking locks */ unsigned long flags; }; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index eec3b7c40811..3afdf339d53c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES) -#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) +#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) #define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) @@ -82,15 +82,23 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ + FAN_RENAME) + +/* Events that can be reported with event->fd */ +#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF) +/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ +#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) + /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ - FANOTIFY_INODE_EVENTS) + FANOTIFY_INODE_EVENTS | \ + FANOTIFY_ERROR_EVENTS) /* Events that require a permission response from user */ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ diff --git a/include/linux/fb.h b/include/linux/fb.h index 5950f8f5dc74..3da95842b207 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -262,7 +262,7 @@ struct fb_ops { /* Draws a rectangle */ void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect); - /* Copy data from area to another */ + /* Copy data from area to another. Obsolete. */ void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region); /* Draws a image to the display */ void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image); @@ -610,6 +610,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name); extern int remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary); +extern bool is_firmware_framebuffer(struct apertures_struct *a); extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); diff --git a/include/linux/filter.h b/include/linux/filter.h index ef03ff34234d..71fa57b88bfc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -6,6 +6,7 @@ #define __LINUX_FILTER_H__ #include <linux/atomic.h> +#include <linux/bpf.h> #include <linux/refcount.h> #include <linux/compat.h> #include <linux/skbuff.h> @@ -26,7 +27,6 @@ #include <asm/byteorder.h> #include <uapi/linux/filter.h> -#include <uapi/linux/bpf.h> struct sk_buff; struct sock; @@ -360,10 +360,9 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = 0, \ .imm = TGT }) -/* Function call */ +/* Convert function address to BPF immediate */ -#define BPF_CAST_CALL(x) \ - ((u64 (*)(u64, u64, u64, u64, u64))(x)) +#define BPF_CALL_IMM(x) ((void *)(x) - (void *)__bpf_call_base) #define BPF_EMIT_CALL(FUNC) \ ((struct bpf_insn) { \ @@ -371,7 +370,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ - .imm = ((FUNC) - __bpf_call_base) }) + .imm = BPF_CALL_IMM(FUNC) }) /* Raw code statement block */ @@ -554,9 +553,9 @@ struct bpf_binary_header { }; struct bpf_prog_stats { - u64 cnt; - u64 nsecs; - u64 misses; + u64_stats_t cnt; + u64_stats_t nsecs; + u64_stats_t misses; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -586,8 +585,10 @@ struct bpf_prog { struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ /* Instructions for interpreter */ - struct sock_filter insns[0]; - struct bpf_insn insnsi[]; + union { + DECLARE_FLEX_ARRAY(struct sock_filter, insns); + DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); + }; }; struct sk_filter { @@ -613,13 +614,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, if (static_branch_unlikely(&bpf_stats_enabled_key)) { struct bpf_prog_stats *stats; u64 start = sched_clock(); + unsigned long flags; ret = dfunc(ctx, prog->insnsi, prog->bpf_func); stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; - u64_stats_update_end(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); } @@ -638,9 +640,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void * This uses migrate_disable/enable() explicitly to document that the * invocation of a BPF program does not require reentrancy protection * against a BPF program which is invoked from a preempting task. - * - * For non RT enabled kernels migrate_disable/enable() maps to - * preempt_disable/enable(), i.e. it disables also preemption. */ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, const void *ctx) @@ -1020,6 +1019,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); +int xdp_do_redirect_frame(struct net_device *dev, + struct xdp_buff *xdp, + struct xdp_frame *xdpf, + struct bpf_prog *prog); void xdp_do_flush(void); /* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as @@ -1028,7 +1031,7 @@ void xdp_do_flush(void); */ #define xdp_do_flush_map xdp_do_flush -void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, @@ -1372,6 +1375,7 @@ struct bpf_sk_lookup_kern { const struct in6_addr *daddr; } v6; struct sock *selected_sk; + u32 ingress_ifindex; bool no_reuseport; }; @@ -1434,7 +1438,7 @@ extern struct static_key_false bpf_sk_lookup_enabled; static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1450,6 +1454,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, .v4.daddr = daddr, .sport = sport, .dport = dport, + .ingress_ifindex = ifindex, }; u32 act; @@ -1472,7 +1477,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, const __be16 sport, const struct in6_addr *daddr, const u16 dport, - struct sock **psk) + const int ifindex, struct sock **psk) { struct bpf_prog_array *run_array; struct sock *selected_sk = NULL; @@ -1488,6 +1493,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, .v6.daddr = daddr, .sport = sport, .dport = dport, + .ingress_ifindex = ifindex, }; u32 act; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index aec8f30ab200..07967a450eaa 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -436,6 +436,12 @@ typedef void (*fw_iso_callback_t)(struct fw_iso_context *context, void *header, void *data); typedef void (*fw_iso_mc_callback_t)(struct fw_iso_context *context, dma_addr_t completed, void *data); + +union fw_iso_callback { + fw_iso_callback_t sc; + fw_iso_mc_callback_t mc; +}; + struct fw_iso_context { struct fw_card *card; int type; @@ -443,10 +449,7 @@ struct fw_iso_context { int speed; bool drop_overflow_headers; size_t header_size; - union { - fw_iso_callback_t sc; - fw_iso_mc_callback_t mc; - } callback; + union fw_iso_callback callback; void *callback_data; }; diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 25109192cebe..3b057dfc8284 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -20,23 +20,19 @@ struct firmware { struct module; struct device; -struct builtin_fw { - char *name; - void *data; - unsigned long size; -}; - -/* We have to play tricks here much like stringify() to get the - __COUNTER__ macro to be expanded as we want it */ -#define __fw_concat1(x, y) x##y -#define __fw_concat(x, y) __fw_concat1(x, y) - -#define DECLARE_BUILTIN_FIRMWARE(name, blob) \ - DECLARE_BUILTIN_FIRMWARE_SIZE(name, &(blob), sizeof(blob)) - -#define DECLARE_BUILTIN_FIRMWARE_SIZE(name, blob, size) \ - static const struct builtin_fw __fw_concat(__builtin_fw,__COUNTER__) \ - __used __section(".builtin_fw") = { name, blob, size } +/* + * Built-in firmware functionality is only available if FW_LOADER=y, but not + * FW_LOADER=m + */ +#ifdef CONFIG_FW_LOADER +bool firmware_request_builtin(struct firmware *fw, const char *name); +#else +static inline bool firmware_request_builtin(struct firmware *fw, + const char *name) +{ + return false; +} +#endif #if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) int request_firmware(const struct firmware **fw, const char *name, diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h new file mode 100644 index 000000000000..38b4da3ddfe4 --- /dev/null +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * cs_dsp.h -- Cirrus Logic DSP firmware support + * + * Based on sound/soc/codecs/wm_adsp.h + * + * Copyright 2012 Wolfson Microelectronics plc + * Copyright (C) 2015-2021 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ +#ifndef __CS_DSP_H +#define __CS_DSP_H + +#include <linux/device.h> +#include <linux/firmware.h> +#include <linux/list.h> +#include <linux/regmap.h> + +#define CS_ADSP2_REGION_0 BIT(0) +#define CS_ADSP2_REGION_1 BIT(1) +#define CS_ADSP2_REGION_2 BIT(2) +#define CS_ADSP2_REGION_3 BIT(3) +#define CS_ADSP2_REGION_4 BIT(4) +#define CS_ADSP2_REGION_5 BIT(5) +#define CS_ADSP2_REGION_6 BIT(6) +#define CS_ADSP2_REGION_7 BIT(7) +#define CS_ADSP2_REGION_8 BIT(8) +#define CS_ADSP2_REGION_9 BIT(9) +#define CS_ADSP2_REGION_1_9 (CS_ADSP2_REGION_1 | \ + CS_ADSP2_REGION_2 | CS_ADSP2_REGION_3 | \ + CS_ADSP2_REGION_4 | CS_ADSP2_REGION_5 | \ + CS_ADSP2_REGION_6 | CS_ADSP2_REGION_7 | \ + CS_ADSP2_REGION_8 | CS_ADSP2_REGION_9) +#define CS_ADSP2_REGION_ALL (CS_ADSP2_REGION_0 | CS_ADSP2_REGION_1_9) + +#define CS_DSP_DATA_WORD_SIZE 3 + +#define CS_DSP_ACKED_CTL_TIMEOUT_MS 100 +#define CS_DSP_ACKED_CTL_N_QUICKPOLLS 10 +#define CS_DSP_ACKED_CTL_MIN_VALUE 0 +#define CS_DSP_ACKED_CTL_MAX_VALUE 0xFFFFFF + +/** + * struct cs_dsp_region - Describes a logical memory region in DSP address space + * @type: Memory region type + * @base: Address of region + */ +struct cs_dsp_region { + int type; + unsigned int base; +}; + +/** + * struct cs_dsp_alg_region - Describes a logical algorithm region in DSP address space + * @list: List node for internal use + * @alg: Algorithm id + * @ver: Expected algorithm version + * @type: Memory region type + * @base: Address of region + */ +struct cs_dsp_alg_region { + struct list_head list; + unsigned int alg; + unsigned int ver; + int type; + unsigned int base; +}; + +/** + * struct cs_dsp_coeff_ctl - Describes a coefficient control + * @fw_name: Name of the firmware + * @subname: Name of the control parsed from the WMFW + * @subname_len: Length of subname + * @alg_region: Logical region associated with this control + * @dsp: DSP instance associated with this control + * @enabled: Flag indicating whether control is enabled + * @list: List node for internal use + * @cache: Cached value of the control + * @offset: Offset of control within alg_region in words + * @len: Length of the cached value in bytes + * @set: Flag indicating the value has been written by the user + * @flags: Bitfield of WMFW_CTL_FLAG_ control flags defined in wmfw.h + * @type: One of the WMFW_CTL_TYPE_ control types defined in wmfw.h + * @priv: For use by the client + */ +struct cs_dsp_coeff_ctl { + const char *fw_name; + /* Subname is needed to match with firmware */ + const char *subname; + unsigned int subname_len; + struct cs_dsp_alg_region alg_region; + struct cs_dsp *dsp; + unsigned int enabled:1; + struct list_head list; + void *cache; + unsigned int offset; + size_t len; + unsigned int set:1; + unsigned int flags; + unsigned int type; + + void *priv; +}; + +struct cs_dsp_ops; +struct cs_dsp_client_ops; + +/** + * struct cs_dsp - Configuration and state of a Cirrus Logic DSP + * @name: The name of the DSP instance + * @rev: Revision of the DSP + * @num: DSP instance number + * @type: Type of DSP + * @dev: Driver model representation of the device + * @regmap: Register map of the device + * @ops: Function pointers for internal callbacks + * @client_ops: Function pointers for client callbacks + * @base: Address of the DSP registers + * @base_sysinfo: Address of the sysinfo register (Halo only) + * @sysclk_reg: Address of the sysclk register (ADSP1 only) + * @sysclk_mask: Mask of frequency bits within sysclk register (ADSP1 only) + * @sysclk_shift: Shift of frequency bits within sysclk register (ADSP1 only) + * @alg_regions: List of currently loaded algorithm regions + * @fw_file_name: Filename of the current firmware + * @fw_name: Name of the current firmware + * @fw_id: ID of the current firmware, obtained from the wmfw + * @fw_id_version: Version of the firmware, obtained from the wmfw + * @fw_vendor_id: Vendor of the firmware, obtained from the wmfw + * @mem: DSP memory region descriptions + * @num_mems: Number of memory regions in this DSP + * @fw_ver: Version of the wmfw file format + * @booted: Flag indicating DSP has been configured + * @running: Flag indicating DSP is executing firmware + * @ctl_list: Controls defined within the loaded DSP firmware + * @lock_regions: Enable MPU traps on specified memory regions + * @pwr_lock: Lock used to serialize accesses + * @debugfs_root: Debugfs directory for this DSP instance + * @wmfw_file_name: Filename of the currently loaded firmware + * @bin_file_name: Filename of the currently loaded coefficients + */ +struct cs_dsp { + const char *name; + int rev; + int num; + int type; + struct device *dev; + struct regmap *regmap; + + const struct cs_dsp_ops *ops; + const struct cs_dsp_client_ops *client_ops; + + unsigned int base; + unsigned int base_sysinfo; + unsigned int sysclk_reg; + unsigned int sysclk_mask; + unsigned int sysclk_shift; + + struct list_head alg_regions; + + const char *fw_name; + unsigned int fw_id; + unsigned int fw_id_version; + unsigned int fw_vendor_id; + + const struct cs_dsp_region *mem; + int num_mems; + + int fw_ver; + + bool booted; + bool running; + + struct list_head ctl_list; + + struct mutex pwr_lock; + + unsigned int lock_regions; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_root; + char *wmfw_file_name; + char *bin_file_name; +#endif +}; + +/** + * struct cs_dsp_client_ops - client callbacks + * @control_add: Called under the pwr_lock when a control is created + * @control_remove: Called under the pwr_lock when a control is destroyed + * @pre_run: Called under the pwr_lock by cs_dsp_run() before the core is started + * @post_run: Called under the pwr_lock by cs_dsp_run() after the core is started + * @post_stop: Called under the pwr_lock by cs_dsp_stop() + * @watchdog_expired: Called when a watchdog expiry is detected + * + * These callbacks give the cs_dsp client an opportunity to respond to events + * or to perform actions atomically. + */ +struct cs_dsp_client_ops { + int (*control_add)(struct cs_dsp_coeff_ctl *ctl); + void (*control_remove)(struct cs_dsp_coeff_ctl *ctl); + int (*pre_run)(struct cs_dsp *dsp); + int (*post_run)(struct cs_dsp *dsp); + void (*post_stop)(struct cs_dsp *dsp); + void (*watchdog_expired)(struct cs_dsp *dsp); +}; + +int cs_dsp_adsp1_init(struct cs_dsp *dsp); +int cs_dsp_adsp2_init(struct cs_dsp *dsp); +int cs_dsp_halo_init(struct cs_dsp *dsp); + +int cs_dsp_adsp1_power_up(struct cs_dsp *dsp, + const struct firmware *wmfw_firmware, char *wmfw_filename, + const struct firmware *coeff_firmware, char *coeff_filename, + const char *fw_name); +void cs_dsp_adsp1_power_down(struct cs_dsp *dsp); +int cs_dsp_power_up(struct cs_dsp *dsp, + const struct firmware *wmfw_firmware, char *wmfw_filename, + const struct firmware *coeff_firmware, char *coeff_filename, + const char *fw_name); +void cs_dsp_power_down(struct cs_dsp *dsp); +int cs_dsp_run(struct cs_dsp *dsp); +void cs_dsp_stop(struct cs_dsp *dsp); + +void cs_dsp_remove(struct cs_dsp *dsp); + +int cs_dsp_set_dspclk(struct cs_dsp *dsp, unsigned int freq); +void cs_dsp_adsp2_bus_error(struct cs_dsp *dsp); +void cs_dsp_halo_bus_error(struct cs_dsp *dsp); +void cs_dsp_halo_wdt_expire(struct cs_dsp *dsp); + +void cs_dsp_init_debugfs(struct cs_dsp *dsp, struct dentry *debugfs_root); +void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp); + +int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl, unsigned int event_id); +int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, + const void *buf, size_t len); +int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, + void *buf, size_t len); +struct cs_dsp_coeff_ctl *cs_dsp_get_ctl(struct cs_dsp *dsp, const char *name, int type, + unsigned int alg); + +int cs_dsp_read_raw_data_block(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr, + unsigned int num_words, __be32 *data); +int cs_dsp_read_data_word(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr, u32 *data); +int cs_dsp_write_data_word(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr, u32 data); +void cs_dsp_remove_padding(u32 *buf, int nwords); + +struct cs_dsp_alg_region *cs_dsp_find_alg_region(struct cs_dsp *dsp, + int type, unsigned int id); + +const char *cs_dsp_mem_region_name(unsigned int type); + +#endif diff --git a/include/linux/firmware/cirrus/wmfw.h b/include/linux/firmware/cirrus/wmfw.h new file mode 100644 index 000000000000..74e5a4f6c13a --- /dev/null +++ b/include/linux/firmware/cirrus/wmfw.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * wmfw.h - Wolfson firmware format information + * + * Copyright 2012 Wolfson Microelectronics plc + * + * Author: Mark Brown <[email protected]> + */ + +#ifndef __WMFW_H +#define __WMFW_H + +#include <linux/types.h> + +#define WMFW_MAX_ALG_NAME 256 +#define WMFW_MAX_ALG_DESCR_NAME 256 + +#define WMFW_MAX_COEFF_NAME 256 +#define WMFW_MAX_COEFF_DESCR_NAME 256 + +#define WMFW_CTL_FLAG_SYS 0x8000 +#define WMFW_CTL_FLAG_VOLATILE 0x0004 +#define WMFW_CTL_FLAG_WRITEABLE 0x0002 +#define WMFW_CTL_FLAG_READABLE 0x0001 + +#define WMFW_CTL_TYPE_BYTES 0x0004 /* byte control */ + +/* Non-ALSA coefficient types start at 0x1000 */ +#define WMFW_CTL_TYPE_ACKED 0x1000 /* acked control */ +#define WMFW_CTL_TYPE_HOSTEVENT 0x1001 /* event control */ +#define WMFW_CTL_TYPE_HOST_BUFFER 0x1002 /* host buffer pointer */ +#define WMFW_CTL_TYPE_FWEVENT 0x1004 /* firmware event control */ + +struct wmfw_header { + char magic[4]; + __le32 len; + __le16 rev; + u8 core; + u8 ver; +} __packed; + +struct wmfw_footer { + __le64 timestamp; + __le32 checksum; +} __packed; + +struct wmfw_adsp1_sizes { + __le32 dm; + __le32 pm; + __le32 zm; +} __packed; + +struct wmfw_adsp2_sizes { + __le32 xm; + __le32 ym; + __le32 pm; + __le32 zm; +} __packed; + +struct wmfw_region { + union { + __be32 type; + __le32 offset; + }; + __le32 len; + u8 data[]; +} __packed; + +struct wmfw_id_hdr { + __be32 core_id; + __be32 core_rev; + __be32 id; + __be32 ver; +} __packed; + +struct wmfw_v3_id_hdr { + __be32 core_id; + __be32 block_rev; + __be32 vendor_id; + __be32 id; + __be32 ver; +} __packed; + +struct wmfw_adsp1_id_hdr { + struct wmfw_id_hdr fw; + __be32 zm; + __be32 dm; + __be32 n_algs; +} __packed; + +struct wmfw_adsp2_id_hdr { + struct wmfw_id_hdr fw; + __be32 zm; + __be32 xm; + __be32 ym; + __be32 n_algs; +} __packed; + +struct wmfw_halo_id_hdr { + struct wmfw_v3_id_hdr fw; + __be32 xm_base; + __be32 xm_size; + __be32 ym_base; + __be32 ym_size; + __be32 n_algs; +} __packed; + +struct wmfw_alg_hdr { + __be32 id; + __be32 ver; +} __packed; + +struct wmfw_adsp1_alg_hdr { + struct wmfw_alg_hdr alg; + __be32 zm; + __be32 dm; +} __packed; + +struct wmfw_adsp2_alg_hdr { + struct wmfw_alg_hdr alg; + __be32 zm; + __be32 xm; + __be32 ym; +} __packed; + +struct wmfw_halo_alg_hdr { + struct wmfw_alg_hdr alg; + __be32 xm_base; + __be32 xm_size; + __be32 ym_base; + __be32 ym_size; +} __packed; + +struct wmfw_adsp_alg_data { + __le32 id; + u8 name[WMFW_MAX_ALG_NAME]; + u8 descr[WMFW_MAX_ALG_DESCR_NAME]; + __le32 ncoeff; + u8 data[]; +} __packed; + +struct wmfw_adsp_coeff_data { + struct { + __le16 offset; + __le16 type; + __le32 size; + } hdr; + u8 name[WMFW_MAX_COEFF_NAME]; + u8 descr[WMFW_MAX_COEFF_DESCR_NAME]; + __le16 ctl_type; + __le16 flags; + __le32 len; + u8 data[]; +} __packed; + +struct wmfw_coeff_hdr { + u8 magic[4]; + __le32 len; + union { + __be32 rev; + __le32 ver; + }; + union { + __be32 core; + __le32 core_ver; + }; + u8 data[]; +} __packed; + +struct wmfw_coeff_item { + __le16 offset; + __le16 type; + __le32 id; + __le32 ver; + __le32 sr; + __le32 len; + u8 data[]; +} __packed; + +#define WMFW_ADSP1 1 +#define WMFW_ADSP2 2 +#define WMFW_HALO 4 + +#define WMFW_ABSOLUTE 0xf0 +#define WMFW_ALGORITHM_DATA 0xf2 +#define WMFW_METADATA 0xfc +#define WMFW_NAME_TEXT 0xfe +#define WMFW_INFO_TEXT 0xff + +#define WMFW_ADSP1_PM 2 +#define WMFW_ADSP1_DM 3 +#define WMFW_ADSP1_ZM 4 + +#define WMFW_ADSP2_PM 2 +#define WMFW_ADSP2_ZM 4 +#define WMFW_ADSP2_XM 5 +#define WMFW_ADSP2_YM 6 + +#define WMFW_HALO_PM_PACKED 0x10 +#define WMFW_HALO_XM_PACKED 0x11 +#define WMFW_HALO_YM_PACKED 0x12 + +#endif diff --git a/include/linux/firmware/imx/s4.h b/include/linux/firmware/imx/s4.h new file mode 100644 index 000000000000..9e34923ae1d6 --- /dev/null +++ b/include/linux/firmware/imx/s4.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2021 NXP + * + * Header file for the IPC implementation. + */ + +#ifndef _S4_IPC_H +#define _S4_IPC_H + +struct imx_s4_ipc; + +struct imx_s4_rpc_msg { + uint8_t ver; + uint8_t size; + uint8_t cmd; + uint8_t tag; +} __packed; + +#endif /* _S4_IPC_H */ diff --git a/include/linux/firmware/xlnx-event-manager.h b/include/linux/firmware/xlnx-event-manager.h new file mode 100644 index 000000000000..3f87c4929d21 --- /dev/null +++ b/include/linux/firmware/xlnx-event-manager.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FIRMWARE_XLNX_EVENT_MANAGER_H_ +#define _FIRMWARE_XLNX_EVENT_MANAGER_H_ + +#include <linux/firmware/xlnx-zynqmp.h> + +#define CB_MAX_PAYLOAD_SIZE (4U) /*In payload maximum 32bytes */ + +/************************** Exported Function *****************************/ + +typedef void (*event_cb_func_t)(const u32 *payload, void *data); + +#if IS_REACHABLE(CONFIG_XLNX_EVENT_MANAGER) +int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, const bool wake, + event_cb_func_t cb_fun, void *data); + +int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, event_cb_func_t cb_fun); +#else +static inline int xlnx_register_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, const bool wake, + event_cb_func_t cb_fun, void *data) +{ + return -ENODEV; +} + +static inline int xlnx_unregister_event(const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, event_cb_func_t cb_fun) +{ + return -ENODEV; +} +#endif + +#endif /* _FIRMWARE_XLNX_EVENT_MANAGER_H_ */ diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 56b426fe020c..907cb01890cf 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -2,7 +2,7 @@ /* * Xilinx Zynq MPSoC Firmware layer * - * Copyright (C) 2014-2019 Xilinx + * Copyright (C) 2014-2021 Xilinx * * Michal Simek <[email protected]> * Davorin Mista <[email protected]> @@ -64,14 +64,31 @@ #define XILINX_ZYNQMP_PM_FPGA_FULL 0x0U #define XILINX_ZYNQMP_PM_FPGA_PARTIAL BIT(0) +/* + * Node IDs for the Error Events. + */ +#define EVENT_ERROR_PMC_ERR1 (0x28100000U) +#define EVENT_ERROR_PMC_ERR2 (0x28104000U) +#define EVENT_ERROR_PSM_ERR1 (0x28108000U) +#define EVENT_ERROR_PSM_ERR2 (0x2810C000U) + +enum pm_api_cb_id { + PM_INIT_SUSPEND_CB = 30, + PM_ACKNOWLEDGE_CB = 31, + PM_NOTIFY_CB = 32, +}; + enum pm_api_id { PM_GET_API_VERSION = 1, + PM_REGISTER_NOTIFIER = 5, PM_SYSTEM_SHUTDOWN = 12, PM_REQUEST_NODE = 13, PM_RELEASE_NODE = 14, PM_SET_REQUIREMENT = 15, PM_RESET_ASSERT = 17, PM_RESET_GET_STATUS = 18, + PM_MMIO_WRITE = 19, + PM_MMIO_READ = 20, PM_PM_INIT_FINALIZE = 21, PM_FPGA_LOAD = 22, PM_FPGA_GET_STATUS = 23, @@ -123,6 +140,9 @@ enum pm_ioctl_id { IOCTL_READ_PGGS = 15, /* Set healthy bit value */ IOCTL_SET_BOOT_HEALTH_STATUS = 17, + IOCTL_OSPI_MUX_SELECT = 21, + /* Register SGI to ATF */ + IOCTL_REGISTER_SGI = 25, }; enum pm_query_id { @@ -351,6 +371,11 @@ enum zynqmp_pm_shutdown_subtype { ZYNQMP_PM_SHUTDOWN_SUBTYPE_SYSTEM = 2, }; +enum ospi_mux_select_type { + PM_OSPI_MUX_SEL_DMA = 0, + PM_OSPI_MUX_SEL_LINEAR = 1, +}; + /** * struct zynqmp_pm_query_data - PM query data * @qid: query ID @@ -387,9 +412,12 @@ int zynqmp_pm_set_pll_frac_data(u32 clk_id, u32 data); int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data); int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value); int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type); +int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select); int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, const enum zynqmp_pm_reset_action assert_flag); int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status); +unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode); +int zynqmp_pm_bootmode_write(u32 ps_mode); int zynqmp_pm_init_finalize(void); int zynqmp_pm_set_suspend_mode(u32 mode); int zynqmp_pm_request_node(const u32 node, const u32 capabilities, @@ -416,6 +444,9 @@ int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param, int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param, u32 value); int zynqmp_pm_load_pdi(const u32 src, const u64 address); +int zynqmp_pm_register_notifier(const u32 node, const u32 event, + const u32 wake, const u32 enable); +int zynqmp_pm_feature(const u32 api_id); #else static inline int zynqmp_pm_get_api_version(u32 *version) { @@ -508,6 +539,11 @@ static inline int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type) return -ENODEV; } +static inline int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select) +{ + return -ENODEV; +} + static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, const enum zynqmp_pm_reset_action assert_flag) { @@ -520,6 +556,16 @@ static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, return -ENODEV; } +static inline unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_bootmode_write(u32 ps_mode) +{ + return -ENODEV; +} + static inline int zynqmp_pm_init_finalize(void) { return -ENODEV; @@ -632,6 +678,17 @@ static inline int zynqmp_pm_load_pdi(const u32 src, const u64 address) { return -ENODEV; } + +static inline int zynqmp_pm_register_notifier(const u32 node, const u32 event, + const u32 wake, const u32 enable) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_feature(const u32 api_id) +{ + return -ENODEV; +} #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index c12df59d3f5f..3e378b1fb0bc 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -83,9 +83,10 @@ struct fprop_local_percpu { int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp); void fprop_local_destroy_percpu(struct fprop_local_percpu *pl); -void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl); -void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, - int max_frac); +void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, + long nr); +void __fprop_add_percpu_max(struct fprop_global *p, + struct fprop_local_percpu *pl, int max_frac, long nr); void fprop_fraction_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, unsigned long *numerator, unsigned long *denominator); @@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) unsigned long flags; local_irq_save(flags); - __fprop_inc_percpu(p, pl); + __fprop_add_percpu(p, pl, 1); local_irq_restore(flags); } diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index c1be37437e77..a6cd6815f249 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,6 +2,27 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) +#define __RENAME(x) __asm__(#x) + +void fortify_panic(const char *name) __noreturn __cold; +void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); +void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); +void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)"); + +#define __compiletime_strlen(p) \ +({ \ + unsigned char *__p = (unsigned char *)(p); \ + size_t __ret = (size_t)-1; \ + size_t __p_size = __builtin_object_size(p, 1); \ + if (__p_size != (size_t)-1) { \ + size_t __p_len = __p_size - 1; \ + if (__builtin_constant_p(__p[__p_len]) && \ + __p[__p_len] == '\0') \ + __ret = __builtin_strlen(__p); \ + } \ + __ret; \ +}) #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr); @@ -49,28 +70,38 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) return p; } -__FORTIFY_INLINE __kernel_size_t strlen(const char *p) +extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); +__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) { - __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); + size_t p_len = __compiletime_strlen(p); + size_t ret; - /* Work around gcc excess stack consumption issue */ - if (p_size == (size_t)-1 || - (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0')) - return __underlying_strlen(p); - ret = strnlen(p, p_size); - if (p_size <= ret) + /* We can take compile-time actions when maxlen is const. */ + if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) { + /* If p is const, we can use its compile-time-known len. */ + if (maxlen >= p_size) + return p_len; + } + + /* Do not check characters beyond the end of p. */ + ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); + if (p_size <= ret && maxlen != ret) fortify_panic(__func__); return ret; } -extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); -__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) +/* defined after fortified strnlen to reuse it. */ +__FORTIFY_INLINE __kernel_size_t strlen(const char *p) { + __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 1); - __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); - if (p_size <= ret && maxlen != ret) + /* Give up if we don't know how large p is. */ + if (p_size == (size_t)-1) + return __underlying_strlen(p); + ret = strnlen(p, p_size); + if (p_size <= ret) fortify_panic(__func__); return ret; } @@ -79,24 +110,27 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) { - size_t ret; size_t p_size = __builtin_object_size(p, 1); size_t q_size = __builtin_object_size(q, 1); + size_t q_len; /* Full count of source string length. */ + size_t len; /* Count of characters going into destination. */ if (p_size == (size_t)-1 && q_size == (size_t)-1) return __real_strlcpy(p, q, size); - ret = strlen(q); - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - - if (__builtin_constant_p(len) && len >= p_size) + q_len = strlen(q); + len = (q_len >= size) ? size - 1 : q_len; + if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) { + /* Write size is always larger than destination. */ + if (len >= p_size) __write_overflow(); + } + if (size) { if (len >= p_size) fortify_panic(__func__); __underlying_memcpy(p, q, len); p[len] = '\0'; } - return ret; + return q_len; } /* defined after fortified strnlen to reuse it */ @@ -280,7 +314,10 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) if (p_size == (size_t)-1 && q_size == (size_t)-1) return __underlying_strcpy(p, q); size = strlen(q) + 1; - /* test here to use the more stringent object size */ + /* Compile-time check for const size overflow. */ + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + /* Run-time check for dynamic size overflow. */ if (p_size < size) fortify_panic(__func__); memcpy(p, q, size); diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h index 6c3c28806ff1..223da48a6d18 100644 --- a/include/linux/fpga/fpga-bridge.h +++ b/include/linux/fpga/fpga-bridge.h @@ -23,6 +23,23 @@ struct fpga_bridge_ops { }; /** + * struct fpga_bridge_info - collection of parameters an FPGA Bridge + * @name: fpga bridge name + * @br_ops: pointer to structure of fpga bridge ops + * @priv: fpga bridge private data + * + * fpga_bridge_info contains parameters for the register function. These + * are separated into an info structure because they some are optional + * others could be added to in the future. The info structure facilitates + * maintaining a stable API. + */ +struct fpga_bridge_info { + const char *name; + const struct fpga_bridge_ops *br_ops; + void *priv; +}; + +/** * struct fpga_bridge - FPGA bridge structure * @name: name of low level FPGA bridge * @dev: FPGA bridge device @@ -62,15 +79,10 @@ int of_fpga_bridge_get_to_list(struct device_node *np, struct fpga_image_info *info, struct list_head *bridge_list); -struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name, - const struct fpga_bridge_ops *br_ops, - void *priv); -void fpga_bridge_free(struct fpga_bridge *br); -int fpga_bridge_register(struct fpga_bridge *br); +struct fpga_bridge * +fpga_bridge_register(struct device *parent, const char *name, + const struct fpga_bridge_ops *br_ops, + void *priv); void fpga_bridge_unregister(struct fpga_bridge *br); -struct fpga_bridge -*devm_fpga_bridge_create(struct device *dev, const char *name, - const struct fpga_bridge_ops *br_ops, void *priv); - #endif /* _LINUX_FPGA_BRIDGE_H */ diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 474c1f506307..0f9468771bb9 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -106,6 +106,36 @@ struct fpga_image_info { }; /** + * struct fpga_compat_id - id for compatibility check + * + * @id_h: high 64bit of the compat_id + * @id_l: low 64bit of the compat_id + */ +struct fpga_compat_id { + u64 id_h; + u64 id_l; +}; + +/** + * struct fpga_manager_info - collection of parameters for an FPGA Manager + * @name: fpga manager name + * @compat_id: FPGA manager id for compatibility check. + * @mops: pointer to structure of fpga manager ops + * @priv: fpga manager private data + * + * fpga_manager_info contains parameters for the register_full function. + * These are separated into an info structure because they some are optional + * others could be added to in the future. The info structure facilitates + * maintaining a stable API. + */ +struct fpga_manager_info { + const char *name; + struct fpga_compat_id *compat_id; + const struct fpga_manager_ops *mops; + void *priv; +}; + +/** * struct fpga_manager_ops - ops for low level fpga manager drivers * @initial_header_size: Maximum number of bytes that should be passed into write_init * @state: returns an enum value of the FPGA's state @@ -144,17 +174,6 @@ struct fpga_manager_ops { #define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR BIT(4) /** - * struct fpga_compat_id - id for compatibility check - * - * @id_h: high 64bit of the compat_id - * @id_l: low 64bit of the compat_id - */ -struct fpga_compat_id { - u64 id_h; - u64 id_l; -}; - -/** * struct fpga_manager - fpga manager structure * @name: name of low level fpga manager * @dev: fpga manager device @@ -191,17 +210,18 @@ struct fpga_manager *fpga_mgr_get(struct device *dev); void fpga_mgr_put(struct fpga_manager *mgr); -struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name, - const struct fpga_manager_ops *mops, - void *priv); -void fpga_mgr_free(struct fpga_manager *mgr); -int fpga_mgr_register(struct fpga_manager *mgr); -void fpga_mgr_unregister(struct fpga_manager *mgr); +struct fpga_manager * +fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info); -int devm_fpga_mgr_register(struct device *dev, struct fpga_manager *mgr); +struct fpga_manager * +fpga_mgr_register(struct device *parent, const char *name, + const struct fpga_manager_ops *mops, void *priv); +void fpga_mgr_unregister(struct fpga_manager *mgr); -struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name, - const struct fpga_manager_ops *mops, - void *priv); +struct fpga_manager * +devm_fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info); +struct fpga_manager * +devm_fpga_mgr_register(struct device *parent, const char *name, + const struct fpga_manager_ops *mops, void *priv); #endif /*_LINUX_FPGA_MGR_H */ diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h index 27cb706275db..3b87f232425c 100644 --- a/include/linux/fpga/fpga-region.h +++ b/include/linux/fpga/fpga-region.h @@ -7,6 +7,27 @@ #include <linux/fpga/fpga-mgr.h> #include <linux/fpga/fpga-bridge.h> +struct fpga_region; + +/** + * struct fpga_region_info - collection of parameters an FPGA Region + * @mgr: fpga region manager + * @compat_id: FPGA region id for compatibility check. + * @priv: fpga region private data + * @get_bridges: optional function to get bridges to a list + * + * fpga_region_info contains parameters for the register_full function. + * These are separated into an info structure because they some are optional + * others could be added to in the future. The info structure facilitates + * maintaining a stable API. + */ +struct fpga_region_info { + struct fpga_manager *mgr; + struct fpga_compat_id *compat_id; + void *priv; + int (*get_bridges)(struct fpga_region *region); +}; + /** * struct fpga_region - FPGA Region structure * @dev: FPGA Region device @@ -37,15 +58,12 @@ struct fpga_region *fpga_region_class_find( int fpga_region_program_fpga(struct fpga_region *region); -struct fpga_region -*fpga_region_create(struct device *dev, struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)); -void fpga_region_free(struct fpga_region *region); -int fpga_region_register(struct fpga_region *region); -void fpga_region_unregister(struct fpga_region *region); +struct fpga_region * +fpga_region_register_full(struct device *parent, const struct fpga_region_info *info); -struct fpga_region -*devm_fpga_region_create(struct device *dev, struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)); +struct fpga_region * +fpga_region_register(struct device *parent, struct fpga_manager *mgr, + int (*get_bridges)(struct fpga_region *)); +void fpga_region_unregister(struct fpga_region *region); #endif /* _FPGA_REGION_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index e7a633353fd2..c8510da6cc6d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -41,6 +41,7 @@ #include <linux/stddef.h> #include <linux/mount.h> #include <linux/cred.h> +#include <linux/mnt_idmapping.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -48,6 +49,7 @@ struct backing_dev_info; struct bdi_writeback; struct bio; +struct io_comp_batch; struct export_operations; struct fiemap_extent_info; struct hd_geometry; @@ -329,16 +331,12 @@ struct kiocb { randomized_struct_fields_start loff_t ki_pos; - void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - union { - unsigned int ki_cookie; /* for ->iopoll */ - struct wait_page_queue *ki_waitq; /* for async buffered IO */ - }; - + struct wait_page_queue *ki_waitq; /* for async buffered IO */ randomized_struct_fields_end }; @@ -1223,13 +1221,13 @@ static inline int fcntl_setlk(unsigned int fd, struct file *file, #if BITS_PER_LONG == 32 static inline int fcntl_getlk64(struct file *file, unsigned int cmd, - struct flock64 __user *user) + struct flock64 *user) { return -EINVAL; } static inline int fcntl_setlk64(unsigned int fd, struct file *file, - unsigned int cmd, struct flock64 __user *user) + unsigned int cmd, struct flock64 *user) { return -EACCES; } @@ -1443,6 +1441,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_UNTRUSTED_MOUNTER 0x00000040 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ +#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ /* Possible states of 'frozen' field */ enum { @@ -1601,6 +1600,11 @@ struct super_block { struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; +static inline struct user_namespace *i_user_ns(const struct inode *inode) +{ + return inode->i_sb->s_user_ns; +} + /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can * instead deal with the raw numeric values that are stored @@ -1608,50 +1612,22 @@ struct super_block { */ static inline uid_t i_uid_read(const struct inode *inode) { - return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); + return from_kuid(i_user_ns(inode), inode->i_uid); } static inline gid_t i_gid_read(const struct inode *inode) { - return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); + return from_kgid(i_user_ns(inode), inode->i_gid); } static inline void i_uid_write(struct inode *inode, uid_t uid) { - inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); + inode->i_uid = make_kuid(i_user_ns(inode), uid); } static inline void i_gid_write(struct inode *inode, gid_t gid) { - inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); -} - -/** - * kuid_into_mnt - map a kuid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return make_kuid(mnt_userns, __kuid_val(kuid)); -} - -/** - * kgid_into_mnt - map a kgid down into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return make_kgid(mnt_userns, __kgid_val(kgid)); + inode->i_gid = make_kgid(i_user_ns(inode), gid); } /** @@ -1665,7 +1641,7 @@ static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kuid_into_mnt(mnt_userns, inode->i_uid); + return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); } /** @@ -1679,69 +1655,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return kgid_into_mnt(mnt_userns, inode->i_gid); -} - -/** - * kuid_from_mnt - map a kuid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kuid: kuid to be mapped - * - * Return: @kuid mapped up according to @mnt_userns. - * If @kuid has no mapping INVALID_UID is returned. - */ -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, - kuid_t kuid) -{ - return KUIDT_INIT(from_kuid(mnt_userns, kuid)); -} - -/** - * kgid_from_mnt - map a kgid up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * @kgid: kgid to be mapped - * - * Return: @kgid mapped up according to @mnt_userns. - * If @kgid has no mapping INVALID_GID is returned. - */ -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, - kgid_t kgid) -{ - return KGIDT_INIT(from_kgid(mnt_userns, kgid)); -} - -/** - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsuid. A common example is initializing the i_uid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsuid mapped up according to @mnt_userns. - */ -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) -{ - return kuid_from_mnt(mnt_userns, current_fsuid()); -} - -/** - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns - * @mnt_userns: user namespace of the relevant mount - * - * Use this helper to initialize a new vfs or filesystem object based on - * the caller's fsgid. A common example is initializing the i_gid field of - * a newly allocated inode triggered by a creation event such as mkdir or - * O_CREAT. Other examples include the allocation of quotas for a specific - * user. - * - * Return: the caller's current fsgid mapped up according to @mnt_userns. - */ -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) -{ - return kgid_from_mnt(mnt_userns, current_fsgid()); + return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); } /** @@ -1755,7 +1669,7 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) static inline void inode_fsuid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_uid = mapped_fsuid(mnt_userns); + inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); } /** @@ -1769,7 +1683,7 @@ static inline void inode_fsuid_set(struct inode *inode, static inline void inode_fsgid_set(struct inode *inode, struct user_namespace *mnt_userns) { - inode->i_gid = mapped_fsgid(mnt_userns); + inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); } /** @@ -1786,10 +1700,18 @@ static inline void inode_fsgid_set(struct inode *inode, static inline bool fsuidgid_has_mapping(struct super_block *sb, struct user_namespace *mnt_userns) { - struct user_namespace *s_user_ns = sb->s_user_ns; + struct user_namespace *fs_userns = sb->s_user_ns; + kuid_t kuid; + kgid_t kgid; - return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) && - kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns)); + kuid = mapped_fsuid(mnt_userns, fs_userns); + if (!uid_valid(kuid)) + return false; + kgid = mapped_fsgid(mnt_userns, fs_userns); + if (!gid_valid(kgid)) + return false; + return kuid_has_mapping(fs_userns, kuid) && + kgid_has_mapping(fs_userns, kgid); } extern struct timespec64 current_time(struct inode *inode); @@ -2075,7 +1997,8 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); - int (*iopoll)(struct kiocb *kiocb, bool spin); + int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, + unsigned int flags); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); @@ -2250,6 +2173,7 @@ struct super_operations { #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ +#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2419,6 +2343,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * Used to detect that mark_inode_dirty() should not move * inode between dirty lists. * + * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2441,6 +2367,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) +#define I_PINNING_FSCACHE_WB (1 << 18) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) @@ -2498,6 +2425,8 @@ enum file_time_flags { extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); +int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); + static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) @@ -2517,7 +2446,6 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ -#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2724,6 +2652,21 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) { return mnt_user_ns(file->f_path.mnt); } + +/** + * is_idmapped_mnt - check whether a mount is mapped + * @mnt: the mount to check + * + * If @mnt has an idmapping attached different from the + * filesystem's idmapping then @mnt is mapped. + * + * Return: true if mount is mapped, false if not. + */ +static inline bool is_idmapped_mnt(const struct vfsmount *mnt) +{ + return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; +} + extern long vfs_truncate(const struct path *, loff_t); int do_truncate(struct user_namespace *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); @@ -2847,8 +2790,6 @@ static inline int filemap_fdatawait(struct address_space *mapping) extern bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); -extern bool filemap_range_needs_writeback(struct address_space *, - loff_t lstart, loff_t lend); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); extern int __filemap_fdatawrite_range(struct address_space *mapping, @@ -3152,6 +3093,7 @@ extern void unlock_new_inode(struct inode *); extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); +void dump_mapping(const struct address_space *); /* * Userspace may rely on the the inode number being non-zero. For example, glibc @@ -3192,6 +3134,7 @@ static inline void remove_inode_hash(struct inode *inode) } extern void inode_sb_list_add(struct inode *inode); +extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); @@ -3383,6 +3326,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct user_namespace *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index aab0ffc6bac6..f103c91139d4 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -42,7 +42,7 @@ struct fs_parameter_spec { u8 opt; /* Option number (returned by fs_parse()) */ unsigned short flags; #define fs_param_neg_with_no 0x0002 /* "noxxx" is negative param */ -#define fs_param_neg_with_empty 0x0004 /* "xxx=" is negative param */ +#define fs_param_can_be_empty 0x0004 /* "xxx=" is allowed */ #define fs_param_deprecated 0x0008 /* The param is deprecated */ const void *data; }; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 8d39491c5f9f..a174cedf4d90 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* General filesystem caching backing cache interface * - * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells ([email protected]) * * NOTE!!! See: @@ -15,207 +15,34 @@ #define _LINUX_FSCACHE_CACHE_H #include <linux/fscache.h> -#include <linux/sched.h> -#include <linux/workqueue.h> -#define NR_MAXCACHES BITS_PER_LONG - -struct fscache_cache; -struct fscache_cache_ops; -struct fscache_object; -struct fscache_operation; - -enum fscache_obj_ref_trace { - fscache_obj_get_add_to_deps, - fscache_obj_get_queue, - fscache_obj_put_alloc_fail, - fscache_obj_put_attach_fail, - fscache_obj_put_drop_obj, - fscache_obj_put_enq_dep, - fscache_obj_put_queue, - fscache_obj_put_work, - fscache_obj_ref__nr_traces +enum fscache_cache_trace; +enum fscache_cookie_trace; +enum fscache_access_trace; + +enum fscache_cache_state { + FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ + FSCACHE_CACHE_IS_PREPARING, /* A cache is preparing to come live */ + FSCACHE_CACHE_IS_ACTIVE, /* Attached cache is active and can be used */ + FSCACHE_CACHE_GOT_IOERROR, /* Attached cache stopped on I/O error */ + FSCACHE_CACHE_IS_WITHDRAWN, /* Attached cache is being withdrawn */ +#define NR__FSCACHE_CACHE_STATE (FSCACHE_CACHE_IS_WITHDRAWN + 1) }; /* - * cache tag definition - */ -struct fscache_cache_tag { - struct list_head link; - struct fscache_cache *cache; /* cache referred to by this tag */ - unsigned long flags; -#define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ - atomic_t usage; - char name[]; /* tag name */ -}; - -/* - * cache definition + * Cache cookie. */ struct fscache_cache { const struct fscache_cache_ops *ops; - struct fscache_cache_tag *tag; /* tag representing this cache */ - struct kobject *kobj; /* system representation of this cache */ - struct list_head link; /* link in list of caches */ - size_t max_index_size; /* maximum size of index data */ - char identifier[36]; /* cache label */ - - /* node management */ - struct work_struct op_gc; /* operation garbage collector */ - struct list_head object_list; /* list of data/index objects */ - struct list_head op_gc_list; /* list of ops to be deleted */ - spinlock_t object_list_lock; - spinlock_t op_gc_list_lock; + struct list_head cache_link; /* Link in cache list */ + void *cache_priv; /* Private cache data (or NULL) */ + refcount_t ref; + atomic_t n_volumes; /* Number of active volumes; */ + atomic_t n_accesses; /* Number of in-progress accesses on the cache */ atomic_t object_count; /* no. of live objects in this cache */ - struct fscache_object *fsdef; /* object for the fsdef index */ - unsigned long flags; -#define FSCACHE_IOERROR 0 /* cache stopped on I/O error */ -#define FSCACHE_CACHE_WITHDRAWN 1 /* cache has been withdrawn */ -}; - -extern wait_queue_head_t fscache_cache_cleared_wq; - -/* - * operation to be applied to a cache object - * - retrieval initiation operations are done in the context of the process - * that issued them, and not in an async thread pool - */ -typedef void (*fscache_operation_release_t)(struct fscache_operation *op); -typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); -typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op); - -enum fscache_operation_state { - FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ - FSCACHE_OP_ST_INITIALISED, /* Op is initialised */ - FSCACHE_OP_ST_PENDING, /* Op is blocked from running */ - FSCACHE_OP_ST_IN_PROGRESS, /* Op is in progress */ - FSCACHE_OP_ST_COMPLETE, /* Op is complete */ - FSCACHE_OP_ST_CANCELLED, /* Op has been cancelled */ - FSCACHE_OP_ST_DEAD /* Op is now dead */ -}; - -struct fscache_operation { - struct work_struct work; /* record for async ops */ - struct list_head pend_link; /* link in object->pending_ops */ - struct fscache_object *object; /* object to be operated upon */ - - unsigned long flags; -#define FSCACHE_OP_TYPE 0x000f /* operation type */ -#define FSCACHE_OP_ASYNC 0x0001 /* - async op, processor may sleep for disk */ -#define FSCACHE_OP_MYTHREAD 0x0002 /* - processing is done be issuing thread, not pool */ -#define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ -#define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ -#define FSCACHE_OP_DEC_READ_CNT 6 /* decrement object->n_reads on destruction */ -#define FSCACHE_OP_UNUSE_COOKIE 7 /* call fscache_unuse_cookie() on completion */ -#define FSCACHE_OP_KEEP_FLAGS 0x00f0 /* flags to keep when repurposing an op */ - - enum fscache_operation_state state; - atomic_t usage; - unsigned debug_id; /* debugging ID */ - - /* operation processor callback - * - can be NULL if FSCACHE_OP_WAITING is going to be used to perform - * the op in a non-pool thread */ - fscache_operation_processor_t processor; - - /* Operation cancellation cleanup (optional) */ - fscache_operation_cancel_t cancel; - - /* operation releaser */ - fscache_operation_release_t release; -}; - -extern atomic_t fscache_op_debug_id; -extern void fscache_op_work_func(struct work_struct *work); - -extern void fscache_enqueue_operation(struct fscache_operation *); -extern void fscache_op_complete(struct fscache_operation *, bool); -extern void fscache_put_operation(struct fscache_operation *); -extern void fscache_operation_init(struct fscache_cookie *, - struct fscache_operation *, - fscache_operation_processor_t, - fscache_operation_cancel_t, - fscache_operation_release_t); - -/* - * data read operation - */ -struct fscache_retrieval { - struct fscache_operation op; - struct fscache_cookie *cookie; /* The netfs cookie */ - struct address_space *mapping; /* netfs pages */ - fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ - void *context; /* netfs read context (pinned) */ - struct list_head to_do; /* list of things to be done by the backend */ - atomic_t n_pages; /* number of pages to be retrieved */ -}; - -typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp); - -typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp); - -/** - * fscache_get_retrieval - Get an extra reference on a retrieval operation - * @op: The retrieval operation to get a reference on - * - * Get an extra reference on a retrieval operation. - */ -static inline -struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op) -{ - atomic_inc(&op->op.usage); - return op; -} - -/** - * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing - * @op: The retrieval operation affected - * - * Enqueue a retrieval operation for processing by the FS-Cache thread pool. - */ -static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) -{ - fscache_enqueue_operation(&op->op); -} - -/** - * fscache_retrieval_complete - Record (partial) completion of a retrieval - * @op: The retrieval operation affected - * @n_pages: The number of pages to account for - */ -static inline void fscache_retrieval_complete(struct fscache_retrieval *op, - int n_pages) -{ - if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0) - fscache_op_complete(&op->op, false); -} - -/** - * fscache_put_retrieval - Drop a reference to a retrieval operation - * @op: The retrieval operation affected - * - * Drop a reference to a retrieval operation. - */ -static inline void fscache_put_retrieval(struct fscache_retrieval *op) -{ - fscache_put_operation(&op->op); -} - -/* - * cached page storage work item - * - used to do three things: - * - batch writes to the cache - * - do cache writes asynchronously - * - defer writes until cache object lookup completion - */ -struct fscache_storage { - struct fscache_operation op; - pgoff_t store_limit; /* don't write more than this */ + unsigned int debug_id; + enum fscache_cache_state state; + char *name; }; /* @@ -225,341 +52,154 @@ struct fscache_cache_ops { /* name of cache provider */ const char *name; - /* allocate an object record for a cookie */ - struct fscache_object *(*alloc_object)(struct fscache_cache *cache, - struct fscache_cookie *cookie); - - /* look up the object for a cookie - * - return -ETIMEDOUT to be requeued - */ - int (*lookup_object)(struct fscache_object *object); - - /* finished looking up */ - void (*lookup_complete)(struct fscache_object *object); - - /* increment the usage count on this object (may fail if unmounting) */ - struct fscache_object *(*grab_object)(struct fscache_object *object, - enum fscache_obj_ref_trace why); + /* Acquire a volume */ + void (*acquire_volume)(struct fscache_volume *volume); - /* pin an object in the cache */ - int (*pin_object)(struct fscache_object *object); + /* Free the cache's data attached to a volume */ + void (*free_volume)(struct fscache_volume *volume); - /* unpin an object in the cache */ - void (*unpin_object)(struct fscache_object *object); + /* Look up a cookie in the cache */ + bool (*lookup_cookie)(struct fscache_cookie *cookie); - /* check the consistency between the backing cache and the FS-Cache - * cookie */ - int (*check_consistency)(struct fscache_operation *op); + /* Withdraw an object without any cookie access counts held */ + void (*withdraw_cookie)(struct fscache_cookie *cookie); - /* store the updated auxiliary data on an object */ - void (*update_object)(struct fscache_object *object); + /* Change the size of a data object */ + void (*resize_cookie)(struct netfs_cache_resources *cres, + loff_t new_size); /* Invalidate an object */ - void (*invalidate_object)(struct fscache_operation *op); - - /* discard the resources pinned by an object and effect retirement if - * necessary */ - void (*drop_object)(struct fscache_object *object); - - /* dispose of a reference to an object */ - void (*put_object)(struct fscache_object *object, - enum fscache_obj_ref_trace why); - - /* sync a cache */ - void (*sync_cache)(struct fscache_cache *cache); - - /* notification that the attributes of a non-index object (such as - * i_size) have changed */ - int (*attr_changed)(struct fscache_object *object); - - /* reserve space for an object's data and associated metadata */ - int (*reserve_space)(struct fscache_object *object, loff_t i_size); - - /* request a backing block for a page be read or allocated in the - * cache */ - fscache_page_retrieval_func_t read_or_alloc_page; - - /* request backing blocks for a list of pages be read or allocated in - * the cache */ - fscache_pages_retrieval_func_t read_or_alloc_pages; - - /* request a backing block for a page be allocated in the cache so that - * it can be written directly */ - fscache_page_retrieval_func_t allocate_page; - - /* request backing blocks for pages be allocated in the cache so that - * they can be written directly */ - fscache_pages_retrieval_func_t allocate_pages; - - /* write a page to its backing block in the cache */ - int (*write_page)(struct fscache_storage *op, struct page *page); - - /* detach backing block from a page (optional) - * - must release the cookie lock before returning - * - may sleep - */ - void (*uncache_page)(struct fscache_object *object, - struct page *page); - - /* dissociate a cache from all the pages it was backing */ - void (*dissociate_pages)(struct fscache_cache *cache); + bool (*invalidate_cookie)(struct fscache_cookie *cookie); - /* Begin a read operation for the netfs lib */ - int (*begin_read_operation)(struct netfs_read_request *rreq, - struct fscache_retrieval *op); -}; - -extern struct fscache_cookie fscache_fsdef_index; + /* Begin an operation for the netfs lib */ + bool (*begin_operation)(struct netfs_cache_resources *cres, + enum fscache_want_state want_state); -/* - * Event list for fscache_object::{event_mask,events} - */ -enum { - FSCACHE_OBJECT_EV_NEW_CHILD, /* T if object has a new child */ - FSCACHE_OBJECT_EV_PARENT_READY, /* T if object's parent is ready */ - FSCACHE_OBJECT_EV_UPDATE, /* T if object should be updated */ - FSCACHE_OBJECT_EV_INVALIDATE, /* T if cache requested object invalidation */ - FSCACHE_OBJECT_EV_CLEARED, /* T if accessors all gone */ - FSCACHE_OBJECT_EV_ERROR, /* T if fatal error occurred during processing */ - FSCACHE_OBJECT_EV_KILL, /* T if netfs relinquished or cache withdrew object */ - NR_FSCACHE_OBJECT_EVENTS -}; - -#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1) - -/* - * States for object state machine. - */ -struct fscache_transition { - unsigned long events; - const struct fscache_state *transit_to; + /* Prepare to write to a live cache object */ + void (*prepare_to_write)(struct fscache_cookie *cookie); }; -struct fscache_state { - char name[24]; - char short_name[8]; - const struct fscache_state *(*work)(struct fscache_object *object, - int event); - const struct fscache_transition transitions[]; -}; +extern struct workqueue_struct *fscache_wq; +extern wait_queue_head_t fscache_clearance_waiters; /* - * on-disk cache file or index handle + * out-of-line cache backend functions */ -struct fscache_object { - const struct fscache_state *state; /* Object state machine state */ - const struct fscache_transition *oob_table; /* OOB state transition table */ - int debug_id; /* debugging ID */ - int n_children; /* number of child objects */ - int n_ops; /* number of extant ops on object */ - int n_obj_ops; /* number of object ops outstanding on object */ - int n_in_progress; /* number of ops in progress */ - int n_exclusive; /* number of exclusive ops queued or in progress */ - atomic_t n_reads; /* number of read ops in progress */ - spinlock_t lock; /* state and operations lock */ - - unsigned long lookup_jif; /* time at which lookup started */ - unsigned long oob_event_mask; /* OOB events this object is interested in */ - unsigned long event_mask; /* events this object is interested in */ - unsigned long events; /* events to be processed by this object - * (order is important - using fls) */ - - unsigned long flags; -#define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ -#define FSCACHE_OBJECT_PENDING_WRITE 1 /* T if object has pending write */ -#define FSCACHE_OBJECT_WAITING 2 /* T if object is waiting on its parent */ -#define FSCACHE_OBJECT_IS_LIVE 3 /* T if object is not withdrawn or relinquished */ -#define FSCACHE_OBJECT_IS_LOOKED_UP 4 /* T if object has been looked up */ -#define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ -#define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ -#define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ -#define FSCACHE_OBJECT_RUN_AFTER_DEAD 8 /* T if object has been dispatched after death */ - - struct list_head cache_link; /* link in cache->object_list */ - struct hlist_node cookie_link; /* link in cookie->backing_objects */ - struct fscache_cache *cache; /* cache that supplied this object */ - struct fscache_cookie *cookie; /* netfs's file/index object */ - struct fscache_object *parent; /* parent object */ - struct work_struct work; /* attention scheduling record */ - struct list_head dependents; /* FIFO of dependent objects */ - struct list_head dep_link; /* link in parent's dependents list */ - struct list_head pending_ops; /* unstarted operations on this object */ - pgoff_t store_limit; /* current storage limit */ - loff_t store_limit_l; /* current storage limit */ -}; - -extern void fscache_object_init(struct fscache_object *, struct fscache_cookie *, - struct fscache_cache *); -extern void fscache_object_destroy(struct fscache_object *); - -extern void fscache_object_lookup_negative(struct fscache_object *object); -extern void fscache_obtained_object(struct fscache_object *object); - -static inline bool fscache_object_is_live(struct fscache_object *object) -{ - return test_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); -} - -static inline bool fscache_object_is_dying(struct fscache_object *object) -{ - return !fscache_object_is_live(object); -} - -static inline bool fscache_object_is_available(struct fscache_object *object) -{ - return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); -} +extern struct rw_semaphore fscache_addremove_sem; +extern struct fscache_cache *fscache_acquire_cache(const char *name); +extern void fscache_relinquish_cache(struct fscache_cache *cache); +extern int fscache_add_cache(struct fscache_cache *cache, + const struct fscache_cache_ops *ops, + void *cache_priv); +extern void fscache_withdraw_cache(struct fscache_cache *cache); +extern void fscache_withdraw_volume(struct fscache_volume *volume); +extern void fscache_withdraw_cookie(struct fscache_cookie *cookie); -static inline bool fscache_cache_is_broken(struct fscache_object *object) -{ - return test_bit(FSCACHE_IOERROR, &object->cache->flags); -} +extern void fscache_io_error(struct fscache_cache *cache); -static inline bool fscache_object_is_active(struct fscache_object *object) -{ - return fscache_object_is_available(object) && - fscache_object_is_live(object) && - !fscache_cache_is_broken(object); -} +extern void fscache_end_volume_access(struct fscache_volume *volume, + struct fscache_cookie *cookie, + enum fscache_access_trace why); + +extern struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie, + enum fscache_cookie_trace where); +extern void fscache_put_cookie(struct fscache_cookie *cookie, + enum fscache_cookie_trace where); +extern void fscache_end_cookie_access(struct fscache_cookie *cookie, + enum fscache_access_trace why); +extern void fscache_cookie_lookup_negative(struct fscache_cookie *cookie); +extern void fscache_resume_after_invalidation(struct fscache_cookie *cookie); +extern void fscache_caching_failed(struct fscache_cookie *cookie); +extern bool fscache_wait_for_operation(struct netfs_cache_resources *cred, + enum fscache_want_state state); /** - * fscache_object_destroyed - Note destruction of an object in a cache - * @cache: The cache from which the object came + * fscache_cookie_state - Read the state of a cookie + * @cookie: The cookie to query * - * Note the destruction and deallocation of an object record in a cache. + * Get the state of a cookie, imposing an ordering between the cookie contents + * and the state value. Paired with fscache_set_cookie_state(). */ -static inline void fscache_object_destroyed(struct fscache_cache *cache) +static inline +enum fscache_cookie_state fscache_cookie_state(struct fscache_cookie *cookie) { - if (atomic_dec_and_test(&cache->object_count)) - wake_up_all(&fscache_cache_cleared_wq); + return smp_load_acquire(&cookie->state); } /** - * fscache_object_lookup_error - Note an object encountered an error - * @object: The object on which the error was encountered + * fscache_get_key - Get a pointer to the cookie key + * @cookie: The cookie to query * - * Note that an object encountered a fatal error (usually an I/O error) and - * that it should be withdrawn as soon as possible. + * Return a pointer to the where a cookie's key is stored. */ -static inline void fscache_object_lookup_error(struct fscache_object *object) +static inline void *fscache_get_key(struct fscache_cookie *cookie) { - set_bit(FSCACHE_OBJECT_EV_ERROR, &object->events); + if (cookie->key_len <= sizeof(cookie->inline_key)) + return cookie->inline_key; + else + return cookie->key; } -/** - * fscache_set_store_limit - Set the maximum size to be stored in an object - * @object: The object to set the maximum on - * @i_size: The limit to set in bytes - * - * Set the maximum size an object is permitted to reach, implying the highest - * byte that may be written. Intended to be called by the attr_changed() op. - * - * See Documentation/filesystems/caching/backend-api.rst for a complete - * description. - */ -static inline -void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) +static inline struct fscache_cookie *fscache_cres_cookie(struct netfs_cache_resources *cres) { - object->store_limit_l = i_size; - object->store_limit = i_size >> PAGE_SHIFT; - if (i_size & ~PAGE_MASK) - object->store_limit++; + return cres->cache_priv; } /** - * fscache_end_io - End a retrieval operation on a page - * @op: The FS-Cache operation covering the retrieval - * @page: The page that was to be fetched - * @error: The error code (0 if successful) + * fscache_count_object - Tell fscache that an object has been added + * @cache: The cache to account to * - * Note the end of an operation to retrieve a page, as covered by a particular - * operation record. + * Tell fscache that an object has been added to the cache. This prevents the + * cache from tearing down the cache structure until the object is uncounted. */ -static inline void fscache_end_io(struct fscache_retrieval *op, - struct page *page, int error) +static inline void fscache_count_object(struct fscache_cache *cache) { - op->end_io_func(page, op->context, error); -} - -static inline void __fscache_use_cookie(struct fscache_cookie *cookie) -{ - atomic_inc(&cookie->n_active); + atomic_inc(&cache->object_count); } /** - * fscache_use_cookie - Request usage of cookie attached to an object - * @object: Object description - * - * Request usage of the cookie attached to an object. NULL is returned if the - * relinquishment had reduced the cookie usage count to 0. + * fscache_uncount_object - Tell fscache that an object has been removed + * @cache: The cache to account to + * + * Tell fscache that an object has been removed from the cache and will no + * longer be accessed. After this point, the cache cookie may be destroyed. */ -static inline bool fscache_use_cookie(struct fscache_object *object) -{ - struct fscache_cookie *cookie = object->cookie; - return atomic_inc_not_zero(&cookie->n_active) != 0; -} - -static inline bool __fscache_unuse_cookie(struct fscache_cookie *cookie) -{ - return atomic_dec_and_test(&cookie->n_active); -} - -static inline void __fscache_wake_unused_cookie(struct fscache_cookie *cookie) +static inline void fscache_uncount_object(struct fscache_cache *cache) { - wake_up_var(&cookie->n_active); + if (atomic_dec_and_test(&cache->object_count)) + wake_up_all(&fscache_clearance_waiters); } /** - * fscache_unuse_cookie - Cease usage of cookie attached to an object - * @object: Object description - * - * Cease usage of the cookie attached to an object. When the users count - * reaches zero then the cookie relinquishment will be permitted to proceed. - */ -static inline void fscache_unuse_cookie(struct fscache_object *object) -{ - struct fscache_cookie *cookie = object->cookie; - if (__fscache_unuse_cookie(cookie)) - __fscache_wake_unused_cookie(cookie); -} - -/* - * out-of-line cache backend functions - */ -extern __printf(3, 4) -void fscache_init_cache(struct fscache_cache *cache, - const struct fscache_cache_ops *ops, - const char *idfmt, ...); - -extern int fscache_add_cache(struct fscache_cache *cache, - struct fscache_object *fsdef, - const char *tagname); -extern void fscache_withdraw_cache(struct fscache_cache *cache); - -extern void fscache_io_error(struct fscache_cache *cache); - -extern void fscache_mark_page_cached(struct fscache_retrieval *op, - struct page *page); - -extern void fscache_mark_pages_cached(struct fscache_retrieval *op, - struct pagevec *pagevec); - -extern bool fscache_object_sleep_till_congested(signed long *timeoutp); - -extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, - const void *data, - uint16_t datalen, - loff_t object_size); - -extern void fscache_object_retrying_stale(struct fscache_object *object); - -enum fscache_why_object_killed { - FSCACHE_OBJECT_IS_STALE, - FSCACHE_OBJECT_NO_SPACE, - FSCACHE_OBJECT_WAS_RETIRED, - FSCACHE_OBJECT_WAS_CULLED, -}; -extern void fscache_object_mark_killed(struct fscache_object *object, - enum fscache_why_object_killed why); + * fscache_wait_for_objects - Wait for all objects to be withdrawn + * @cache: The cache to query + * + * Wait for all extant objects in a cache to finish being withdrawn + * and go away. + */ +static inline void fscache_wait_for_objects(struct fscache_cache *cache) +{ + wait_event(fscache_clearance_waiters, + atomic_read(&cache->object_count) == 0); +} + +#ifdef CONFIG_FSCACHE_STATS +extern atomic_t fscache_n_read; +extern atomic_t fscache_n_write; +extern atomic_t fscache_n_no_write_space; +extern atomic_t fscache_n_no_create_space; +extern atomic_t fscache_n_culled; +#define fscache_count_read() atomic_inc(&fscache_n_read) +#define fscache_count_write() atomic_inc(&fscache_n_write) +#define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space) +#define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space) +#define fscache_count_culled() atomic_inc(&fscache_n_culled) +#else +#define fscache_count_read() do {} while(0) +#define fscache_count_write() do {} while(0) +#define fscache_count_no_write_space() do {} while(0) +#define fscache_count_no_create_space() do {} while(0) +#define fscache_count_culled() do {} while(0) +#endif #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index a4dab5998613..ede50406bcb0 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* General filesystem caching interface * - * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells ([email protected]) * * NOTE!!! See: @@ -15,146 +15,126 @@ #define _LINUX_FSCACHE_H #include <linux/fs.h> -#include <linux/list.h> -#include <linux/pagemap.h> -#include <linux/pagevec.h> -#include <linux/list_bl.h> #include <linux/netfs.h> +#include <linux/writeback.h> #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) +#define __fscache_available (1) #define fscache_available() (1) +#define fscache_volume_valid(volume) (volume) #define fscache_cookie_valid(cookie) (cookie) +#define fscache_resources_valid(cres) ((cres)->cache_priv) +#define fscache_cookie_enabled(cookie) (cookie && !test_bit(FSCACHE_COOKIE_DISABLED, &cookie->flags)) #else +#define __fscache_available (0) #define fscache_available() (0) +#define fscache_volume_valid(volume) (0) #define fscache_cookie_valid(cookie) (0) +#define fscache_resources_valid(cres) (false) +#define fscache_cookie_enabled(cookie) (0) #endif +struct fscache_cookie; -/* pattern used to fill dead space in an index entry */ -#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 +#define FSCACHE_ADV_SINGLE_CHUNK 0x01 /* The object is a single chunk of data */ +#define FSCACHE_ADV_WRITE_CACHE 0x00 /* Do cache if written to locally */ +#define FSCACHE_ADV_WRITE_NOCACHE 0x02 /* Don't cache if written to locally */ -struct pagevec; -struct fscache_cache_tag; -struct fscache_cookie; -struct fscache_netfs; -struct netfs_read_request; - -typedef void (*fscache_rw_complete_t)(struct page *page, - void *context, - int error); - -/* result of index entry consultation */ -enum fscache_checkaux { - FSCACHE_CHECKAUX_OKAY, /* entry okay as is */ - FSCACHE_CHECKAUX_NEEDS_UPDATE, /* entry requires update */ - FSCACHE_CHECKAUX_OBSOLETE, /* entry requires deletion */ +#define FSCACHE_INVAL_DIO_WRITE 0x01 /* Invalidate due to DIO write */ + +enum fscache_want_state { + FSCACHE_WANT_PARAMS, + FSCACHE_WANT_WRITE, + FSCACHE_WANT_READ, }; /* - * fscache cookie definition - */ -struct fscache_cookie_def { - /* name of cookie type */ - char name[16]; - - /* cookie type */ - uint8_t type; -#define FSCACHE_COOKIE_TYPE_INDEX 0 -#define FSCACHE_COOKIE_TYPE_DATAFILE 1 - - /* select the cache into which to insert an entry in this index - * - optional - * - should return a cache identifier or NULL to cause the cache to be - * inherited from the parent if possible or the first cache picked - * for a non-index file if not - */ - struct fscache_cache_tag *(*select_cache)( - const void *parent_netfs_data, - const void *cookie_netfs_data); - - /* consult the netfs about the state of an object - * - this function can be absent if the index carries no state data - * - the netfs data from the cookie being used as the target is - * presented, as is the auxiliary data and the object size - */ - enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, - const void *data, - uint16_t datalen, - loff_t object_size); - - /* get an extra reference on a read context - * - this function can be absent if the completion function doesn't - * require a context - */ - void (*get_context)(void *cookie_netfs_data, void *context); - - /* release an extra reference on a read context - * - this function can be absent if the completion function doesn't - * require a context - */ - void (*put_context)(void *cookie_netfs_data, void *context); - - /* indicate page that now have cache metadata retained - * - this function should mark the specified page as now being cached - * - the page will have been marked with PG_fscache before this is - * called, so this is optional - */ - void (*mark_page_cached)(void *cookie_netfs_data, - struct address_space *mapping, - struct page *page); -}; + * Data object state. + */ +enum fscache_cookie_state { + FSCACHE_COOKIE_STATE_QUIESCENT, /* The cookie is uncached */ + FSCACHE_COOKIE_STATE_LOOKING_UP, /* The cache object is being looked up */ + FSCACHE_COOKIE_STATE_CREATING, /* The cache object is being created */ + FSCACHE_COOKIE_STATE_ACTIVE, /* The cache is active, readable and writable */ + FSCACHE_COOKIE_STATE_INVALIDATING, /* The cache is being invalidated */ + FSCACHE_COOKIE_STATE_FAILED, /* The cache failed, withdraw to clear */ + FSCACHE_COOKIE_STATE_LRU_DISCARDING, /* The cookie is being discarded by the LRU */ + FSCACHE_COOKIE_STATE_WITHDRAWING, /* The cookie is being withdrawn */ + FSCACHE_COOKIE_STATE_RELINQUISHING, /* The cookie is being relinquished */ + FSCACHE_COOKIE_STATE_DROPPED, /* The cookie has been dropped */ +#define FSCACHE_COOKIE_STATE__NR (FSCACHE_COOKIE_STATE_DROPPED + 1) +} __attribute__((mode(byte))); /* - * fscache cached network filesystem type - * - name, version and ops must be filled in before registration - * - all other fields will be set during registration + * Volume representation cookie. */ -struct fscache_netfs { - uint32_t version; /* indexing version */ - const char *name; /* filesystem name */ - struct fscache_cookie *primary_index; +struct fscache_volume { + refcount_t ref; + atomic_t n_cookies; /* Number of data cookies in volume */ + atomic_t n_accesses; /* Number of cache accesses in progress */ + unsigned int debug_id; + unsigned int key_hash; /* Hash of key string */ + char *key; /* Volume ID, eg. "[email protected]@1234" */ + struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ + struct hlist_bl_node hash_link; /* Link in hash table */ + struct work_struct work; + struct fscache_cache *cache; /* The cache in which this resides */ + void *cache_priv; /* Cache private data */ + spinlock_t lock; + unsigned long flags; +#define FSCACHE_VOLUME_RELINQUISHED 0 /* Volume is being cleaned up */ +#define FSCACHE_VOLUME_INVALIDATE 1 /* Volume was invalidated */ +#define FSCACHE_VOLUME_COLLIDED_WITH 2 /* Volume was collided with */ +#define FSCACHE_VOLUME_ACQUIRE_PENDING 3 /* Volume is waiting to complete acquisition */ +#define FSCACHE_VOLUME_CREATING 4 /* Volume is being created on disk */ + u8 coherency_len; /* Length of the coherency data */ + u8 coherency[]; /* Coherency data */ }; /* - * data file or index object cookie + * Data file representation cookie. * - a file will only appear in one cache * - a request to cache a file may or may not be honoured, subject to * constraints such as disk space * - indices are created on disk just-in-time */ struct fscache_cookie { - refcount_t ref; /* number of users of this cookie */ - atomic_t n_children; /* number of children of this cookie */ - atomic_t n_active; /* number of active users of netfs ptrs */ + refcount_t ref; + atomic_t n_active; /* number of active users of cookie */ + atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; + unsigned int inval_counter; /* Number of invalidations made */ spinlock_t lock; - spinlock_t stores_lock; /* lock on page store tree */ - struct hlist_head backing_objects; /* object(s) backing this file/index */ - const struct fscache_cookie_def *def; /* definition */ - struct fscache_cookie *parent; /* parent of this entry */ + struct fscache_volume *volume; /* Parent volume of this file. */ + void *cache_priv; /* Cache-side representation */ struct hlist_bl_node hash_link; /* Link in hash table */ struct list_head proc_link; /* Link in proc list */ - void *netfs_data; /* back pointer to netfs */ - struct radix_tree_root stores; /* pages to be stored on this cookie */ -#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ -#define FSCACHE_COOKIE_STORING_TAG 1 /* pages tag: writing to cache */ - + struct list_head commit_link; /* Link in commit queue */ + struct work_struct work; /* Commit/relinq/withdraw work */ + loff_t object_size; /* Size of the netfs object */ + unsigned long unused_at; /* Time at which unused (jiffies) */ unsigned long flags; -#define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ -#define FSCACHE_COOKIE_NO_DATA_YET 1 /* T if new object with no cached data yet */ -#define FSCACHE_COOKIE_UNAVAILABLE 2 /* T if cookie is unavailable (error, etc) */ -#define FSCACHE_COOKIE_INVALIDATING 3 /* T if cookie is being invalidated */ -#define FSCACHE_COOKIE_RELINQUISHED 4 /* T if cookie has been relinquished */ -#define FSCACHE_COOKIE_ENABLED 5 /* T if cookie is enabled */ -#define FSCACHE_COOKIE_ENABLEMENT_LOCK 6 /* T if cookie is being en/disabled */ -#define FSCACHE_COOKIE_AUX_UPDATED 8 /* T if the auxiliary data was updated */ -#define FSCACHE_COOKIE_ACQUIRED 9 /* T if cookie is in use */ -#define FSCACHE_COOKIE_RELINQUISHING 10 /* T if cookie is being relinquished */ - - u8 type; /* Type of object */ +#define FSCACHE_COOKIE_RELINQUISHED 0 /* T if cookie has been relinquished */ +#define FSCACHE_COOKIE_RETIRED 1 /* T if this cookie has retired on relinq */ +#define FSCACHE_COOKIE_IS_CACHING 2 /* T if this cookie is cached */ +#define FSCACHE_COOKIE_NO_DATA_TO_READ 3 /* T if this cookie has nothing to read */ +#define FSCACHE_COOKIE_NEEDS_UPDATE 4 /* T if attrs have been updated */ +#define FSCACHE_COOKIE_HAS_BEEN_CACHED 5 /* T if cookie needs withdraw-on-relinq */ +#define FSCACHE_COOKIE_DISABLED 6 /* T if cookie has been disabled */ +#define FSCACHE_COOKIE_LOCAL_WRITE 7 /* T if cookie has been modified locally */ +#define FSCACHE_COOKIE_NO_ACCESS_WAKE 8 /* T if no wake when n_accesses goes 0 */ +#define FSCACHE_COOKIE_DO_RELINQUISH 9 /* T if this cookie needs relinquishment */ +#define FSCACHE_COOKIE_DO_WITHDRAW 10 /* T if this cookie needs withdrawing */ +#define FSCACHE_COOKIE_DO_LRU_DISCARD 11 /* T if this cookie needs LRU discard */ +#define FSCACHE_COOKIE_DO_PREP_TO_WRITE 12 /* T if cookie needs write preparation */ +#define FSCACHE_COOKIE_HAVE_DATA 13 /* T if this cookie has data stored */ +#define FSCACHE_COOKIE_IS_HASHED 14 /* T if this cookie is hashed */ + + enum fscache_cookie_state state; + u8 advice; /* FSCACHE_ADV_* */ u8 key_len; /* Length of index key */ u8 aux_len; /* Length of auxiliary data */ - u32 key_hash; /* Hash of parent, type, key, len */ + u32 key_hash; /* Hash of volume, key, len */ union { void *key; /* Index key */ u8 inline_key[16]; /* - If the key is short enough */ @@ -165,11 +145,6 @@ struct fscache_cookie { }; }; -static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie) -{ - return test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); -} - /* * slow-path functions for when there is actually caching available, and the * netfs does actually have a valid token @@ -177,699 +152,523 @@ static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie) * - these are undefined symbols when FS-Cache is not configured and the * optimiser takes care of not using them */ -extern int __fscache_register_netfs(struct fscache_netfs *); -extern void __fscache_unregister_netfs(struct fscache_netfs *); -extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *); -extern void __fscache_release_cache_tag(struct fscache_cache_tag *); +extern struct fscache_volume *__fscache_acquire_volume(const char *, const char *, + const void *, size_t); +extern void __fscache_relinquish_volume(struct fscache_volume *, const void *, bool); extern struct fscache_cookie *__fscache_acquire_cookie( - struct fscache_cookie *, - const struct fscache_cookie_def *, + struct fscache_volume *, + u8, const void *, size_t, const void *, size_t, - void *, loff_t, bool); -extern void __fscache_relinquish_cookie(struct fscache_cookie *, const void *, bool); -extern int __fscache_check_consistency(struct fscache_cookie *, const void *); -extern void __fscache_update_cookie(struct fscache_cookie *, const void *); -extern int __fscache_attr_changed(struct fscache_cookie *); -extern void __fscache_invalidate(struct fscache_cookie *); -extern void __fscache_wait_on_invalidate(struct fscache_cookie *); - -#ifdef FSCACHE_USE_NEW_IO_API -extern int __fscache_begin_read_operation(struct netfs_read_request *, struct fscache_cookie *); -#else -extern int __fscache_read_or_alloc_page(struct fscache_cookie *, - struct page *, - fscache_rw_complete_t, - void *, - gfp_t); -extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, - struct address_space *, - struct list_head *, - unsigned *, - fscache_rw_complete_t, - void *, - gfp_t); -extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); -extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t); -extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); -extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); -extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); -extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *, - gfp_t); -extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, - struct inode *); -extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages); -#endif /* FSCACHE_USE_NEW_IO_API */ - -extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); -extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, - bool (*)(void *), void *); + loff_t); +extern void __fscache_use_cookie(struct fscache_cookie *, bool); +extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *); +extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); +extern void __fscache_resize_cookie(struct fscache_cookie *, loff_t); +extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); +extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); +extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *); + +extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *, + loff_t, size_t, loff_t, netfs_io_terminated_t, void *, + bool); +extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); /** - * fscache_register_netfs - Register a filesystem as desiring caching services - * @netfs: The description of the filesystem + * fscache_acquire_volume - Register a volume as desiring caching services + * @volume_key: An identification string for the volume + * @cache_name: The name of the cache to use (or NULL for the default) + * @coherency_data: Piece of arbitrary coherency data to check (or NULL) + * @coherency_len: The size of the coherency data * - * Register a filesystem as desiring caching services if they're available. + * Register a volume as desiring caching services if they're available. The + * caller must provide an identifier for the volume and may also indicate which + * cache it should be in. If a preexisting volume entry is found in the cache, + * the coherency data must match otherwise the entry will be invalidated. * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. + * Returns a cookie pointer on success, -ENOMEM if out of memory or -EBUSY if a + * cache volume of that name is already acquired. Note that "NULL" is a valid + * cookie pointer and can be returned if caching is refused. */ static inline -int fscache_register_netfs(struct fscache_netfs *netfs) +struct fscache_volume *fscache_acquire_volume(const char *volume_key, + const char *cache_name, + const void *coherency_data, + size_t coherency_len) { - if (fscache_available()) - return __fscache_register_netfs(netfs); - else - return 0; + if (!fscache_available()) + return NULL; + return __fscache_acquire_volume(volume_key, cache_name, + coherency_data, coherency_len); } /** - * fscache_unregister_netfs - Indicate that a filesystem no longer desires - * caching services - * @netfs: The description of the filesystem + * fscache_relinquish_volume - Cease caching a volume + * @volume: The volume cookie + * @coherency_data: Piece of arbitrary coherency data to set (or NULL) + * @invalidate: True if the volume should be invalidated * - * Indicate that a filesystem no longer desires caching services for the - * moment. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. + * Indicate that a filesystem no longer desires caching services for a volume. + * The caller must have relinquished all file cookies prior to calling this. + * The stored coherency data is updated. */ static inline -void fscache_unregister_netfs(struct fscache_netfs *netfs) +void fscache_relinquish_volume(struct fscache_volume *volume, + const void *coherency_data, + bool invalidate) { - if (fscache_available()) - __fscache_unregister_netfs(netfs); + if (fscache_volume_valid(volume)) + __fscache_relinquish_volume(volume, coherency_data, invalidate); } /** - * fscache_lookup_cache_tag - Look up a cache tag - * @name: The name of the tag to search for + * fscache_acquire_cookie - Acquire a cookie to represent a cache object + * @volume: The volume in which to locate/create this cookie + * @advice: Advice flags (FSCACHE_COOKIE_ADV_*) + * @index_key: The index key for this cookie + * @index_key_len: Size of the index key + * @aux_data: The auxiliary data for the cookie (may be NULL) + * @aux_data_len: Size of the auxiliary data buffer + * @object_size: The initial size of object * - * Acquire a specific cache referral tag that can be used to select a specific - * cache in which to cache an index. + * Acquire a cookie to represent a data file within the given cache volume. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline -struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) +struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, + u8 advice, + const void *index_key, + size_t index_key_len, + const void *aux_data, + size_t aux_data_len, + loff_t object_size) { - if (fscache_available()) - return __fscache_lookup_cache_tag(name); - else + if (!fscache_volume_valid(volume)) return NULL; + return __fscache_acquire_cookie(volume, advice, + index_key, index_key_len, + aux_data, aux_data_len, + object_size); } /** - * fscache_release_cache_tag - Release a cache tag - * @tag: The tag to release - * - * Release a reference to a cache referral tag previously looked up. + * fscache_use_cookie - Request usage of cookie attached to an object + * @object: Object description + * @will_modify: If cache is expected to be modified locally * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. + * Request usage of the cookie attached to an object. The caller should tell + * the cache if the object's contents are about to be modified locally and then + * the cache can apply the policy that has been set to handle this case. */ -static inline -void fscache_release_cache_tag(struct fscache_cache_tag *tag) +static inline void fscache_use_cookie(struct fscache_cookie *cookie, + bool will_modify) { - if (fscache_available()) - __fscache_release_cache_tag(tag); + if (fscache_cookie_valid(cookie)) + __fscache_use_cookie(cookie, will_modify); } /** - * fscache_acquire_cookie - Acquire a cookie to represent a cache object - * @parent: The cookie that's to be the parent of this one - * @def: A description of the cache object, including callback operations - * @index_key: The index key for this cookie - * @index_key_len: Size of the index key - * @aux_data: The auxiliary data for the cookie (may be NULL) - * @aux_data_len: Size of the auxiliary data buffer - * @netfs_data: An arbitrary piece of data to be kept in the cookie to - * represent the cache object to the netfs - * @object_size: The initial size of object - * @enable: Whether or not to enable a data cookie immediately + * fscache_unuse_cookie - Cease usage of cookie attached to an object + * @object: Object description + * @aux_data: Updated auxiliary data (or NULL) + * @object_size: Revised size of the object (or NULL) * - * This function is used to inform FS-Cache about part of an index hierarchy - * that can be used to locate files. This is done by requesting a cookie for - * each index in the path to the file. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. + * Cease usage of the cookie attached to an object. When the users count + * reaches zero then the cookie relinquishment will be permitted to proceed. */ -static inline -struct fscache_cookie *fscache_acquire_cookie( - struct fscache_cookie *parent, - const struct fscache_cookie_def *def, - const void *index_key, - size_t index_key_len, - const void *aux_data, - size_t aux_data_len, - void *netfs_data, - loff_t object_size, - bool enable) +static inline void fscache_unuse_cookie(struct fscache_cookie *cookie, + const void *aux_data, + const loff_t *object_size) { - if (fscache_cookie_valid(parent) && fscache_cookie_enabled(parent)) - return __fscache_acquire_cookie(parent, def, - index_key, index_key_len, - aux_data, aux_data_len, - netfs_data, object_size, enable); - else - return NULL; + if (fscache_cookie_valid(cookie)) + __fscache_unuse_cookie(cookie, aux_data, object_size); } /** * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding * it * @cookie: The cookie being returned - * @aux_data: The updated auxiliary data for the cookie (may be NULL) * @retire: True if the cache object the cookie represents is to be discarded * * This function returns a cookie to the cache, forcibly discarding the - * associated cache object if retire is set to true. The opportunity is - * provided to update the auxiliary data in the cache before the object is - * disconnected. + * associated cache object if retire is set to true. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline -void fscache_relinquish_cookie(struct fscache_cookie *cookie, - const void *aux_data, - bool retire) +void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) { if (fscache_cookie_valid(cookie)) - __fscache_relinquish_cookie(cookie, aux_data, retire); + __fscache_relinquish_cookie(cookie, retire); } -/** - * fscache_check_consistency - Request validation of a cache's auxiliary data - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * - * Request an consistency check from fscache, which passes the request to the - * backing cache. The auxiliary data on the cookie will be updated first if - * @aux_data is set. - * - * Returns 0 if consistent and -ESTALE if inconsistent. May also - * return -ENOMEM and -ERESTARTSYS. +/* + * Find the auxiliary data on a cookie. */ -static inline -int fscache_check_consistency(struct fscache_cookie *cookie, - const void *aux_data) +static inline void *fscache_get_aux(struct fscache_cookie *cookie) { - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_check_consistency(cookie, aux_data); + if (cookie->aux_len <= sizeof(cookie->inline_aux)) + return cookie->inline_aux; else - return 0; + return cookie->aux; } -/** - * fscache_update_cookie - Request that a cache object be updated - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * - * Request an update of the index data for the cache object associated with the - * cookie. The auxiliary data on the cookie will be updated first if @aux_data - * is set. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. +/* + * Update the auxiliary data on a cookie. */ static inline -void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data) +void fscache_update_aux(struct fscache_cookie *cookie, + const void *aux_data, const loff_t *object_size) { - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_update_cookie(cookie, aux_data); + void *p = fscache_get_aux(cookie); + + if (aux_data && p) + memcpy(p, aux_data, cookie->aux_len); + if (object_size) + cookie->object_size = *object_size; } -/** - * fscache_pin_cookie - Pin a data-storage cache object in its cache - * @cookie: The cookie representing the cache object - * - * Permit data-storage cache objects to be pinned in the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ +#ifdef CONFIG_FSCACHE_STATS +extern atomic_t fscache_n_updates; +#endif + static inline -int fscache_pin_cookie(struct fscache_cookie *cookie) +void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, + const loff_t *object_size) { - return -ENOBUFS; +#ifdef CONFIG_FSCACHE_STATS + atomic_inc(&fscache_n_updates); +#endif + fscache_update_aux(cookie, aux_data, object_size); + smp_wmb(); + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags); } /** - * fscache_pin_cookie - Unpin a data-storage cache object in its cache + * fscache_update_cookie - Request that a cache object be updated * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) + * @object_size: The current size of the object (may be NULL) * - * Permit data-storage cache objects to be unpinned from the cache. + * Request an update of the index data for the cache object associated with the + * cookie. The auxiliary data on the cookie will be updated first if @aux_data + * is set and the object size will be updated and the object possibly trimmed + * if @object_size is set. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline -void fscache_unpin_cookie(struct fscache_cookie *cookie) +void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, + const loff_t *object_size) { + if (fscache_cookie_enabled(cookie)) + __fscache_update_cookie(cookie, aux_data, object_size); } /** - * fscache_attr_changed - Notify cache that an object's attributes changed + * fscache_resize_cookie - Request that a cache object be resized * @cookie: The cookie representing the cache object + * @new_size: The new size of the object (may be NULL) * - * Send a notification to the cache indicating that an object's attributes have - * changed. This includes the data size. These attributes will be obtained - * through the get_attr() cookie definition op. + * Request that the size of an object be changed. * - * See Documentation/filesystems/caching/netfs-api.rst for a complete + * See Documentation/filesystems/caching/netfs-api.txt for a complete * description. */ static inline -int fscache_attr_changed(struct fscache_cookie *cookie) +void fscache_resize_cookie(struct fscache_cookie *cookie, loff_t new_size) { - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_attr_changed(cookie); - else - return -ENOBUFS; + if (fscache_cookie_enabled(cookie)) + __fscache_resize_cookie(cookie, new_size); } /** * fscache_invalidate - Notify cache that an object needs invalidation * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) + * @size: The revised size of the object. + * @flags: Invalidation flags (FSCACHE_INVAL_*) * * Notify the cache that an object is needs to be invalidated and that it - * should abort any retrievals or stores it is doing on the cache. The object - * is then marked non-caching until such time as the invalidation is complete. - * - * This can be called with spinlocks held. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_invalidate(struct fscache_cookie *cookie) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_invalidate(cookie); -} - -/** - * fscache_wait_on_invalidate - Wait for invalidation to complete - * @cookie: The cookie representing the cache object + * should abort any retrievals or stores it is doing on the cache. This + * increments inval_counter on the cookie which can be used by the caller to + * reconsider I/O requests as they complete. * - * Wait for the invalidation of an object to complete. + * If @flags has FSCACHE_INVAL_DIO_WRITE set, this indicates that this is due + * to a direct I/O write and will cause caching to be disabled on this cookie + * until it is completely unused. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline -void fscache_wait_on_invalidate(struct fscache_cookie *cookie) +void fscache_invalidate(struct fscache_cookie *cookie, + const void *aux_data, loff_t size, unsigned int flags) { - if (fscache_cookie_valid(cookie)) - __fscache_wait_on_invalidate(cookie); + if (fscache_cookie_enabled(cookie)) + __fscache_invalidate(cookie, aux_data, size, flags); } /** - * fscache_reserve_space - Reserve data space for a cached object - * @cookie: The cookie representing the cache object - * @i_size: The amount of space to be reserved - * - * Reserve an amount of space in the cache for the cache object attached to a - * cookie so that a write to that object within the space can always be - * honoured. + * fscache_operation_valid - Return true if operations resources are usable + * @cres: The resources to check. * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. + * Returns a pointer to the operations table if usable or NULL if not. */ static inline -int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) +const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_resources *cres) { - return -ENOBUFS; + return fscache_resources_valid(cres) ? cres->ops : NULL; } -#ifdef FSCACHE_USE_NEW_IO_API - /** * fscache_begin_read_operation - Begin a read operation for the netfs lib - * @rreq: The read request being undertaken + * @cres: The cache resources for the read being performed * @cookie: The cookie representing the cache object * - * Begin a read operation on behalf of the netfs helper library. @rreq - * indicates the read request to which the operation state should be attached; - * @cookie indicates the cache object that will be accessed. + * Begin a read operation on behalf of the netfs helper library. @cres + * indicates the cache resources to which the operation state should be + * attached; @cookie indicates the cache object that will be accessed. * * This is intended to be called from the ->begin_cache_operation() netfs lib * operation as implemented by the network filesystem. * + * @cres->inval_counter is set from @cookie->inval_counter for comparison at + * the end of the operation. This allows invalidation during the operation to + * be detected by the caller. + * * Returns: * * 0 - Success * * -ENOBUFS - No caching available * * Other error code from the cache, such as -ENOMEM. */ static inline -int fscache_begin_read_operation(struct netfs_read_request *rreq, +int fscache_begin_read_operation(struct netfs_cache_resources *cres, struct fscache_cookie *cookie) { - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_begin_read_operation(rreq, cookie); + if (fscache_cookie_enabled(cookie)) + return __fscache_begin_read_operation(cres, cookie); return -ENOBUFS; } -#else /* FSCACHE_USE_NEW_IO_API */ - -/** - * fscache_read_or_alloc_page - Read a page from the cache or allocate a block - * in which to store it - * @cookie: The cookie representing the cache object - * @page: The netfs page to fill if possible - * @end_io_func: The callback to invoke when and if the page is filled - * @context: An arbitrary piece of data to pass on to end_io_func() - * @gfp: The conditions under which memory allocation should be made - * - * Read a page from the cache, or if that's not possible make a potential - * one-block reservation in the cache into which the page may be stored once - * fetched from the server. - * - * If the page is not backed by the cache object, or if it there's some reason - * it can't be, -ENOBUFS will be returned and nothing more will be done for - * that page. - * - * Else, if that page is backed by the cache, a read will be initiated directly - * to the netfs's page and 0 will be returned by this function. The - * end_io_func() callback will be invoked when the operation terminates on a - * completion or failure. Note that the callback may be invoked before the - * return. - * - * Else, if the page is unbacked, -ENODATA is returned and a block may have - * been allocated in the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_read_or_alloc_page(struct fscache_cookie *cookie, - struct page *page, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_read_or_alloc_page(cookie, page, end_io_func, - context, gfp); - else - return -ENOBUFS; -} - /** - * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate - * blocks in which to store them - * @cookie: The cookie representing the cache object - * @mapping: The netfs inode mapping to which the pages will be attached - * @pages: A list of potential netfs pages to be filled - * @nr_pages: Number of pages to be read and/or allocated - * @end_io_func: The callback to invoke when and if each page is filled - * @context: An arbitrary piece of data to pass on to end_io_func() - * @gfp: The conditions under which memory allocation should be made - * - * Read a set of pages from the cache, or if that's not possible, attempt to - * make a potential one-block reservation for each page in the cache into which - * that page may be stored once fetched from the server. - * - * If some pages are not backed by the cache object, or if it there's some - * reason they can't be, -ENOBUFS will be returned and nothing more will be - * done for that pages. - * - * Else, if some of the pages are backed by the cache, a read will be initiated - * directly to the netfs's page and 0 will be returned by this function. The - * end_io_func() callback will be invoked when the operation terminates on a - * completion or failure. Note that the callback may be invoked before the - * return. - * - * Else, if a page is unbacked, -ENODATA is returned and a block may have - * been allocated in the cache. - * - * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in - * regard to different pages, the return values are prioritised in that order. - * Any pages submitted for reading are removed from the pages list. + * fscache_read - Start a read from the cache. + * @cres: The cache resources to use + * @start_pos: The beginning file offset in the cache file + * @iter: The buffer to fill - and also the length + * @read_hole: How to handle a hole in the data. + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_read_or_alloc_pages(cookie, mapping, pages, - nr_pages, end_io_func, - context, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_alloc_page - Allocate a block in which to store a page - * @cookie: The cookie representing the cache object - * @page: The netfs page to allocate a page for - * @gfp: The conditions under which memory allocation should be made + * Start a read from the cache. @cres indicates the cache object to read from + * and must be obtained by a call to fscache_begin_operation() beforehand. * - * Request Allocation a block in the cache in which to store a netfs page - * without retrieving any contents from the cache. + * The data is read into the iterator, @iter, and that also indicates the size + * of the operation. @start_pos is the start position in the file, though if + * @seek_data is set appropriately, the cache can use SEEK_DATA to find the + * next piece of data, writing zeros for the hole into the iterator. * - * If the page is not backed by a file then -ENOBUFS will be returned and - * nothing more will be done, and no reservation will be made. + * Upon termination of the operation, @term_func will be called and supplied + * with @term_func_priv plus the amount of data written, if successful, or the + * error code otherwise. * - * Else, a block will be allocated if one wasn't already, and 0 will be - * returned + * @read_hole indicates how a partially populated region in the cache should be + * handled. It can be one of a number of settings: * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_alloc_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_alloc_page(cookie, page, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_readpages_cancel - Cancel read/alloc on pages - * @cookie: The cookie representing the inode's cache object. - * @pages: The netfs pages that we canceled write on in readpages() + * NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read). * - * Uncache/unreserve the pages reserved earlier in readpages() via - * fscache_readpages_or_alloc() and similar. In most successful caches in - * readpages() this doesn't do anything. In cases when the underlying netfs's - * readahead failed we need to clean up the pagelist (unmark and uncache). + * NETFS_READ_HOLE_CLEAR - Seek for data, clearing the part of the buffer + * skipped over, then do as for IGNORE. * - * This function may sleep as it may have to clean up disk state. + * NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole. */ static inline -void fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages) +int fscache_read(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + enum netfs_read_from_hole read_hole, + netfs_io_terminated_t term_func, + void *term_func_priv) { - if (fscache_cookie_valid(cookie)) - __fscache_readpages_cancel(cookie, pages); + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); + return ops->read(cres, start_pos, iter, read_hole, + term_func, term_func_priv); } /** - * fscache_write_page - Request storage of a page in the cache + * fscache_begin_write_operation - Begin a write operation for the netfs lib + * @cres: The cache resources for the write being performed * @cookie: The cookie representing the cache object - * @page: The netfs page to store - * @object_size: Updated size of object - * @gfp: The conditions under which memory allocation should be made * - * Request the contents of the netfs page be written into the cache. This - * request may be ignored if no cache block is currently allocated, in which - * case it will return -ENOBUFS. + * Begin a write operation on behalf of the netfs helper library. @cres + * indicates the cache resources to which the operation state should be + * attached; @cookie indicates the cache object that will be accessed. * - * If a cache block was already allocated, a write will be initiated and 0 will - * be returned. The PG_fscache_write page bit is set immediately and will then - * be cleared at the completion of the write to indicate the success or failure - * of the operation. Note that the completion may happen before the return. + * @cres->inval_counter is set from @cookie->inval_counter for comparison at + * the end of the operation. This allows invalidation during the operation to + * be detected by the caller. * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. + * Returns: + * * 0 - Success + * * -ENOBUFS - No caching available + * * Other error code from the cache, such as -ENOMEM. */ static inline -int fscache_write_page(struct fscache_cookie *cookie, - struct page *page, - loff_t object_size, - gfp_t gfp) +int fscache_begin_write_operation(struct netfs_cache_resources *cres, + struct fscache_cookie *cookie) { - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_write_page(cookie, page, object_size, gfp); - else - return -ENOBUFS; + if (fscache_cookie_enabled(cookie)) + return __fscache_begin_write_operation(cres, cookie); + return -ENOBUFS; } /** - * fscache_uncache_page - Indicate that caching is no longer required on a page - * @cookie: The cookie representing the cache object - * @page: The netfs page that was being cached. + * fscache_write - Start a write to the cache. + * @cres: The cache resources to use + * @start_pos: The beginning file offset in the cache file + * @iter: The data to write - and also the length + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func * - * Tell the cache that we no longer want a page to be cached and that it should - * remove any knowledge of the netfs page it may have. + * Start a write to the cache. @cres indicates the cache object to write to and + * must be obtained by a call to fscache_begin_operation() beforehand. * - * Note that this cannot cancel any outstanding I/O operations between this - * page and the cache. + * The data to be written is obtained from the iterator, @iter, and that also + * indicates the size of the operation. @start_pos is the start position in + * the file. * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. + * Upon termination of the operation, @term_func will be called and supplied + * with @term_func_priv plus the amount of data written, if successful, or the + * error code otherwise. */ static inline -void fscache_uncache_page(struct fscache_cookie *cookie, - struct page *page) +int fscache_write(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) { - if (fscache_cookie_valid(cookie)) - __fscache_uncache_page(cookie, page); + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); + return ops->write(cres, start_pos, iter, term_func, term_func_priv); } /** - * fscache_check_page_write - Ask if a page is being writing to the cache + * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * - * Ask the cache if a page is being written to the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -bool fscache_check_page_write(struct fscache_cookie *cookie, - struct page *page) + * @mapping: The netfs inode to use as the source + * @start: The start position in @mapping + * @len: The amount of data to unlock + * @caching: If PG_fscache has been set + * + * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's + * waiting. + */ +static inline void fscache_clear_page_bits(struct fscache_cookie *cookie, + struct address_space *mapping, + loff_t start, size_t len, + bool caching) { - if (fscache_cookie_valid(cookie)) - return __fscache_check_page_write(cookie, page); - return false; + if (caching) + __fscache_clear_page_bits(mapping, start, len); } /** - * fscache_wait_on_page_write - Wait for a page to complete writing to the cache + * fscache_write_to_cache - Save a write to the cache and clear PG_fscache * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * - * Ask the cache to wake us up when a page is no longer being written to the - * cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_wait_on_page_write(struct fscache_cookie *cookie, - struct page *page) + * @mapping: The netfs inode to use as the source + * @start: The start position in @mapping + * @len: The amount of data to write back + * @i_size: The new size of the inode + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func + * @caching: If PG_fscache has been set + * + * Helper function for a netfs to write dirty data from an inode into the cache + * object that's backing it. + * + * @start and @len describe the range of the data. This does not need to be + * page-aligned, but to satisfy DIO requirements, the cache may expand it up to + * the page boundaries on either end. All the pages covering the range must be + * marked with PG_fscache. + * + * If given, @term_func will be called upon completion and supplied with + * @term_func_priv. Note that the PG_fscache flags will have been cleared by + * this point, so the netfs must retain its own pin on the mapping. + */ +static inline void fscache_write_to_cache(struct fscache_cookie *cookie, + struct address_space *mapping, + loff_t start, size_t len, loff_t i_size, + netfs_io_terminated_t term_func, + void *term_func_priv, + bool caching) { - if (fscache_cookie_valid(cookie)) - __fscache_wait_on_page_write(cookie, page); -} + if (caching) + __fscache_write_to_cache(cookie, mapping, start, len, i_size, + term_func, term_func_priv, caching); + else if (term_func) + term_func(term_func_priv, -ENOBUFS, false); -/** - * fscache_maybe_release_page - Consider releasing a page, cancelling a store - * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * @gfp: The gfp flags passed to releasepage() - * - * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's - * releasepage() call. A storage request on the page may cancelled if it is - * not currently being processed. - * - * The function returns true if the page no longer has a storage request on it, - * and false if a storage request is left in place. If true is returned, the - * page will have been passed to fscache_uncache_page(). If false is returned - * the page cannot be freed yet. - */ -static inline -bool fscache_maybe_release_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && PageFsCache(page)) - return __fscache_maybe_release_page(cookie, page, gfp); - return true; } +#if __fscache_available +extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie); +#else +#define fscache_set_page_dirty(PAGE, COOKIE) (__set_page_dirty_nobuffers((PAGE))) +#endif + /** - * fscache_uncache_all_inode_pages - Uncache all an inode's pages - * @cookie: The cookie representing the inode's cache object. - * @inode: The inode to uncache pages from. - * - * Uncache all the pages in an inode that are marked PG_fscache, assuming them - * to be associated with the given cookie. + * fscache_unpin_writeback - Unpin writeback resources + * @wbc: The writeback control + * @cookie: The cookie referring to the cache object * - * This function may sleep. It will wait for pages that are being written out - * and will wait whilst the PG_fscache mark is removed by the cache. + * Unpin the writeback resources pinned by fscache_set_page_dirty(). This is + * intended to be called by the netfs's ->write_inode() method. */ -static inline -void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, - struct inode *inode) +static inline void fscache_unpin_writeback(struct writeback_control *wbc, + struct fscache_cookie *cookie) { - if (fscache_cookie_valid(cookie)) - __fscache_uncache_all_inode_pages(cookie, inode); + if (wbc->unpinned_fscache_wb) + fscache_unuse_cookie(cookie, NULL, NULL); } -#endif /* FSCACHE_USE_NEW_IO_API */ - /** - * fscache_disable_cookie - Disable a cookie - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * @invalidate: Invalidate the backing object - * - * Disable a cookie from accepting further alloc, read, write, invalidate, - * update or acquire operations. Outstanding operations can still be waited - * upon and pages can still be uncached and the cookie relinquished. - * - * This will not return until all outstanding operations have completed. + * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode + * @cookie: The cookie referring to the cache object + * @inode: The inode to clean up + * @aux: Auxiliary data to apply to the inode * - * If @invalidate is set, then the backing object will be invalidated and - * detached, otherwise it will just be detached. - * - * If @aux_data is set, then auxiliary data will be updated from that. + * Clear any writeback resources held by an inode when the inode is evicted. + * This must be called before clear_inode() is called. */ -static inline -void fscache_disable_cookie(struct fscache_cookie *cookie, - const void *aux_data, - bool invalidate) +static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, + struct inode *inode, + const void *aux) { - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_disable_cookie(cookie, aux_data, invalidate); + if (inode->i_state & I_PINNING_FSCACHE_WB) { + loff_t i_size = i_size_read(inode); + fscache_unuse_cookie(cookie, aux, &i_size); + } } /** - * fscache_enable_cookie - Reenable a cookie - * @cookie: The cookie representing the cache object - * @aux_data: The updated auxiliary data for the cookie (may be NULL) - * @object_size: Current size of object - * @can_enable: A function to permit enablement once lock is held - * @data: Data for can_enable() - * - * Reenable a previously disabled cookie, allowing it to accept further alloc, - * read, write, invalidate, update or acquire operations. An attempt will be - * made to immediately reattach the cookie to a backing object. If @aux_data - * is set, the auxiliary data attached to the cookie will be updated. + * fscache_note_page_release - Note that a netfs page got released + * @cookie: The cookie corresponding to the file * - * The can_enable() function is called (if not NULL) once the enablement lock - * is held to rule on whether enablement is still permitted to go ahead. + * Note that a page that has been copied to the cache has been released. This + * means that future reads will need to look in the cache to see if it's there. */ static inline -void fscache_enable_cookie(struct fscache_cookie *cookie, - const void *aux_data, - loff_t object_size, - bool (*can_enable)(void *data), - void *data) +void fscache_note_page_release(struct fscache_cookie *cookie) { - if (fscache_cookie_valid(cookie) && !fscache_cookie_enabled(cookie)) - __fscache_enable_cookie(cookie, aux_data, object_size, - can_enable, data); + if (cookie && + test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) && + test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) + clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); } #endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index e912ed9141d9..91ea9477e9bd 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -118,9 +118,6 @@ struct fscrypt_operations { */ bool (*empty_dir)(struct inode *inode); - /* The filesystem's maximum ciphertext filename length, in bytes */ - unsigned int max_namelen; - /* * Check whether the filesystem's inode numbers and UUID are stable, * meaning that they will never be changed even by offline operations diff --git a/include/linux/fsi-occ.h b/include/linux/fsi-occ.h index d4cdc2aa6e33..7ee3dbd7f4b3 100644 --- a/include/linux/fsi-occ.h +++ b/include/linux/fsi-occ.h @@ -19,6 +19,8 @@ struct device; #define OCC_RESP_CRIT_OCB 0xE3 #define OCC_RESP_CRIT_HW 0xE4 +#define OCC_MAX_RESP_WORDS 2048 + int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, void *response, size_t *resp_len); diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 30ece3ae6df7..7b6c42bfb660 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -91,13 +91,13 @@ struct fsl_mc_resource { /** * struct fsl_mc_device_irq - MC object device message-based interrupt - * @msi_desc: pointer to MSI descriptor allocated by fsl_mc_msi_alloc_descs() + * @virq: Linux virtual interrupt number * @mc_dev: MC object device that owns this interrupt * @dev_irq_index: device-relative IRQ index * @resource: MC generic resource associated with the interrupt */ struct fsl_mc_device_irq { - struct msi_desc *msi_desc; + unsigned int virq; struct fsl_mc_device *mc_dev; u8 dev_irq_index; struct fsl_mc_resource resource; @@ -620,6 +620,20 @@ int dpcon_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); +int fsl_mc_obj_open(struct fsl_mc_io *mc_io, + u32 cmd_flags, + int obj_id, + char *obj_type, + u16 *token); + +int fsl_mc_obj_close(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +int fsl_mc_obj_reset(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + /** * struct dpcon_attr - Structure representing DPCON attributes * @id: DPCON object ID diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 12d3a7d308ab..3a2d7dc3c607 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -26,20 +26,20 @@ * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only * the child is interested and not the parent. */ -static inline void fsnotify_name(struct inode *dir, __u32 mask, - struct inode *child, - const struct qstr *name, u32 cookie) +static inline int fsnotify_name(__u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *name, + u32 cookie) { if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) - return; + return 0; - fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie); + return fsnotify(mask, data, data_type, dir, name, NULL, cookie); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, __u32 mask) { - fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); + fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0); } static inline void fsnotify_inode(struct inode *inode, __u32 mask) @@ -86,7 +86,7 @@ notify_child: */ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { - fsnotify_parent(dentry, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE); + fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } static inline int fsnotify_file(struct file *file, __u32 mask) @@ -144,18 +144,23 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; + __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; - if (old_dir == new_dir) - old_dir_mask |= FS_DN_RENAME; - if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; + rename_mask |= FS_ISDIR; } - fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie); - fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie); + /* Event with information about both old and new parent+name */ + fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, + old_dir, old_name, 0); + + fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, + old_dir, old_name, fs_cookie); + fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, + new_dir, new_name, fs_cookie); if (target) fsnotify_link_count(target); @@ -190,16 +195,22 @@ static inline void fsnotify_inoderemove(struct inode *inode) /* * fsnotify_create - 'name' was linked in + * + * Caller must make sure that dentry->d_name is stable. + * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate + * ->d_inode later */ -static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) +static inline void fsnotify_create(struct inode *dir, struct dentry *dentry) { - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); + audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_dirent(inode, dentry, FS_CREATE); + fsnotify_dirent(dir, dentry, FS_CREATE); } /* * fsnotify_link - new hardlink in 'inode' directory + * + * Caller must make sure that new_dentry->d_name is stable. * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ @@ -209,7 +220,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); + fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE, + dir, &new_dentry->d_name, 0); } /* @@ -227,12 +239,16 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) /* * fsnotify_mkdir - directory 'name' was created + * + * Caller must make sure that dentry->d_name is stable. + * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate + * ->d_inode later */ -static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) +static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) { - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); + audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); + fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR); } /* @@ -326,4 +342,17 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) fsnotify_dentry(dentry, mask); } +static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, + int error) +{ + struct fs_error_report report = { + .error = error, + .inode = inode, + .sb = sb, + }; + + return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, + NULL, NULL, NULL, 0); +} + #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1ce66748a2d2..790c31844db5 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -19,6 +19,7 @@ #include <linux/atomic.h> #include <linux/user_namespace.h> #include <linux/refcount.h> +#include <linux/mempool.h> /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -42,6 +43,12 @@ #define FS_UNMOUNT 0x00002000 /* inode on umount fs */ #define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FS_ERROR 0x00008000 /* Filesystem Error (fanotify) */ + +/* + * FS_IN_IGNORED overloads FS_ERROR. It is only used internally by inotify + * which does not support FS_ERROR. + */ #define FS_IN_IGNORED 0x00008000 /* last inotify event here */ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ @@ -56,7 +63,7 @@ */ #define FS_EVENT_ON_CHILD 0x08000000 -#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_RENAME 0x10000000 /* File was renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ #define FS_IN_ONESHOT 0x80000000 /* only send event once */ @@ -69,7 +76,7 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) @@ -94,8 +101,9 @@ /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ - FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ - FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) + FS_DELETE_SELF | FS_MOVE_SELF | \ + FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ + FS_ERROR) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ @@ -136,6 +144,7 @@ struct mem_cgroup; * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to. + * Either @inode or @dir must be non-NULL. * @file_name: optional file name associated with event * @cookie: inotify rename cookie * @@ -155,7 +164,7 @@ struct fsnotify_ops { const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); - void (*free_event)(struct fsnotify_event *event); + void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event); /* called on final put+free to free memory */ void (*free_mark)(struct fsnotify_mark *mark); }; @@ -238,6 +247,7 @@ struct fsnotify_group { int flags; /* flags from fanotify_init() */ int f_flags; /* event_f_flags from fanotify_init() */ struct ucounts *ucounts; + mempool_t error_events_pool; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; @@ -248,6 +258,14 @@ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, + FSNOTIFY_EVENT_DENTRY, + FSNOTIFY_EVENT_ERROR, +}; + +struct fs_error_report { + int error; + struct inode *inode; + struct super_block *sb; }; static inline struct inode *fsnotify_data_inode(const void *data, int data_type) @@ -255,8 +273,25 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type) switch (data_type) { case FSNOTIFY_EVENT_INODE: return (struct inode *)data; + case FSNOTIFY_EVENT_DENTRY: + return d_inode(data); case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); + case FSNOTIFY_EVENT_ERROR: + return ((struct fs_error_report *)data)->inode; + default: + return NULL; + } +} + +static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_DENTRY: + /* Non const is needed for dget() */ + return (struct dentry *)data; + case FSNOTIFY_EVENT_PATH: + return ((const struct path *)data)->dentry; default: return NULL; } @@ -273,58 +308,98 @@ static inline const struct path *fsnotify_data_path(const void *data, } } +static inline struct super_block *fsnotify_data_sb(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_INODE: + return ((struct inode *)data)->i_sb; + case FSNOTIFY_EVENT_DENTRY: + return ((struct dentry *)data)->d_sb; + case FSNOTIFY_EVENT_PATH: + return ((const struct path *)data)->dentry->d_sb; + case FSNOTIFY_EVENT_ERROR: + return ((struct fs_error_report *) data)->sb; + default: + return NULL; + } +} + +static inline struct fs_error_report *fsnotify_data_error_report( + const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_ERROR: + return (struct fs_error_report *) data; + default: + return NULL; + } +} + +/* + * Index to merged marks iterator array that correlates to a type of watch. + * The type of watched object can be deduced from the iterator type, but not + * the other way around, because an event can match different watched objects + * of the same object type. + * For example, both parent and child are watching an object of type inode. + */ +enum fsnotify_iter_type { + FSNOTIFY_ITER_TYPE_INODE, + FSNOTIFY_ITER_TYPE_VFSMOUNT, + FSNOTIFY_ITER_TYPE_SB, + FSNOTIFY_ITER_TYPE_PARENT, + FSNOTIFY_ITER_TYPE_INODE2, + FSNOTIFY_ITER_TYPE_COUNT +}; + +/* The type of object that a mark is attached to */ enum fsnotify_obj_type { + FSNOTIFY_OBJ_TYPE_ANY = -1, FSNOTIFY_OBJ_TYPE_INODE, - FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; -#define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) -#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) -#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) -#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) -#define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) - -static inline bool fsnotify_valid_obj_type(unsigned int type) +static inline bool fsnotify_valid_obj_type(unsigned int obj_type) { - return (type < FSNOTIFY_OBJ_TYPE_COUNT); + return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT); } struct fsnotify_iter_info { - struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT]; + struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT]; unsigned int report_mask; int srcu_idx; }; static inline bool fsnotify_iter_should_report_type( - struct fsnotify_iter_info *iter_info, int type) + struct fsnotify_iter_info *iter_info, int iter_type) { - return (iter_info->report_mask & (1U << type)); + return (iter_info->report_mask & (1U << iter_type)); } static inline void fsnotify_iter_set_report_type( - struct fsnotify_iter_info *iter_info, int type) + struct fsnotify_iter_info *iter_info, int iter_type) { - iter_info->report_mask |= (1U << type); + iter_info->report_mask |= (1U << iter_type); } static inline void fsnotify_iter_set_report_type_mark( - struct fsnotify_iter_info *iter_info, int type, + struct fsnotify_iter_info *iter_info, int iter_type, struct fsnotify_mark *mark) { - iter_info->marks[type] = mark; - iter_info->report_mask |= (1U << type); + iter_info->marks[iter_type] = mark; + iter_info->report_mask |= (1U << iter_type); } #define FSNOTIFY_ITER_FUNCS(name, NAME) \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ - return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \ - iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \ + return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \ + iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \ } FSNOTIFY_ITER_FUNCS(inode, INODE) @@ -332,8 +407,8 @@ FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) -#define fsnotify_foreach_obj_type(type) \ - for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++) +#define fsnotify_foreach_iter_type(type) \ + for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++) /* * fsnotify_connp_t is what we embed in objects which connector can be attached @@ -482,16 +557,30 @@ extern int fsnotify_fasync(int fd, struct file *file, int on); extern void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event); /* attach the event to the group notification queue */ -extern int fsnotify_add_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct fsnotify_group *, - struct fsnotify_event *), - void (*insert)(struct fsnotify_group *, - struct fsnotify_event *)); +extern int fsnotify_insert_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct fsnotify_group *, + struct fsnotify_event *), + void (*insert)(struct fsnotify_group *, + struct fsnotify_event *)); + +static inline int fsnotify_add_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct fsnotify_group *, + struct fsnotify_event *)) +{ + return fsnotify_insert_event(group, event, merge, NULL); +} + /* Queue overflow event to a notification group */ static inline void fsnotify_queue_overflow(struct fsnotify_group *group) { - fsnotify_add_event(group, group->overflow_event, NULL, NULL); + fsnotify_add_event(group, group->overflow_event, NULL); +} + +static inline bool fsnotify_is_overflow_event(u32 mask) +{ + return mask & FS_Q_OVERFLOW; } static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) @@ -526,11 +615,11 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int type, + fsnotify_connp_t *connp, unsigned int obj_type, int allow_dups, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int type, int allow_dups, + unsigned int obj_type, int allow_dups, __kernel_fsid_t *fsid); /* attach the mark to the inode */ @@ -559,22 +648,23 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark); extern void fsnotify_free_mark(struct fsnotify_mark *mark); /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); -/* run all the marks in a group, and clear all of the marks attached to given object type */ -extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); +/* Clear all of the marks of a group attached to a given object type */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + unsigned int obj_type); /* run all the marks in a group, and clear all of the vfsmount marks */ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); } /* run all the marks in a group, and clear all of the inode marks */ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); } /* run all the marks in a group, and clear all of the sn marks */ static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); } extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 832e65f06754..9999e29187de 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -30,16 +30,26 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +#ifdef CONFIG_FUNCTION_TRACER +struct ftrace_ops; +struct ftrace_regs; /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that * does. Or at least does enough to prevent any unwelcome side effects. + * + * Also define the function prototype that these architectures use + * to call the ftrace_ops_list_func(). */ #if !ARCH_SUPPORTS_FTRACE_OPS # define FTRACE_FORCE_LIST_FUNC 1 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); #else # define FTRACE_FORCE_LIST_FUNC 0 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); #endif +#endif /* CONFIG_FUNCTION_TRACER */ /* Main tracing buffer and events set up */ #ifdef CONFIG_TRACING @@ -88,8 +98,6 @@ extern int ftrace_enable_sysctl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -struct ftrace_ops; - #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_regs { @@ -316,7 +324,12 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, unsigned long old_addr, unsigned long new_addr); unsigned long ftrace_find_rec_direct(unsigned long ip); +int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); + #else +struct ftrace_ops; # define ftrace_direct_func_count 0 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) { @@ -346,6 +359,18 @@ static inline unsigned long ftrace_find_rec_direct(unsigned long ip) { return 0; } +static inline int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} +static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} +static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS @@ -795,6 +820,15 @@ static inline bool is_ftrace_trampoline(unsigned long addr) } #endif /* CONFIG_DYNAMIC_FTRACE */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifndef ftrace_graph_func +#define ftrace_graph_func ftrace_stub +#define FTRACE_OPS_GRAPH_STUB FTRACE_OPS_FL_STUB +#else +#define FTRACE_OPS_GRAPH_STUB 0 +#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9f4ad719bfe3..3a532ba66f6c 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/list.h> +#include <linux/bits.h> #include <linux/err.h> struct fwnode_operations; diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index bfd00320c7f3..107613f7d792 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -38,8 +38,9 @@ #include <asm/page.h> #include <linux/bug.h> -#include <linux/kernel.h> #include <linux/log2.h> +#include <linux/math.h> +#include <linux/types.h> struct genradix_root; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0f5315c2b5a3..6906a45bc761 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -12,12 +12,10 @@ #include <linux/types.h> #include <linux/kdev_t.h> -#include <linux/rcupdate.h> -#include <linux/slab.h> -#include <linux/percpu-refcount.h> #include <linux/uuid.h> #include <linux/blk_types.h> -#include <asm/local.h> +#include <linux/device.h> +#include <linux/xarray.h> extern const struct device_type disk_type; extern struct device_type part_type; @@ -26,14 +24,6 @@ extern struct class block_class; #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 -#include <linux/major.h> -#include <linux/device.h> -#include <linux/smp.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/workqueue.h> -#include <linux/xarray.h> - #define PARTITION_META_INFO_VOLNAMELTH 64 /* * Enough for the string representation of any kind of UUID plus NULL. @@ -49,57 +39,24 @@ struct partition_meta_info { /** * DOC: genhd capability flags * - * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device - * gives access to removable media. - * When set, the device remains present even when media is not - * inserted. - * Must not be set for devices which are removed entirely when the + * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to + * removable media. When set, the device remains present even when media is not + * inserted. Shall not be set for devices which are removed entirely when the * media is removed. * - * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style - * device. - * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl. - * - * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include - * partition information in ``/proc/partitions`` or in the output of - * printk_all_partitions(). - * Used for the null block device and some MMC devices. - * - * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended - * dynamic ``dev_t``, i.e. it wants extended device numbers - * (``BLOCK_EXT_MAJOR``). - * This affects the maximum number of partitions. + * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, + * doesn't appear in sysfs, and can't be opened from userspace or using + * blkdev_get*. Used for the underlying components of multipath devices. * - * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the - * partition table, the device's capacity has been extended to its - * native capacity; i.e. the device has hidden capacity used by one - * of the partitions (this is a flag used so that native capacity is - * only ever unlocked once). + * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not + * scan for partitions from add_disk, and users can't add partitions manually. * - * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is - * blocked whenever a writer holds an exclusive lock. - * - * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled. - * Used for loop devices in their default settings and some MMC - * devices. - * - * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it - * doesn't produce events, doesn't appear in sysfs, and doesn't have - * an associated ``bdev``. - * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and - * ``GENHD_FL_NO_PART_SCAN``. - * Used for multipath devices. */ -#define GENHD_FL_REMOVABLE 0x0001 -/* 2 is unused (used to be GENHD_FL_DRIVERFS) */ -/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ -#define GENHD_FL_CD 0x0008 -#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 -#define GENHD_FL_EXT_DEVT 0x0040 -#define GENHD_FL_NATIVE_CAPACITY 0x0080 -#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100 -#define GENHD_FL_NO_PART_SCAN 0x0200 -#define GENHD_FL_HIDDEN 0x0400 +enum { + GENHD_FL_REMOVABLE = 1 << 0, + GENHD_FL_HIDDEN = 1 << 1, + GENHD_FL_NO_PART = 1 << 2, +}; enum { DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ @@ -111,6 +68,8 @@ enum { DISK_EVENT_FLAG_POLL = 1 << 0, /* Forward events to udev */ DISK_EVENT_FLAG_UEVENT = 1 << 1, + /* Block event polling when open for exclusive write */ + DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, }; struct disk_events; @@ -125,13 +84,13 @@ struct blk_integrity { }; struct gendisk { - /* major, first_minor and minors are input parameters only, - * don't use directly. Use disk_devt() and disk_max_parts(). + /* + * major/first_minor/minors should not be set by any new driver, the + * block core will take care of allocating them automatically. */ - int major; /* major number of driver */ + int major; int first_minor; - int minors; /* maximum number of minors, =1 for - * disks that can't be partitioned. */ + int minors; char disk_name[DISK_NAME_LEN]; /* name of major driver */ @@ -150,6 +109,7 @@ struct gendisk { #define GD_NEED_PART_SCAN 0 #define GD_READ_ONLY 1 #define GD_DEAD 2 +#define GD_NATIVE_CAPACITY 3 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -194,19 +154,6 @@ static inline bool disk_live(struct gendisk *disk) #define disk_to_cdi(disk) NULL #endif -static inline int disk_max_parts(struct gendisk *disk) -{ - if (disk->flags & GENHD_FL_EXT_DEVT) - return DISK_MAX_PARTS; - return disk->minors; -} - -static inline bool disk_part_scan_enabled(struct gendisk *disk) -{ - return disk_max_parts(disk) > 1 && - !(disk->flags & GENHD_FL_NO_PART_SCAN); -} - static inline dev_t disk_devt(struct gendisk *disk) { return MKDEV(disk->major, disk->first_minor); @@ -215,14 +162,16 @@ static inline dev_t disk_devt(struct gendisk *disk) void disk_uevent(struct gendisk *disk, enum kobject_action action); /* block/genhd.c */ -int device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups); -static inline int add_disk(struct gendisk *disk) +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); +static inline int __must_check add_disk(struct gendisk *disk) { return device_add_disk(NULL, disk, NULL); } extern void del_gendisk(struct gendisk *gp); +void invalidate_disk(struct gendisk *disk); + void set_disk_ro(struct gendisk *disk, bool read_only); static inline int get_disk_ro(struct gendisk *disk) @@ -231,6 +180,11 @@ static inline int get_disk_ro(struct gendisk *disk) test_bit(GD_READ_ONLY, &disk->state); } +static inline int bdev_read_only(struct block_device *bdev) +{ + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +} + extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); @@ -248,7 +202,12 @@ static inline sector_t get_start_sect(struct block_device *bdev) static inline sector_t bdev_nr_sectors(struct block_device *bdev) { - return i_size_read(bdev->bd_inode) >> 9; + return bdev->bd_nr_sectors; +} + +static inline loff_t bdev_nr_bytes(struct block_device *bdev) +{ + return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; } static inline sector_t get_capacity(struct gendisk *disk) @@ -256,6 +215,12 @@ static inline sector_t get_capacity(struct gendisk *disk) return bdev_nr_sectors(disk->part0); } +static inline u64 sb_bdev_nr_blocks(struct super_block *sb) +{ + return bdev_nr_sectors(sb->s_bdev) >> + (sb->s_blocksize_bits - SECTOR_SHIFT); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void blk_drop_partitions(struct gendisk *disk); @@ -291,10 +256,6 @@ bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void set_capacity(struct gendisk *disk, sector_t size); -/* for drivers/char/raw.c: */ -int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); - #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 55b2ec1f965a..80f63c862be5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -302,7 +302,9 @@ struct vm_area_struct; * lowest zone as a type of emergency reserve. * * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit - * address. + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). * * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, * do not need to be directly accessible by the kernel but that cannot @@ -520,21 +522,21 @@ static inline void arch_free_page(struct page *page, int order) { } #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif -#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -static inline int arch_make_page_accessible(struct page *page) -{ - return 0; -} -#endif struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask); unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, struct list_head *page_list, struct page **page_array); +unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, + unsigned long nr_pages, + struct page **page_array); + /* Bulk allocate order-0 pages */ static inline unsigned long alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) @@ -570,6 +572,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) return __alloc_pages(gfp_mask, order, nid, NULL); } +static inline +struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) +{ + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); + VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + + return __folio_alloc(gfp, order, nid, NULL); +} + /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and @@ -586,31 +597,36 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); +struct folio *folio_alloc(gfp_t gfp, unsigned order); extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node, bool hugepage); + bool hugepage); #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) + alloc_pages_vma(gfp_mask, order, vma, addr, true) #else static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ +static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) +{ + return __folio_alloc_node(gfp, order, numa_node_id()); +} +#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) + alloc_pages_vma(gfp_mask, 0, vma, addr, false) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -void *alloc_pages_exact(size_t size, gfp_t gfp_mask); +void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); +__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 97a28ad3393b..3ad67b4a72be 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -690,7 +690,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -void devm_acpi_dev_remove_driver_gpios(struct device *dev); struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label); @@ -708,7 +707,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, { return -ENXIO; } -static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {} #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a0f9901dcae6..b0728c8ad90c 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -168,11 +168,18 @@ struct gpio_irq_chip { /** * @parent_handler_data: + * @parent_handler_data_array: * * Data associated, and passed to, the handler for the parent - * interrupt. + * interrupt. Can either be a single pointer if @per_parent_data + * is false, or an array of @num_parents pointers otherwise. If + * @per_parent_data is true, @parent_handler_data_array cannot be + * NULL. */ - void *parent_handler_data; + union { + void *parent_handler_data; + void **parent_handler_data_array; + }; /** * @num_parents: @@ -204,6 +211,14 @@ struct gpio_irq_chip { bool threaded; /** + * @per_parent_data: + * + * True if parent_handler_data_array describes a @num_parents + * sized array to be used as parent data. + */ + bool per_parent_data; + + /** * @init_hw: optional routine to initialize hardware before * an IRQ chip will be added. This is quite useful when * a particular driver wants to clear IRQ related registers @@ -274,6 +289,7 @@ struct gpio_irq_chip { * number or the name of the SoC IP-block implementing it. * @gpiodev: the internal state holder, opaque struct * @parent: optional parent device providing the GPIOs + * @fwnode: optional fwnode providing this controller's properties * @owner: helps prevent removal of modules exporting active GPIOs * @request: optional hook for chip-specific activation, such as * enabling module power and clock; may sleep @@ -362,6 +378,7 @@ struct gpio_chip { const char *label; struct gpio_device *gpiodev; struct device *parent; + struct fwnode_handle *fwnode; struct module *owner; int (*request)(struct gpio_chip *gc, diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index d755e529c1e3..2647dd10b541 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -100,6 +100,7 @@ void gpiod_add_lookup_table(struct gpiod_lookup_table *table); void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n); void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); void gpiod_add_hogs(struct gpiod_hog *hogs); +void gpiod_remove_hogs(struct gpiod_hog *hogs); #else /* ! CONFIG_GPIOLIB */ static inline void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {} @@ -108,6 +109,7 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) {} static inline void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {} static inline void gpiod_add_hogs(struct gpiod_hog *hogs) {} +static inline void gpiod_remove_hogs(struct gpiod_hog *hogs) {} #endif /* CONFIG_GPIOLIB */ #endif /* __LINUX_GPIO_MACHINE_H */ diff --git a/include/linux/hid.h b/include/linux/hid.h index 9e067f937dbc..7487b0586fe6 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -241,6 +241,7 @@ struct hid_item { #define HID_DG_TOUCH 0x000d0033 #define HID_DG_UNTOUCH 0x000d0034 #define HID_DG_TAP 0x000d0035 +#define HID_DG_TRANSDUCER_INDEX 0x000d0038 #define HID_DG_TABLETFUNCTIONKEY 0x000d0039 #define HID_DG_PROGRAMCHANGEKEY 0x000d003a #define HID_DG_BATTERYSTRENGTH 0x000d003b @@ -253,6 +254,15 @@ struct hid_item { #define HID_DG_BARRELSWITCH 0x000d0044 #define HID_DG_ERASER 0x000d0045 #define HID_DG_TABLETPICK 0x000d0046 +#define HID_DG_PEN_COLOR 0x000d005c +#define HID_DG_PEN_LINE_WIDTH 0x000d005e +#define HID_DG_PEN_LINE_STYLE 0x000d0070 +#define HID_DG_PEN_LINE_STYLE_INK 0x000d0072 +#define HID_DG_PEN_LINE_STYLE_PENCIL 0x000d0073 +#define HID_DG_PEN_LINE_STYLE_HIGHLIGHTER 0x000d0074 +#define HID_DG_PEN_LINE_STYLE_CHISEL_MARKER 0x000d0075 +#define HID_DG_PEN_LINE_STYLE_BRUSH 0x000d0076 +#define HID_DG_PEN_LINE_STYLE_NO_PREFERENCE 0x000d0077 #define HID_CP_CONSUMERCONTROL 0x000c0001 #define HID_CP_NUMERICKEYPAD 0x000c0002 @@ -349,6 +359,8 @@ struct hid_item { /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ #define HID_QUIRK_ALWAYS_POLL BIT(10) #define HID_QUIRK_INPUT_PER_APP BIT(11) +#define HID_QUIRK_X_INVERT BIT(12) +#define HID_QUIRK_Y_INVERT BIT(13) #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) @@ -788,7 +800,7 @@ struct hid_driver { container_of(pdrv, struct hid_driver, driver) /** - * hid_ll_driver - low level driver callbacks + * struct hid_ll_driver - low level driver callbacks * @start: called on probe to start the device * @stop: called on remove * @open: called by input layer on open @@ -840,6 +852,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 @@ -889,7 +906,6 @@ extern void hidinput_disconnect(struct hid_device *); int hid_set_field(struct hid_field *, unsigned, __s32); int hid_input_report(struct hid_device *, int type, u8 *, u32, int); -int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code); @@ -923,6 +939,16 @@ s32 hid_snto32(__u32 value, unsigned n); __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, unsigned offset, unsigned n); +#ifdef CONFIG_PM +int hid_driver_suspend(struct hid_device *hdev, pm_message_t state); +int hid_driver_reset_resume(struct hid_device *hdev); +int hid_driver_resume(struct hid_device *hdev); +#else +static inline int hid_driver_suspend(struct hid_device *hdev, pm_message_t state) { return 0; } +static inline int hid_driver_reset_resume(struct hid_device *hdev) { return 0; } +static inline int hid_driver_resume(struct hid_device *hdev) { return 0; } +#endif + /** * hid_device_io_start - enable HID input during probe, remove * @@ -1000,6 +1026,10 @@ static inline void hid_map_usage(struct hid_input *hidinput, bmap = input->ledbit; limit = LED_MAX; break; + case EV_MSC: + bmap = input->mscbit; + limit = MSC_MAX; + break; } if (unlikely(c > limit || !bmap)) { @@ -1056,6 +1086,12 @@ int __must_check hid_hw_start(struct hid_device *hdev, void hid_hw_stop(struct hid_device *hdev); int __must_check hid_hw_open(struct hid_device *hdev); void hid_hw_close(struct hid_device *hdev); +void hid_hw_request(struct hid_device *hdev, + struct hid_report *report, int reqtype); +int hid_hw_raw_request(struct hid_device *hdev, + unsigned char reportnum, __u8 *buf, + size_t len, unsigned char rtype, int reqtype); +int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len); /** * hid_hw_power - requests underlying HW to go into given power mode @@ -1074,68 +1110,6 @@ static inline int hid_hw_power(struct hid_device *hdev, int level) /** - * hid_hw_request - send report request to device - * - * @hdev: hid device - * @report: report to send - * @reqtype: hid request type - */ -static inline void hid_hw_request(struct hid_device *hdev, - struct hid_report *report, int reqtype) -{ - if (hdev->ll_driver->request) - return hdev->ll_driver->request(hdev, report, reqtype); - - __hid_request(hdev, report, reqtype); -} - -/** - * hid_hw_raw_request - send report request to device - * - * @hdev: hid device - * @reportnum: report ID - * @buf: in/out data to transfer - * @len: length of buf - * @rtype: HID report type - * @reqtype: HID_REQ_GET_REPORT or HID_REQ_SET_REPORT - * - * Return: count of data transferred, negative if error - * - * Same behavior as hid_hw_request, but with raw buffers instead. - */ -static inline int hid_hw_raw_request(struct hid_device *hdev, - unsigned char reportnum, __u8 *buf, - size_t len, unsigned char rtype, int reqtype) -{ - if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf) - return -EINVAL; - - return hdev->ll_driver->raw_request(hdev, reportnum, buf, len, - rtype, reqtype); -} - -/** - * hid_hw_output_report - send output report to device - * - * @hdev: hid device - * @buf: raw data to transfer - * @len: length of buf - * - * Return: count of data transferred, negative if error - */ -static inline int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, - size_t len) -{ - if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf) - return -EINVAL; - - if (hdev->ll_driver->output_report) - return hdev->ll_driver->output_report(hdev, buf, len); - - return -ENOSYS; -} - -/** * hid_hw_idle - send idle request to device * * @hdev: hid device @@ -1153,7 +1127,7 @@ static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle, } /** - * hid_may_wakeup - return if the hid device may act as a wakeup source during system-suspend + * hid_hw_may_wakeup - return if the hid device may act as a wakeup source during system-suspend * * @hdev: hid device */ diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 4aa1031d3e4c..0a0b2b09b1b8 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page) return __kmap_local_page_prot(page, kmap_prot); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + struct page *page = folio_page(folio, offset / PAGE_SIZE); + return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return __kmap_local_page_prot(page, prot); @@ -171,6 +177,11 @@ static inline void *kmap_local_page(struct page *page) return page_address(page); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + return page_address(&folio->page) + offset; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return kmap_local_page(page); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b4c49f9cc379..39bb9b47fa9c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -5,12 +5,11 @@ #include <linux/fs.h> #include <linux/kernel.h> #include <linux/bug.h> +#include <linux/cacheflush.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> -#include <asm/cacheflush.h> - #include "highmem-internal.h" /** @@ -97,6 +96,43 @@ static inline void kmap_flush_unused(void); static inline void *kmap_local_page(struct page *page); /** + * kmap_local_folio - Map a page in this folio for temporary usage + * @folio: The folio containing the page. + * @offset: The byte offset within the folio which identifies the page. + * + * Requires careful handling when nesting multiple mappings because the map + * management is stack based. The unmap has to be in the reverse order of + * the map operation:: + * + * addr1 = kmap_local_folio(folio1, offset1); + * addr2 = kmap_local_folio(folio2, offset2); + * ... + * kunmap_local(addr2); + * kunmap_local(addr1); + * + * Unmapping addr1 before addr2 is invalid and causes malfunction. + * + * Contrary to kmap() mappings the mapping is only valid in the context of + * the caller and cannot be handed to other contexts. + * + * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the + * virtual address of the direct mapping. Only real highmem pages are + * temporarily mapped. + * + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_folio() can rely on this side effect. + * + * Context: Can be invoked from any context. + * Return: The virtual address of @offset. + */ +static inline void *kmap_local_folio(struct folio *folio, size_t offset); + +/** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! * @page: Pointer to the page to be mapped * @@ -143,9 +179,9 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { - void *addr = kmap_atomic(page); + void *addr = kmap_local_page(page); clear_user_page(addr, vaddr, page); - kunmap_atomic(addr); + kunmap_local(addr); } #endif @@ -177,9 +213,9 @@ alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, static inline void clear_highpage(struct page *page) { - void *kaddr = kmap_atomic(page); + void *kaddr = kmap_local_page(page); clear_page(kaddr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); } #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE @@ -194,15 +230,15 @@ static inline void tag_clear_highpage(struct page *page) * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. */ -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#ifdef CONFIG_HIGHMEM void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2); -#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ +#else static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { - void *kaddr = kmap_atomic(page); + void *kaddr = kmap_local_page(page); unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); @@ -213,11 +249,11 @@ static inline void zero_user_segments(struct page *page, if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); - kunmap_atomic(kaddr); + kunmap_local(kaddr); for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } -#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ +#endif static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) @@ -238,11 +274,11 @@ static inline void copy_user_highpage(struct page *to, struct page *from, { char *vfrom, *vto; - vfrom = kmap_atomic(from); - vto = kmap_atomic(to); + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); copy_user_page(vto, vfrom, vaddr, to); - kunmap_atomic(vto); - kunmap_atomic(vfrom); + kunmap_local(vto); + kunmap_local(vfrom); } #endif @@ -253,11 +289,11 @@ static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; - vfrom = kmap_atomic(from); - vto = kmap_atomic(to); + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); copy_page(vto, vfrom); - kunmap_atomic(vto); - kunmap_atomic(vfrom); + kunmap_local(vto); + kunmap_local(vfrom); } #endif @@ -327,4 +363,42 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +/** + * folio_zero_segments() - Zero two byte ranges in a folio. + * @folio: The folio to write to. + * @start1: The first byte to zero. + * @xend1: One more than the last byte in the first range. + * @start2: The first byte to zero in the second range. + * @xend2: One more than the last byte in the second range. + */ +static inline void folio_zero_segments(struct folio *folio, + size_t start1, size_t xend1, size_t start2, size_t xend2) +{ + zero_user_segments(&folio->page, start1, xend1, start2, xend2); +} + +/** + * folio_zero_segment() - Zero a byte range in a folio. + * @folio: The folio to write to. + * @start: The first byte to zero. + * @xend: One more than the last byte to zero. + */ +static inline void folio_zero_segment(struct folio *folio, + size_t start, size_t xend) +{ + zero_user_segments(&folio->page, start, xend, 0, 0); +} + +/** + * folio_zero_range() - Zero a byte range in a folio. + * @folio: The folio to write to. + * @start: The first byte to zero. + * @length: The number of bytes to zero. + */ +static inline void folio_zero_range(struct folio *folio, + size_t start, size_t length) +{ + zero_user_segments(&folio->page, start, start + length, 0, 0); +} + #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 7bccf589aba7..e8dc5bc41f79 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -7,6 +7,8 @@ #define __LINUX_HOST1X_H #include <linux/device.h> +#include <linux/dma-direction.h> +#include <linux/spinlock.h> #include <linux/types.h> enum host1x_class { @@ -15,6 +17,8 @@ enum host1x_class { HOST1X_CLASS_GR2D_SB = 0x52, HOST1X_CLASS_VIC = 0x5D, HOST1X_CLASS_GR3D = 0x60, + HOST1X_CLASS_NVDEC = 0xF0, + HOST1X_CLASS_NVDEC1 = 0xF5, }; struct host1x; @@ -24,6 +28,28 @@ struct iommu_group; u64 host1x_get_dma_mask(struct host1x *host1x); /** + * struct host1x_bo_cache - host1x buffer object cache + * @mappings: list of mappings + * @lock: synchronizes accesses to the list of mappings + */ +struct host1x_bo_cache { + struct list_head mappings; + struct mutex lock; +}; + +static inline void host1x_bo_cache_init(struct host1x_bo_cache *cache) +{ + INIT_LIST_HEAD(&cache->mappings); + mutex_init(&cache->lock); +} + +static inline void host1x_bo_cache_destroy(struct host1x_bo_cache *cache) +{ + /* XXX warn if not empty? */ + mutex_destroy(&cache->lock); +} + +/** * struct host1x_client_ops - host1x client operations * @early_init: host1x client early initialization code * @init: host1x client initialization code @@ -73,6 +99,8 @@ struct host1x_client { struct host1x_client *parent; unsigned int usecount; struct mutex lock; + + struct host1x_bo_cache cache; }; /* @@ -82,23 +110,48 @@ struct host1x_client { struct host1x_bo; struct sg_table; +struct host1x_bo_mapping { + struct kref ref; + struct dma_buf_attachment *attach; + enum dma_data_direction direction; + struct list_head list; + struct host1x_bo *bo; + struct sg_table *sgt; + unsigned int chunks; + struct device *dev; + dma_addr_t phys; + size_t size; + + struct host1x_bo_cache *cache; + struct list_head entry; +}; + +static inline struct host1x_bo_mapping *to_host1x_bo_mapping(struct kref *ref) +{ + return container_of(ref, struct host1x_bo_mapping, ref); +} + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys); - void (*unpin)(struct device *dev, struct sg_table *sgt); + struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir); + void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); }; struct host1x_bo { const struct host1x_bo_ops *ops; + struct list_head mappings; + spinlock_t lock; }; static inline void host1x_bo_init(struct host1x_bo *bo, const struct host1x_bo_ops *ops) { + INIT_LIST_HEAD(&bo->mappings); + spin_lock_init(&bo->lock); bo->ops = ops; } @@ -112,18 +165,10 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline struct sg_table *host1x_bo_pin(struct device *dev, - struct host1x_bo *bo, - dma_addr_t *phys) -{ - return bo->ops->pin(dev, bo, phys); -} - -static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, - struct sg_table *sgt) -{ - bo->ops->unpin(dev, sgt); -} +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache); +void host1x_bo_unpin(struct host1x_bo_mapping *map); static inline void *host1x_bo_mmap(struct host1x_bo *bo) { @@ -181,6 +226,7 @@ struct host1x_job; struct host1x_channel *host1x_channel_request(struct host1x_client *client); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); +void host1x_channel_stop(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f123e15d966e..e4c18ba8d3bf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -251,15 +251,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, } /** - * thp_head - Head page of a transparent huge page. - * @page: Any page (tail, head or regular) found in the page cache. - */ -static inline struct page *thp_head(struct page *page) -{ - return compound_head(page); -} - -/** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. */ @@ -283,6 +274,15 @@ static inline int thp_nr_pages(struct page *page) return 1; } +/** + * folio_test_pmd_mappable - Can we map this folio with a PMD? + * @folio: The folio to test + */ +static inline bool folio_test_pmd_mappable(struct folio *folio) +{ + return folio_order(folio) >= HPAGE_PMD_ORDER; +} + struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, @@ -336,12 +336,6 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -static inline struct page *thp_head(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return page; -} - static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); @@ -354,6 +348,11 @@ static inline int thp_nr_pages(struct page *page) return 1; } +static inline bool folio_test_pmd_mappable(struct folio *folio) +{ + return false; +} + static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { return false; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1faebe1cd0ed..d1897a69c540 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -124,6 +124,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, void hugepage_put_subpool(struct hugepage_subpool *spool); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); +void clear_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, loff_t *); @@ -132,6 +133,10 @@ int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +int move_hugetlb_page_tables(struct vm_area_struct *vma, + struct vm_area_struct *new_vma, + unsigned long old_addr, unsigned long new_addr, + unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, @@ -143,9 +148,6 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page); -void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct page *ref_page); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo(void); @@ -218,6 +220,10 @@ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } +static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) +{ +} + static inline unsigned long hugetlb_total_pages(void) { return 0; @@ -265,6 +271,16 @@ static inline int copy_hugetlb_page_range(struct mm_struct *dst, return 0; } +static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, + struct vm_area_struct *new_vma, + unsigned long old_addr, + unsigned long new_addr, + unsigned long len) +{ + BUG(); + return 0; +} + static inline void hugetlb_report_meminfo(struct seq_file *m) { } @@ -385,13 +401,6 @@ static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, BUG(); } -static inline void __unmap_hugepage_range(struct mmu_gather *tlb, - struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page) -{ - BUG(); -} - static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -468,8 +477,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, - struct ucounts **ucounts, int creat_flags, - int page_size_log); + int creat_flags, int page_size_log); static inline bool is_file_hugepages(struct file *file) { @@ -488,8 +496,7 @@ static inline struct hstate *hstate_inode(struct inode *i) #define is_file_hugepages(file) false static inline struct file * hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, - struct ucounts **ucounts, int creat_flags, - int page_size_log) + int creat_flags, int page_size_log) { return ERR_PTR(-ENOSYS); } @@ -596,6 +603,7 @@ struct hstate { int next_nid_to_alloc; int next_nid_to_free; unsigned int order; + unsigned int demote_order; unsigned long mask; unsigned long max_huge_pages; unsigned long nr_huge_pages; @@ -605,6 +613,7 @@ struct hstate { unsigned long nr_overcommit_huge_pages; struct list_head hugepage_activelist; struct list_head hugepage_freelists[MAX_NUMNODES]; + unsigned int max_huge_pages_node[MAX_NUMNODES]; unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; @@ -613,8 +622,8 @@ struct hstate { #endif #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ - struct cftype cgroup_files_dfl[7]; - struct cftype cgroup_files_legacy[9]; + struct cftype cgroup_files_dfl[8]; + struct cftype cgroup_files_legacy[10]; #endif char name[HSTATE_NAME_LEN]; }; @@ -637,8 +646,9 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct page *page); /* arch callback */ -int __init __alloc_bootmem_huge_page(struct hstate *h); -int __init alloc_bootmem_huge_page(struct hstate *h); +int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); +int __init alloc_bootmem_huge_page(struct hstate *h, int nid); +bool __init hugetlb_node_alloc_supported(void); void __init hugetlb_add_hstate(unsigned order); bool __init arch_hugetlb_valid_size(unsigned long size); diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index c137396129db..379344828e78 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -36,6 +36,11 @@ enum hugetlb_memory_event { HUGETLB_NR_MEMORY_EVENTS, }; +struct hugetlb_cgroup_per_node { + /* hugetlb usage in pages over all hstates. */ + unsigned long usage[HUGE_MAX_HSTATE]; +}; + struct hugetlb_cgroup { struct cgroup_subsys_state css; @@ -57,6 +62,8 @@ struct hugetlb_cgroup { /* Handle for "hugetlb.events.local" */ struct cgroup_file events_local_file[HUGE_MAX_HSTATE]; + + struct hugetlb_cgroup_per_node *nodeinfo[]; }; static inline struct hugetlb_cgroup * @@ -128,6 +135,13 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info( css_get(resv_map->css); } +static inline void resv_map_put_hugetlb_cgroup_uncharge_info( + struct resv_map *resv_map) +{ + if (resv_map->css) + css_put(resv_map->css); +} + extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, @@ -211,6 +225,11 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info( { } +static inline void resv_map_put_hugetlb_cgroup_uncharge_info( + struct resv_map *resv_map) +{ +} + static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 1e8d6ea8992e..fad1f1df26df 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -403,7 +403,7 @@ struct hwmon_ops { }; /** - * Channel information + * struct hwmon_channel_info - Channel information * @type: Channel type. * @config: Pointer to NULL-terminated list of channel parameters. * Use for per-channel attributes. @@ -422,7 +422,7 @@ struct hwmon_channel_info { }) /** - * Chip configuration + * struct hwmon_chip_info - Chip configuration * @ops: Pointer to hwmon operations. * @info: Null-terminated list of channel information. */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index ddc8713ce57b..f565a8938836 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -803,6 +803,12 @@ struct vmbus_device { #define VMBUS_DEFAULT_MAX_PKT_SIZE 4096 +struct vmbus_gpadl { + u32 gpadl_handle; + u32 size; + void *buffer; +}; + struct vmbus_channel { struct list_head listentry; @@ -822,7 +828,7 @@ struct vmbus_channel { bool rescind_ref; /* got rescind msg, got channel reference */ struct completion rescind_event; - u32 ringbuffer_gpadlhandle; + struct vmbus_gpadl ringbuffer_gpadlhandle; /* Allocated memory for ring buffer */ struct page *ringbuffer_page; @@ -1100,19 +1106,6 @@ void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, void (*chn_rescind_cb)(struct vmbus_channel *)); -/* - * Check if sub-channels have already been offerred. This API will be useful - * when the driver is unloaded after establishing sub-channels. In this case, - * when the driver is re-loaded, the driver would have to check if the - * subchannels have already been established before attempting to request - * the creation of sub-channels. - * This function returns TRUE to indicate that subchannels have already been - * created. - * This function should be invoked after setting the callback function for - * sub-channel creation. - */ -bool vmbus_are_subchannels_present(struct vmbus_channel *primary); - /* The format must be the same as struct vmdata_gpa_direct */ struct vmbus_channel_packet_page_buffer { u16 type; @@ -1192,10 +1185,10 @@ extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, extern int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, u32 size, - u32 *gpadl_handle); + struct vmbus_gpadl *gpadl); extern int vmbus_teardown_gpadl(struct vmbus_channel *channel, - u32 gpadl_handle); + struct vmbus_gpadl *gpadl); void vmbus_reset_channel_cb(struct vmbus_channel *channel); @@ -1268,6 +1261,7 @@ struct hv_device { struct vmbus_channel *channel; struct kset *channels_kset; + struct device_dma_parameters dma_parms; /* place holder to keep track of the dir for hv device in debugfs */ struct dentry *debug_dir; @@ -1590,6 +1584,11 @@ struct hyperv_service_callback { void (*callback)(void *context); }; +struct hv_dma_range { + dma_addr_t dma; + u32 mapping_size; +}; + #define MAX_SRV_VER 0x7ffffff extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen, const int *fw_version, int fw_vercnt, diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 2ce3efbe9198..7d4f52ceb7b5 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -11,6 +11,7 @@ #define _LINUX_I2C_H #include <linux/acpi.h> /* for acpi_handle */ +#include <linux/bits.h> #include <linux/mod_devicetable.h> #include <linux/device.h> /* for struct device */ #include <linux/sched.h> /* for completion */ @@ -223,6 +224,15 @@ enum i2c_alert_protocol { }; /** + * enum i2c_driver_flags - Flags for an I2C device driver + * + * @I2C_DRV_ACPI_WAIVE_D0_PROBE: Don't put the device in D0 state for probe + */ +enum i2c_driver_flags { + I2C_DRV_ACPI_WAIVE_D0_PROBE = BIT(0), +}; + +/** * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding - soon to be deprecated @@ -236,6 +246,7 @@ enum i2c_alert_protocol { * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) + * @flags: A bitmask of flags defined in &enum i2c_driver_flags * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. @@ -294,6 +305,8 @@ struct i2c_driver { int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; + + u32 flags; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) @@ -1012,9 +1025,11 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); u32 i2c_acpi_find_bus_speed(struct device *dev); -struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, - struct i2c_board_info *info); +struct i2c_client *i2c_acpi_new_device_by_fwnode(struct fwnode_handle *fwnode, + int index, + struct i2c_board_info *info); struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle); +bool i2c_acpi_waive_d0_probe(struct device *dev); #else static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c) @@ -1029,8 +1044,9 @@ static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; } -static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, - int index, struct i2c_board_info *info) +static inline struct i2c_client *i2c_acpi_new_device_by_fwnode( + struct fwnode_handle *fwnode, int index, + struct i2c_board_info *info) { return ERR_PTR(-ENODEV); } @@ -1038,6 +1054,17 @@ static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle ha { return NULL; } +static inline bool i2c_acpi_waive_d0_probe(struct device *dev) +{ + return false; +} #endif /* CONFIG_ACPI */ +static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, + int index, + struct i2c_board_info *info) +{ + return i2c_acpi_new_device_by_fwnode(dev_fwnode(dev), index, info); +} + #endif /* _LINUX_I2C_H */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 694264503119..559b6c644938 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1143,7 +1143,7 @@ struct ieee80211_mgmt { __le16 auth_transaction; __le16 status_code; /* possibly followed by Challenge text */ - u8 variable[0]; + u8 variable[]; } __packed auth; struct { __le16 reason_code; @@ -1152,26 +1152,26 @@ struct ieee80211_mgmt { __le16 capab_info; __le16 listen_interval; /* followed by SSID and Supported rates */ - u8 variable[0]; + u8 variable[]; } __packed assoc_req; struct { __le16 capab_info; __le16 status_code; __le16 aid; /* followed by Supported rates */ - u8 variable[0]; + u8 variable[]; } __packed assoc_resp, reassoc_resp; struct { __le16 capab_info; __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed s1g_assoc_resp, s1g_reassoc_resp; struct { __le16 capab_info; __le16 listen_interval; u8 current_ap[ETH_ALEN]; /* followed by SSID and Supported rates */ - u8 variable[0]; + u8 variable[]; } __packed reassoc_req; struct { __le16 reason_code; @@ -1182,11 +1182,11 @@ struct ieee80211_mgmt { __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params, TIM */ - u8 variable[0]; + u8 variable[]; } __packed beacon; struct { /* only variable items: SSID, Supported rates */ - u8 variable[0]; + DECLARE_FLEX_ARRAY(u8, variable); } __packed probe_req; struct { __le64 timestamp; @@ -1194,7 +1194,7 @@ struct ieee80211_mgmt { __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params */ - u8 variable[0]; + u8 variable[]; } __packed probe_resp; struct { u8 category; @@ -1203,16 +1203,16 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; u8 status_code; - u8 variable[0]; + u8 variable[]; } __packed wme_action; struct{ u8 action_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch; struct{ u8 action_code; struct ieee80211_ext_chansw_ie data; - u8 variable[0]; + u8 variable[]; } __packed ext_chan_switch; struct{ u8 action_code; @@ -1228,7 +1228,7 @@ struct ieee80211_mgmt { __le16 timeout; __le16 start_seq_num; /* followed by BA Extension */ - u8 variable[0]; + u8 variable[]; } __packed addba_req; struct{ u8 action_code; @@ -1244,11 +1244,11 @@ struct ieee80211_mgmt { } __packed delba; struct { u8 action_code; - u8 variable[0]; + u8 variable[]; } __packed self_prot; struct{ u8 action_code; - u8 variable[0]; + u8 variable[]; } __packed mesh_action; struct { u8 action; @@ -1292,7 +1292,7 @@ struct ieee80211_mgmt { u8 toa[6]; __le16 tod_error; __le16 toa_error; - u8 variable[0]; + u8 variable[]; } __packed ftm; struct { u8 action_code; @@ -1988,6 +1988,44 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, int mcs, bool ext_nss_bw_capable, unsigned int max_vht_nss); +/** + * enum ieee80211_ap_reg_power - regulatory power for a Access Point + * + * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode + * @IEEE80211_REG_LPI: Indoor Access Point + * @IEEE80211_REG_SP: Standard power Access Point + * @IEEE80211_REG_VLP: Very low power Access Point + * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal + * @IEEE80211_REG_AP_POWER_MAX: maximum value + */ +enum ieee80211_ap_reg_power { + IEEE80211_REG_UNSET_AP, + IEEE80211_REG_LPI_AP, + IEEE80211_REG_SP_AP, + IEEE80211_REG_VLP_AP, + IEEE80211_REG_AP_POWER_AFTER_LAST, + IEEE80211_REG_AP_POWER_MAX = + IEEE80211_REG_AP_POWER_AFTER_LAST - 1, +}; + +/** + * enum ieee80211_client_reg_power - regulatory power for a client + * + * @IEEE80211_REG_UNSET_CLIENT: Client has no regulatory power mode + * @IEEE80211_REG_DEFAULT_CLIENT: Default Client + * @IEEE80211_REG_SUBORDINATE_CLIENT: Subordinate Client + * @IEEE80211_REG_CLIENT_POWER_AFTER_LAST: internal + * @IEEE80211_REG_CLIENT_POWER_MAX: maximum value + */ +enum ieee80211_client_reg_power { + IEEE80211_REG_UNSET_CLIENT, + IEEE80211_REG_DEFAULT_CLIENT, + IEEE80211_REG_SUBORDINATE_CLIENT, + IEEE80211_REG_CLIENT_POWER_AFTER_LAST, + IEEE80211_REG_CLIENT_POWER_MAX = + IEEE80211_REG_CLIENT_POWER_AFTER_LAST - 1, +}; + /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 @@ -2084,6 +2122,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_VHT_MAX_AMPDU_FACTOR 20 #define IEEE80211_HE_HT_MAX_AMPDU_FACTOR 16 +#define IEEE80211_HE_6GHZ_MAX_AMPDU_FACTOR 13 /* 802.11ax HE PHY capabilities */ #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G 0x02 @@ -2219,11 +2258,12 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU 0x08 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB 0x10 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB 0x20 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US 0x00 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US 0x40 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US 0x80 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED 0xc0 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK 0xc0 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US 0x0 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US 0x1 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US 0x2 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED 0x3 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS 6 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK 0xc0 #define IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF 0x01 diff --git a/include/linux/if_eql.h b/include/linux/if_eql.h index d984694c384d..d75601d613cc 100644 --- a/include/linux/if_eql.h +++ b/include/linux/if_eql.h @@ -26,6 +26,7 @@ typedef struct slave { struct list_head list; struct net_device *dev; + netdevice_tracker dev_tracker; long priority; long priority_bps; long priority_Bps; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 41a518336673..8420fe504927 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -162,6 +162,7 @@ struct netpoll; * @vlan_id: VLAN identifier * @flags: device flags * @real_dev: underlying netdevice + * @dev_tracker: refcount tracker for @real_dev reference * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry * @vlan_pcpu_stats: ptr to percpu rx stats @@ -177,6 +178,8 @@ struct vlan_dev_priv { u16 flags; struct net_device *real_dev; + netdevice_tracker dev_tracker; + unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index ff15c61bf319..6564bdcdac66 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -17,11 +17,6 @@ struct iio_dma_buffer_queue; struct iio_dma_buffer_ops; struct device; -struct iio_buffer_block { - u32 size; - u32 bytes_used; -}; - /** * enum iio_block_state - State of a struct iio_dma_buffer_block * @IIO_BLOCK_STATE_DEQUEUED: Block is not queued diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index b6928ac5c63d..418b1307d3f2 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -11,8 +11,15 @@ struct iio_buffer; +enum iio_buffer_direction { + IIO_BUFFER_DIRECTION_IN, + IIO_BUFFER_DIRECTION_OUT, +}; + int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data); +int iio_pop_from_buffer(struct iio_buffer *buffer, void *data); + /** * iio_push_to_buffers_with_timestamp() - push data and timestamp to buffers * @indio_dev: iio_dev structure for device. @@ -38,6 +45,10 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, return iio_push_to_buffers(indio_dev, data); } +int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev, + const void *data, size_t data_sz, + int64_t timestamp); + bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev, const unsigned long *mask); diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index 245b32918ae1..e2ca8ea23e19 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -7,6 +7,7 @@ #ifdef CONFIG_IIO_BUFFER #include <uapi/linux/iio/buffer.h> +#include <linux/iio/buffer.h> struct iio_dev; struct iio_buffer; @@ -23,6 +24,10 @@ struct iio_buffer; * @read: try to get a specified number of bytes (must exist) * @data_available: indicates how much data is available for reading from * the buffer. + * @remove_from: remove scan from buffer. Drivers should calls this to + * remove a scan from a buffer. + * @write: try to write a number of bytes + * @space_available: returns the amount of bytes available in a buffer * @request_update: if a parameter change has been marked, update underlying * storage. * @set_bytes_per_datum:set number of bytes per datum @@ -49,6 +54,9 @@ struct iio_buffer_access_funcs { int (*store_to)(struct iio_buffer *buffer, const void *data); int (*read)(struct iio_buffer *buffer, size_t n, char __user *buf); size_t (*data_available)(struct iio_buffer *buffer); + int (*remove_from)(struct iio_buffer *buffer, void *data); + int (*write)(struct iio_buffer *buffer, size_t n, const char __user *buf); + size_t (*space_available)(struct iio_buffer *buffer); int (*request_update)(struct iio_buffer *buffer); @@ -80,6 +88,9 @@ struct iio_buffer { /** @bytes_per_datum: Size of individual datum including timestamp. */ size_t bytes_per_datum; + /* @direction: Direction of the data stream (in/out). */ + enum iio_buffer_direction direction; + /** * @access: Buffer access functions associated with the * implementation. diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 8bdbaf3f3796..22f67845cdd3 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -220,7 +220,6 @@ struct st_sensor_settings { /** * struct st_sensor_data - ST sensor device status - * @dev: Pointer to instance of struct device (I2C or SPI). * @trig: The trigger in use by the core driver. * @mount_matrix: The mounting matrix of the sensor. * @sensor_settings: Pointer to the specific sensor settings in use. @@ -240,7 +239,6 @@ struct st_sensor_settings { * @buffer_data: Data used by buffer part. */ struct st_sensor_data { - struct device *dev; struct iio_trigger *trig; struct iio_mount_matrix mount_matrix; struct st_sensor_settings *sensor_settings; @@ -273,7 +271,6 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p); int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops); -void st_sensors_deallocate_trigger(struct iio_dev *indio_dev); int st_sensors_validate_device(struct iio_trigger *trig, struct iio_dev *indio_dev); #else @@ -282,10 +279,6 @@ static inline int st_sensors_allocate_trigger(struct iio_dev *indio_dev, { return 0; } -static inline void st_sensors_deallocate_trigger(struct iio_dev *indio_dev) -{ - return; -} #define st_sensors_validate_device NULL #endif @@ -298,8 +291,6 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable); int st_sensors_power_enable(struct iio_dev *indio_dev); -void st_sensors_power_disable(struct iio_dev *indio_dev); - int st_sensors_debugfs_reg_access(struct iio_dev *indio_dev, unsigned reg, unsigned writeval, unsigned *readval); @@ -330,21 +321,17 @@ void st_sensors_dev_name_probe(struct device *dev, char *name, int len); /* Accelerometer */ const struct st_sensor_settings *st_accel_get_settings(const char *name); int st_accel_common_probe(struct iio_dev *indio_dev); -void st_accel_common_remove(struct iio_dev *indio_dev); /* Gyroscope */ const struct st_sensor_settings *st_gyro_get_settings(const char *name); int st_gyro_common_probe(struct iio_dev *indio_dev); -void st_gyro_common_remove(struct iio_dev *indio_dev); /* Magnetometer */ const struct st_sensor_settings *st_magn_get_settings(const char *name); int st_magn_common_probe(struct iio_dev *indio_dev); -void st_magn_common_remove(struct iio_dev *indio_dev); /* Pressure */ const struct st_sensor_settings *st_press_get_settings(const char *name); int st_press_common_probe(struct iio_dev *indio_dev); -void st_press_common_remove(struct iio_dev *indio_dev); #endif /* ST_SENSORS_H */ diff --git a/include/linux/iio/driver.h b/include/linux/iio/driver.h index 36de60a5da7a..7a157ed218f6 100644 --- a/include/linux/iio/driver.h +++ b/include/linux/iio/driver.h @@ -8,6 +8,7 @@ #ifndef _IIO_INKERN_H_ #define _IIO_INKERN_H_ +struct device; struct iio_dev; struct iio_map; @@ -26,4 +27,17 @@ int iio_map_array_register(struct iio_dev *indio_dev, */ int iio_map_array_unregister(struct iio_dev *indio_dev); +/** + * devm_iio_map_array_register - device-managed version of iio_map_array_register + * @dev: Device object to which to bind the unwinding of this registration + * @indio_dev: Pointer to the iio_dev structure + * @maps: Pointer to an IIO map object which is to be registered to this IIO device + * + * This function will call iio_map_array_register() to register an IIO map object + * and will also hook a callback to the iio_map_array_unregister() function to + * handle de-registration of the IIO map object when the device's refcount goes to + * zero. + */ +int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps); + #endif diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index c9504e9da571..2be12b7b5dc5 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -23,6 +23,8 @@ * @groupcounter: index of next attribute group * @legacy_scan_el_group: attribute group for legacy scan elements attribute group * @legacy_buffer_group: attribute group for legacy buffer attributes group + * @bounce_buffer: for devices that call iio_push_to_buffers_with_timestamp_unaligned() + * @bounce_buffer_size: size of currently allocate bounce buffer * @scan_index_timestamp: cache of the index to the timestamp * @clock_id: timestamping clock posix identifier * @chrdev: associated character device @@ -50,6 +52,8 @@ struct iio_dev_opaque { int groupcounter; struct attribute_group legacy_scan_el_group; struct attribute_group legacy_buffer_group; + void *bounce_buffer; + size_t bounce_buffer_size; unsigned int scan_index_timestamp; clockid_t clock_id; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 324561b7a5e8..07025d6b3de1 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -103,15 +103,16 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev, /** * IIO_ENUM_AVAILABLE() - Initialize enum available extended channel attribute * @_name: Attribute name ("_available" will be appended to the name) + * @_shared: Whether the attribute is shared between all channels * @_e: Pointer to an iio_enum struct * * Creates a read only attribute which lists all the available enum items in a * space separated list. This should usually be used together with IIO_ENUM() */ -#define IIO_ENUM_AVAILABLE(_name, _e) \ +#define IIO_ENUM_AVAILABLE(_name, _shared, _e) \ { \ .name = (_name "_available"), \ - .shared = IIO_SHARED_BY_TYPE, \ + .shared = _shared, \ .read = iio_enum_available_read, \ .private = (uintptr_t)(_e), \ } diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index cf49997d5903..7c02f5292eea 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -49,6 +49,7 @@ struct adis_timeout { * @status_error_mask: Bitmask of errors supported by the device * @timeouts: Chip specific delays * @enable_irq: Hook for ADIS devices that have a special IRQ enable/disable + * @unmasked_drdy: True for devices that cannot mask/unmask the data ready pin * @has_paging: True if ADIS device has paged registers * @burst_reg_cmd: Register command that triggers burst * @burst_len: Burst size in the SPI RX buffer. If @burst_max_len is defined, @@ -78,6 +79,7 @@ struct adis_data { unsigned int status_error_mask; int (*enable_irq)(struct adis *adis, bool enable); + bool unmasked_drdy; bool has_paging; diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 096f68dd2e0c..4c69b144677b 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -55,6 +55,7 @@ struct iio_trigger_ops { * @attached_own_device:[INTERN] if we are using our own device as trigger, * i.e. if we registered a poll function to the same * device as the one providing the trigger. + * @reenable_work: [INTERN] work item used to ensure reenable can sleep. **/ struct iio_trigger { const struct iio_trigger_ops *ops; @@ -74,6 +75,7 @@ struct iio_trigger { unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)]; struct mutex pool_lock; bool attached_own_device; + struct work_struct reenable_work; }; diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h index 7f154d1f8739..7490b05fc5b2 100644 --- a/include/linux/iio/triggered_buffer.h +++ b/include/linux/iio/triggered_buffer.h @@ -2,6 +2,7 @@ #ifndef _LINUX_IIO_TRIGGERED_BUFFER_H_ #define _LINUX_IIO_TRIGGERED_BUFFER_H_ +#include <linux/iio/buffer.h> #include <linux/interrupt.h> struct attribute; @@ -11,21 +12,27 @@ struct iio_buffer_setup_ops; int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev, irqreturn_t (*h)(int irq, void *p), irqreturn_t (*thread)(int irq, void *p), + enum iio_buffer_direction direction, const struct iio_buffer_setup_ops *setup_ops, const struct attribute **buffer_attrs); void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev); #define iio_triggered_buffer_setup(indio_dev, h, thread, setup_ops) \ - iio_triggered_buffer_setup_ext((indio_dev), (h), (thread), (setup_ops), NULL) + iio_triggered_buffer_setup_ext((indio_dev), (h), (thread), \ + IIO_BUFFER_DIRECTION_IN, (setup_ops), \ + NULL) int devm_iio_triggered_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, irqreturn_t (*h)(int irq, void *p), irqreturn_t (*thread)(int irq, void *p), + enum iio_buffer_direction direction, const struct iio_buffer_setup_ops *ops, const struct attribute **buffer_attrs); #define devm_iio_triggered_buffer_setup(dev, indio_dev, h, thread, setup_ops) \ - devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread), (setup_ops), NULL) + devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread), \ + IIO_BUFFER_DIRECTION_IN, \ + (setup_ops), NULL) #endif diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 84b3f8175cc6..a7aa91f3a8dc 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -24,6 +24,7 @@ enum iio_event_info { #define IIO_VAL_INT_PLUS_NANO 3 #define IIO_VAL_INT_PLUS_MICRO_DB 4 #define IIO_VAL_INT_MULTIPLE 5 +#define IIO_VAL_INT_64 6 /* 64-bit data, val is lower 32 bits */ #define IIO_VAL_FRACTIONAL 10 #define IIO_VAL_FRACTIONAL_LOG2 11 #define IIO_VAL_CHAR 12 diff --git a/include/linux/ima.h b/include/linux/ima.h index b6ab66a546ae..426b1744215e 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -50,21 +50,6 @@ static inline void ima_appraise_parse_cmdline(void) {} extern void ima_add_kexec_buffer(struct kimage *image); #endif -#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT -extern bool arch_ima_get_secureboot(void); -extern const char * const *arch_get_ima_policy(void); -#else -static inline bool arch_ima_get_secureboot(void) -{ - return false; -} - -static inline const char * const *arch_get_ima_policy(void) -{ - return NULL; -} -#endif - #else static inline enum hash_algo ima_get_current_hash_algo(void) { @@ -155,6 +140,21 @@ static inline int ima_measure_critical_data(const char *event_label, #endif /* CONFIG_IMA */ +#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT +extern bool arch_ima_get_secureboot(void); +extern const char * const *arch_get_ima_policy(void); +#else +static inline bool arch_ima_get_secureboot(void) +{ + return false; +} + +static inline const char * const *arch_get_ima_policy(void) +{ + return NULL; +} +#endif + #ifndef CONFIG_IMA_KEXEC struct kimage; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a038feb63f23..674aeead6260 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -24,6 +24,8 @@ struct ipv4_devconf { struct in_device { struct net_device *dev; + netdevice_tracker dev_tracker; + refcount_t refcnt; int dead; struct in_ifaddr __rcu *ifa_list;/* IP ifaddr chain */ @@ -133,6 +135,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) +#define IN_DEV_ARP_EVICT_NOCARRIER(in_dev) IN_DEV_ANDCONF((in_dev), \ + ARP_EVICT_NOCARRIER) struct in_ifaddr { struct hlist_node hash; diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h new file mode 100644 index 000000000000..cda1f706eaeb --- /dev/null +++ b/include/linux/instruction_pointer.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_INSTRUCTION_POINTER_H +#define _LINUX_INSTRUCTION_POINTER_H + +#define _RET_IP_ (unsigned long)__builtin_return_address(0) +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) + +#endif /* _LINUX_INSTRUCTION_POINTER_H */ diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index fa2cd8c63dcc..24359b4a9605 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -11,7 +11,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_begin\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -50,7 +50,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_end\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 05a65eb155f7..69230fd695ea 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -517,9 +517,6 @@ struct context_entry { u64 hi; }; -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) - /* * When VT-d works in the scalable mode, it allows DMA translation to * happen through either first level or second level page table. This @@ -708,9 +705,15 @@ static inline bool dma_pte_superpage(struct dma_pte *pte) return (pte->val & DMA_PTE_LARGE_PAGE); } -static inline int first_pte_in_page(struct dma_pte *pte) +static inline bool first_pte_in_page(struct dma_pte *pte) +{ + return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE); +} + +static inline int nr_pte_to_next_page(struct dma_pte *pte) { - return !((unsigned long)pte & ~VTD_PAGE_MASK); + return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) : + (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index aee8ff4739b1..f45f13304add 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -9,7 +9,7 @@ #define _INTEL_ISH_CLIENT_IF_H_ #include <linux/device.h> -#include <linux/uuid.h> +#include <linux/mod_devicetable.h> struct ishtp_cl_device; struct ishtp_device; @@ -40,7 +40,7 @@ enum cl_state { struct ishtp_cl_driver { struct device_driver driver; const char *name; - const guid_t *guid; + const struct ishtp_device_id *id; int (*probe)(struct ishtp_cl_device *dev); void (*remove)(struct ishtp_cl_device *dev); int (*reset)(struct ishtp_cl_device *dev); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 57cceecbe37f..1b73bab7eeff 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,12 +8,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -/* Values for rxwp in fault_cb callback */ -#define SVM_REQ_READ (1<<3) -#define SVM_REQ_WRITE (1<<2) -#define SVM_REQ_EXEC (1<<1) -#define SVM_REQ_PRIV (1<<0) - /* Page Request Queue depth */ #define PRQ_ORDER 2 #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 93780834fc8f..9f4b6f5b822f 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -58,6 +58,12 @@ enum rapl_primitives { THROTTLED_TIME, PRIORITY_LEVEL, + PSYS_POWER_LIMIT1, + PSYS_POWER_LIMIT2, + PSYS_PL1_ENABLE, + PSYS_PL2_ENABLE, + PSYS_TIME_WINDOW1, + PSYS_TIME_WINDOW2, /* below are not raw primitive data */ AVERAGE_POWER, NR_RAPL_PRIMITIVES, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 1f22a30c0963..9367f1cb2e3c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -329,7 +329,46 @@ extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); -extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); +extern int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, + bool setaffinity); + +/** + * irq_update_affinity_hint - Update the affinity hint + * @irq: Interrupt to update + * @m: cpumask pointer (NULL to clear the hint) + * + * Updates the affinity hint, but does not change the affinity of the interrupt. + */ +static inline int +irq_update_affinity_hint(unsigned int irq, const struct cpumask *m) +{ + return __irq_apply_affinity_hint(irq, m, false); +} + +/** + * irq_set_affinity_and_hint - Update the affinity hint and apply the provided + * cpumask to the interrupt + * @irq: Interrupt to update + * @m: cpumask pointer (NULL to clear the hint) + * + * Updates the affinity hint and if @m is not NULL it applies it as the + * affinity of that interrupt. + */ +static inline int +irq_set_affinity_and_hint(unsigned int irq, const struct cpumask *m) +{ + return __irq_apply_affinity_hint(irq, m, true); +} + +/* + * Deprecated. Use irq_update_affinity_hint() or irq_set_affinity_and_hint() + * instead. + */ +static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +{ + return irq_set_affinity_and_hint(irq, m); +} + extern int irq_update_affinity_desc(unsigned int irq, struct irq_affinity_desc *affinity); @@ -361,6 +400,18 @@ static inline int irq_can_set_affinity(unsigned int irq) static inline int irq_select_affinity(unsigned int irq) { return 0; } +static inline int irq_update_affinity_hint(unsigned int irq, + const struct cpumask *m) +{ + return -EINVAL; +} + +static inline int irq_set_affinity_and_hint(unsigned int irq, + const struct cpumask *m) +{ + return -EINVAL; +} + static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) { diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index e9743cfd8585..66a774d2710e 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -132,13 +132,7 @@ io_mapping_init_wc(struct io_mapping *iomap, iomap->base = base; iomap->size = size; -#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */ - iomap->prot = pgprot_noncached_wc(PAGE_KERNEL); -#elif defined(pgprot_writecombine) iomap->prot = pgprot_writecombine(PAGE_KERNEL); -#else - iomap->prot = pgprot_noncached(PAGE_KERNEL); -#endif return iomap; } diff --git a/include/linux/io.h b/include/linux/io.h index 9595151d800d..5fc800390fe4 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -132,6 +132,8 @@ static inline int arch_phys_wc_index(int handle) #endif #endif +int devm_arch_phys_wc_add(struct device *dev, unsigned long base, unsigned long size); + enum { /* See memremap() kernel-doc for usage description... */ MEMREMAP_WB = 1 << 0, @@ -166,4 +168,7 @@ static inline void arch_io_free_memtype_wc(resource_size_t base, } #endif +int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, + resource_size_t size); + #endif /* _LINUX_IO_H */ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 0a9dc40b7be8..14f7eaf1b443 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -99,55 +99,40 @@ struct io_cq { struct io_context { atomic_long_t refcount; atomic_t active_ref; - atomic_t nr_tasks; + unsigned short ioprio; + +#ifdef CONFIG_BLK_ICQ /* all the fields below are protected by this lock */ spinlock_t lock; - unsigned short ioprio; - struct radix_tree_root icq_tree; struct io_cq __rcu *icq_hint; struct hlist_head icq_list; struct work_struct release_work; +#endif /* CONFIG_BLK_ICQ */ }; -/** - * get_io_context_active - get active reference on ioc - * @ioc: ioc of interest - * - * Only iocs with active reference can issue new IOs. This function - * acquires an active reference on @ioc. The caller must already have an - * active reference on @ioc. - */ -static inline void get_io_context_active(struct io_context *ioc) -{ - WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0); - WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0); - atomic_long_inc(&ioc->refcount); - atomic_inc(&ioc->active_ref); -} - -static inline void ioc_task_link(struct io_context *ioc) -{ - get_io_context_active(ioc); - - WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); - atomic_inc(&ioc->nr_tasks); -} - struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); -void put_io_context_active(struct io_context *ioc); void exit_io_context(struct task_struct *task); -struct io_context *get_task_io_context(struct task_struct *task, - gfp_t gfp_flags, int node); +int __copy_io(unsigned long clone_flags, struct task_struct *tsk); +static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) +{ + if (!current->io_context) + return 0; + return __copy_io(clone_flags, tsk); +} #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } static inline void exit_io_context(struct task_struct *task) { } -#endif +static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) +{ + return 0; +} +#endif /* CONFIG_BLOCK */ -#endif +#endif /* IOCONTEXT_H */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 24f8489583ca..b55bd49e55f5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -141,6 +141,11 @@ struct iomap_page_ops { #define IOMAP_NOWAIT (1 << 5) /* do not block */ #define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */ #define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */ +#ifdef CONFIG_FS_DAX +#define IOMAP_DAX (1 << 8) /* DAX mapping */ +#else +#define IOMAP_DAX 0 +#endif /* CONFIG_FS_DAX */ struct iomap_ops { /* @@ -225,6 +230,7 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); int iomap_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); +void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); void iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len); #ifdef CONFIG_MIGRATION @@ -284,7 +290,7 @@ struct iomap_writeback_ops { * Optional, allows the file system to discard state on a page where * we failed to submit any I/O. */ - void (*discard_page)(struct page *page, loff_t fileoff); + void (*discard_folio)(struct folio *folio, loff_t pos); }; struct iomap_writepage_ctx { @@ -313,8 +319,8 @@ int iomap_writepages(struct address_space *mapping, struct iomap_dio_ops { int (*end_io)(struct kiocb *iocb, ssize_t size, int error, unsigned flags); - blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio, - loff_t file_offset); + void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, + loff_t file_offset); }; /* @@ -330,14 +336,20 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1) +/* + * When a page fault occurs, return a partial synchronous result and allow + * the caller to retry the rest of the operation after dealing with the page + * fault. + */ +#define IOMAP_DIO_PARTIAL (1 << 2) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags); + unsigned int dio_flags, size_t done_before); struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags); + unsigned int dio_flags, size_t done_before); ssize_t iomap_dio_complete(struct iomap_dio *dio); -int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP struct file; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d2f3435e7d17..de0c57a567c8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -186,7 +186,7 @@ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; - struct page *freelist; + struct list_head freelist; bool queued; }; @@ -399,6 +399,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { .start = ULONG_MAX, + .freelist = LIST_HEAD_INIT(gather->freelist), }; } diff --git a/include/linux/iova.h b/include/linux/iova.h index 71d8a2de6635..cea79cb9f26c 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -12,7 +12,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/rbtree.h> -#include <linux/atomic.h> #include <linux/dma-mapping.h> /* iova structure */ @@ -35,35 +34,6 @@ struct iova_rcache { struct iova_cpu_rcache __percpu *cpu_rcaches; }; -struct iova_domain; - -/* Call-Back from IOVA code into IOMMU drivers */ -typedef void (* iova_flush_cb)(struct iova_domain *domain); - -/* Destructor for per-entry data */ -typedef void (* iova_entry_dtor)(unsigned long data); - -/* Number of entries per Flush Queue */ -#define IOVA_FQ_SIZE 256 - -/* Timeout (in ms) after which entries are flushed from the Flush-Queue */ -#define IOVA_FQ_TIMEOUT 10 - -/* Flush Queue entry for defered flushing */ -struct iova_fq_entry { - unsigned long iova_pfn; - unsigned long pages; - unsigned long data; - u64 counter; /* Flush counter when this entrie was added */ -}; - -/* Per-CPU Flush Queue structure */ -struct iova_fq { - struct iova_fq_entry entries[IOVA_FQ_SIZE]; - unsigned head, tail; - spinlock_t lock; -}; - /* holds all the iova translations for a domain */ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ @@ -74,27 +44,9 @@ struct iova_domain { unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; unsigned long max32_alloc_size; /* Size of last failed allocation */ - struct iova_fq __percpu *fq; /* Flush Queue */ - - atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that - have been started */ - - atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that - have been finished */ - struct iova anchor; /* rbtree lookup anchor */ - struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ - - iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU - TLBs */ - iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for - iova entry */ - - struct timer_list fq_timer; /* Timer to regularily empty the - flush-queues */ - atomic_t fq_timer_on; /* 1 when timer is active, 0 - when not */ + struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ struct hlist_node cpuhp_dead; }; @@ -144,17 +96,12 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, bool size_aligned); void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); -void queue_iova(struct iova_domain *iovad, - unsigned long pfn, unsigned long pages, - unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); -int init_iova_flush_queue(struct iova_domain *iovad, - iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); #else @@ -189,12 +136,6 @@ static inline void free_iova_fast(struct iova_domain *iovad, { } -static inline void queue_iova(struct iova_domain *iovad, - unsigned long pfn, unsigned long pages, - unsigned long data) -{ -} - static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, @@ -216,13 +157,6 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } -static inline int init_iova_flush_queue(struct iova_domain *iovad, - iova_flush_cb flush_cb, - iova_entry_dtor entry_dtor) -{ - return -ENODEV; -} - static inline struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 05e22770af51..b75395ec8d52 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + if (ns) { + if (refcount_inc_not_zero(&ns->ns.count)) + return ns; + } + + return NULL; +} + extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, @@ -147,6 +157,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + return ns; +} + static inline void put_ipc_ns(struct ipc_namespace *ns) { } diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 52850a02a3d0..163831a087ef 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -335,4 +335,7 @@ extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data); #define GET_DEVICE_ID_MAX_RETRY 5 +/* Helper function for computing the IPMB checksum of some data. */ +unsigned char ipmb_checksum(unsigned char *data, int size); + #endif /* __LINUX_IPMI_H */ diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index deec18b8944a..9277d21c2690 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -39,6 +39,59 @@ struct ipmi_smi; #define IPMI_WATCH_MASK_CHECK_COMMANDS (1 << 2) /* + * SMI messages + * + * When communicating with an SMI, messages come in two formats: + * + * * Normal (to a BMC over a BMC interface) + * + * * IPMB (over a IPMB to another MC) + * + * When normal, commands are sent using the format defined by a + * standard message over KCS (NetFn must be even): + * + * +-----------+-----+------+ + * | NetFn/LUN | Cmd | Data | + * +-----------+-----+------+ + * + * And responses, similarly, with an completion code added (NetFn must + * be odd): + * + * +-----------+-----+------+------+ + * | NetFn/LUN | Cmd | CC | Data | + * +-----------+-----+------+------+ + * + * With normal messages, only commands are sent and only responses are + * received. + * + * In IPMB mode, we are acting as an IPMB device. Commands will be in + * the following format (NetFn must be even): + * + * +-------------+------+-------------+-----+------+ + * | NetFn/rsLUN | Addr | rqSeq/rqLUN | Cmd | Data | + * +-------------+------+-------------+-----+------+ + * + * Responses will using the following format: + * + * +-------------+------+-------------+-----+------+------+ + * | NetFn/rqLUN | Addr | rqSeq/rsLUN | Cmd | CC | Data | + * +-------------+------+-------------+-----+------+------+ + * + * This is similar to the format defined in the IPMB manual section + * 2.11.1 with the checksums and the first address removed. Also, the + * address is always the remote address. + * + * IPMB messages can be commands and responses in both directions. + * Received commands are handled as received commands from the message + * queue. + */ + +enum ipmi_smi_msg_type { + IPMI_SMI_MSG_TYPE_NORMAL = 0, + IPMI_SMI_MSG_TYPE_IPMB_DIRECT +}; + +/* * Messages to/from the lower layer. The smi interface will take one * of these to send. After the send has occurred and a response has * been received, it will report this same data structure back up to @@ -54,6 +107,8 @@ struct ipmi_smi; struct ipmi_smi_msg { struct list_head link; + enum ipmi_smi_msg_type type; + long msgid; void *user_data; @@ -73,6 +128,10 @@ struct ipmi_smi_msg { struct ipmi_smi_handlers { struct module *owner; + /* Capabilities of the SMI. */ +#define IPMI_SMI_CAN_HANDLE_IPMB_DIRECT (1 << 0) + unsigned int flags; + /* * The low-level interface cannot start sending messages to * the upper layer until this function is called. This may diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ef4a69865737..a59d25f19385 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -79,6 +79,7 @@ struct ipv6_devconf { __u32 ioam6_id; __u32 ioam6_id_wide; __u8 ioam6_enabled; + __u8 ndisc_evict_nocarrier; struct ctl_table_header *sysctl_header; }; @@ -132,6 +133,7 @@ struct inet6_skb_parm { __u16 dsthao; #endif __u16 frag_max_size; + __u16 srhoff; #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 @@ -141,6 +143,7 @@ struct inet6_skb_parm { #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 +#define IP6SKB_SEG6 256 }; #if defined(CONFIG_NET_L3_MASTER_DEV) @@ -282,7 +285,6 @@ struct ipv6_pinfo { __be32 rcv_flowinfo; __u32 dst_cookie; - __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; diff --git a/include/linux/irq.h b/include/linux/irq.h index c8293c817646..848e1e12c5c6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -524,9 +524,10 @@ struct irq_chip { void (*irq_bus_lock)(struct irq_data *data); void (*irq_bus_sync_unlock)(struct irq_data *data); +#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE void (*irq_cpu_online)(struct irq_data *data); void (*irq_cpu_offline)(struct irq_data *data); - +#endif void (*irq_suspend)(struct irq_data *data); void (*irq_resume)(struct irq_data *data); void (*irq_pm_shutdown)(struct irq_data *data); @@ -606,8 +607,10 @@ struct irqaction; extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); +#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE extern void irq_cpu_online(void); extern void irq_cpu_offline(void); +#endif extern int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask, bool force); extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); @@ -1261,6 +1264,7 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)); * top-level IRQ handler. */ extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; +asmlinkage void generic_handle_arch_irq(struct pt_regs *regs); #else #ifndef set_handle_irq #define set_handle_irq(handle_irq) \ diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index ec2a47a81e42..8cd11a223260 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -3,6 +3,7 @@ #define _LINUX_IRQ_WORK_H #include <linux/smp_types.h> +#include <linux/rcuwait.h> /* * An entry can be in one of four states: @@ -16,11 +17,13 @@ struct irq_work { struct __call_single_node node; void (*func)(struct irq_work *); + struct rcuwait irqwait; }; #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \ .node = { .u_flags = (_flags), }, \ .func = (_func), \ + .irqwait = __RCUWAIT_INITIALIZER(irqwait), \ } #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0) @@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work) return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY; } +static inline bool irq_work_is_hard(struct irq_work *work) +{ + return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ; +} + bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 67351aac65ef..3a091d0710ae 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -14,8 +14,15 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_irq.h> #include <linux/platform_device.h> +/* Undefined on purpose */ +extern of_irq_init_cb_t typecheck_irq_init_cb; + +#define typecheck_irq_init_cb(fn) \ + (__typecheck(typecheck_irq_init_cb, &fn) ? fn : fn) + /* * This macro must be used by the different irqchip drivers to declare * the association between their DT compatible string and their @@ -23,24 +30,27 @@ * * @name: name that must be unique across all IRQCHIP_DECLARE of the * same file. - * @compstr: compatible string of the irqchip driver + * @compat: compatible string of the irqchip driver * @fn: initialization function */ -#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) +#define IRQCHIP_DECLARE(name, compat, fn) \ + OF_DECLARE_2(irqchip, name, compat, typecheck_irq_init_cb(fn)) extern int platform_irqchip_probe(struct platform_device *pdev); #define IRQCHIP_PLATFORM_DRIVER_BEGIN(drv_name) \ static const struct of_device_id drv_name##_irqchip_match_table[] = { -#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, .data = fn }, +#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \ + .data = typecheck_irq_init_cb(fn), }, #define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \ {}, \ }; \ MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \ -static struct platform_driver drv_name##_driver = { \ - .probe = platform_irqchip_probe, \ +static struct platform_driver drv_name##_driver = { \ + .probe = IS_ENABLED(CONFIG_IRQCHIP) ? \ + platform_irqchip_probe : NULL, \ .driver = { \ .name = #drv_name, \ .owner = THIS_MODULE, \ diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 81cbf85f73de..12d91f0dedf9 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -615,7 +615,7 @@ struct rdists { void __iomem *rd_base; struct page *pend_page; phys_addr_t phys_base; - bool lpi_enabled; + u64 flags; cpumask_t *vpe_table_mask; void *vpe_l1_base; } __percpu *rdist; @@ -624,6 +624,7 @@ struct rdists { u64 flags; u32 gicd_typer; u32 gicd_typer2; + int cpuhp_memreserve_state; bool has_vlpis; bool has_rvpeid; bool has_direct_lpi; @@ -632,6 +633,7 @@ struct rdists { struct irq_domain; struct fwnode_handle; +int __init its_lpi_memreserve_init(void); int its_cpu_init(void); int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 59aea39785bf..93d270ca0c56 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -168,14 +168,7 @@ int generic_handle_irq(unsigned int irq); * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); - -#ifdef CONFIG_HANDLE_DOMAIN_IRQ -int handle_domain_irq(struct irq_domain *domain, - unsigned int hwirq, struct pt_regs *regs); - -int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, - struct pt_regs *regs); -#endif +int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif /* Test to see if a driver has successfully requested an irq */ diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9ee238ad29ce..d476405802e9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -64,6 +64,10 @@ struct irq_fwspec { u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; }; +/* Conversion function from of_phandle_args fields to fwspec */ +void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, + unsigned int count, struct irq_fwspec *fwspec); + /* * Should several domains have the same device node, but serve * different purposes (for example one domain is for PCI/MSI, and the @@ -127,7 +131,7 @@ struct irq_domain_ops { #endif }; -extern struct irq_domain_ops irq_generic_chip_ops; +extern const struct irq_domain_ops irq_generic_chip_ops; struct irq_domain_chip_generic; diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 600c10da321a..4b140938b03e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -71,14 +71,6 @@ do { \ do { \ __this_cpu_dec(hardirq_context); \ } while (0) -# define lockdep_softirq_enter() \ -do { \ - current->softirq_context++; \ -} while (0) -# define lockdep_softirq_exit() \ -do { \ - current->softirq_context--; \ -} while (0) # define lockdep_hrtimer_enter(__hrtimer) \ ({ \ @@ -140,6 +132,21 @@ do { \ # define lockdep_irq_work_exit(__work) do { } while (0) #endif +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT) +# define lockdep_softirq_enter() \ +do { \ + current->softirq_context++; \ +} while (0) +# define lockdep_softirq_exit() \ +do { \ + current->softirq_context--; \ +} while (0) + +#else +# define lockdep_softirq_enter() do { } while (0) +# define lockdep_softirq_exit() do { } while (0) +#endif + #if defined(CONFIG_IRQSOFF_TRACER) || \ defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index a1d6fc82d7f0..4176c7eca7b5 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -24,25 +24,16 @@ struct cred; struct module; -static inline int is_kernel_inittext(unsigned long addr) -{ - if (addr >= (unsigned long)_sinittext - && addr <= (unsigned long)_einittext) - return 1; - return 0; -} - static inline int is_kernel_text(unsigned long addr) { - if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || - arch_is_kernel_text(addr)) + if (__is_kernel_text(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { - if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) + if (__is_kernel(addr)) return 1; return in_gate_area_no_mm(addr); } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dd874a1ee862..4a45562d8893 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -9,6 +9,7 @@ struct kmem_cache; struct page; +struct slab; struct vm_struct; struct task_struct; @@ -89,7 +90,7 @@ static __always_inline bool kasan_enabled(void) return static_branch_likely(&kasan_flag_enabled); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return kasan_enabled(); } @@ -104,7 +105,7 @@ static inline bool kasan_enabled(void) return IS_ENABLED(CONFIG_KASAN); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return false; } @@ -125,6 +126,11 @@ static __always_inline void kasan_free_pages(struct page *page, #endif /* CONFIG_KASAN_HW_TAGS */ +static inline bool kasan_has_integrated_init(void) +{ + return kasan_hw_tags_enabled(); +} + #ifdef CONFIG_KASAN struct kasan_cache { @@ -188,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache) return 0; } -void __kasan_poison_slab(struct page *page); -static __always_inline void kasan_poison_slab(struct page *page) +void __kasan_poison_slab(struct slab *slab); +static __always_inline void kasan_poison_slab(struct slab *slab) { if (kasan_enabled()) - __kasan_poison_slab(page); + __kasan_poison_slab(slab); } void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -317,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache, slab_flags_t *flags) {} static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } -static inline void kasan_poison_slab(struct page *page) {} +static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} static inline void kasan_poison_object_data(struct kmem_cache *cache, @@ -370,12 +376,14 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); +void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} +static inline void kasan_record_aux_stack_noalloc(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ @@ -434,6 +442,8 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); +void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); + #else /* CONFIG_KASAN_VMALLOC */ static inline int kasan_populate_vmalloc(unsigned long start, @@ -451,6 +461,10 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_start, unsigned long free_region_end) {} +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) +{ } + #endif /* CONFIG_KASAN_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ @@ -461,12 +475,12 @@ static inline void kasan_release_vmalloc(unsigned long start, * allocations with real shadow memory. With KASAN vmalloc, the special * case is unnecessary, as the work is handled in the generic case. */ -int kasan_module_alloc(void *addr, size_t size); +int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); void kasan_free_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 9fd0ad80fef6..92f3843d9ebb 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -36,6 +36,36 @@ */ void __kcsan_check_access(const volatile void *ptr, size_t size, int type); +/* + * See definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c. + * Note: The mappings are arbitrary, and do not reflect any real mappings of C11 + * memory orders to the LKMM memory orders and vice-versa! + */ +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_mb __ATOMIC_SEQ_CST +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_wmb __ATOMIC_ACQ_REL +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_rmb __ATOMIC_ACQUIRE +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE_release __ATOMIC_RELEASE + +/** + * __kcsan_mb - full memory barrier instrumentation + */ +void __kcsan_mb(void); + +/** + * __kcsan_wmb - write memory barrier instrumentation + */ +void __kcsan_wmb(void); + +/** + * __kcsan_rmb - read memory barrier instrumentation + */ +void __kcsan_rmb(void); + +/** + * __kcsan_release - release barrier instrumentation + */ +void __kcsan_release(void); + /** * kcsan_disable_current - disable KCSAN for the current context * @@ -99,10 +129,21 @@ void kcsan_set_access_mask(unsigned long mask); /* Scoped access information. */ struct kcsan_scoped_access { - struct list_head list; + union { + struct list_head list; /* scoped_accesses list */ + /* + * Not an entry in scoped_accesses list; stack depth from where + * the access was initialized. + */ + int stack_depth; + }; + + /* Access information. */ const volatile void *ptr; size_t size; int type; + /* Location where scoped access was set up. */ + unsigned long ip; }; /* * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes @@ -148,6 +189,10 @@ void kcsan_end_scoped_access(struct kcsan_scoped_access *sa); static inline void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { } +static inline void __kcsan_mb(void) { } +static inline void __kcsan_wmb(void) { } +static inline void __kcsan_rmb(void) { } +static inline void __kcsan_release(void) { } static inline void kcsan_disable_current(void) { } static inline void kcsan_enable_current(void) { } static inline void kcsan_enable_current_nowarn(void) { } @@ -180,12 +225,47 @@ static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } */ #define __kcsan_disable_current kcsan_disable_current #define __kcsan_enable_current kcsan_enable_current_nowarn -#else +#else /* __SANITIZE_THREAD__ */ static inline void kcsan_check_access(const volatile void *ptr, size_t size, int type) { } static inline void __kcsan_enable_current(void) { } static inline void __kcsan_disable_current(void) { } -#endif +#endif /* __SANITIZE_THREAD__ */ + +#if defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__SANITIZE_THREAD__) +/* + * Normal barrier instrumentation is not done via explicit calls, but by mapping + * to a repurposed __atomic_signal_fence(), which normally does not generate any + * real instructions, but is still intercepted by fsanitize=thread. This means, + * like any other compile-time instrumentation, barrier instrumentation can be + * disabled with the __no_kcsan function attribute. + * + * Also see definition of __tsan_atomic_signal_fence() in kernel/kcsan/core.c. + * + * These are all macros, like <asm/barrier.h>, since some architectures use them + * in non-static inline functions. + */ +#define __KCSAN_BARRIER_TO_SIGNAL_FENCE(name) \ + do { \ + barrier(); \ + __atomic_signal_fence(__KCSAN_BARRIER_TO_SIGNAL_FENCE_##name); \ + barrier(); \ + } while (0) +#define kcsan_mb() __KCSAN_BARRIER_TO_SIGNAL_FENCE(mb) +#define kcsan_wmb() __KCSAN_BARRIER_TO_SIGNAL_FENCE(wmb) +#define kcsan_rmb() __KCSAN_BARRIER_TO_SIGNAL_FENCE(rmb) +#define kcsan_release() __KCSAN_BARRIER_TO_SIGNAL_FENCE(release) +#elif defined(CONFIG_KCSAN_WEAK_MEMORY) && defined(__KCSAN_INSTRUMENT_BARRIERS__) +#define kcsan_mb __kcsan_mb +#define kcsan_wmb __kcsan_wmb +#define kcsan_rmb __kcsan_rmb +#define kcsan_release __kcsan_release +#else /* CONFIG_KCSAN_WEAK_MEMORY && ... */ +#define kcsan_mb() do { } while (0) +#define kcsan_wmb() do { } while (0) +#define kcsan_rmb() do { } while (0) +#define kcsan_release() do { } while (0) +#endif /* CONFIG_KCSAN_WEAK_MEMORY && ... */ /** * __kcsan_check_read - check regular read access for races diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index fc266ecb2a4d..c07c71f5ba4f 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -21,6 +21,7 @@ */ struct kcsan_ctx { int disable_count; /* disable counter */ + int disable_scoped; /* disable scoped access counter */ int atomic_next; /* number of following atomic ops */ /* @@ -48,8 +49,16 @@ struct kcsan_ctx { */ unsigned long access_mask; - /* List of scoped accesses. */ + /* List of scoped accesses; likely to be empty. */ struct list_head scoped_accesses; + +#ifdef CONFIG_KCSAN_WEAK_MEMORY + /* + * Scoped access for modeling access reordering to detect missing memory + * barriers; only keep 1 to keep fast-path complexity manageable. + */ + struct kcsan_scoped_access reorder_access; +#endif }; /** diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2776423a587e..055eb203c00e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -9,6 +9,7 @@ #include <linux/stddef.h> #include <linux/types.h> #include <linux/compiler.h> +#include <linux/container_of.h> #include <linux/bitops.h> #include <linux/kstrtox.h> #include <linux/log2.h> @@ -19,6 +20,7 @@ #include <linux/printk.h> #include <linux/build_bug.h> #include <linux/static_call_types.h> +#include <linux/instruction_pointer.h> #include <asm/byteorder.h> #include <uapi/linux/kernel.h> @@ -52,11 +54,6 @@ } \ ) -#define typeof_member(T, m) typeof(((T*)0)->m) - -#define _RET_IP_ (unsigned long)__builtin_return_address(0) -#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) - /** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing @@ -88,7 +85,7 @@ struct completion; struct user; -#ifdef CONFIG_PREEMPT_VOLUNTARY +#ifdef CONFIG_PREEMPT_VOLUNTARY_BUILD extern int __cond_resched(void); # define might_resched() __cond_resched() @@ -111,8 +108,8 @@ static __always_inline void might_resched(void) #endif /* CONFIG_PREEMPT_* */ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -extern void ___might_sleep(const char *file, int line, int preempt_offset); -extern void __might_sleep(const char *file, int line, int preempt_offset); +extern void __might_resched(const char *file, int line, unsigned int offsets); +extern void __might_sleep(const char *file, int line); extern void __cant_sleep(const char *file, int line, int preempt_offset); extern void __cant_migrate(const char *file, int line); @@ -129,7 +126,7 @@ extern void __cant_migrate(const char *file, int line); * supposed to. */ # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) + do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) /** * cant_sleep - annotation for functions that cannot sleep * @@ -168,10 +165,9 @@ extern void __cant_migrate(const char *file, int line); */ # define non_block_end() WARN_ON(current->non_block_count-- == 0) #else - static inline void ___might_sleep(const char *file, int line, - int preempt_offset) { } - static inline void __might_sleep(const char *file, int line, - int preempt_offset) { } + static inline void __might_resched(const char *file, int line, + unsigned int offsets) { } +static inline void __might_sleep(const char *file, int line) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) # define cant_migrate() do { } while (0) @@ -191,7 +187,6 @@ static inline void might_fault(void) { } #endif void do_exit(long error_code) __noreturn; -void complete_and_exit(struct completion *, long) __noreturn; extern int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); @@ -229,8 +224,6 @@ extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); -extern int init_kernel_text(unsigned long addr); -extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); @@ -248,6 +241,7 @@ extern bool early_boot_irqs_disabled; extern enum system_states { SYSTEM_BOOTING, SYSTEM_SCHEDULING, + SYSTEM_FREEING_INITMEM, SYSTEM_RUNNING, SYSTEM_HALT, SYSTEM_POWER_OFF, @@ -483,36 +477,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #define __CONCAT(a, b) a ## b #define CONCATENATE(a, b) __CONCAT(a, b) -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - void *__mptr = (void *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ - ((type *)(__mptr - offsetof(type, member))); }) - -/** - * container_of_safe - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged. - */ -#define container_of_safe(ptr, type, member) ({ \ - void *__mptr = (void *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ - IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) : \ - ((type *)(__mptr - offsetof(type, member))); }) - /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 44ae1a7eb9e3..69ae6b278464 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64, enum cpu_usage_stat); extern void account_steal_time(u64); extern void account_idle_time(u64); +extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 1093abf7c28c..861c4f0f8a29 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -6,7 +6,6 @@ #ifndef __LINUX_KERNFS_H #define __LINUX_KERNFS_H -#include <linux/kernel.h> #include <linux/err.h> #include <linux/list.h> #include <linux/mutex.h> @@ -14,14 +13,18 @@ #include <linux/lockdep.h> #include <linux/rbtree.h> #include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/types.h> #include <linux/uidgid.h> #include <linux/wait.h> +#include <linux/rwsem.h> struct file; struct dentry; struct iattr; struct seq_file; struct vm_area_struct; +struct vm_operations_struct; struct super_block; struct file_system_type; struct poll_table_struct; @@ -197,6 +200,7 @@ struct kernfs_root { struct list_head supers; wait_queue_head_t deactivate_waitq; + struct rw_semaphore kernfs_rwsem; }; struct kernfs_open_file { @@ -269,10 +273,6 @@ struct kernfs_ops { struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lockdep_key; -#endif }; /* @@ -568,30 +568,6 @@ kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, priv, NULL); } -static inline struct kernfs_node * -kernfs_create_file_ns(struct kernfs_node *parent, const char *name, - umode_t mode, kuid_t uid, kgid_t gid, - loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns) -{ - struct lock_class_key *key = NULL; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - key = (struct lock_class_key *)&ops->lockdep_key; -#endif - return __kernfs_create_file(parent, name, mode, uid, gid, - size, ops, priv, ns, key); -} - -static inline struct kernfs_node * -kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, - loff_t size, const struct kernfs_ops *ops, void *priv) -{ - return kernfs_create_file_ns(parent, name, mode, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, - size, ops, priv, NULL); -} - static inline int kernfs_remove_by_name(struct kernfs_node *parent, const char *name) { diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h deleted file mode 100644 index a27605e2f826..000000000000 --- a/include/linux/keyslot-manager.h +++ /dev/null @@ -1,120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright 2019 Google LLC - */ - -#ifndef __LINUX_KEYSLOT_MANAGER_H -#define __LINUX_KEYSLOT_MANAGER_H - -#include <linux/bio.h> -#include <linux/blk-crypto.h> - -struct blk_keyslot_manager; - -/** - * struct blk_ksm_ll_ops - functions to manage keyslots in hardware - * @keyslot_program: Program the specified key into the specified slot in the - * inline encryption hardware. - * @keyslot_evict: Evict key from the specified keyslot in the hardware. - * The key is provided so that e.g. dm layers can evict - * keys from the devices that they map over. - * Returns 0 on success, -errno otherwise. - * - * This structure should be provided by storage device drivers when they set up - * a keyslot manager - this structure holds the function ptrs that the keyslot - * manager will use to manipulate keyslots in the hardware. - */ -struct blk_ksm_ll_ops { - int (*keyslot_program)(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key, - unsigned int slot); - int (*keyslot_evict)(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key, - unsigned int slot); -}; - -struct blk_keyslot_manager { - /* - * The struct blk_ksm_ll_ops that this keyslot manager will use - * to perform operations like programming and evicting keys on the - * device - */ - struct blk_ksm_ll_ops ksm_ll_ops; - - /* - * The maximum number of bytes supported for specifying the data unit - * number. - */ - unsigned int max_dun_bytes_supported; - - /* - * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents - * whether a crypto mode and data unit size are supported. The i'th - * bit of crypto_mode_supported[crypto_mode] is set iff a data unit - * size of (1 << i) is supported. We only support data unit sizes - * that are powers of 2. - */ - unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; - - /* Device for runtime power management (NULL if none) */ - struct device *dev; - - /* Here onwards are *private* fields for internal keyslot manager use */ - - unsigned int num_slots; - - /* Protects programming and evicting keys from the device */ - struct rw_semaphore lock; - - /* List of idle slots, with least recently used slot at front */ - wait_queue_head_t idle_slots_wait_queue; - struct list_head idle_slots; - spinlock_t idle_slots_lock; - - /* - * Hash table which maps struct *blk_crypto_key to keyslots, so that we - * can find a key's keyslot in O(1) time rather than O(num_slots). - * Protected by 'lock'. - */ - struct hlist_head *slot_hashtable; - unsigned int log_slot_ht_size; - - /* Per-keyslot data */ - struct blk_ksm_keyslot *slots; -}; - -int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots); - -int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, - unsigned int num_slots); - -blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key, - struct blk_ksm_keyslot **slot_ptr); - -unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot); - -void blk_ksm_put_slot(struct blk_ksm_keyslot *slot); - -bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, - const struct blk_crypto_config *cfg); - -int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, - const struct blk_crypto_key *key); - -void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm); - -void blk_ksm_destroy(struct blk_keyslot_manager *ksm); - -void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, - const struct blk_keyslot_manager *child); - -void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm); - -bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, - struct blk_keyslot_manager *ksm_subset); - -void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, - struct blk_keyslot_manager *reference_ksm); - -#endif /* __LINUX_KEYSLOT_MANAGER_H */ diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 3fe6dd8a18c1..4b5e3679a72c 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -14,6 +14,9 @@ #ifdef CONFIG_KFENCE +#include <linux/atomic.h> +#include <linux/static_key.h> + /* * We allocate an even number of pages, as it simplifies calculations to map * address to metadata indices; effectively, the very first page serves as an @@ -22,13 +25,8 @@ #define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE) extern char *__kfence_pool; -#ifdef CONFIG_KFENCE_STATIC_KEYS -#include <linux/static_key.h> DECLARE_STATIC_KEY_FALSE(kfence_allocation_key); -#else -#include <linux/atomic.h> extern atomic_t kfence_allocation_gate; -#endif /** * is_kfence_address() - check if an address belongs to KFENCE pool @@ -116,13 +114,16 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags); */ static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { -#ifdef CONFIG_KFENCE_STATIC_KEYS - if (static_branch_unlikely(&kfence_allocation_key)) +#if defined(CONFIG_KFENCE_STATIC_KEYS) || CONFIG_KFENCE_SAMPLE_INTERVAL == 0 + if (!static_branch_unlikely(&kfence_allocation_key)) + return NULL; #else - if (unlikely(!atomic_read(&kfence_allocation_gate))) + if (!static_branch_likely(&kfence_allocation_key)) + return NULL; #endif - return __kfence_alloc(s, size, flags); - return NULL; + if (likely(atomic_read(&kfence_allocation_gate))) + return NULL; + return __kfence_alloc(s, size, flags); } /** diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ea30529fba08..c7b47399b36a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -19,10 +19,10 @@ #include <linux/list.h> #include <linux/sysfs.h> #include <linux/compiler.h> +#include <linux/container_of.h> #include <linux/spinlock.h> #include <linux/kref.h> #include <linux/kobject_ns.h> -#include <linux/kernel.h> #include <linux/wait.h> #include <linux/atomic.h> #include <linux/workqueue.h> @@ -66,7 +66,7 @@ struct kobject { struct list_head entry; struct kobject *parent; struct kset *kset; - struct kobj_type *ktype; + const struct kobj_type *ktype; struct kernfs_node *sd; /* sysfs directory entry */ struct kref kref; #ifdef CONFIG_DEBUG_KOBJECT_RELEASE @@ -90,18 +90,17 @@ static inline const char *kobject_name(const struct kobject *kobj) return kobj->name; } -extern void kobject_init(struct kobject *kobj, struct kobj_type *ktype); +extern void kobject_init(struct kobject *kobj, const struct kobj_type *ktype); extern __printf(3, 4) __must_check int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...); extern __printf(4, 5) __must_check int kobject_init_and_add(struct kobject *kobj, - struct kobj_type *ktype, struct kobject *parent, + const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...); extern void kobject_del(struct kobject *kobj); -extern struct kobject * __must_check kobject_create(void); extern struct kobject * __must_check kobject_create_and_add(const char *name, struct kobject *parent); @@ -118,23 +117,6 @@ extern void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); -/** - * kobject_has_children - Returns whether a kobject has children. - * @kobj: the object to test - * - * This will return whether a kobject has other kobjects as children. - * - * It does NOT account for the presence of attribute files, only sub - * directories. It also assumes there is no concurrent addition or - * removal of such children, and thus relies on external locking. - */ -static inline bool kobject_has_children(struct kobject *kobj) -{ - WARN_ON_ONCE(kref_read(&kobj->kref) == 0); - - return kobj->sd && kobj->sd->dir.subdirs; -} - struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; @@ -154,10 +136,9 @@ struct kobj_uevent_env { }; struct kset_uevent_ops { - int (* const filter)(struct kset *kset, struct kobject *kobj); - const char *(* const name)(struct kset *kset, struct kobject *kobj); - int (* const uevent)(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env); + int (* const filter)(struct kobject *kobj); + const char *(* const name)(struct kobject *kobj); + int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); }; struct kobj_attribute { @@ -218,7 +199,7 @@ static inline void kset_put(struct kset *k) kobject_put(&k->kobj); } -static inline struct kobj_type *get_ktype(struct kobject *kobj) +static inline const struct kobj_type *get_ktype(struct kobject *kobj) { return kobj->ktype; } diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e4f3bfe08757..8c8f7a4d93af 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -3,7 +3,6 @@ #define _LINUX_KPROBES_H /* * Kernel Probes (KProbes) - * include/linux/kprobes.h * * Copyright (C) IBM Corporation, 2002, 2004 * @@ -39,7 +38,7 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 -#else /* CONFIG_KPROBES */ +#else /* !CONFIG_KPROBES */ #include <asm-generic/kprobes.h> typedef int kprobe_opcode_t; struct arch_specific_insn { @@ -105,25 +104,25 @@ struct kprobe { #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ -static inline int kprobe_gone(struct kprobe *p) +static inline bool kprobe_gone(struct kprobe *p) { return p->flags & KPROBE_FLAG_GONE; } /* Is this kprobe disabled ? */ -static inline int kprobe_disabled(struct kprobe *p) +static inline bool kprobe_disabled(struct kprobe *p) { return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE); } /* Is this kprobe really running optimized path ? */ -static inline int kprobe_optimized(struct kprobe *p) +static inline bool kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } /* Is this kprobe uses ftrace ? */ -static inline int kprobe_ftrace(struct kprobe *p) +static inline bool kprobe_ftrace(struct kprobe *p) { return p->flags & KPROBE_FLAG_FTRACE; } @@ -154,6 +153,8 @@ struct kretprobe { struct kretprobe_holder *rph; }; +#define KRETPROBE_MAX_DATA_SIZE 4096 + struct kretprobe_instance { union { struct freelist_node freelist; @@ -181,14 +182,6 @@ struct kprobe_blacklist_entry { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -/* - * For #ifdef avoidance: - */ -static inline int kprobes_built_in(void) -{ - return 1; -} - extern void kprobe_busy_begin(void); extern void kprobe_busy_end(void); @@ -197,15 +190,26 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr); + +void __kretprobe_trampoline(void); +/* + * Since some architecture uses structured function pointer, + * use dereference_function_descriptor() to get real function address. + */ +static nokprobe_inline void *kretprobe_trampoline_addr(void) +{ + return dereference_kernel_function_descriptor(__kretprobe_trampoline); +} + /* If the trampoline handler called from a kprobe, use this version */ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, - void *frame_pointer); + void *frame_pointer); static nokprobe_inline unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, - void *frame_pointer) + void *frame_pointer) { unsigned long ret; /* @@ -214,7 +218,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, * be running at this point. */ kprobe_busy_begin(); - ret = __kretprobe_trampoline_handler(regs, trampoline_address, frame_pointer); + ret = __kretprobe_trampoline_handler(regs, frame_pointer); kprobe_busy_end(); return ret; @@ -228,7 +232,7 @@ static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance return READ_ONCE(ri->rph->rp); } -#else /* CONFIG_KRETPROBES */ +#else /* !CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -239,11 +243,15 @@ static inline int arch_trampoline_kprobe(struct kprobe *p) } #endif /* CONFIG_KRETPROBES */ +/* Markers of '_kprobe_blacklist' section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; + extern struct kretprobe_blackpoint kretprobe_blacklist[]; #ifdef CONFIG_KPROBES_SANITY_TEST extern int init_test_probes(void); -#else +#else /* !CONFIG_KPROBES_SANITY_TEST */ static inline int init_test_probes(void) { return 0; @@ -303,7 +311,7 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ #define KPROBE_OPTINSN_PAGE_SYM "kprobe_optinsn_page" int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, unsigned long *value, char *type, char *sym); -#else /* __ARCH_WANT_KPROBES_INSN_SLOT */ +#else /* !__ARCH_WANT_KPROBES_INSN_SLOT */ #define DEFINE_INSN_CACHE_OPS(__name) \ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ { \ @@ -334,7 +342,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); extern int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr); + kprobe_opcode_t *addr); extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); @@ -345,18 +353,22 @@ extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); -#endif +#endif /* CONFIG_SYSCTL */ extern void wait_for_kprobe_optimizer(void); -#else +#else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ + #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); -#endif - -int arch_check_ftrace_location(struct kprobe *p); +#else +static inline int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + return -EINVAL; +} +#endif /* CONFIG_KPROBES_ON_FTRACE */ /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); @@ -364,7 +376,7 @@ struct kprobe *get_kprobe(void *addr); /* kprobe_running() will just return the current_kprobe on this CPU */ static inline struct kprobe *kprobe_running(void) { - return (__this_cpu_read(current_kprobe)); + return __this_cpu_read(current_kprobe); } static inline void reset_current_kprobe(void) @@ -382,7 +394,6 @@ int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); void unregister_kprobes(struct kprobe **kps, int num); -unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); @@ -410,10 +421,6 @@ int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); #else /* !CONFIG_KPROBES: */ -static inline int kprobes_built_in(void) -{ - return 0; -} static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { return 0; @@ -428,11 +435,11 @@ static inline struct kprobe *kprobe_running(void) } static inline int register_kprobe(struct kprobe *p) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int register_kprobes(struct kprobe **kps, int num) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline void unregister_kprobe(struct kprobe *p) { @@ -442,11 +449,11 @@ static inline void unregister_kprobes(struct kprobe **kps, int num) } static inline int register_kretprobe(struct kretprobe *rp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int register_kretprobes(struct kretprobe **rps, int num) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline void unregister_kretprobe(struct kretprobe *rp) { @@ -462,11 +469,11 @@ static inline void kprobe_free_init_mem(void) } static inline int disable_kprobe(struct kprobe *kp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int enable_kprobe(struct kprobe *kp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline bool within_kprobe_blacklist(unsigned long addr) @@ -479,6 +486,7 @@ static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } #endif /* CONFIG_KPROBES */ + static inline int disable_kretprobe(struct kretprobe *rp) { return disable_kprobe(&rp->kp); @@ -493,19 +501,42 @@ static inline bool is_kprobe_insn_slot(unsigned long addr) { return false; } -#endif +#endif /* !CONFIG_KPROBES */ + #ifndef CONFIG_OPTPROBES static inline bool is_kprobe_optinsn_slot(unsigned long addr) { return false; } +#endif /* !CONFIG_OPTPROBES */ + +#ifdef CONFIG_KRETPROBES +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return (void *)addr == kretprobe_trampoline_addr(); +} + +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur); +#else +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return false; +} + +static nokprobe_inline +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur) +{ + return 0; +} #endif /* Returns true if kprobes handled the fault */ static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs, unsigned int trap) { - if (!kprobes_built_in()) + if (!IS_ENABLED(CONFIG_KPROBES)) return false; if (user_mode(regs)) return false; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 161e8164abcf..a38a5bca1ba5 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); -void ksm_migrate_page(struct page *newpage, struct page *oldpage); +void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ @@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page, { } -static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) +static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old) { } #endif /* CONFIG_MMU */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 346b0f269161..b6c8aafa8db5 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,7 +33,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt); -void set_kthread_struct(struct task_struct *p); +bool set_kthread_struct(struct task_struct *p); void kthread_set_per_cpu(struct task_struct *k, int cpu); bool kthread_is_per_cpu(struct task_struct *k); @@ -56,6 +56,31 @@ bool kthread_is_per_cpu(struct task_struct *k); __k; \ }) +/** + * kthread_run_on_cpu - create and wake a cpu bound thread. + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @cpu: The cpu on which the thread should be bound, + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. + * + * Description: Convenient wrapper for kthread_create_on_cpu() + * followed by wake_up_process(). Returns the kthread or + * ERR_PTR(-ENOMEM). + */ +static inline struct task_struct * +kthread_run_on_cpu(int (*threadfn)(void *data), void *data, + unsigned int cpu, const char *namefmt) +{ + struct task_struct *p; + + p = kthread_create_on_cpu(threadfn, data, cpu, namefmt); + if (!IS_ERR(p)) + wake_up_process(p); + + return p; +} + void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); @@ -70,6 +95,8 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); +void kthread_exit(long result) __noreturn; +void kthread_complete_and_exit(struct completion *, long) __noreturn; int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 120e5e90fa1d..906f899813dc 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -27,9 +27,9 @@ struct kvm_dirty_ring { int index; }; -#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0) +#ifndef CONFIG_HAVE_KVM_DIRTY_RING /* - * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should + * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should * not be included as well, so define these nop functions for the arch. */ static inline u32 kvm_dirty_ring_get_rsvd_entries(void) @@ -43,11 +43,6 @@ static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, return 0; } -static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) -{ - return NULL; -} - static inline int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) { @@ -74,11 +69,10 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) return true; } -#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ +#else /* CONFIG_HAVE_KVM_DIRTY_RING */ u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); -struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); /* * called with kvm->slots_lock held, returns the number of @@ -98,6 +92,6 @@ struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); -#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ +#endif /* CONFIG_HAVE_KVM_DIRTY_RING */ #endif /* KVM_DIRTY_RING_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0f18df7fe874..d89d564f7c19 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -29,6 +29,10 @@ #include <linux/refcount.h> #include <linux/nospec.h> #include <linux/notifier.h> +#include <linux/hashtable.h> +#include <linux/interval_tree.h> +#include <linux/rbtree.h> +#include <linux/xarray.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -39,8 +43,8 @@ #include <asm/kvm_host.h> #include <linux/kvm_dirty_ring.h> -#ifndef KVM_MAX_VCPU_ID -#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS +#ifndef KVM_MAX_VCPU_IDS +#define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS #endif /* @@ -150,7 +154,8 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_UNHALT 3 -#define KVM_REQ_VM_BUGGED (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQUEST_ARCH_BASE 8 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ @@ -160,8 +165,7 @@ static inline bool is_error_page(struct page *page) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, - struct kvm_vcpu *except, - unsigned long *vcpu_bitmap, cpumask_var_t tmp); + unsigned long *vcpu_bitmap); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except); @@ -311,7 +315,9 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; +#ifndef __KVM_HAVE_ARCH_WQP struct rcuwait wait; +#endif struct pid __rcu *pid; int sigset_active; sigset_t sigset; @@ -356,11 +362,13 @@ struct kvm_vcpu { struct kvm_dirty_ring dirty_ring; /* - * The index of the most recently used memslot by this vCPU. It's ok - * if this becomes stale due to memslot changes since we always check - * it is a valid slot. + * The most recently used memslot by this vCPU and the slots generation + * for which it is valid. + * No wraparound protection is needed since generations won't overflow in + * thousands of years, even assuming 1M memslot operations per second. */ - int last_used_slot; + struct kvm_memory_slot *last_used_slot; + u64 last_used_slot_gen; }; /* must be called with irqs disabled */ @@ -425,7 +433,26 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) */ #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) +/* + * Since at idle each memslot belongs to two memslot sets it has to contain + * two embedded nodes for each data structure that it forms a part of. + * + * Two memslot sets (one active and one inactive) are necessary so the VM + * continues to run on one memslot set while the other is being modified. + * + * These two memslot sets normally point to the same set of memslots. + * They can, however, be desynchronized when performing a memslot management + * operation by replacing the memslot to be modified by its copy. + * After the operation is complete, both memslot sets once again point to + * the same, common set of memslot data. + * + * The memslots themselves are independent of each other so they can be + * individually added or deleted. + */ struct kvm_memory_slot { + struct hlist_node id_node[2]; + struct interval_tree_node hva_node[2]; + struct rb_node gfn_node[2]; gfn_t base_gfn; unsigned long npages; unsigned long *dirty_bitmap; @@ -436,7 +463,7 @@ struct kvm_memory_slot { u16 as_id; }; -static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) +static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; } @@ -470,6 +497,12 @@ struct kvm_hv_sint { u32 sint; }; +struct kvm_xen_evtchn { + u32 port; + u32 vcpu; + u32 priority; +}; + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; @@ -490,6 +523,7 @@ struct kvm_kernel_irq_routing_entry { } msi; struct kvm_s390_adapter_int adapter; struct kvm_hv_sint hv_sint; + struct kvm_xen_evtchn xen_evtchn; }; struct hlist_node link; }; @@ -520,18 +554,21 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) } #endif -/* - * Note: - * memslots are not sorted by id anymore, please use id_to_memslot() - * to get the memslot by its id. - */ struct kvm_memslots { u64 generation; - /* The mapping table from slot id to the index in memslots[]. */ - short id_to_index[KVM_MEM_SLOTS_NUM]; - atomic_t last_used_slot; - int used_slots; - struct kvm_memory_slot memslots[]; + atomic_long_t last_used_slot; + struct rb_root_cached hva_tree; + struct rb_root gfn_tree; + /* + * The mapping table from slot id to memslot. + * + * 7-bit bucket count matches the size of the old id to index array for + * 512 slots, while giving good performance with this slot count. + * Higher bucket counts bring only small performance improvements but + * always result in higher memory usage (even for lower memslot counts). + */ + DECLARE_HASHTABLE(id_hash, 7); + int node_idx; }; struct kvm { @@ -552,14 +589,22 @@ struct kvm { */ struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ + unsigned long nr_memslot_pages; + /* The two memslot sets - active and inactive (per address space) */ + struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2]; + /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; - struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + struct xarray vcpu_array; /* Used to wait for completion of MMU notifiers. */ spinlock_t mn_invalidate_lock; unsigned long mn_active_invalidate_count; struct rcuwait mn_memslots_update_rcuwait; + /* For management / invalidation of gfn_to_pfn_caches */ + spinlock_t gpc_lock; + struct list_head gpc_list; + /* * created_vcpus is protected by kvm->lock, and is incremented * at the beginning of KVM_CREATE_VCPU. online_vcpus is only @@ -618,6 +663,7 @@ struct kvm { unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool vm_bugged; + bool vm_dead; #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER struct notifier_block pm_notifier; @@ -651,12 +697,19 @@ struct kvm { #define vcpu_err(vcpu, fmt, ...) \ kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +static inline void kvm_vm_dead(struct kvm *kvm) +{ + kvm->vm_dead = true; + kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD); +} + static inline void kvm_vm_bugged(struct kvm *kvm) { kvm->vm_bugged = true; - kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED); + kvm_vm_dead(kvm); } + #define KVM_BUG(cond, kvm, fmt...) \ ({ \ int __ret = (cond); \ @@ -694,19 +747,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); - return kvm->vcpus[i]; + return xa_load(&kvm->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - for (idx = 0; \ - idx < atomic_read(&kvm->online_vcpus) && \ - (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ - idx++) +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { struct kvm_vcpu *vcpu = NULL; - int i; + unsigned long i; if (id < 0) return NULL; @@ -720,13 +771,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -#define kvm_for_each_memslot(memslot, slots) \ - for (memslot = &slots->memslots[0]; \ - memslot < slots->memslots + slots->used_slots; memslot++) \ - if (WARN_ON_ONCE(!memslot->npages)) { \ - } else +static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) +{ + return vcpu->vcpu_idx; +} -void kvm_vcpu_destroy(struct kvm_vcpu *vcpu); +void kvm_destroy_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); @@ -786,21 +836,124 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) return __kvm_memslots(vcpu->kvm, as_id); } +static inline bool kvm_memslots_empty(struct kvm_memslots *slots) +{ + return RB_EMPTY_ROOT(&slots->gfn_tree); +} + +#define kvm_for_each_memslot(memslot, bkt, slots) \ + hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \ + if (WARN_ON_ONCE(!memslot->npages)) { \ + } else + static inline struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) { - int index = slots->id_to_index[id]; struct kvm_memory_slot *slot; + int idx = slots->node_idx; - if (index < 0) - return NULL; + hash_for_each_possible(slots->id_hash, slot, id_node[idx], id) { + if (slot->id == id) + return slot; + } + + return NULL; +} + +/* Iterator used for walking memslots that overlap a gfn range. */ +struct kvm_memslot_iter { + struct kvm_memslots *slots; + struct rb_node *node; + struct kvm_memory_slot *slot; +}; + +static inline void kvm_memslot_iter_next(struct kvm_memslot_iter *iter) +{ + iter->node = rb_next(iter->node); + if (!iter->node) + return; + + iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[iter->slots->node_idx]); +} + +static inline void kvm_memslot_iter_start(struct kvm_memslot_iter *iter, + struct kvm_memslots *slots, + gfn_t start) +{ + int idx = slots->node_idx; + struct rb_node *tmp; + struct kvm_memory_slot *slot; + + iter->slots = slots; + + /* + * Find the so called "upper bound" of a key - the first node that has + * its key strictly greater than the searched one (the start gfn in our case). + */ + iter->node = NULL; + for (tmp = slots->gfn_tree.rb_node; tmp; ) { + slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]); + if (start < slot->base_gfn) { + iter->node = tmp; + tmp = tmp->rb_left; + } else { + tmp = tmp->rb_right; + } + } + + /* + * Find the slot with the lowest gfn that can possibly intersect with + * the range, so we'll ideally have slot start <= range start + */ + if (iter->node) { + /* + * A NULL previous node means that the very first slot + * already has a higher start gfn. + * In this case slot start > range start. + */ + tmp = rb_prev(iter->node); + if (tmp) + iter->node = tmp; + } else { + /* a NULL node below means no slots */ + iter->node = rb_last(&slots->gfn_tree); + } - slot = &slots->memslots[index]; + if (iter->node) { + iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]); - WARN_ON(slot->id != id); - return slot; + /* + * It is possible in the slot start < range start case that the + * found slot ends before or at range start (slot end <= range start) + * and so it does not overlap the requested range. + * + * In such non-overlapping case the next slot (if it exists) will + * already have slot start > range start, otherwise the logic above + * would have found it instead of the current slot. + */ + if (iter->slot->base_gfn + iter->slot->npages <= start) + kvm_memslot_iter_next(iter); + } } +static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end) +{ + if (!iter->node) + return false; + + /* + * If this slot starts beyond or at the end of the range so does + * every next one + */ + return iter->slot->base_gfn < end; +} + +/* Iterate over each memslot at least partially intersecting [start, end) range */ +#define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end) \ + for (kvm_memslot_iter_start(iter, slots, start); \ + kvm_memslot_iter_is_valid(iter, end); \ + kvm_memslot_iter_next(iter)) + /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot @@ -826,11 +979,10 @@ int __kvm_set_memory_region(struct kvm *kvm, void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + struct kvm_memory_slot *new, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); @@ -856,9 +1008,9 @@ void kvm_set_page_accessed(struct page *page); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); -kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, +kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); +kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); +kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, bool *async, bool write_fault, bool *writable, hva_t *hva); @@ -867,7 +1019,7 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); -void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache); +void kvm_release_pfn(kvm_pfn_t pfn, bool dirty); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); @@ -935,7 +1087,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); @@ -943,12 +1095,8 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, - struct gfn_to_pfn_cache *cache, bool atomic); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); -int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, - struct gfn_to_pfn_cache *cache, bool dirty, bool atomic); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, @@ -963,10 +1111,109 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); +/** + * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a + * given guest physical address. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * @vcpu: vCPU to be used for marking pages dirty and to be woken on + * invalidation. + * @guest_uses_pa: indicates that the resulting host physical PFN is used while + * @vcpu is IN_GUEST_MODE so invalidations should wake it. + * @kernel_map: requests a kernel virtual mapping (kmap / memremap). + * @gpa: guest physical address to map. + * @len: sanity check; the range being access must fit a single page. + * @dirty: mark the cache dirty immediately. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * This primes a gfn_to_pfn_cache and links it into the @kvm's list for + * invalidations to be processed. Invalidation callbacks to @vcpu using + * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM + * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check() + * to ensure that the cache is valid before accessing the target page. + */ +int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, + struct kvm_vcpu *vcpu, bool guest_uses_pa, + bool kernel_map, gpa_t gpa, unsigned long len, + bool dirty); + +/** + * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * @gpa: current guest physical address to map. + * @len: sanity check; the range being access must fit a single page. + * @dirty: mark the cache dirty immediately. + * + * @return: %true if the cache is still valid and the address matches. + * %false if the cache is not valid. + * + * Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock + * while calling this function, and then continue to hold the lock until the + * access is complete. + * + * Callers in IN_GUEST_MODE may do so without locking, although they should + * still hold a read lock on kvm->scru for the memslot checks. + */ +bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, + gpa_t gpa, unsigned long len); + +/** + * kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * @gpa: updated guest physical address to map. + * @len: sanity check; the range being access must fit a single page. + * @dirty: mark the cache dirty immediately. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful + * returm from this function does not mean the page can be immediately + * accessed because it may have raced with an invalidation. Callers must + * still lock and check the cache status, as this function does not return + * with the lock still held to permit access. + */ +int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, + gpa_t gpa, unsigned long len, bool dirty); + +/** + * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * + * This unmaps the referenced page and marks it dirty, if appropriate. The + * cache is left in the invalid state but at least the mapping from GPA to + * userspace HVA will remain cached and can be reused on a subsequent + * refresh. + */ +void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); + +/** + * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache. + * + * @kvm: pointer to kvm instance. + * @gpc: struct gfn_to_pfn_cache object. + * + * This removes a cache from the @kvm's list to be processed on MMU notifier + * invocation. + */ +void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); + void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); -void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_vcpu_halt(struct kvm_vcpu *vcpu); +bool kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); @@ -1082,10 +1329,17 @@ static inline struct kvm *kvm_arch_alloc_vm(void) { return kzalloc(sizeof(struct kvm), GFP_KERNEL); } +#endif + +static inline void __kvm_arch_free_vm(struct kvm *kvm) +{ + kvfree(kvm); +} +#ifndef __KVM_HAVE_ARCH_VM_FREE static inline void kvm_arch_free_vm(struct kvm *kvm) { - kfree(kvm); + __kvm_arch_free_vm(kvm); } #endif @@ -1142,6 +1396,20 @@ static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) #endif } +/* + * Wake a vCPU if necessary, but don't do any stats/metadata updates. Returns + * true if the vCPU was blocking and was awakened, false otherwise. + */ +static inline bool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) +{ + return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); +} + +static inline bool kvm_vcpu_is_blocking(struct kvm_vcpu *vcpu) +{ + return rcuwait_active(kvm_arch_vcpu_get_wait(vcpu)); +} + #ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED /* * returns true if the virtual interrupt controller is initialized and @@ -1156,6 +1424,16 @@ static inline bool kvm_arch_intc_initialized(struct kvm *kvm) } #endif +#ifdef CONFIG_GUEST_PERF_EVENTS +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu); + +void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)); +void kvm_unregister_perf_callbacks(void); +#else +static inline void kvm_register_perf_callbacks(void *ign) {} +static inline void kvm_unregister_perf_callbacks(void) {} +#endif /* CONFIG_GUEST_PERF_EVENTS */ + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); @@ -1164,7 +1442,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); bool kvm_is_reserved_pfn(kvm_pfn_t pfn); bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); -bool kvm_is_transparent_hugepage(kvm_pfn_t pfn); struct kvm_irq_ack_notifier { struct hlist_node link; @@ -1195,25 +1472,15 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); /* - * Returns a pointer to the memslot at slot_index if it contains gfn. + * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL. */ static inline struct kvm_memory_slot * -try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn) +try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { - struct kvm_memory_slot *slot; - - if (slot_index < 0 || slot_index >= slots->used_slots) + if (!slot) return NULL; - /* - * slot_index can come from vcpu->last_used_slot which is not kept - * in sync with userspace-controllable memslot deletion. So use nospec - * to prevent the CPU from speculating past the end of memslots[]. - */ - slot_index = array_index_nospec(slot_index, slots->used_slots); - slot = &slots->memslots[slot_index]; - if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages) return slot; else @@ -1221,63 +1488,63 @@ try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn) } /* - * Returns a pointer to the memslot that contains gfn and records the index of - * the slot in index. Otherwise returns NULL. + * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL. * - * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! + * With "approx" set returns the memslot also when the address falls + * in a hole. In that case one of the memslots bordering the hole is + * returned. */ static inline struct kvm_memory_slot * -search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index) +search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx) { - int start = 0, end = slots->used_slots; - struct kvm_memory_slot *memslots = slots->memslots; struct kvm_memory_slot *slot; - - if (unlikely(!slots->used_slots)) - return NULL; - - while (start < end) { - int slot = start + (end - start) / 2; - - if (gfn >= memslots[slot].base_gfn) - end = slot; - else - start = slot + 1; - } - - slot = try_get_memslot(slots, start, gfn); - if (slot) { - *index = start; - return slot; + struct rb_node *node; + int idx = slots->node_idx; + + slot = NULL; + for (node = slots->gfn_tree.rb_node; node; ) { + slot = container_of(node, struct kvm_memory_slot, gfn_node[idx]); + if (gfn >= slot->base_gfn) { + if (gfn < slot->base_gfn + slot->npages) + return slot; + node = node->rb_right; + } else + node = node->rb_left; } - return NULL; + return approx ? slot : NULL; } -/* - * __gfn_to_memslot() and its descendants are here because it is called from - * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot() - * itself isn't here as an inline because that would bloat other code too much. - */ static inline struct kvm_memory_slot * -__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) +____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx) { struct kvm_memory_slot *slot; - int slot_index = atomic_read(&slots->last_used_slot); - slot = try_get_memslot(slots, slot_index, gfn); + slot = (struct kvm_memory_slot *)atomic_long_read(&slots->last_used_slot); + slot = try_get_memslot(slot, gfn); if (slot) return slot; - slot = search_memslots(slots, gfn, &slot_index); + slot = search_memslots(slots, gfn, approx); if (slot) { - atomic_set(&slots->last_used_slot, slot_index); + atomic_long_set(&slots->last_used_slot, (unsigned long)slot); return slot; } return NULL; } +/* + * __gfn_to_memslot() and its descendants are here to allow arch code to inline + * the lookups in hot paths. gfn_to_memslot() itself isn't here as an inline + * because that would bloat other code too much. + */ +static inline struct kvm_memory_slot * +__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) +{ + return ____gfn_to_memslot(slots, gfn, false); +} + static inline unsigned long __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) { @@ -1453,7 +1720,8 @@ struct _kvm_stats_desc { STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ - HALT_POLL_HIST_COUNT) + HALT_POLL_HIST_COUNT), \ + STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking) extern struct dentry *kvm_debugfs_dir; @@ -1765,6 +2033,8 @@ void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); +bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, + struct kvm_kernel_irq_routing_entry *); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 2237abb93ccd..dceac12c1ce5 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -19,6 +19,7 @@ struct kvm_memslots; enum kvm_mr_change; #include <linux/types.h> +#include <linux/spinlock_types.h> #include <asm/kvm_types.h> @@ -55,9 +56,19 @@ struct gfn_to_hva_cache { struct gfn_to_pfn_cache { u64 generation; - gfn_t gfn; + gpa_t gpa; + unsigned long uhva; + struct kvm_memory_slot *memslot; + struct kvm_vcpu *vcpu; + struct list_head list; + rwlock_t lock; + void *khva; kvm_pfn_t pfn; + bool active; + bool valid; bool dirty; + bool kernel_map; + bool guest_uses_pa; }; #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE @@ -94,6 +105,7 @@ struct kvm_vcpu_stat_generic { u64 halt_poll_success_hist[HALT_POLL_HIST_COUNT]; u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT]; u64 halt_wait_hist[HALT_POLL_HIST_COUNT]; + u64 blocking; }; #define KVM_STATS_NAME_SIZE 48 diff --git a/include/linux/leds.h b/include/linux/leds.h index a0b730be40ad..ba4861ec73d3 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -360,7 +360,7 @@ struct led_trigger { struct led_hw_trigger_type *trigger_type; /* LEDs under control by this trigger (for simple triggers) */ - rwlock_t leddev_list_lock; + spinlock_t leddev_list_lock; struct list_head led_cdevs; /* Link to next registered trigger */ diff --git a/include/linux/libata.h b/include/linux/libata.h index c0c64f03e107..2a8404b26083 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -394,7 +394,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ @@ -427,6 +427,7 @@ enum { ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ + ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ @@ -676,6 +677,18 @@ struct ata_ering { struct ata_ering_entry ring[ATA_ERING_SIZE]; }; +struct ata_cpr { + u8 num; + u8 num_storage_elements; + u64 start_lba; + u64 num_lbas; +}; + +struct ata_cpr_log { + u8 nr_cpr; + struct ata_cpr cpr[]; +}; + struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ @@ -735,6 +748,9 @@ struct ata_device { u32 zac_zones_optimal_nonseq; u32 zac_zones_max_open; + /* Concurrent positioning ranges */ + struct ata_cpr_log *cpr_log; + /* error history */ int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ @@ -1388,7 +1404,7 @@ extern int ata_link_nr_enabled(struct ata_link *link); */ extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; -extern struct device_attribute *ata_common_sdev_attrs[]; +extern const struct attribute_group *ata_common_sdev_groups[]; /* * All sht initializers (BASE, PIO, BMDMA, NCQ) must be instantiated @@ -1418,14 +1434,14 @@ extern struct device_attribute *ata_common_sdev_attrs[]; #define ATA_BASE_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ - .sdev_attrs = ata_common_sdev_attrs + .sdev_groups = ata_common_sdev_groups #ifdef CONFIG_SATA_HOST -extern struct device_attribute *ata_ncq_sdev_attrs[]; +extern const struct attribute_group *ata_ncq_sdev_groups[]; #define ATA_NCQ_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ - .sdev_attrs = ata_ncq_sdev_attrs, \ + .sdev_groups = ata_ncq_sdev_groups, \ .change_queue_depth = ata_scsi_change_queue_depth #endif diff --git a/include/linux/list.h b/include/linux/list.h index f2af4b4aa4e9..6636fc07f918 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -2,11 +2,13 @@ #ifndef _LINUX_LIST_H #define _LINUX_LIST_H +#include <linux/container_of.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/poison.h> #include <linux/const.h> -#include <linux/kernel.h> + +#include <asm/barrier.h> /* * Circular doubly linked list implementation. diff --git a/include/linux/llist.h b/include/linux/llist.h index 24f207b0190b..85bda2d02d65 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -49,7 +49,9 @@ */ #include <linux/atomic.h> -#include <linux/kernel.h> +#include <linux/container_of.h> +#include <linux/stddef.h> +#include <linux/types.h> struct llist_head { struct llist_node *first; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c4ae6506b8b3..fcef192e5e45 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -303,10 +303,15 @@ void nlmsvc_invalidate_all(void); int nlmsvc_unlock_all_by_sb(struct super_block *sb); int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); +static inline struct file *nlmsvc_file_file(struct nlm_file *file) +{ + return file->f_file[O_RDONLY] ? + file->f_file[O_RDONLY] : file->f_file[O_WRONLY]; +} + static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return locks_inode(file->f_file[O_RDONLY] ? - file->f_file[O_RDONLY] : file->f_file[O_WRONLY]); + return locks_inode(nlmsvc_file_file(file)); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index a98309c0121c..398f70093cd3 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -96,18 +96,19 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); -int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); -int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); -int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_res(struct svc_rqst *, __be32 *); -int nlmsvc_decode_res(struct svc_rqst *, __be32 *); -int nlmsvc_encode_void(struct svc_rqst *, __be32 *); -int nlmsvc_decode_void(struct svc_rqst *, __be32 *); -int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); -int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); -int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); +bool nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); #endif /* LOCKD_XDR_H */ diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 5ae766f26e04..9a6b55da8fd6 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,21 +22,20 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) - - -int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); -int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); -int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); -int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_res(struct svc_rqst *, __be32 *); -int nlm4svc_decode_res(struct svc_rqst *, __be32 *); -int nlm4svc_encode_void(struct svc_rqst *, __be32 *); -int nlm4svc_decode_void(struct svc_rqst *, __be32 *); -int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); -int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); -int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); +bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); extern const struct rpc_version nlm_version4; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 9fe165beb0f9..467b94257105 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -481,23 +481,6 @@ do { \ #endif /* CONFIG_LOCK_STAT */ -#ifdef CONFIG_LOCKDEP - -/* - * On lockdep we dont want the hand-coded irq-enable of - * _raw_*_lock_flags() code, because lockdep assumes - * that interrupts are not re-enabled during lock-acquire: - */ -#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \ - LOCK_CONTENDED((_lock), (try), (lock)) - -#else /* CONFIG_LOCKDEP */ - -#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \ - lockfl((_lock), (flags)) - -#endif /* CONFIG_LOCKDEP */ - #ifdef CONFIG_PROVE_LOCKING extern void print_irqtrace_events(struct task_struct *curr); #else diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 3e726ace5c62..d22430840b53 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -21,7 +21,7 @@ enum lockdep_wait_type { LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ #ifdef CONFIG_PROVE_RAW_LOCK_NESTING - LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ + LD_WAIT_CONFIG, /* preemptible in PREEMPT_RT, spinlock_t etc.. */ #else LD_WAIT_CONFIG = LD_WAIT_SPIN, #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 2adeea44c0d5..a5a724c308d8 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -26,13 +26,13 @@ * #undef LSM_HOOK * }; */ -LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) -LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, - struct task_struct *to, struct file *file) +LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr) +LSM_HOOK(int, 0, binder_transaction, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, + const struct cred *to, struct file *file) LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, unsigned int mode) LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) @@ -78,12 +78,11 @@ LSM_HOOK(int, 0, sb_set_mnt_opts, struct super_block *sb, void *mnt_opts, LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, struct super_block *newsb, unsigned long kern_flags, unsigned long *set_kern_flags) -LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val, - int len, void **mnt_opts) LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, - int mode, const struct qstr *name, void **ctx, u32 *ctxlen) + int mode, const struct qstr *name, const char **xattr_name, + void **ctx, u32 *ctxlen) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) @@ -205,8 +204,7 @@ LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old, LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) -LSM_HOOK(void, LSM_RET_VOID, task_getsecid_subj, - struct task_struct *p, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj, struct task_struct *p, u32 *secid) LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) @@ -328,11 +326,11 @@ LSM_HOOK(int, 0, tun_dev_create, void) LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) LSM_HOOK(int, 0, tun_dev_open, void *security) -LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_endpoint *ep, +LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_association *asoc, struct sk_buff *skb) LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) -LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_endpoint *ep, +LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) #endif /* CONFIG_SECURITY_NETWORK */ @@ -402,3 +400,8 @@ LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #endif /* CONFIG_PERF_EVENTS */ + +#ifdef CONFIG_IO_URING +LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) +LSM_HOOK(int, 0, uring_sqpoll, void) +#endif /* CONFIG_IO_URING */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 5c4c5c0602cb..3bf5c658bc44 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -180,8 +180,6 @@ * Copy all security options from a given superblock to another * @oldsb old superblock which contain information to clone * @newsb new superblock which needs filled in - * @sb_add_mnt_opt: - * Add one mount @option to @mnt_opts. * @sb_parse_opts_str: * Parse a string of security data filling in the opts structure * @options string containing all mount options known by the LSM @@ -196,6 +194,9 @@ * @dentry dentry to use in calculating the context. * @mode mode used to determine resource type. * @name name of the last path component used to create file + * @xattr_name pointer to place the pointer to security xattr name. + * Caller does not have to free the resulting pointer. Its + * a pointer to static string. * @ctx pointer to place the pointer to the resulting context in. * @ctxlen point to place the length of the resulting context. * @dentry_create_files_as: @@ -716,11 +717,9 @@ * @p. * @p contains the task_struct for the process. * Return 0 if permission is granted. - * @task_getsecid_subj: - * Retrieve the subjective security identifier of the task_struct in @p - * and return it in @secid. Special care must be taken to ensure that @p - * is the either the "current" task, or the caller has exclusive access - * to @p. + * @current_getsecid_subj: + * Retrieve the subjective security identifier of the current task and + * return it in @secid. * In case of failure, @secid will be set to zero. * @task_getsecid_obj: * Retrieve the objective security identifier of the task_struct in @p @@ -1024,9 +1023,9 @@ * Security hooks for SCTP * * @sctp_assoc_request: - * Passes the @ep and @chunk->skb of the association INIT packet to + * Passes the @asoc and @chunk->skb of the association INIT packet to * the security module. - * @ep pointer to sctp endpoint structure. + * @asoc pointer to sctp association structure. * @skb pointer to skbuff of association packet. * Return 0 on success, error on failure. * @sctp_bind_connect: @@ -1044,9 +1043,9 @@ * Called whenever a new socket is created by accept(2) (i.e. a TCP * style socket) or when a socket is 'peeled off' e.g userspace * calls sctp_peeloff(3). - * @ep pointer to current sctp endpoint structure. + * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. - * @sk pointer to new sock structure. + * @newsk pointer to new sock structure. * * Security hooks for Infiniband * @@ -1313,22 +1312,22 @@ * * @binder_set_context_mgr: * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. + * @mgr contains the struct cred for the current binder process. * Return 0 if permission is granted. * @binder_transaction: * Check whether @from is allowed to invoke a binder transaction call * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. + * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. + * @to contains the struct cred for the receiving process. * * @ptrace_access_check: * Check permission before allowing the current process to trace the @@ -1557,6 +1556,19 @@ * Read perf_event security info if allowed. * @perf_event_write: * Write perf_event security info if allowed. + * + * Security hooks for io_uring + * + * @uring_override_creds: + * Check if the current task, executing an io_uring operation, is allowed + * to override it's credentials with @new. + * + * @new: the new creds to use + * + * @uring_sqpoll: + * Check whether the current task is allowed to spawn a io_uring polling + * thread (IORING_SETUP_SQPOLL). + * */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 68427e8fadeb..15d03f6532d0 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -18,7 +18,6 @@ struct mdev_device { void *driver_data; struct list_head next; struct mdev_type *type; - struct device *iommu_device; bool active; }; @@ -27,25 +26,6 @@ static inline struct mdev_device *to_mdev_device(struct device *dev) return container_of(dev, struct mdev_device, dev); } -/* - * Called by the parent device driver to set the device which represents - * this mdev in iommu protection scope. By default, the iommu device is - * NULL, that indicates using vendor defined isolation. - * - * @dev: the mediated device that iommu will isolate. - * @iommu_device: a pci device which represents the iommu for @dev. - */ -static inline void mdev_set_iommu_device(struct mdev_device *mdev, - struct device *iommu_device) -{ - mdev->iommu_device = iommu_device; -} - -static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev) -{ - return mdev->iommu_device; -} - unsigned int mdev_get_type_group_id(struct mdev_device *mdev); unsigned int mtype_get_type_group_id(struct mdev_type *mtype); struct device *mtype_get_parent_dev(struct mdev_type *mtype); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 5e6dc38f418e..ecac96d52e01 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -7,6 +7,7 @@ #define __LINUX_MDIO_H__ #include <uapi/linux/mdio.h> +#include <linux/bitfield.h> #include <linux/mod_devicetable.h> /* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit @@ -14,6 +15,7 @@ */ #define MII_ADDR_C45 (1<<30) #define MII_DEVADDR_C45_SHIFT 16 +#define MII_DEVADDR_C45_MASK GENMASK(20, 16) #define MII_REGADDR_C45_MASK GENMASK(15, 0) struct gpio_desc; @@ -349,12 +351,48 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); +int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, + u16 mask, u16 set); + +static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum) +{ + return mdiobus_read(mdiodev->bus, mdiodev->addr, regnum); +} + +static inline int mdiodev_write(struct mdio_device *mdiodev, u32 regnum, + u16 val) +{ + return mdiobus_write(mdiodev->bus, mdiodev->addr, regnum, val); +} + +static inline int mdiodev_modify(struct mdio_device *mdiodev, u32 regnum, + u16 mask, u16 set) +{ + return mdiobus_modify(mdiodev->bus, mdiodev->addr, regnum, mask, set); +} + +static inline int mdiodev_modify_changed(struct mdio_device *mdiodev, + u32 regnum, u16 mask, u16 set) +{ + return mdiobus_modify_changed(mdiodev->bus, mdiodev->addr, regnum, + mask, set); +} static inline u32 mdiobus_c45_addr(int devad, u16 regnum) { return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum; } +static inline u16 mdiobus_c45_regad(u32 regnum) +{ + return FIELD_GET(MII_REGADDR_C45_MASK, regnum); +} + +static inline u16 mdiobus_c45_devad(u32 regnum) +{ + return FIELD_GET(MII_DEVADDR_C45_MASK, regnum); +} + static inline int __mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, u16 regnum) { diff --git a/include/linux/mdio/mdio-mscc-miim.h b/include/linux/mdio/mdio-mscc-miim.h new file mode 100644 index 000000000000..5b4ed2c3cbb9 --- /dev/null +++ b/include/linux/mdio/mdio-mscc-miim.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Driver for the MDIO interface of Microsemi network switches. + * + * Author: Colin Foster <[email protected]> + * Copyright (C) 2021 Innovative Advantage + */ +#ifndef MDIO_MSCC_MIIM_H +#define MDIO_MSCC_MIIM_H + +#include <linux/device.h> +#include <linux/phy.h> +#include <linux/regmap.h> + +int mscc_miim_setup(struct device *device, struct mii_bus **bus, + const char *name, struct regmap *mii_regmap, + int status_offset); + +#endif diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index c6786c12b207..df1fab44ea5c 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -117,4 +117,7 @@ int mei_cldev_enable(struct mei_cl_device *cldev); int mei_cldev_disable(struct mei_cl_device *cldev); bool mei_cldev_enabled(const struct mei_cl_device *cldev); +void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size); +int mei_cldev_dma_unmap(struct mei_cl_device *cldev); + #endif /* _LINUX_MEI_CL_BUS_H */ diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index 5c4a18a91f89..ae4526389261 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -16,10 +16,6 @@ #include <asm/mem_encrypt.h> -#else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */ - -static inline bool mem_encrypt_active(void) { return false; } - #endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ #ifdef CONFIG_AMD_MEM_ENCRYPT diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 34de69b3b8ba..50ad19662a32 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _LINUX_MEMBLOCK_H #define _LINUX_MEMBLOCK_H -#ifdef __KERNEL__ /* * Logical memory blocks. @@ -28,17 +27,26 @@ extern unsigned long long max_possible_pfn; /** * enum memblock_flags - definition of memory region attributes * @MEMBLOCK_NONE: no special request - * @MEMBLOCK_HOTPLUG: hotpluggable region + * @MEMBLOCK_HOTPLUG: memory region indicated in the firmware-provided memory + * map during early boot as hot(un)pluggable system RAM (e.g., memory range + * that might get hotunplugged later). With "movable_node" set on the kernel + * commandline, try keeping this memory region hotunpluggable. Does not apply + * to memblocks added ("hotplugged") after early boot. * @MEMBLOCK_MIRROR: mirrored region * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as * reserved in the memory map; refer to memblock_mark_nomap() description * for further details + * @MEMBLOCK_DRIVER_MANAGED: memory region that is always detected and added + * via a driver, and never indicated in the firmware-provided memory map as + * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the + * kernel resource tree. */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ MEMBLOCK_MIRROR = 0x2, /* mirrored region */ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ + MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ }; /** @@ -100,10 +108,11 @@ static inline void memblock_discard(void) {} #endif void memblock_allow_resize(void); -int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); +int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid, + enum memblock_flags flags); int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); -int memblock_free(phys_addr_t base, phys_addr_t size); +int memblock_phys_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP int memblock_physmem_add(phys_addr_t base, phys_addr_t size); @@ -118,7 +127,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); -void memblock_free_ptr(void *ptr, size_t size); +void memblock_free(void *ptr, size_t size); void reset_node_managed_pages(pg_data_t *pgdat); void reset_all_zones_managed_pages(void); @@ -133,7 +142,7 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); -void __memblock_free_late(phys_addr_t base, phys_addr_t size); +void memblock_free_late(phys_addr_t base, phys_addr_t size); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, @@ -208,7 +217,8 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range(i, p_start, p_end) \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_HOTPLUG, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED, \ + p_start, p_end, NULL) /** * for_each_mem_range_rev - reverse iterate through memblock areas from @@ -219,7 +229,8 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range_rev(i, p_start, p_end) \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_HOTPLUG, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED,\ + p_start, p_end, NULL) /** * for_each_reserved_mem_range - iterate over all reserved memblock areas @@ -249,6 +260,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } +static inline bool memblock_is_driver_managed(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_DRIVER_MANAGED; +} + int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, @@ -372,7 +388,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 -#define MEMBLOCK_ALLOC_KASAN 1 +#define MEMBLOCK_ALLOC_NOLEAKTRACE 1 /* We are using top down, so it is safe to use 0 here */ #define MEMBLOCK_LOW_LIMIT 0 @@ -388,8 +404,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); @@ -441,23 +457,6 @@ static inline void *memblock_alloc_node(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE, nid); } -static inline void memblock_free_early(phys_addr_t base, - phys_addr_t size) -{ - memblock_free(base, size); -} - -static inline void memblock_free_early_nid(phys_addr_t base, - phys_addr_t size, int nid) -{ - memblock_free(base, size); -} - -static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) -{ - __memblock_free_late(base, size); -} - /* * Set the allocation direction to bottom-up or top-down. */ @@ -605,6 +604,5 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) } #endif -#endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3096c9a0ee01..b72d75141e12 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -33,6 +33,7 @@ enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, MEMCG_PERCPU_B, + MEMCG_VMALLOC, MEMCG_NR_STAT, }; @@ -42,6 +43,7 @@ enum memcg_memory_event { MEMCG_MAX, MEMCG_OOM, MEMCG_OOM_KILL, + MEMCG_OOM_GROUP_KILL, MEMCG_SWAP_HIGH, MEMCG_SWAP_MAX, MEMCG_SWAP_FAIL, @@ -180,12 +182,6 @@ struct mem_cgroup_thresholds { struct mem_cgroup_threshold_ary *spare; }; -enum memcg_kmem_state { - KMEM_NONE, - KMEM_ALLOCATED, - KMEM_ONLINE, -}; - #if defined(CONFIG_SMP) struct memcg_padding { char x[0]; @@ -318,7 +314,6 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; - enum memcg_kmem_state kmem_state; struct obj_cgroup __rcu *objcg; struct list_head objcg_list; /* list of inherited objcgs */ #endif @@ -369,7 +364,7 @@ enum page_memcg_data_flags { #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) -static inline bool PageMemcgKmem(struct page *page); +static inline bool folio_memcg_kmem(struct folio *folio); /* * After the initialization objcg->memcg is always pointing at @@ -384,89 +379,95 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) } /* - * __page_memcg - get the memory cgroup associated with a non-kmem page - * @page: a pointer to the page struct + * __folio_memcg - Get the memory cgroup associated with a non-kmem folio + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * kmem pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * kmem folios. */ -static inline struct mem_cgroup *__page_memcg(struct page *page) +static inline struct mem_cgroup *__folio_memcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * __page_objcg - get the object cgroup associated with a kmem page - * @page: a pointer to the page struct + * __folio_objcg - get the object cgroup associated with a kmem folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the object cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the object cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper object cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * LRU pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * LRU folios. */ -static inline struct obj_cgroup *__page_objcg(struct page *page) +static inline struct obj_cgroup *__folio_objcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * page_memcg - get the memory cgroup associated with a page - * @page: a pointer to the page struct + * folio_memcg - Get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. * - * For a non-kmem page any of the following ensures page and memcg binding + * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * - * - the page lock + * - the folio lock * - LRU isolation * - lock_page_memcg() * - exclusive reference * - * For a kmem page a caller should hold an rcu read lock to protect memcg - * associated with a kmem page from being released. + * For a kmem folio a caller should hold an rcu read lock to protect memcg + * associated with a kmem folio from being released. */ +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + if (folio_memcg_kmem(folio)) + return obj_cgroup_memcg(__folio_objcg(folio)); + return __folio_memcg(folio); +} + static inline struct mem_cgroup *page_memcg(struct page *page) { - if (PageMemcgKmem(page)) - return obj_cgroup_memcg(__page_objcg(page)); - else - return __page_memcg(page); + return folio_memcg(page_folio(page)); } -/* - * page_memcg_rcu - locklessly get the memory cgroup associated with a page - * @page: a pointer to the page struct +/** + * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. + * + * Return: A pointer to the memory cgroup associated with the folio, + * or NULL. */ -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { - unsigned long memcg_data = READ_ONCE(page->memcg_data); + unsigned long memcg_data = READ_ONCE(folio->memcg_data); - VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); WARN_ON_ONCE(!rcu_read_lock_held()); if (memcg_data & MEMCG_DATA_KMEM) { @@ -523,75 +524,33 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) #ifdef CONFIG_MEMCG_KMEM /* - * PageMemcgKmem - check if the page has MemcgKmem flag set - * @page: a pointer to the page struct + * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. + * @folio: Pointer to the folio. * - * Checks if the page has MemcgKmem flag set. The caller must ensure that - * the page has an associated memory cgroup. It's not safe to call this function - * against some types of pages, e.g. slab pages. + * Checks if the folio has MemcgKmem flag set. The caller must ensure + * that the folio has an associated memory cgroup. It's not safe to call + * this function against some types of folios, e.g. slab folios. */ -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { - VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); - return page->memcg_data & MEMCG_DATA_KMEM; + VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page); + VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio); + return folio->memcg_data & MEMCG_DATA_KMEM; } -/* - * page_objcgs - get the object cgroups vector associated with a page - * @page: a pointer to the page struct - * - * Returns a pointer to the object cgroups vector associated with the page, - * or NULL. This function assumes that the page is known to have an - * associated object cgroups vector. It's not safe to call this function - * against pages, which might have an associated memory cgroup: e.g. - * kernel stack pages. - */ -static inline struct obj_cgroup **page_objcgs(struct page *page) -{ - unsigned long memcg_data = READ_ONCE(page->memcg_data); - - VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); -} - -/* - * page_objcgs_check - get the object cgroups vector associated with a page - * @page: a pointer to the page struct - * - * Returns a pointer to the object cgroups vector associated with the page, - * or NULL. This function is safe to use if the page can be directly associated - * with a memory cgroup. - */ -static inline struct obj_cgroup **page_objcgs_check(struct page *page) -{ - unsigned long memcg_data = READ_ONCE(page->memcg_data); - - if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) - return NULL; - - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); - - return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); -} #else -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { return false; } -static inline struct obj_cgroup **page_objcgs(struct page *page) -{ - return NULL; -} +#endif -static inline struct obj_cgroup **page_objcgs_check(struct page *page) +static inline bool PageMemcgKmem(struct page *page) { - return NULL; + return folio_memcg_kmem(page_folio(page)); } -#endif static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { @@ -684,26 +643,47 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) page_counter_read(&memcg->memory); } -int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); + +/** + * mem_cgroup_charge - Charge a newly allocated folio to a cgroup. + * @folio: Folio to charge. + * @mm: mm context of the allocating task. + * @gfp: Reclaim mode. + * + * Try to charge @folio to the memcg that @mm belongs to, reclaiming + * pages according to @gfp if necessary. If @mm is NULL, try to + * charge to the active memcg. + * + * Do not use this for folios allocated for swapin. + * + * Return: 0 on success. Otherwise, an error code is returned. + */ +static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, + gfp_t gfp) { if (mem_cgroup_disabled()) return 0; - return __mem_cgroup_charge(page, mm, gfp_mask); + return __mem_cgroup_charge(folio, mm, gfp); } int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); -void __mem_cgroup_uncharge(struct page *page); -static inline void mem_cgroup_uncharge(struct page *page) +void __mem_cgroup_uncharge(struct folio *folio); + +/** + * mem_cgroup_uncharge - Uncharge a folio. + * @folio: Folio to uncharge. + * + * Uncharge a folio previously charged with mem_cgroup_charge(). + */ +static inline void mem_cgroup_uncharge(struct folio *folio) { if (mem_cgroup_disabled()) return; - __mem_cgroup_uncharge(page); + __mem_cgroup_uncharge(folio); } void __mem_cgroup_uncharge_list(struct list_head *page_list); @@ -714,7 +694,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) __mem_cgroup_uncharge_list(page_list); } -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); +void mem_cgroup_migrate(struct folio *old, struct folio *new); /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node @@ -753,33 +733,33 @@ out: } /** - * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page - * @page: the page + * folio_lruvec - return lruvec for isolating/putting an LRU folio + * @folio: Pointer to the folio. * - * This function relies on page->mem_cgroup being stable. + * This function relies on folio->mem_cgroup being stable. */ -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - struct mem_cgroup *memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); - VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); - return mem_cgroup_lruvec(memcg, pgdat); + VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio); + return mem_cgroup_lruvec(memcg, folio_pgdat(folio)); } struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); -struct lruvec *lock_page_lruvec(struct page *page); -struct lruvec *lock_page_lruvec_irq(struct page *page); -struct lruvec *lock_page_lruvec_irqsave(struct page *page, +struct lruvec *folio_lruvec_lock(struct folio *folio); +struct lruvec *folio_lruvec_lock_irq(struct folio *folio); +struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags); #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio); #else -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } #endif @@ -947,6 +927,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); extern bool cgroup_memory_noswap; #endif +void folio_memcg_lock(struct folio *folio); +void folio_memcg_unlock(struct folio *folio); void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); @@ -963,6 +945,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, local_irq_restore(flags); } +static inline void mod_memcg_page_state(struct page *page, + int idx, int val) +{ + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = page_memcg(page); + if (memcg) + mod_memcg_state(memcg, idx, val); + rcu_read_unlock(); +} + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { return READ_ONCE(memcg->vmstats.state[idx]); @@ -1115,12 +1112,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #define MEM_CGROUP_ID_SHIFT 0 #define MEM_CGROUP_ID_MAX 0 +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *page_memcg(struct page *page) { return NULL; } -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { WARN_ON_ONCE(!rcu_read_lock_held()); return NULL; @@ -1131,6 +1133,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline bool folio_memcg_kmem(struct folio *folio) +{ + return false; +} + static inline bool PageMemcgKmem(struct page *page) { return false; @@ -1179,8 +1186,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) return false; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +static inline int mem_cgroup_charge(struct folio *folio, + struct mm_struct *mm, gfp_t gfp) { return 0; } @@ -1195,7 +1202,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) { } -static inline void mem_cgroup_uncharge(struct page *page) +static inline void mem_cgroup_uncharge(struct folio *folio) { } @@ -1203,7 +1210,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } -static inline void mem_cgroup_migrate(struct page *old, struct page *new) +static inline void mem_cgroup_migrate(struct folio *old, struct folio *new) { } @@ -1213,14 +1220,14 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, return &pgdat->__lruvec; } -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - + struct pglist_data *pgdat = folio_pgdat(folio); return &pgdat->__lruvec; } -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } @@ -1250,26 +1257,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } -static inline struct lruvec *lock_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec_lock(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irq(struct page *page) +static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irq(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flagsp) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); return &pgdat->__lruvec; @@ -1356,6 +1363,14 @@ static inline void unlock_page_memcg(struct page *page) { } +static inline void folio_memcg_lock(struct folio *folio) +{ +} + +static inline void folio_memcg_unlock(struct folio *folio) +{ +} + static inline void mem_cgroup_handle_over_high(void) { } @@ -1400,6 +1415,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, { } +static inline void mod_memcg_page_state(struct page *page, + int idx, int val) +{ +} + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { return 0; @@ -1517,38 +1537,39 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, } /* Test requires a stable page->memcg binding, see page_memcg() */ -static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) +static inline bool folio_matches_lruvec(struct folio *folio, + struct lruvec *lruvec) { - return lruvec_pgdat(lruvec) == page_pgdat(page) && - lruvec_memcg(lruvec) == page_memcg(page); + return lruvec_pgdat(lruvec) == folio_pgdat(folio) && + lruvec_memcg(lruvec) == folio_memcg(folio); } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irq(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, struct lruvec *locked_lruvec) { if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irq(locked_lruvec); } - return lock_page_lruvec_irq(page); + return folio_lruvec_lock_irq(folio); } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, struct lruvec *locked_lruvec, unsigned long *flags) { if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); } - return lock_page_lruvec_irqsave(page, flags); + return folio_lruvec_lock_irqsave(folio, flags); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -1558,17 +1579,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, unsigned long *pheadroom, unsigned long *pdirty, unsigned long *pwriteback); -void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, +void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb); -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { if (mem_cgroup_disabled()) return; - if (unlikely(&page_memcg(page)->css != wb->memcg_css)) - mem_cgroup_track_foreign_dirty_slowpath(page, wb); + if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) + mem_cgroup_track_foreign_dirty_slowpath(folio, wb); } void mem_cgroup_flush_foreign(struct bdi_writeback *wb); @@ -1588,7 +1609,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, { } -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { } @@ -1613,7 +1634,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) return true; do { - if (time_before(jiffies, memcg->socket_pressure)) + if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) return true; } while ((memcg = parent_mem_cgroup(memcg))); return false; diff --git a/include/linux/memory.h b/include/linux/memory.h index 182c606adb06..88eb587b5143 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -96,7 +96,6 @@ struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; - int status_change_nid_high; int status_change_nid; }; @@ -110,7 +109,7 @@ struct mem_section; #define SLAB_CALLBACK_PRI 1 #define IPC_CALLBACK_PRI 10 -#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifndef CONFIG_MEMORY_HOTPLUG static inline void memory_dev_init(void) { return; @@ -126,7 +125,14 @@ static inline int memory_notify(unsigned long val, void *v) { return 0; } -#else +static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) +{ + return 0; +} +/* These aren't inline functions due to a GCC bug. */ +#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) +#define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) +#else /* CONFIG_MEMORY_HOTPLUG */ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, @@ -140,7 +146,6 @@ typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func); -#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) extern int memory_group_register_static(int nid, unsigned long max_pages); extern int memory_group_register_dynamic(int nid, unsigned long unit_pages); @@ -149,9 +154,6 @@ struct memory_group *memory_group_find_by_id(int mgid); typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *); int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, struct memory_group *excluded, void *arg); -#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ - -#ifdef CONFIG_MEMORY_HOTPLUG #define hotplug_memory_notifier(fn, pri) ({ \ static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri };\ @@ -159,15 +161,7 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, }) #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) -#else -static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) -{ - return 0; -} -/* These aren't inline functions due to a GCC bug. */ -#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) -#define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) -#endif +#endif /* CONFIG_MEMORY_HOTPLUG */ /* * Kernel text modification mutex, used for code patching. Users of this lock diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e5a867c950b2..be48e003a518 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -98,9 +98,6 @@ static inline void zone_seqlock_init(struct zone *zone) { seqlock_init(&zone->span_seqlock); } -extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); -extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); -extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); extern void adjust_present_page_count(struct page *page, struct memory_group *group, long nr_pages); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 4091692bed8c..668389b4b53d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -8,7 +8,6 @@ #include <linux/sched.h> #include <linux/mmzone.h> -#include <linux/dax.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/spinlock.h> @@ -47,6 +46,7 @@ struct mempolicy { unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ nodemask_t nodes; /* interleave/bind/perfer */ + int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */ union { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ @@ -184,8 +184,6 @@ extern bool vma_migratable(struct vm_area_struct *vma); extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); -extern bool numa_demotion_enabled; - static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return (pol->mode == MPOL_PREFERRED_MANY); @@ -301,8 +299,6 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp) return NULL; } -#define numa_demotion_enabled false - static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index c0e9d35889e8..1fafcc38acba 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -73,16 +73,6 @@ struct dev_pagemap_ops { void (*page_free)(struct page *page); /* - * Transition the refcount in struct dev_pagemap to the dead state. - */ - void (*kill)(struct dev_pagemap *pgmap); - - /* - * Wait for refcount in struct dev_pagemap to be idle and reap it. - */ - void (*cleanup)(struct dev_pagemap *pgmap); - - /* * Used for private (un-addressable) device memory only. Must migrate * the page back to a CPU accessible page. */ @@ -95,10 +85,14 @@ struct dev_pagemap_ops { * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping - * @internal_ref: internal reference if @ref is not provided by the caller - * @done: completion for @internal_ref + * @done: completion for @ref * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior + * @vmemmap_shift: structural definition of how the vmemmap page metadata + * is populated, specifically the metadata page order. + * A zero value (default) uses base pages as the vmemmap metadata + * representation. A bigger value will set up compound struct pages + * of the requested order value. * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no @@ -109,11 +103,11 @@ struct dev_pagemap_ops { */ struct dev_pagemap { struct vmem_altmap altmap; - struct percpu_ref *ref; - struct percpu_ref internal_ref; + struct percpu_ref ref; struct completion done; enum memory_type type; unsigned int flags; + unsigned long vmemmap_shift; const struct dev_pagemap_ops *ops; void *owner; int nr_range; @@ -130,6 +124,11 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) return NULL; } +static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) +{ + return 1 << pgmap->vmemmap_shift; +} + #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); @@ -191,7 +190,7 @@ static inline unsigned long memremap_compat_align(void) static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) - percpu_ref_put(pgmap->ref); + percpu_ref_put(&pgmap->ref); } #endif /* _LINUX_MEMREMAP_H_ */ diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index fa7a43f02f27..8db52324f416 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -36,6 +36,7 @@ enum da9063_variant_codes { PMIC_DA9063_BB = 0x5, PMIC_DA9063_CA = 0x6, PMIC_DA9063_DA = 0x7, + PMIC_DA9063_EA = 0x8, }; /* Interrupts */ diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h deleted file mode 100644 index e5b8dbf828b6..000000000000 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Header file for device driver Hi6421 PMIC - * - * Copyright (c) 2013 Linaro Ltd. - * Copyright (C) 2011 Hisilicon. - * Copyright (c) 2020-2021 Huawei Technologies Co., Ltd - * - * Guodong Xu <[email protected]> - */ - -#ifndef __HISI_PMIC_H -#define __HISI_PMIC_H - -#include <linux/irqdomain.h> -#include <linux/regmap.h> - -struct hi6421_spmi_pmic { - struct resource *res; - struct device *dev; - void __iomem *regs; - struct regmap *regmap; -}; - -#endif /* __HISI_PMIC_H */ diff --git a/include/linux/mfd/idt8a340_reg.h b/include/linux/mfd/idt8a340_reg.h index 92d763230bdf..a18c1539a152 100644 --- a/include/linux/mfd/idt8a340_reg.h +++ b/include/linux/mfd/idt8a340_reg.h @@ -506,6 +506,10 @@ #define STATE_MODE_SHIFT (0) #define STATE_MODE_MASK (0x7) +/* Bit definitions for the DPLL_MANU_REF_CFG register */ +#define MANUAL_REFERENCE_SHIFT (0) +#define MANUAL_REFERENCE_MASK (0x1f) + /* Bit definitions for the GPIO_CFG_GBL register */ #define SUPPLY_MODE_SHIFT (0) #define SUPPLY_MODE_MASK (0x3) @@ -654,7 +658,7 @@ /* Values of DPLL_N.DPLL_MODE.PLL_MODE */ enum pll_mode { PLL_MODE_MIN = 0, - PLL_MODE_NORMAL = PLL_MODE_MIN, + PLL_MODE_PLL = PLL_MODE_MIN, PLL_MODE_WRITE_PHASE = 1, PLL_MODE_WRITE_FREQUENCY = 2, PLL_MODE_GPIO_INC_DEC = 3, @@ -664,6 +668,31 @@ enum pll_mode { PLL_MODE_MAX = PLL_MODE_DISABLED, }; +/* Values of DPLL_CTRL_n.DPLL_MANU_REF_CFG.MANUAL_REFERENCE */ +enum manual_reference { + MANU_REF_MIN = 0, + MANU_REF_CLK0 = MANU_REF_MIN, + MANU_REF_CLK1, + MANU_REF_CLK2, + MANU_REF_CLK3, + MANU_REF_CLK4, + MANU_REF_CLK5, + MANU_REF_CLK6, + MANU_REF_CLK7, + MANU_REF_CLK8, + MANU_REF_CLK9, + MANU_REF_CLK10, + MANU_REF_CLK11, + MANU_REF_CLK12, + MANU_REF_CLK13, + MANU_REF_CLK14, + MANU_REF_CLK15, + MANU_REF_WRITE_PHASE, + MANU_REF_WRITE_FREQUENCY, + MANU_REF_XO_DPLL, + MANU_REF_MAX = MANU_REF_XO_DPLL, +}; + enum hw_tod_write_trig_sel { HW_TOD_WR_TRIG_SEL_MIN = 0, HW_TOD_WR_TRIG_SEL_MSB = HW_TOD_WR_TRIG_SEL_MIN, diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 833e578e051e..b1482b3cf353 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -133,35 +133,35 @@ enum max77686_pmic_reg { /* Reserved: 0x7A-0x7D */ MAX77686_REG_BBAT_CHG = 0x7E, - MAX77686_REG_32KHZ = 0x7F, + MAX77686_REG_32KHZ = 0x7F, MAX77686_REG_PMIC_END = 0x80, }; enum max77686_rtc_reg { - MAX77686_RTC_INT = 0x00, - MAX77686_RTC_INTM = 0x01, + MAX77686_RTC_INT = 0x00, + MAX77686_RTC_INTM = 0x01, MAX77686_RTC_CONTROLM = 0x02, MAX77686_RTC_CONTROL = 0x03, MAX77686_RTC_UPDATE0 = 0x04, /* Reserved: 0x5 */ MAX77686_WTSR_SMPL_CNTL = 0x06, - MAX77686_RTC_SEC = 0x07, - MAX77686_RTC_MIN = 0x08, - MAX77686_RTC_HOUR = 0x09, + MAX77686_RTC_SEC = 0x07, + MAX77686_RTC_MIN = 0x08, + MAX77686_RTC_HOUR = 0x09, MAX77686_RTC_WEEKDAY = 0x0A, - MAX77686_RTC_MONTH = 0x0B, - MAX77686_RTC_YEAR = 0x0C, - MAX77686_RTC_DATE = 0x0D, - MAX77686_ALARM1_SEC = 0x0E, - MAX77686_ALARM1_MIN = 0x0F, + MAX77686_RTC_MONTH = 0x0B, + MAX77686_RTC_YEAR = 0x0C, + MAX77686_RTC_DATE = 0x0D, + MAX77686_ALARM1_SEC = 0x0E, + MAX77686_ALARM1_MIN = 0x0F, MAX77686_ALARM1_HOUR = 0x10, MAX77686_ALARM1_WEEKDAY = 0x11, MAX77686_ALARM1_MONTH = 0x12, MAX77686_ALARM1_YEAR = 0x13, MAX77686_ALARM1_DATE = 0x14, - MAX77686_ALARM2_SEC = 0x15, - MAX77686_ALARM2_MIN = 0x16, + MAX77686_ALARM2_SEC = 0x15, + MAX77686_ALARM2_MIN = 0x16, MAX77686_ALARM2_HOUR = 0x17, MAX77686_ALARM2_WEEKDAY = 0x18, MAX77686_ALARM2_MONTH = 0x19, diff --git a/include/linux/mfd/ntxec.h b/include/linux/mfd/ntxec.h index 26ab3b8eb612..cc6f07bfa2b3 100644 --- a/include/linux/mfd/ntxec.h +++ b/include/linux/mfd/ntxec.h @@ -26,7 +26,7 @@ struct ntxec { * This convenience function converts an 8-bit value to 16-bit for use in the * second kind of register. */ -static inline __be16 ntxec_reg8(u8 value) +static inline u16 ntxec_reg8(u8 value) { return value << 8; } diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h deleted file mode 100644 index 4a5966475a35..000000000000 --- a/include/linux/mfd/rohm-bd70528.h +++ /dev/null @@ -1,389 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* Copyright (C) 2018 ROHM Semiconductors */ - -#ifndef __LINUX_MFD_BD70528_H__ -#define __LINUX_MFD_BD70528_H__ - -#include <linux/bits.h> -#include <linux/device.h> -#include <linux/mfd/rohm-generic.h> -#include <linux/mfd/rohm-shared.h> -#include <linux/regmap.h> - -enum { - BD70528_BUCK1, - BD70528_BUCK2, - BD70528_BUCK3, - BD70528_LDO1, - BD70528_LDO2, - BD70528_LDO3, - BD70528_LED1, - BD70528_LED2, -}; - -struct bd70528_data { - struct rohm_regmap_dev chip; - struct mutex rtc_timer_lock; -}; - -#define BD70528_BUCK_VOLTS 0x10 -#define BD70528_LDO_VOLTS 0x20 - -#define BD70528_REG_BUCK1_EN 0x0F -#define BD70528_REG_BUCK1_VOLT 0x15 -#define BD70528_REG_BUCK2_EN 0x10 -#define BD70528_REG_BUCK2_VOLT 0x16 -#define BD70528_REG_BUCK3_EN 0x11 -#define BD70528_REG_BUCK3_VOLT 0x17 -#define BD70528_REG_LDO1_EN 0x1b -#define BD70528_REG_LDO1_VOLT 0x1e -#define BD70528_REG_LDO2_EN 0x1c -#define BD70528_REG_LDO2_VOLT 0x1f -#define BD70528_REG_LDO3_EN 0x1d -#define BD70528_REG_LDO3_VOLT 0x20 -#define BD70528_REG_LED_CTRL 0x2b -#define BD70528_REG_LED_VOLT 0x29 -#define BD70528_REG_LED_EN 0x2a - -/* main irq registers */ -#define BD70528_REG_INT_MAIN 0x7E -#define BD70528_REG_INT_MAIN_MASK 0x74 - -/* 'sub irq' registers */ -#define BD70528_REG_INT_SHDN 0x7F -#define BD70528_REG_INT_PWR_FLT 0x80 -#define BD70528_REG_INT_VR_FLT 0x81 -#define BD70528_REG_INT_MISC 0x82 -#define BD70528_REG_INT_BAT1 0x83 -#define BD70528_REG_INT_BAT2 0x84 -#define BD70528_REG_INT_RTC 0x85 -#define BD70528_REG_INT_GPIO 0x86 -#define BD70528_REG_INT_OP_FAIL 0x87 - -#define BD70528_REG_INT_SHDN_MASK 0x75 -#define BD70528_REG_INT_PWR_FLT_MASK 0x76 -#define BD70528_REG_INT_VR_FLT_MASK 0x77 -#define BD70528_REG_INT_MISC_MASK 0x78 -#define BD70528_REG_INT_BAT1_MASK 0x79 -#define BD70528_REG_INT_BAT2_MASK 0x7a -#define BD70528_REG_INT_RTC_MASK 0x7b -#define BD70528_REG_INT_GPIO_MASK 0x7c -#define BD70528_REG_INT_OP_FAIL_MASK 0x7d - -/* Reset related 'magic' registers */ -#define BD70528_REG_SHIPMODE 0x03 -#define BD70528_REG_HWRESET 0x04 -#define BD70528_REG_WARMRESET 0x05 -#define BD70528_REG_STANDBY 0x06 - -/* GPIO registers */ -#define BD70528_REG_GPIO_STATE 0x8F - -#define BD70528_REG_GPIO1_IN 0x4d -#define BD70528_REG_GPIO2_IN 0x4f -#define BD70528_REG_GPIO3_IN 0x51 -#define BD70528_REG_GPIO4_IN 0x53 -#define BD70528_REG_GPIO1_OUT 0x4e -#define BD70528_REG_GPIO2_OUT 0x50 -#define BD70528_REG_GPIO3_OUT 0x52 -#define BD70528_REG_GPIO4_OUT 0x54 - -/* RTC */ - -#define BD70528_REG_RTC_COUNT_H 0x2d -#define BD70528_REG_RTC_COUNT_L 0x2e -#define BD70528_REG_RTC_SEC 0x2f -#define BD70528_REG_RTC_MINUTE 0x30 -#define BD70528_REG_RTC_HOUR 0x31 -#define BD70528_REG_RTC_WEEK 0x32 -#define BD70528_REG_RTC_DAY 0x33 -#define BD70528_REG_RTC_MONTH 0x34 -#define BD70528_REG_RTC_YEAR 0x35 - -#define BD70528_REG_RTC_ALM_SEC 0x36 -#define BD70528_REG_RTC_ALM_START BD70528_REG_RTC_ALM_SEC -#define BD70528_REG_RTC_ALM_MINUTE 0x37 -#define BD70528_REG_RTC_ALM_HOUR 0x38 -#define BD70528_REG_RTC_ALM_WEEK 0x39 -#define BD70528_REG_RTC_ALM_DAY 0x3a -#define BD70528_REG_RTC_ALM_MONTH 0x3b -#define BD70528_REG_RTC_ALM_YEAR 0x3c -#define BD70528_REG_RTC_ALM_MASK 0x3d -#define BD70528_REG_RTC_ALM_REPEAT 0x3e -#define BD70528_REG_RTC_START BD70528_REG_RTC_SEC - -#define BD70528_REG_RTC_WAKE_SEC 0x43 -#define BD70528_REG_RTC_WAKE_START BD70528_REG_RTC_WAKE_SEC -#define BD70528_REG_RTC_WAKE_MIN 0x44 -#define BD70528_REG_RTC_WAKE_HOUR 0x45 -#define BD70528_REG_RTC_WAKE_CTRL 0x46 - -#define BD70528_REG_ELAPSED_TIMER_EN 0x42 -#define BD70528_REG_WAKE_EN 0x46 - -/* WDT registers */ -#define BD70528_REG_WDT_CTRL 0x4A -#define BD70528_REG_WDT_HOUR 0x49 -#define BD70528_REG_WDT_MINUTE 0x48 -#define BD70528_REG_WDT_SEC 0x47 - -/* Charger / Battery */ -#define BD70528_REG_CHG_CURR_STAT 0x59 -#define BD70528_REG_CHG_BAT_STAT 0x57 -#define BD70528_REG_CHG_BAT_TEMP 0x58 -#define BD70528_REG_CHG_IN_STAT 0x56 -#define BD70528_REG_CHG_DCIN_ILIM 0x5d -#define BD70528_REG_CHG_CHG_CURR_WARM 0x61 -#define BD70528_REG_CHG_CHG_CURR_COLD 0x62 - -/* Masks for main IRQ register bits */ -enum { - BD70528_INT_SHDN, -#define BD70528_INT_SHDN_MASK BIT(BD70528_INT_SHDN) - BD70528_INT_PWR_FLT, -#define BD70528_INT_PWR_FLT_MASK BIT(BD70528_INT_PWR_FLT) - BD70528_INT_VR_FLT, -#define BD70528_INT_VR_FLT_MASK BIT(BD70528_INT_VR_FLT) - BD70528_INT_MISC, -#define BD70528_INT_MISC_MASK BIT(BD70528_INT_MISC) - BD70528_INT_BAT1, -#define BD70528_INT_BAT1_MASK BIT(BD70528_INT_BAT1) - BD70528_INT_RTC, -#define BD70528_INT_RTC_MASK BIT(BD70528_INT_RTC) - BD70528_INT_GPIO, -#define BD70528_INT_GPIO_MASK BIT(BD70528_INT_GPIO) - BD70528_INT_OP_FAIL, -#define BD70528_INT_OP_FAIL_MASK BIT(BD70528_INT_OP_FAIL) -}; - -/* IRQs */ -enum { - /* Shutdown register IRQs */ - BD70528_INT_LONGPUSH, - BD70528_INT_WDT, - BD70528_INT_HWRESET, - BD70528_INT_RSTB_FAULT, - BD70528_INT_VBAT_UVLO, - BD70528_INT_TSD, - BD70528_INT_RSTIN, - /* Power failure register IRQs */ - BD70528_INT_BUCK1_FAULT, - BD70528_INT_BUCK2_FAULT, - BD70528_INT_BUCK3_FAULT, - BD70528_INT_LDO1_FAULT, - BD70528_INT_LDO2_FAULT, - BD70528_INT_LDO3_FAULT, - BD70528_INT_LED1_FAULT, - BD70528_INT_LED2_FAULT, - /* VR FAULT register IRQs */ - BD70528_INT_BUCK1_OCP, - BD70528_INT_BUCK2_OCP, - BD70528_INT_BUCK3_OCP, - BD70528_INT_LED1_OCP, - BD70528_INT_LED2_OCP, - BD70528_INT_BUCK1_FULLON, - BD70528_INT_BUCK2_FULLON, - /* PMU register interrupts */ - BD70528_INT_SHORTPUSH, - BD70528_INT_AUTO_WAKEUP, - BD70528_INT_STATE_CHANGE, - /* Charger 1 register IRQs */ - BD70528_INT_BAT_OV_RES, - BD70528_INT_BAT_OV_DET, - BD70528_INT_DBAT_DET, - BD70528_INT_BATTSD_COLD_RES, - BD70528_INT_BATTSD_COLD_DET, - BD70528_INT_BATTSD_HOT_RES, - BD70528_INT_BATTSD_HOT_DET, - BD70528_INT_CHG_TSD, - /* Charger 2 register IRQs */ - BD70528_INT_BAT_RMV, - BD70528_INT_BAT_DET, - BD70528_INT_DCIN2_OV_RES, - BD70528_INT_DCIN2_OV_DET, - BD70528_INT_DCIN2_RMV, - BD70528_INT_DCIN2_DET, - BD70528_INT_DCIN1_RMV, - BD70528_INT_DCIN1_DET, - /* RTC register IRQs */ - BD70528_INT_RTC_ALARM, - BD70528_INT_ELPS_TIM, - /* GPIO register IRQs */ - BD70528_INT_GPIO0, - BD70528_INT_GPIO1, - BD70528_INT_GPIO2, - BD70528_INT_GPIO3, - /* Invalid operation register IRQs */ - BD70528_INT_BUCK1_DVS_OPFAIL, - BD70528_INT_BUCK2_DVS_OPFAIL, - BD70528_INT_BUCK3_DVS_OPFAIL, - BD70528_INT_LED1_VOLT_OPFAIL, - BD70528_INT_LED2_VOLT_OPFAIL, -}; - -/* Masks */ -#define BD70528_INT_LONGPUSH_MASK 0x1 -#define BD70528_INT_WDT_MASK 0x2 -#define BD70528_INT_HWRESET_MASK 0x4 -#define BD70528_INT_RSTB_FAULT_MASK 0x8 -#define BD70528_INT_VBAT_UVLO_MASK 0x10 -#define BD70528_INT_TSD_MASK 0x20 -#define BD70528_INT_RSTIN_MASK 0x40 - -#define BD70528_INT_BUCK1_FAULT_MASK 0x1 -#define BD70528_INT_BUCK2_FAULT_MASK 0x2 -#define BD70528_INT_BUCK3_FAULT_MASK 0x4 -#define BD70528_INT_LDO1_FAULT_MASK 0x8 -#define BD70528_INT_LDO2_FAULT_MASK 0x10 -#define BD70528_INT_LDO3_FAULT_MASK 0x20 -#define BD70528_INT_LED1_FAULT_MASK 0x40 -#define BD70528_INT_LED2_FAULT_MASK 0x80 - -#define BD70528_INT_BUCK1_OCP_MASK 0x1 -#define BD70528_INT_BUCK2_OCP_MASK 0x2 -#define BD70528_INT_BUCK3_OCP_MASK 0x4 -#define BD70528_INT_LED1_OCP_MASK 0x8 -#define BD70528_INT_LED2_OCP_MASK 0x10 -#define BD70528_INT_BUCK1_FULLON_MASK 0x20 -#define BD70528_INT_BUCK2_FULLON_MASK 0x40 - -#define BD70528_INT_SHORTPUSH_MASK 0x1 -#define BD70528_INT_AUTO_WAKEUP_MASK 0x2 -#define BD70528_INT_STATE_CHANGE_MASK 0x10 - -#define BD70528_INT_BAT_OV_RES_MASK 0x1 -#define BD70528_INT_BAT_OV_DET_MASK 0x2 -#define BD70528_INT_DBAT_DET_MASK 0x4 -#define BD70528_INT_BATTSD_COLD_RES_MASK 0x8 -#define BD70528_INT_BATTSD_COLD_DET_MASK 0x10 -#define BD70528_INT_BATTSD_HOT_RES_MASK 0x20 -#define BD70528_INT_BATTSD_HOT_DET_MASK 0x40 -#define BD70528_INT_CHG_TSD_MASK 0x80 - -#define BD70528_INT_BAT_RMV_MASK 0x1 -#define BD70528_INT_BAT_DET_MASK 0x2 -#define BD70528_INT_DCIN2_OV_RES_MASK 0x4 -#define BD70528_INT_DCIN2_OV_DET_MASK 0x8 -#define BD70528_INT_DCIN2_RMV_MASK 0x10 -#define BD70528_INT_DCIN2_DET_MASK 0x20 -#define BD70528_INT_DCIN1_RMV_MASK 0x40 -#define BD70528_INT_DCIN1_DET_MASK 0x80 - -#define BD70528_INT_RTC_ALARM_MASK 0x1 -#define BD70528_INT_ELPS_TIM_MASK 0x2 - -#define BD70528_INT_GPIO0_MASK 0x1 -#define BD70528_INT_GPIO1_MASK 0x2 -#define BD70528_INT_GPIO2_MASK 0x4 -#define BD70528_INT_GPIO3_MASK 0x8 - -#define BD70528_INT_BUCK1_DVS_OPFAIL_MASK 0x1 -#define BD70528_INT_BUCK2_DVS_OPFAIL_MASK 0x2 -#define BD70528_INT_BUCK3_DVS_OPFAIL_MASK 0x4 -#define BD70528_INT_LED1_VOLT_OPFAIL_MASK 0x10 -#define BD70528_INT_LED2_VOLT_OPFAIL_MASK 0x20 - -#define BD70528_DEBOUNCE_MASK 0x3 - -#define BD70528_DEBOUNCE_DISABLE 0 -#define BD70528_DEBOUNCE_15MS 1 -#define BD70528_DEBOUNCE_30MS 2 -#define BD70528_DEBOUNCE_50MS 3 - -#define BD70528_GPIO_DRIVE_MASK 0x2 -#define BD70528_GPIO_PUSH_PULL 0x0 -#define BD70528_GPIO_OPEN_DRAIN 0x2 - -#define BD70528_GPIO_OUT_EN_MASK 0x80 -#define BD70528_GPIO_OUT_ENABLE 0x80 -#define BD70528_GPIO_OUT_DISABLE 0x0 - -#define BD70528_GPIO_OUT_HI 0x1 -#define BD70528_GPIO_OUT_LO 0x0 -#define BD70528_GPIO_OUT_MASK 0x1 - -#define BD70528_GPIO_IN_STATE_BASE 1 - -/* RTC masks to mask out reserved bits */ - -#define BD70528_MASK_ELAPSED_TIMER_EN 0x1 -/* Mask second, min and hour fields - * HW would support ALM irq for over 24h - * (by setting day, month and year too) - * but as we wish to keep this same as for - * wake-up we limit ALM to 24H and only - * unmask sec, min and hour - */ -#define BD70528_MASK_WAKE_EN 0x1 - -/* WDT masks */ -#define BD70528_MASK_WDT_EN 0x1 -#define BD70528_MASK_WDT_HOUR 0x1 -#define BD70528_MASK_WDT_MINUTE 0x7f -#define BD70528_MASK_WDT_SEC 0x7f - -#define BD70528_WDT_STATE_BIT 0x1 -#define BD70528_ELAPSED_STATE_BIT 0x2 -#define BD70528_WAKE_STATE_BIT 0x4 - -/* Charger masks */ -#define BD70528_MASK_CHG_STAT 0x7f -#define BD70528_MASK_CHG_BAT_TIMER 0x20 -#define BD70528_MASK_CHG_BAT_OVERVOLT 0x10 -#define BD70528_MASK_CHG_BAT_DETECT 0x1 -#define BD70528_MASK_CHG_DCIN1_UVLO 0x1 -#define BD70528_MASK_CHG_DCIN_ILIM 0x3f -#define BD70528_MASK_CHG_CHG_CURR 0x1f -#define BD70528_MASK_CHG_TRICKLE_CURR 0x10 - -/* - * Note, external battery register is the lonely rider at - * address 0xc5. See how to stuff that in the regmap - */ -#define BD70528_MAX_REGISTER 0x94 - -/* Buck control masks */ -#define BD70528_MASK_RUN_EN 0x4 -#define BD70528_MASK_STBY_EN 0x2 -#define BD70528_MASK_IDLE_EN 0x1 -#define BD70528_MASK_LED1_EN 0x1 -#define BD70528_MASK_LED2_EN 0x10 - -#define BD70528_MASK_BUCK_VOLT 0xf -#define BD70528_MASK_LDO_VOLT 0x1f -#define BD70528_MASK_LED1_VOLT 0x1 -#define BD70528_MASK_LED2_VOLT 0x10 - -/* Misc irq masks */ -#define BD70528_INT_MASK_SHORT_PUSH 1 -#define BD70528_INT_MASK_AUTO_WAKE 2 -#define BD70528_INT_MASK_POWER_STATE 4 - -#define BD70528_MASK_BUCK_RAMP 0x10 -#define BD70528_SIFT_BUCK_RAMP 4 - -#if IS_ENABLED(CONFIG_BD70528_WATCHDOG) - -int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, int *old_state); -void bd70528_wdt_lock(struct rohm_regmap_dev *data); -void bd70528_wdt_unlock(struct rohm_regmap_dev *data); - -#else /* CONFIG_BD70528_WATCHDOG */ - -static inline int bd70528_wdt_set(struct rohm_regmap_dev *data, int enable, - int *old_state) -{ - return 0; -} - -static inline void bd70528_wdt_lock(struct rohm_regmap_dev *data) -{ -} - -static inline void bd70528_wdt_unlock(struct rohm_regmap_dev *data) -{ -} - -#endif /* CONFIG_BD70528_WATCHDOG */ - -#endif /* __LINUX_MFD_BD70528_H__ */ diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 35b392a0d73a..4eeb22876bad 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -12,7 +12,6 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD9573, ROHM_CHIP_TYPE_BD9574, ROHM_CHIP_TYPE_BD9576, - ROHM_CHIP_TYPE_BD70528, ROHM_CHIP_TYPE_BD71815, ROHM_CHIP_TYPE_BD71828, ROHM_CHIP_TYPE_BD71837, @@ -80,14 +79,8 @@ int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs, const struct regulator_desc *desc, struct regmap *regmap); -#else -static inline int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs, - struct device_node *np, - const struct regulator_desc *desc, - struct regmap *regmap) -{ - return 0; -} +int rohm_regulator_set_voltage_sel_restricted(struct regulator_dev *rdev, + unsigned int sel); #endif #endif diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h index 90b20550c1c8..06d3f11dc3c9 100644 --- a/include/linux/mfd/stm32-lptimer.h +++ b/include/linux/mfd/stm32-lptimer.h @@ -45,6 +45,11 @@ #define STM32_LPTIM_PRESC GENMASK(11, 9) #define STM32_LPTIM_CKPOL GENMASK(2, 1) +/* STM32_LPTIM_CKPOL */ +#define STM32_LPTIM_CKPOL_RISING_EDGE 0 +#define STM32_LPTIM_CKPOL_FALLING_EDGE 1 +#define STM32_LPTIM_CKPOL_BOTH_EDGES 2 + /* STM32_LPTIM_ARR */ #define STM32_LPTIM_MAX_ARR 0xFFFF diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index f8db83aedb2b..5f5c43fd69dd 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -82,6 +82,10 @@ #define MAX_TIM_ICPSC 0x3 #define TIM_CR2_MMS_SHIFT 4 #define TIM_CR2_MMS2_SHIFT 20 +#define TIM_SMCR_SMS_SLAVE_MODE_DISABLED 0 /* counts on internal clock when CEN=1 */ +#define TIM_SMCR_SMS_ENCODER_MODE_1 1 /* counts TI1FP1 edges, depending on TI2FP2 level */ +#define TIM_SMCR_SMS_ENCODER_MODE_2 2 /* counts TI2FP2 edges, depending on TI1FP1 level */ +#define TIM_SMCR_SMS_ENCODER_MODE_3 3 /* counts on both TI1FP1 and TI2FP2 edges */ #define TIM_SMCR_TS_SHIFT 4 #define TIM_BDTR_BKF_MASK 0xF #define TIM_BDTR_BKF_SHIFT(x) (16 + (x) * 4) diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index ffc091b77633..4063b0614d90 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -1,22 +1,16 @@ -#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H -#define __LINUX_TI_AM335X_TSCADC_MFD_H - +/* SPDX-License-Identifier: GPL-2.0-only */ /* * TI Touch Screen / ADC MFD driver * * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ +#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H +#define __LINUX_TI_AM335X_TSCADC_MFD_H + +#include <linux/bitfield.h> #include <linux/mfd/core.h> +#include <linux/units.h> #define REG_RAWIRQSTATUS 0x024 #define REG_IRQSTATUS 0x028 @@ -46,13 +40,6 @@ /* IRQ wakeup enable */ #define IRQWKUP_ENB BIT(0) -/* Step Enable */ -#define STEPENB_MASK (0x1FFFF << 0) -#define STEPENB(val) ((val) << 0) -#define ENB(val) (1 << (val)) -#define STPENB_STEPENB STEPENB(0x1FFFF) -#define STPENB_STEPENB_TC STEPENB(0x1FFF) - /* IRQ enable */ #define IRQENB_HW_PEN BIT(0) #define IRQENB_EOS BIT(1) @@ -65,12 +52,10 @@ #define IRQENB_PENUP BIT(9) /* Step Configuration */ -#define STEPCONFIG_MODE_MASK (3 << 0) -#define STEPCONFIG_MODE(val) ((val) << 0) +#define STEPCONFIG_MODE(val) FIELD_PREP(GENMASK(1, 0), (val)) #define STEPCONFIG_MODE_SWCNT STEPCONFIG_MODE(1) #define STEPCONFIG_MODE_HWSYNC STEPCONFIG_MODE(2) -#define STEPCONFIG_AVG_MASK (7 << 2) -#define STEPCONFIG_AVG(val) ((val) << 2) +#define STEPCONFIG_AVG(val) FIELD_PREP(GENMASK(4, 2), (val)) #define STEPCONFIG_AVG_16 STEPCONFIG_AVG(4) #define STEPCONFIG_XPP BIT(5) #define STEPCONFIG_XNN BIT(6) @@ -78,70 +63,67 @@ #define STEPCONFIG_YNN BIT(8) #define STEPCONFIG_XNP BIT(9) #define STEPCONFIG_YPN BIT(10) -#define STEPCONFIG_RFP(val) ((val) << 12) -#define STEPCONFIG_RFP_VREFP (0x3 << 12) -#define STEPCONFIG_INM_MASK (0xF << 15) -#define STEPCONFIG_INM(val) ((val) << 15) +#define STEPCONFIG_RFP(val) FIELD_PREP(GENMASK(13, 12), (val)) +#define STEPCONFIG_RFP_VREFP STEPCONFIG_RFP(3) +#define STEPCONFIG_INM(val) FIELD_PREP(GENMASK(18, 15), (val)) #define STEPCONFIG_INM_ADCREFM STEPCONFIG_INM(8) -#define STEPCONFIG_INP_MASK (0xF << 19) -#define STEPCONFIG_INP(val) ((val) << 19) +#define STEPCONFIG_INP(val) FIELD_PREP(GENMASK(22, 19), (val)) #define STEPCONFIG_INP_AN4 STEPCONFIG_INP(4) #define STEPCONFIG_INP_ADCREFM STEPCONFIG_INP(8) #define STEPCONFIG_FIFO1 BIT(26) -#define STEPCONFIG_RFM(val) ((val) << 23) -#define STEPCONFIG_RFM_VREFN (0x3 << 23) +#define STEPCONFIG_RFM(val) FIELD_PREP(GENMASK(24, 23), (val)) +#define STEPCONFIG_RFM_VREFN STEPCONFIG_RFM(3) /* Delay register */ -#define STEPDELAY_OPEN_MASK (0x3FFFF << 0) -#define STEPDELAY_OPEN(val) ((val) << 0) +#define STEPDELAY_OPEN(val) FIELD_PREP(GENMASK(17, 0), (val)) #define STEPCONFIG_OPENDLY STEPDELAY_OPEN(0x098) -#define STEPDELAY_SAMPLE_MASK (0xFF << 24) -#define STEPDELAY_SAMPLE(val) ((val) << 24) +#define STEPCONFIG_MAX_OPENDLY GENMASK(17, 0) +#define STEPDELAY_SAMPLE(val) FIELD_PREP(GENMASK(31, 24), (val)) #define STEPCONFIG_SAMPLEDLY STEPDELAY_SAMPLE(0) +#define STEPCONFIG_MAX_SAMPLE GENMASK(7, 0) /* Charge Config */ -#define STEPCHARGE_RFP_MASK (7 << 12) -#define STEPCHARGE_RFP(val) ((val) << 12) +#define STEPCHARGE_RFP(val) FIELD_PREP(GENMASK(14, 12), (val)) #define STEPCHARGE_RFP_XPUL STEPCHARGE_RFP(1) -#define STEPCHARGE_INM_MASK (0xF << 15) -#define STEPCHARGE_INM(val) ((val) << 15) +#define STEPCHARGE_INM(val) FIELD_PREP(GENMASK(18, 15), (val)) #define STEPCHARGE_INM_AN1 STEPCHARGE_INM(1) -#define STEPCHARGE_INP_MASK (0xF << 19) -#define STEPCHARGE_INP(val) ((val) << 19) -#define STEPCHARGE_RFM_MASK (3 << 23) -#define STEPCHARGE_RFM(val) ((val) << 23) +#define STEPCHARGE_INP(val) FIELD_PREP(GENMASK(22, 19), (val)) +#define STEPCHARGE_RFM(val) FIELD_PREP(GENMASK(24, 23), (val)) #define STEPCHARGE_RFM_XNUR STEPCHARGE_RFM(1) /* Charge delay */ -#define CHARGEDLY_OPEN_MASK (0x3FFFF << 0) -#define CHARGEDLY_OPEN(val) ((val) << 0) +#define CHARGEDLY_OPEN(val) FIELD_PREP(GENMASK(17, 0), (val)) #define CHARGEDLY_OPENDLY CHARGEDLY_OPEN(0x400) /* Control register */ -#define CNTRLREG_TSCSSENB BIT(0) +#define CNTRLREG_SSENB BIT(0) #define CNTRLREG_STEPID BIT(1) -#define CNTRLREG_STEPCONFIGWRT BIT(2) +#define CNTRLREG_TSC_STEPCONFIGWRT BIT(2) #define CNTRLREG_POWERDOWN BIT(4) -#define CNTRLREG_AFE_CTRL_MASK (3 << 5) -#define CNTRLREG_AFE_CTRL(val) ((val) << 5) -#define CNTRLREG_4WIRE CNTRLREG_AFE_CTRL(1) -#define CNTRLREG_5WIRE CNTRLREG_AFE_CTRL(2) -#define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) -#define CNTRLREG_TSCENB BIT(7) +#define CNTRLREG_TSC_AFE_CTRL(val) FIELD_PREP(GENMASK(6, 5), (val)) +#define CNTRLREG_TSC_4WIRE CNTRLREG_TSC_AFE_CTRL(1) +#define CNTRLREG_TSC_5WIRE CNTRLREG_TSC_AFE_CTRL(2) +#define CNTRLREG_TSC_ENB BIT(7) + +/*Control registers bitfields for MAGADC IP */ +#define CNTRLREG_MAGADCENB BIT(0) +#define CNTRLREG_MAG_PREAMP_PWRDOWN BIT(5) +#define CNTRLREG_MAG_PREAMP_BYPASS BIT(6) /* FIFO READ Register */ -#define FIFOREAD_DATA_MASK (0xfff << 0) -#define FIFOREAD_CHNLID_MASK (0xf << 16) +#define FIFOREAD_DATA_MASK GENMASK(11, 0) +#define FIFOREAD_CHNLID_MASK GENMASK(19, 16) /* DMA ENABLE/CLEAR Register */ #define DMA_FIFO0 BIT(0) #define DMA_FIFO1 BIT(1) /* Sequencer Status */ -#define SEQ_STATUS BIT(5) +#define SEQ_STATUS BIT(5) #define CHARGE_STEP 0x11 -#define ADC_CLK 3000000 +#define TSC_ADC_CLK (3 * HZ_PER_MHZ) +#define MAG_ADC_CLK (13 * HZ_PER_MHZ) #define TOTAL_STEPS 16 #define TOTAL_CHANNELS 8 #define FIFO1_THRESHOLD 19 @@ -158,21 +140,27 @@ * * max processing time: 266431 * 308ns = 83ms(approx) */ -#define IDLE_TIMEOUT 83 /* milliseconds */ +#define IDLE_TIMEOUT_MS 83 /* milliseconds */ #define TSCADC_CELLS 2 +struct ti_tscadc_data { + char *adc_feature_name; + char *adc_feature_compatible; + char *secondary_feature_name; + char *secondary_feature_compatible; + unsigned int target_clk_rate; +}; + struct ti_tscadc_dev { struct device *dev; struct regmap *regmap; void __iomem *tscadc_base; phys_addr_t tscadc_phys_base; + const struct ti_tscadc_data *data; int irq; - int used_cells; /* 1-2 */ - int tsc_wires; - int tsc_cell; /* -1 if not used */ - int adc_cell; /* -1 if not used */ struct mfd_cell cells[TSCADC_CELLS]; + u32 ctrl; u32 reg_se_cache; bool adc_waiting; bool adc_in_use; @@ -194,6 +182,12 @@ static inline struct ti_tscadc_dev *ti_tscadc_dev_get(struct platform_device *p) return *tscadc_dev; } +static inline bool ti_adc_with_touchscreen(struct ti_tscadc_dev *tscadc) +{ + return of_device_is_compatible(tscadc->dev->of_node, + "ti,am3359-tscadc"); +} + void am335x_tsc_se_set_cache(struct ti_tscadc_dev *tsadc, u32 val); void am335x_tsc_se_set_once(struct ti_tscadc_dev *tsadc, u32 val); void am335x_tsc_se_clr(struct ti_tscadc_dev *tsadc, u32 val); diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h index 7943e413deae..8a61386cb8c1 100644 --- a/include/linux/mfd/tps65912.h +++ b/include/linux/mfd/tps65912.h @@ -322,6 +322,6 @@ struct tps65912 { extern const struct regmap_config tps65912_regmap_config; int tps65912_device_init(struct tps65912 *tps); -int tps65912_device_exit(struct tps65912 *tps); +void tps65912_device_exit(struct tps65912 *tps); #endif /* __LINUX_MFD_TPS65912_H */ diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h index ffe81127d91c..7807fa329db0 100644 --- a/include/linux/mfd/tps68470.h +++ b/include/linux/mfd/tps68470.h @@ -75,6 +75,17 @@ #define TPS68470_CLKCFG1_MODE_A_MASK GENMASK(1, 0) #define TPS68470_CLKCFG1_MODE_B_MASK GENMASK(3, 2) +#define TPS68470_CLKCFG2_DRV_STR_2MA 0x05 +#define TPS68470_PLL_OUTPUT_ENABLE 0x02 +#define TPS68470_CLK_SRC_XTAL BIT(0) +#define TPS68470_PLLSWR_DEFAULT GENMASK(1, 0) +#define TPS68470_OSC_EXT_CAP_DEFAULT 0x05 + +#define TPS68470_OUTPUT_A_SHIFT 0x00 +#define TPS68470_OUTPUT_B_SHIFT 0x02 +#define TPS68470_CLK_SRC_SHIFT GENMASK(2, 0) +#define TPS68470_OSC_EXT_CAP_SHIFT BIT(2) + #define TPS68470_GPIO_CTL_REG_A(x) (TPS68470_REG_GPCTL0A + (x) * 2) #define TPS68470_GPIO_CTL_REG_B(x) (TPS68470_REG_GPCTL0B + (x) * 2) #define TPS68470_GPIO_MODE_MASK GENMASK(1, 0) diff --git a/include/linux/mfd/tps80031.h b/include/linux/mfd/tps80031.h deleted file mode 100644 index 2c75c9c9318f..000000000000 --- a/include/linux/mfd/tps80031.h +++ /dev/null @@ -1,637 +0,0 @@ -/* - * tps80031.h -- TI TPS80031 and TI TPS80032 PMIC driver. - * - * Copyright (c) 2012, NVIDIA Corporation. - * - * Author: Laxman Dewangan <[email protected]> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind, - * whether express or implied; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307, USA - */ - -#ifndef __LINUX_MFD_TPS80031_H -#define __LINUX_MFD_TPS80031_H - -#include <linux/device.h> -#include <linux/regmap.h> - -/* Pull-ups/Pull-downs */ -#define TPS80031_CFG_INPUT_PUPD1 0xF0 -#define TPS80031_CFG_INPUT_PUPD2 0xF1 -#define TPS80031_CFG_INPUT_PUPD3 0xF2 -#define TPS80031_CFG_INPUT_PUPD4 0xF3 -#define TPS80031_CFG_LDO_PD1 0xF4 -#define TPS80031_CFG_LDO_PD2 0xF5 -#define TPS80031_CFG_SMPS_PD 0xF6 - -/* Real Time Clock */ -#define TPS80031_SECONDS_REG 0x00 -#define TPS80031_MINUTES_REG 0x01 -#define TPS80031_HOURS_REG 0x02 -#define TPS80031_DAYS_REG 0x03 -#define TPS80031_MONTHS_REG 0x04 -#define TPS80031_YEARS_REG 0x05 -#define TPS80031_WEEKS_REG 0x06 -#define TPS80031_ALARM_SECONDS_REG 0x08 -#define TPS80031_ALARM_MINUTES_REG 0x09 -#define TPS80031_ALARM_HOURS_REG 0x0A -#define TPS80031_ALARM_DAYS_REG 0x0B -#define TPS80031_ALARM_MONTHS_REG 0x0C -#define TPS80031_ALARM_YEARS_REG 0x0D -#define TPS80031_RTC_CTRL_REG 0x10 -#define TPS80031_RTC_STATUS_REG 0x11 -#define TPS80031_RTC_INTERRUPTS_REG 0x12 -#define TPS80031_RTC_COMP_LSB_REG 0x13 -#define TPS80031_RTC_COMP_MSB_REG 0x14 -#define TPS80031_RTC_RESET_STATUS_REG 0x16 - -/*PMC Master Module */ -#define TPS80031_PHOENIX_START_CONDITION 0x1F -#define TPS80031_PHOENIX_MSK_TRANSITION 0x20 -#define TPS80031_STS_HW_CONDITIONS 0x21 -#define TPS80031_PHOENIX_LAST_TURNOFF_STS 0x22 -#define TPS80031_VSYSMIN_LO_THRESHOLD 0x23 -#define TPS80031_VSYSMIN_HI_THRESHOLD 0x24 -#define TPS80031_PHOENIX_DEV_ON 0x25 -#define TPS80031_STS_PWR_GRP_STATE 0x27 -#define TPS80031_PH_CFG_VSYSLOW 0x28 -#define TPS80031_PH_STS_BOOT 0x29 -#define TPS80031_PHOENIX_SENS_TRANSITION 0x2A -#define TPS80031_PHOENIX_SEQ_CFG 0x2B -#define TPS80031_PRIMARY_WATCHDOG_CFG 0X2C -#define TPS80031_KEY_PRESS_DUR_CFG 0X2D -#define TPS80031_SMPS_LDO_SHORT_STS 0x2E - -/* PMC Slave Module - Broadcast */ -#define TPS80031_BROADCAST_ADDR_ALL 0x31 -#define TPS80031_BROADCAST_ADDR_REF 0x32 -#define TPS80031_BROADCAST_ADDR_PROV 0x33 -#define TPS80031_BROADCAST_ADDR_CLK_RST 0x34 - -/* PMC Slave Module SMPS Regulators */ -#define TPS80031_SMPS4_CFG_TRANS 0x41 -#define TPS80031_SMPS4_CFG_STATE 0x42 -#define TPS80031_SMPS4_CFG_VOLTAGE 0x44 -#define TPS80031_VIO_CFG_TRANS 0x47 -#define TPS80031_VIO_CFG_STATE 0x48 -#define TPS80031_VIO_CFG_FORCE 0x49 -#define TPS80031_VIO_CFG_VOLTAGE 0x4A -#define TPS80031_VIO_CFG_STEP 0x48 -#define TPS80031_SMPS1_CFG_TRANS 0x53 -#define TPS80031_SMPS1_CFG_STATE 0x54 -#define TPS80031_SMPS1_CFG_FORCE 0x55 -#define TPS80031_SMPS1_CFG_VOLTAGE 0x56 -#define TPS80031_SMPS1_CFG_STEP 0x57 -#define TPS80031_SMPS2_CFG_TRANS 0x59 -#define TPS80031_SMPS2_CFG_STATE 0x5A -#define TPS80031_SMPS2_CFG_FORCE 0x5B -#define TPS80031_SMPS2_CFG_VOLTAGE 0x5C -#define TPS80031_SMPS2_CFG_STEP 0x5D -#define TPS80031_SMPS3_CFG_TRANS 0x65 -#define TPS80031_SMPS3_CFG_STATE 0x66 -#define TPS80031_SMPS3_CFG_VOLTAGE 0x68 - -/* PMC Slave Module LDO Regulators */ -#define TPS80031_VANA_CFG_TRANS 0x81 -#define TPS80031_VANA_CFG_STATE 0x82 -#define TPS80031_VANA_CFG_VOLTAGE 0x83 -#define TPS80031_LDO2_CFG_TRANS 0x85 -#define TPS80031_LDO2_CFG_STATE 0x86 -#define TPS80031_LDO2_CFG_VOLTAGE 0x87 -#define TPS80031_LDO4_CFG_TRANS 0x89 -#define TPS80031_LDO4_CFG_STATE 0x8A -#define TPS80031_LDO4_CFG_VOLTAGE 0x8B -#define TPS80031_LDO3_CFG_TRANS 0x8D -#define TPS80031_LDO3_CFG_STATE 0x8E -#define TPS80031_LDO3_CFG_VOLTAGE 0x8F -#define TPS80031_LDO6_CFG_TRANS 0x91 -#define TPS80031_LDO6_CFG_STATE 0x92 -#define TPS80031_LDO6_CFG_VOLTAGE 0x93 -#define TPS80031_LDOLN_CFG_TRANS 0x95 -#define TPS80031_LDOLN_CFG_STATE 0x96 -#define TPS80031_LDOLN_CFG_VOLTAGE 0x97 -#define TPS80031_LDO5_CFG_TRANS 0x99 -#define TPS80031_LDO5_CFG_STATE 0x9A -#define TPS80031_LDO5_CFG_VOLTAGE 0x9B -#define TPS80031_LDO1_CFG_TRANS 0x9D -#define TPS80031_LDO1_CFG_STATE 0x9E -#define TPS80031_LDO1_CFG_VOLTAGE 0x9F -#define TPS80031_LDOUSB_CFG_TRANS 0xA1 -#define TPS80031_LDOUSB_CFG_STATE 0xA2 -#define TPS80031_LDOUSB_CFG_VOLTAGE 0xA3 -#define TPS80031_LDO7_CFG_TRANS 0xA5 -#define TPS80031_LDO7_CFG_STATE 0xA6 -#define TPS80031_LDO7_CFG_VOLTAGE 0xA7 - -/* PMC Slave Module External Control */ -#define TPS80031_REGEN1_CFG_TRANS 0xAE -#define TPS80031_REGEN1_CFG_STATE 0xAF -#define TPS80031_REGEN2_CFG_TRANS 0xB1 -#define TPS80031_REGEN2_CFG_STATE 0xB2 -#define TPS80031_SYSEN_CFG_TRANS 0xB4 -#define TPS80031_SYSEN_CFG_STATE 0xB5 - -/* PMC Slave Module Internal Control */ -#define TPS80031_NRESPWRON_CFG_TRANS 0xB7 -#define TPS80031_NRESPWRON_CFG_STATE 0xB8 -#define TPS80031_CLK32KAO_CFG_TRANS 0xBA -#define TPS80031_CLK32KAO_CFG_STATE 0xBB -#define TPS80031_CLK32KG_CFG_TRANS 0xBD -#define TPS80031_CLK32KG_CFG_STATE 0xBE -#define TPS80031_CLK32KAUDIO_CFG_TRANS 0xC0 -#define TPS80031_CLK32KAUDIO_CFG_STATE 0xC1 -#define TPS80031_VRTC_CFG_TRANS 0xC3 -#define TPS80031_VRTC_CFG_STATE 0xC4 -#define TPS80031_BIAS_CFG_TRANS 0xC6 -#define TPS80031_BIAS_CFG_STATE 0xC7 -#define TPS80031_VSYSMIN_HI_CFG_TRANS 0xC9 -#define TPS80031_VSYSMIN_HI_CFG_STATE 0xCA -#define TPS80031_RC6MHZ_CFG_TRANS 0xCC -#define TPS80031_RC6MHZ_CFG_STATE 0xCD -#define TPS80031_TMP_CFG_TRANS 0xCF -#define TPS80031_TMP_CFG_STATE 0xD0 - -/* PMC Slave Module resources assignment */ -#define TPS80031_PREQ1_RES_ASS_A 0xD7 -#define TPS80031_PREQ1_RES_ASS_B 0xD8 -#define TPS80031_PREQ1_RES_ASS_C 0xD9 -#define TPS80031_PREQ2_RES_ASS_A 0xDA -#define TPS80031_PREQ2_RES_ASS_B 0xDB -#define TPS80031_PREQ2_RES_ASS_C 0xDC -#define TPS80031_PREQ3_RES_ASS_A 0xDD -#define TPS80031_PREQ3_RES_ASS_B 0xDE -#define TPS80031_PREQ3_RES_ASS_C 0xDF - -/* PMC Slave Module Miscellaneous */ -#define TPS80031_SMPS_OFFSET 0xE0 -#define TPS80031_SMPS_MULT 0xE3 -#define TPS80031_MISC1 0xE4 -#define TPS80031_MISC2 0xE5 -#define TPS80031_BBSPOR_CFG 0xE6 -#define TPS80031_TMP_CFG 0xE7 - -/* Battery Charging Controller and Indicator LED */ -#define TPS80031_CONTROLLER_CTRL2 0xDA -#define TPS80031_CONTROLLER_VSEL_COMP 0xDB -#define TPS80031_CHARGERUSB_VSYSREG 0xDC -#define TPS80031_CHARGERUSB_VICHRG_PC 0xDD -#define TPS80031_LINEAR_CHRG_STS 0xDE -#define TPS80031_CONTROLLER_INT_MASK 0xE0 -#define TPS80031_CONTROLLER_CTRL1 0xE1 -#define TPS80031_CONTROLLER_WDG 0xE2 -#define TPS80031_CONTROLLER_STAT1 0xE3 -#define TPS80031_CHARGERUSB_INT_STATUS 0xE4 -#define TPS80031_CHARGERUSB_INT_MASK 0xE5 -#define TPS80031_CHARGERUSB_STATUS_INT1 0xE6 -#define TPS80031_CHARGERUSB_STATUS_INT2 0xE7 -#define TPS80031_CHARGERUSB_CTRL1 0xE8 -#define TPS80031_CHARGERUSB_CTRL2 0xE9 -#define TPS80031_CHARGERUSB_CTRL3 0xEA -#define TPS80031_CHARGERUSB_STAT1 0xEB -#define TPS80031_CHARGERUSB_VOREG 0xEC -#define TPS80031_CHARGERUSB_VICHRG 0xED -#define TPS80031_CHARGERUSB_CINLIMIT 0xEE -#define TPS80031_CHARGERUSB_CTRLLIMIT1 0xEF -#define TPS80031_CHARGERUSB_CTRLLIMIT2 0xF0 -#define TPS80031_LED_PWM_CTRL1 0xF4 -#define TPS80031_LED_PWM_CTRL2 0xF5 - -/* USB On-The-Go */ -#define TPS80031_BACKUP_REG 0xFA -#define TPS80031_USB_VENDOR_ID_LSB 0x00 -#define TPS80031_USB_VENDOR_ID_MSB 0x01 -#define TPS80031_USB_PRODUCT_ID_LSB 0x02 -#define TPS80031_USB_PRODUCT_ID_MSB 0x03 -#define TPS80031_USB_VBUS_CTRL_SET 0x04 -#define TPS80031_USB_VBUS_CTRL_CLR 0x05 -#define TPS80031_USB_ID_CTRL_SET 0x06 -#define TPS80031_USB_ID_CTRL_CLR 0x07 -#define TPS80031_USB_VBUS_INT_SRC 0x08 -#define TPS80031_USB_VBUS_INT_LATCH_SET 0x09 -#define TPS80031_USB_VBUS_INT_LATCH_CLR 0x0A -#define TPS80031_USB_VBUS_INT_EN_LO_SET 0x0B -#define TPS80031_USB_VBUS_INT_EN_LO_CLR 0x0C -#define TPS80031_USB_VBUS_INT_EN_HI_SET 0x0D -#define TPS80031_USB_VBUS_INT_EN_HI_CLR 0x0E -#define TPS80031_USB_ID_INT_SRC 0x0F -#define TPS80031_USB_ID_INT_LATCH_SET 0x10 -#define TPS80031_USB_ID_INT_LATCH_CLR 0x11 -#define TPS80031_USB_ID_INT_EN_LO_SET 0x12 -#define TPS80031_USB_ID_INT_EN_LO_CLR 0x13 -#define TPS80031_USB_ID_INT_EN_HI_SET 0x14 -#define TPS80031_USB_ID_INT_EN_HI_CLR 0x15 -#define TPS80031_USB_OTG_ADP_CTRL 0x16 -#define TPS80031_USB_OTG_ADP_HIGH 0x17 -#define TPS80031_USB_OTG_ADP_LOW 0x18 -#define TPS80031_USB_OTG_ADP_RISE 0x19 -#define TPS80031_USB_OTG_REVISION 0x1A - -/* Gas Gauge */ -#define TPS80031_FG_REG_00 0xC0 -#define TPS80031_FG_REG_01 0xC1 -#define TPS80031_FG_REG_02 0xC2 -#define TPS80031_FG_REG_03 0xC3 -#define TPS80031_FG_REG_04 0xC4 -#define TPS80031_FG_REG_05 0xC5 -#define TPS80031_FG_REG_06 0xC6 -#define TPS80031_FG_REG_07 0xC7 -#define TPS80031_FG_REG_08 0xC8 -#define TPS80031_FG_REG_09 0xC9 -#define TPS80031_FG_REG_10 0xCA -#define TPS80031_FG_REG_11 0xCB - -/* General Purpose ADC */ -#define TPS80031_GPADC_CTRL 0x2E -#define TPS80031_GPADC_CTRL2 0x2F -#define TPS80031_RTSELECT_LSB 0x32 -#define TPS80031_RTSELECT_ISB 0x33 -#define TPS80031_RTSELECT_MSB 0x34 -#define TPS80031_GPSELECT_ISB 0x35 -#define TPS80031_CTRL_P1 0x36 -#define TPS80031_RTCH0_LSB 0x37 -#define TPS80031_RTCH0_MSB 0x38 -#define TPS80031_RTCH1_LSB 0x39 -#define TPS80031_RTCH1_MSB 0x3A -#define TPS80031_GPCH0_LSB 0x3B -#define TPS80031_GPCH0_MSB 0x3C - -/* SIM, MMC and Battery Detection */ -#define TPS80031_SIMDEBOUNCING 0xEB -#define TPS80031_SIMCTRL 0xEC -#define TPS80031_MMCDEBOUNCING 0xED -#define TPS80031_MMCCTRL 0xEE -#define TPS80031_BATDEBOUNCING 0xEF - -/* Vibrator Driver and PWMs */ -#define TPS80031_VIBCTRL 0x9B -#define TPS80031_VIBMODE 0x9C -#define TPS80031_PWM1ON 0xBA -#define TPS80031_PWM1OFF 0xBB -#define TPS80031_PWM2ON 0xBD -#define TPS80031_PWM2OFF 0xBE - -/* Control Interface */ -#define TPS80031_INT_STS_A 0xD0 -#define TPS80031_INT_STS_B 0xD1 -#define TPS80031_INT_STS_C 0xD2 -#define TPS80031_INT_MSK_LINE_A 0xD3 -#define TPS80031_INT_MSK_LINE_B 0xD4 -#define TPS80031_INT_MSK_LINE_C 0xD5 -#define TPS80031_INT_MSK_STS_A 0xD6 -#define TPS80031_INT_MSK_STS_B 0xD7 -#define TPS80031_INT_MSK_STS_C 0xD8 -#define TPS80031_TOGGLE1 0x90 -#define TPS80031_TOGGLE2 0x91 -#define TPS80031_TOGGLE3 0x92 -#define TPS80031_PWDNSTATUS1 0x93 -#define TPS80031_PWDNSTATUS2 0x94 -#define TPS80031_VALIDITY0 0x17 -#define TPS80031_VALIDITY1 0x18 -#define TPS80031_VALIDITY2 0x19 -#define TPS80031_VALIDITY3 0x1A -#define TPS80031_VALIDITY4 0x1B -#define TPS80031_VALIDITY5 0x1C -#define TPS80031_VALIDITY6 0x1D -#define TPS80031_VALIDITY7 0x1E - -/* Version number related register */ -#define TPS80031_JTAGVERNUM 0x87 -#define TPS80031_EPROM_REV 0xDF - -/* GPADC Trimming Bits. */ -#define TPS80031_GPADC_TRIM0 0xCC -#define TPS80031_GPADC_TRIM1 0xCD -#define TPS80031_GPADC_TRIM2 0xCE -#define TPS80031_GPADC_TRIM3 0xCF -#define TPS80031_GPADC_TRIM4 0xD0 -#define TPS80031_GPADC_TRIM5 0xD1 -#define TPS80031_GPADC_TRIM6 0xD2 -#define TPS80031_GPADC_TRIM7 0xD3 -#define TPS80031_GPADC_TRIM8 0xD4 -#define TPS80031_GPADC_TRIM9 0xD5 -#define TPS80031_GPADC_TRIM10 0xD6 -#define TPS80031_GPADC_TRIM11 0xD7 -#define TPS80031_GPADC_TRIM12 0xD8 -#define TPS80031_GPADC_TRIM13 0xD9 -#define TPS80031_GPADC_TRIM14 0xDA -#define TPS80031_GPADC_TRIM15 0xDB -#define TPS80031_GPADC_TRIM16 0xDC -#define TPS80031_GPADC_TRIM17 0xDD -#define TPS80031_GPADC_TRIM18 0xDE - -/* TPS80031_CONTROLLER_STAT1 bit fields */ -#define TPS80031_CONTROLLER_STAT1_BAT_TEMP 0 -#define TPS80031_CONTROLLER_STAT1_BAT_REMOVED 1 -#define TPS80031_CONTROLLER_STAT1_VBUS_DET 2 -#define TPS80031_CONTROLLER_STAT1_VAC_DET 3 -#define TPS80031_CONTROLLER_STAT1_FAULT_WDG 4 -#define TPS80031_CONTROLLER_STAT1_LINCH_GATED 6 -/* TPS80031_CONTROLLER_INT_MASK bit filed */ -#define TPS80031_CONTROLLER_INT_MASK_MVAC_DET 0 -#define TPS80031_CONTROLLER_INT_MASK_MVBUS_DET 1 -#define TPS80031_CONTROLLER_INT_MASK_MBAT_TEMP 2 -#define TPS80031_CONTROLLER_INT_MASK_MFAULT_WDG 3 -#define TPS80031_CONTROLLER_INT_MASK_MBAT_REMOVED 4 -#define TPS80031_CONTROLLER_INT_MASK_MLINCH_GATED 5 - -#define TPS80031_CHARGE_CONTROL_SUB_INT_MASK 0x3F - -/* TPS80031_PHOENIX_DEV_ON bit field */ -#define TPS80031_DEVOFF 0x1 - -#define TPS80031_EXT_CONTROL_CFG_TRANS 0 -#define TPS80031_EXT_CONTROL_CFG_STATE 1 - -/* State register field */ -#define TPS80031_STATE_OFF 0x00 -#define TPS80031_STATE_ON 0x01 -#define TPS80031_STATE_MASK 0x03 - -/* Trans register field */ -#define TPS80031_TRANS_ACTIVE_OFF 0x00 -#define TPS80031_TRANS_ACTIVE_ON 0x01 -#define TPS80031_TRANS_ACTIVE_MASK 0x03 -#define TPS80031_TRANS_SLEEP_OFF 0x00 -#define TPS80031_TRANS_SLEEP_ON 0x04 -#define TPS80031_TRANS_SLEEP_MASK 0x0C -#define TPS80031_TRANS_OFF_OFF 0x00 -#define TPS80031_TRANS_OFF_ACTIVE 0x10 -#define TPS80031_TRANS_OFF_MASK 0x30 - -#define TPS80031_EXT_PWR_REQ (TPS80031_PWR_REQ_INPUT_PREQ1 | \ - TPS80031_PWR_REQ_INPUT_PREQ2 | \ - TPS80031_PWR_REQ_INPUT_PREQ3) - -/* TPS80031_BBSPOR_CFG bit field */ -#define TPS80031_BBSPOR_CHG_EN 0x8 -#define TPS80031_MAX_REGISTER 0xFF - -struct i2c_client; - -/* Supported chips */ -enum chips { - TPS80031 = 0x00000001, - TPS80032 = 0x00000002, -}; - -enum { - TPS80031_INT_PWRON, - TPS80031_INT_RPWRON, - TPS80031_INT_SYS_VLOW, - TPS80031_INT_RTC_ALARM, - TPS80031_INT_RTC_PERIOD, - TPS80031_INT_HOT_DIE, - TPS80031_INT_VXX_SHORT, - TPS80031_INT_SPDURATION, - TPS80031_INT_WATCHDOG, - TPS80031_INT_BAT, - TPS80031_INT_SIM, - TPS80031_INT_MMC, - TPS80031_INT_RES, - TPS80031_INT_GPADC_RT, - TPS80031_INT_GPADC_SW2_EOC, - TPS80031_INT_CC_AUTOCAL, - TPS80031_INT_ID_WKUP, - TPS80031_INT_VBUSS_WKUP, - TPS80031_INT_ID, - TPS80031_INT_VBUS, - TPS80031_INT_CHRG_CTRL, - TPS80031_INT_EXT_CHRG, - TPS80031_INT_INT_CHRG, - TPS80031_INT_RES2, - TPS80031_INT_BAT_TEMP_OVRANGE, - TPS80031_INT_BAT_REMOVED, - TPS80031_INT_VBUS_DET, - TPS80031_INT_VAC_DET, - TPS80031_INT_FAULT_WDG, - TPS80031_INT_LINCH_GATED, - - /* Last interrupt id to get the end number */ - TPS80031_INT_NR, -}; - -/* TPS80031 Slave IDs */ -#define TPS80031_NUM_SLAVES 4 -#define TPS80031_SLAVE_ID0 0 -#define TPS80031_SLAVE_ID1 1 -#define TPS80031_SLAVE_ID2 2 -#define TPS80031_SLAVE_ID3 3 - -/* TPS80031 I2C addresses */ -#define TPS80031_I2C_ID0_ADDR 0x12 -#define TPS80031_I2C_ID1_ADDR 0x48 -#define TPS80031_I2C_ID2_ADDR 0x49 -#define TPS80031_I2C_ID3_ADDR 0x4A - -enum { - TPS80031_REGULATOR_VIO, - TPS80031_REGULATOR_SMPS1, - TPS80031_REGULATOR_SMPS2, - TPS80031_REGULATOR_SMPS3, - TPS80031_REGULATOR_SMPS4, - TPS80031_REGULATOR_VANA, - TPS80031_REGULATOR_LDO1, - TPS80031_REGULATOR_LDO2, - TPS80031_REGULATOR_LDO3, - TPS80031_REGULATOR_LDO4, - TPS80031_REGULATOR_LDO5, - TPS80031_REGULATOR_LDO6, - TPS80031_REGULATOR_LDO7, - TPS80031_REGULATOR_LDOLN, - TPS80031_REGULATOR_LDOUSB, - TPS80031_REGULATOR_VBUS, - TPS80031_REGULATOR_REGEN1, - TPS80031_REGULATOR_REGEN2, - TPS80031_REGULATOR_SYSEN, - TPS80031_REGULATOR_MAX, -}; - -/* Different configurations for the rails */ -enum { - /* USBLDO input selection */ - TPS80031_USBLDO_INPUT_VSYS = 0x00000001, - TPS80031_USBLDO_INPUT_PMID = 0x00000002, - - /* LDO3 output mode */ - TPS80031_LDO3_OUTPUT_VIB = 0x00000004, - - /* VBUS configuration */ - TPS80031_VBUS_DISCHRG_EN_PDN = 0x00000004, - TPS80031_VBUS_SW_ONLY = 0x00000008, - TPS80031_VBUS_SW_N_ID = 0x00000010, -}; - -/* External controls requests */ -enum tps80031_ext_control { - TPS80031_PWR_REQ_INPUT_NONE = 0x00000000, - TPS80031_PWR_REQ_INPUT_PREQ1 = 0x00000001, - TPS80031_PWR_REQ_INPUT_PREQ2 = 0x00000002, - TPS80031_PWR_REQ_INPUT_PREQ3 = 0x00000004, - TPS80031_PWR_OFF_ON_SLEEP = 0x00000008, - TPS80031_PWR_ON_ON_SLEEP = 0x00000010, -}; - -enum tps80031_pupd_pins { - TPS80031_PREQ1 = 0, - TPS80031_PREQ2A, - TPS80031_PREQ2B, - TPS80031_PREQ2C, - TPS80031_PREQ3, - TPS80031_NRES_WARM, - TPS80031_PWM_FORCE, - TPS80031_CHRG_EXT_CHRG_STATZ, - TPS80031_SIM, - TPS80031_MMC, - TPS80031_GPADC_START, - TPS80031_DVSI2C_SCL, - TPS80031_DVSI2C_SDA, - TPS80031_CTLI2C_SCL, - TPS80031_CTLI2C_SDA, -}; - -enum tps80031_pupd_settings { - TPS80031_PUPD_NORMAL, - TPS80031_PUPD_PULLDOWN, - TPS80031_PUPD_PULLUP, -}; - -struct tps80031 { - struct device *dev; - unsigned long chip_info; - int es_version; - struct i2c_client *clients[TPS80031_NUM_SLAVES]; - struct regmap *regmap[TPS80031_NUM_SLAVES]; - struct regmap_irq_chip_data *irq_data; -}; - -struct tps80031_pupd_init_data { - int input_pin; - int setting; -}; - -/* - * struct tps80031_regulator_platform_data - tps80031 regulator platform data. - * - * @reg_init_data: The regulator init data. - * @ext_ctrl_flag: External control flag for sleep/power request control. - * @config_flags: Configuration flag to configure the rails. - * It should be ORed of config enums. - */ - -struct tps80031_regulator_platform_data { - struct regulator_init_data *reg_init_data; - unsigned int ext_ctrl_flag; - unsigned int config_flags; -}; - -struct tps80031_platform_data { - int irq_base; - bool use_power_off; - struct tps80031_pupd_init_data *pupd_init_data; - int pupd_init_data_size; - struct tps80031_regulator_platform_data - *regulator_pdata[TPS80031_REGULATOR_MAX]; -}; - -static inline int tps80031_write(struct device *dev, int sid, - int reg, uint8_t val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_write(tps80031->regmap[sid], reg, val); -} - -static inline int tps80031_writes(struct device *dev, int sid, int reg, - int len, uint8_t *val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_bulk_write(tps80031->regmap[sid], reg, val, len); -} - -static inline int tps80031_read(struct device *dev, int sid, - int reg, uint8_t *val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - unsigned int ival; - int ret; - - ret = regmap_read(tps80031->regmap[sid], reg, &ival); - if (ret < 0) { - dev_err(dev, "failed reading from reg 0x%02x\n", reg); - return ret; - } - - *val = ival; - return ret; -} - -static inline int tps80031_reads(struct device *dev, int sid, - int reg, int len, uint8_t *val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_bulk_read(tps80031->regmap[sid], reg, val, len); -} - -static inline int tps80031_set_bits(struct device *dev, int sid, - int reg, uint8_t bit_mask) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_update_bits(tps80031->regmap[sid], reg, - bit_mask, bit_mask); -} - -static inline int tps80031_clr_bits(struct device *dev, int sid, - int reg, uint8_t bit_mask) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_update_bits(tps80031->regmap[sid], reg, bit_mask, 0); -} - -static inline int tps80031_update(struct device *dev, int sid, - int reg, uint8_t val, uint8_t mask) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_update_bits(tps80031->regmap[sid], reg, mask, val); -} - -static inline unsigned long tps80031_get_chip_info(struct device *dev) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return tps80031->chip_info; -} - -static inline int tps80031_get_pmu_version(struct device *dev) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return tps80031->es_version; -} - -static inline int tps80031_irq_get_virq(struct device *dev, int irq) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_irq_get_virq(tps80031->irq_data, irq); -} - -extern int tps80031_ext_power_req_config(struct device *dev, - unsigned long ext_ctrl_flag, int preq_bit, - int state_reg_add, int trans_reg_add); -#endif /*__LINUX_MFD_TPS80031_H */ diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 723985879035..a5441ad33c74 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -664,6 +664,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl); int mhi_pm_resume(struct mhi_controller *mhi_cntrl); /** + * mhi_pm_resume_force - Force resume MHI from suspended state + * @mhi_cntrl: MHI controller + * + * Resume the device irrespective of its MHI state. As per the MHI spec, devices + * has to be in M3 state during resume. But some devices seem to be in a + * different MHI state other than M3 but they continue working fine if allowed. + * This API is intented to be used for such devices. + * + * Return: 0 if the resume succeeds, a negative error code otherwise + */ +int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl); + +/** * mhi_download_rddm_image - Download ramdump image from device for * debugging purpose. * @mhi_cntrl: MHI controller @@ -717,16 +730,27 @@ void mhi_device_put(struct mhi_device *mhi_dev); /** * mhi_prepare_for_transfer - Setup UL and DL channels for data transfer. - * Allocate and initialize the channel context and - * also issue the START channel command to both - * channels. Channels can be started only if both - * host and device execution environments match and - * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * + * Allocate and initialize the channel context and also issue the START channel + * command to both channels. Channels can be started only if both host and + * device execution environments match and channels are in a DISABLED state. */ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); /** + * mhi_prepare_for_transfer_autoqueue - Setup UL and DL channels with auto queue + * buffers for DL traffic + * @mhi_dev: Device associated with the channels + * + * Allocate and initialize the channel context and also issue the START channel + * command to both channels. Channels can be started only if both host and + * device execution environments match and channels are in a DISABLED state. + * The MHI core will automatically allocate and queue buffers for the DL traffic. + */ +int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev); + +/** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. * Issue the RESET channel command and let the * device clean-up the context so no incoming diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 3d43c60b49fa..1f7c33b2f5a3 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -28,6 +28,7 @@ #define PHY_ID_KSZ9031 0x00221620 #define PHY_ID_KSZ9131 0x00221640 #define PHY_ID_LAN8814 0x00221660 +#define PHY_ID_LAN8804 0x00221670 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c8077e936691..4850cc5bf813 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,24 +19,7 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 -/* - * Keep sync with: - * - macro MIGRATE_REASON in include/trace/events/migrate.h - * - migrate_reason_names[MR_TYPES] in mm/debug.c - */ -enum migrate_reason { - MR_COMPACTION, - MR_MEMORY_FAILURE, - MR_MEMORY_HOTPLUG, - MR_SYSCALL, /* also applies to cpusets */ - MR_MEMPOLICY_MBIND, - MR_NUMA_MISPLACED, - MR_CONTIG_RANGE, - MR_LONGTERM_PIN, - MR_DEMOTION, - MR_TYPES -}; - +/* Defined in mm/debug.c: */ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION @@ -57,6 +40,12 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void folio_migrate_flags(struct folio *newfolio, struct folio *folio); +void folio_migrate_copy(struct folio *newfolio, struct folio *folio); +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count); + +extern bool numa_demotion_enabled; #else static inline void putback_movable_pages(struct list_head *l) {} @@ -82,6 +71,8 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } + +#define numa_demotion_enabled false #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_COMPACTION @@ -119,7 +110,6 @@ static inline int migrate_misplaced_page(struct page *page, */ #define MIGRATE_PFN_VALID (1UL << 0) #define MIGRATE_PFN_MIGRATE (1UL << 1) -#define MIGRATE_PFN_LOCKED (1UL << 2) #define MIGRATE_PFN_WRITE (1UL << 3) #define MIGRATE_PFN_SHIFT 6 diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index 883c99249033..f37cc03f9369 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -19,4 +19,17 @@ enum migrate_mode { MIGRATE_SYNC_NO_COPY, }; +enum migrate_reason { + MR_COMPACTION, + MR_MEMORY_FAILURE, + MR_MEMORY_HOTPLUG, + MR_SYSCALL, /* also applies to cpusets */ + MR_MEMPOLICY_MBIND, + MR_NUMA_MISPLACED, + MR_CONTIG_RANGE, + MR_LONGTERM_PIN, + MR_DEMOTION, + MR_TYPES +}; + #endif /* MIGRATE_MODE_H_INCLUDED */ diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index da2367e2ac1e..c238207d1615 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -36,7 +36,7 @@ struct misc_cg; struct misc_res { unsigned long max; atomic_long_t usage; - bool failed; + atomic_long_t events; }; /** @@ -46,6 +46,10 @@ struct misc_res { */ struct misc_cg { struct cgroup_subsys_state css; + + /* misc.events */ + struct cgroup_file events_file; + struct misc_res res[MISC_CG_RES_TYPES]; }; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 30bb59fe970c..6646634a0b9d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1436,7 +1436,7 @@ int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); -int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr, +int mlx4_tunnel_steer_add(struct mlx4_dev *dev, const unsigned char *addr, int port, int qpn, u16 prio, u64 *reg_id); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index a858bcb6220b..1834c8fad12e 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -92,26 +92,4 @@ void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port); -static inline u64 mlx4_mac_to_u64(u8 *addr) -{ - u64 mac = 0; - int i; - - for (i = 0; i < ETH_ALEN; i++) { - mac <<= 8; - mac |= addr[i]; - } - return mac; -} - -static inline void mlx4_u64_to_mac(u8 *addr, u64 mac) -{ - int i; - - for (i = ETH_ALEN; i > 0; i--) { - addr[i - 1] = mac & 0xFF; - mac >>= 8; - } -} - #endif /* MLX4_DRIVER_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 66eaf0aa7f69..604b85dd770a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -290,6 +290,7 @@ enum { MLX5_UMR_INLINE = (1 << 7), }; +#define MLX5_UMR_KLM_ALIGNMENT 4 #define MLX5_UMR_MTT_ALIGNMENT 0x40 #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT @@ -541,19 +542,21 @@ struct mlx5_cmd_layout { u8 status_own; }; -enum mlx5_fatal_assert_bit_offsets { - MLX5_RFR_OFFSET = 31, +enum mlx5_rfr_severity_bit_offsets { + MLX5_RFR_BIT_OFFSET = 0x7, }; struct health_buffer { - __be32 assert_var[5]; - __be32 rsvd0[3]; + __be32 assert_var[6]; + __be32 rsvd0[2]; __be32 assert_exit_ptr; __be32 assert_callra; - __be32 rsvd1[2]; + __be32 rsvd1[1]; + __be32 time; __be32 fw_ver; __be32 hw_id; - __be32 rfr; + u8 rfr_severity; + u8 rsvd2[3]; u8 irisc_index; u8 synd; __be16 ext_synd; @@ -577,7 +580,9 @@ struct mlx5_init_seg { __be32 rsvd1[120]; __be32 initializing; struct health_buffer health; - __be32 rsvd2[880]; + __be32 rsvd2[878]; + __be32 cmd_exec_to; + __be32 cmd_q_init_to; __be32 internal_timer_h; __be32 internal_timer_l; __be32 rsvd3[2]; @@ -795,10 +800,23 @@ struct mlx5_cqe64 { u8 tls_outer_l3_tunneled; u8 rsvd0; __be16 wqe_id; - u8 lro_tcppsh_abort_dupack; - u8 lro_min_ttl; - __be16 lro_tcp_win; - __be32 lro_ack_seq_num; + union { + struct { + u8 tcppsh_abort_dupack; + u8 min_ttl; + __be16 tcp_win; + __be32 ack_seq_num; + } lro; + struct { + u8 reserved0:1; + u8 match:1; + u8 flush:1; + u8 reserved3:5; + u8 header_size; + __be16 header_entry_index; + __be32 data_offset; + } shampo; + }; __be32 rss_hash_result; u8 rss_hash_type; u8 ml_path; @@ -868,7 +886,7 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { - return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; + return (cqe->lro.tcppsh_abort_dupack >> 6) & 1; } static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) @@ -1099,6 +1117,7 @@ enum { MLX5_MATCH_MISC_PARAMETERS_2 = 1 << 3, MLX5_MATCH_MISC_PARAMETERS_3 = 1 << 4, MLX5_MATCH_MISC_PARAMETERS_4 = 1 << 5, + MLX5_MATCH_MISC_PARAMETERS_5 = 1 << 6, }; enum { @@ -1182,7 +1201,9 @@ enum mlx5_cap_type { MLX5_CAP_VDPA_EMULATION = 0x13, MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, + MLX5_CAP_DEV_SHAMPO = 0x1d, MLX5_CAP_GENERAL_2 = 0x20, + MLX5_CAP_PORT_SELECTION = 0x25, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1340,6 +1361,20 @@ enum mlx5_qcam_feature_groups { MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap) +#define MLX5_CAP_PORT_SELECTION(mdev, cap) \ + MLX5_GET(port_selection_cap, \ + mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, cap) + +#define MLX5_CAP_PORT_SELECTION_MAX(mdev, cap) \ + MLX5_GET(port_selection_cap, \ + mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->max, cap) + +#define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ + MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) + +#define MLX5_CAP_FLOWTABLE_PORT_SELECTION_MAX(mdev, cap) \ + MLX5_CAP_PORT_SELECTION_MAX(mdev, flow_table_properties_port_selection.cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) @@ -1412,6 +1447,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_IPSEC(mdev, cap)\ MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap) +#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\ + MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, @@ -1456,6 +1494,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } +#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2 +#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1 #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f17d2101af7a..78655d8d13a7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -59,15 +59,13 @@ #define MLX5_ADEV_NAME "mlx5_core" +#define MLX5_IRQ_EQ_CTRL (U8_MAX) + enum { MLX5_BOARD_ID_LEN = 64, }; enum { - /* one minute for the sake of bringup. Generally, commands must always - * complete and we may need to increase this timeout value - */ - MLX5_CMD_TIMEOUT_MSEC = 60 * 1000, MLX5_CMD_WQ_MAX_NAME = 32, }; @@ -136,6 +134,7 @@ enum { MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, MLX5_REG_MLCR = 0x902b, + MLX5_REG_MRTC = 0x902d, MLX5_REG_MTRC_CAP = 0x9040, MLX5_REG_MTRC_CONF = 0x9041, MLX5_REG_MTRC_STDB = 0x9042, @@ -154,6 +153,7 @@ enum { MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, + MLX5_REG_DTOR = 0xC00E, }; enum mlx5_qpts_trust_state { @@ -357,22 +357,6 @@ struct mlx5_core_sig_ctx { u32 sigerr_count; }; -enum { - MLX5_MKEY_MR = 1, - MLX5_MKEY_MW, - MLX5_MKEY_INDIRECT_DEVX, -}; - -struct mlx5_core_mkey { - u64 iova; - u64 size; - u32 key; - u32 pd; - u32 type; - struct wait_queue_head wait; - refcount_t usecount; -}; - #define MLX5_24BIT_MASK ((1 << 24) - 1) enum mlx5_res_type { @@ -441,6 +425,7 @@ struct mlx5_core_health { struct work_struct report_work; struct devlink_health_reporter *fw_reporter; struct devlink_health_reporter *fw_fatal_reporter; + struct delayed_work update_fw_log_ts_work; }; struct mlx5_qp_table { @@ -493,6 +478,10 @@ struct mlx5_fc_stats { unsigned long next_query; unsigned long sampling_interval; /* jiffies */ u32 *bulk_query_out; + int bulk_query_len; + size_t num_counters; + bool bulk_query_alloc_failed; + unsigned long next_bulk_query_alloc; struct mlx5_fc_pool fc_pool; }; @@ -653,7 +642,7 @@ struct mlx5e_resources { struct mlx5e_hw_objs { u32 pdn; struct mlx5_td td; - struct mlx5_core_mkey mkey; + u32 mkey; struct mlx5_sq_bfreg bfreg; } hw_objs; struct devlink_port dl_port; @@ -752,6 +741,7 @@ struct mlx5_core_dev { u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; u8 embedded_cpu; } caps; + struct mlx5_timeouts *timeouts; u64 sys_image_guid; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; @@ -1005,8 +995,6 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); -int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); -int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); void mlx5_health_flush(struct mlx5_core_dev *dev); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); @@ -1024,13 +1012,11 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen); -int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey); -int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *out, int outlen); +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, + int inlen); +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out, + int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); @@ -1242,6 +1228,16 @@ static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) return MLX5_CAP_GEN(dev, native_port_num); } +static inline int mlx5_get_dev_index(struct mlx5_core_dev *dev) +{ + int idx = MLX5_CAP_GEN(dev, native_port_num); + + if (idx >= 1 && idx <= MLX5_MAX_PORTS) + return idx - 1; + else + return PCI_FUNC(dev->pdev->devfn); +} + enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; @@ -1250,11 +1246,12 @@ static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); union devlink_param_value val; + int err; - devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - &val); - return val.vbool; + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + return err ? MLX5_CAP_GEN(dev, roce) : val.vbool; } #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index cea6ecb4b73e..3705a382276b 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -4,19 +4,18 @@ #ifndef MLX5_CORE_EQ_H #define MLX5_CORE_EQ_H -#define MLX5_IRQ_VEC_COMP_BASE 1 #define MLX5_NUM_CMD_EQE (32) #define MLX5_NUM_ASYNC_EQE (0x1000) #define MLX5_NUM_SPARE_EQE (0x80) struct mlx5_eq; +struct mlx5_irq; struct mlx5_core_dev; struct mlx5_eq_param { - u8 irq_index; int nent; u64 mask[4]; - cpumask_var_t affinity; + struct mlx5_irq *irq; }; struct mlx5_eq * diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 4ab5c1fc1270..8b18fe9771f9 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -130,19 +130,28 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, #define ESW_TUN_OPTS_MASK GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, ESW_TUN_OPTS_OFFSET) #define ESW_TUN_MASK GENMASK(31 - ESW_RESERVED_BITS, ESW_TUN_OFFSET) #define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */ +#define ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT /* 0x7FF is a reserved mapping */ #define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT GENMASK(ESW_TUN_OPTS_BITS - 1, 0) #define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \ ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT) #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK - -u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); +/* 0x7FE is a reserved mapping for bridge ingress push vlan mark */ +#define ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN (ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT - 1) +#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN ((ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN << \ + ESW_TUN_OPTS_BITS) | \ + ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN) +#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK \ + GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \ + ESW_TUN_OPTS_OFFSET + 1) + +u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ -static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) +static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { return MLX5_ESWITCH_NONE; } diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0106c67e8ccb..b1aad14689e3 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -73,6 +73,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, + MLX5_FLOW_NAMESPACE_FDB_BYPASS, MLX5_FLOW_NAMESPACE_FDB, MLX5_FLOW_NAMESPACE_ESW_EGRESS, MLX5_FLOW_NAMESPACE_ESW_INGRESS, @@ -83,6 +84,9 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_RDMA_RX, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL, MLX5_FLOW_NAMESPACE_RDMA_TX, + MLX5_FLOW_NAMESPACE_PORT_SEL, + MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS, + MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS, }; enum { @@ -97,6 +101,7 @@ enum { struct mlx5_pkt_reformat; struct mlx5_modify_hdr; +struct mlx5_flow_definer; struct mlx5_flow_table; struct mlx5_flow_group; struct mlx5_flow_namespace; @@ -241,6 +246,10 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_flow_destination *old_dest); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); + +/* As mlx5_fc_create() but doesn't queue stats refresh thread. */ +struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging); + void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, @@ -257,6 +266,13 @@ struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, void *modify_actions); void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, struct mlx5_modify_hdr *modify_hdr); +struct mlx5_flow_definer * +mlx5_create_match_definer(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type ns_type, u16 format_id, + u32 *match_mask); +void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, + struct mlx5_flow_definer *definer); +int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer); struct mlx5_pkt_reformat_params { int type; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 993204a6c1a1..598ac3bcc901 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -94,6 +94,7 @@ enum { enum { MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, + MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018, MLX5_OBJ_TYPE_MKEY = 0xff01, MLX5_OBJ_TYPE_QP = 0xff02, MLX5_OBJ_TYPE_PSV = 0xff03, @@ -342,7 +343,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_geneve_oam[0x1]; u8 outer_geneve_protocol_type[0x1]; u8 outer_geneve_opt_len[0x1]; - u8 reserved_at_1e[0x1]; + u8 source_vhca_port[0x1]; u8 source_eswitch_port[0x1]; u8 inner_dmac[0x1]; @@ -371,7 +372,8 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 reserved_at_37[0x9]; u8 geneve_tlv_option_0_data[0x1]; - u8 reserved_at_41[0x4]; + u8 geneve_tlv_option_0_exist[0x1]; + u8 reserved_at_42[0x3]; u8 outer_first_mpls_over_udp[0x4]; u8 outer_first_mpls_over_gre[0x4]; u8 inner_first_mpls[0x4]; @@ -393,6 +395,14 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 metadata_reg_c_0[0x1]; }; +struct mlx5_ifc_flow_table_fields_supported_2_bits { + u8 reserved_at_0[0xe]; + u8 bth_opcode[0x1]; + u8 reserved_at_f[0x11]; + + u8 reserved_at_20[0x60]; +}; + struct mlx5_ifc_flow_table_prop_layout_bits { u8 ft_support[0x1]; u8 reserved_at_1[0x1]; @@ -539,10 +549,11 @@ struct mlx5_ifc_fte_match_set_misc_bits { union mlx5_ifc_gre_key_bits gre_key; u8 vxlan_vni[0x18]; - u8 reserved_at_b8[0x8]; + u8 bth_opcode[0x8]; u8 geneve_vni[0x18]; - u8 reserved_at_d8[0x7]; + u8 reserved_at_d8[0x6]; + u8 geneve_tlv_option_0_exist[0x1]; u8 geneve_oam[0x1]; u8 reserved_at_e0[0xc]; @@ -661,6 +672,26 @@ struct mlx5_ifc_fte_match_set_misc4_bits { u8 reserved_at_100[0x100]; }; +struct mlx5_ifc_fte_match_set_misc5_bits { + u8 macsec_tag_0[0x20]; + + u8 macsec_tag_1[0x20]; + + u8 macsec_tag_2[0x20]; + + u8 macsec_tag_3[0x20]; + + u8 tunnel_header_0[0x20]; + + u8 tunnel_header_1[0x20]; + + u8 tunnel_header_2[0x20]; + + u8 tunnel_header_3[0x20]; + + u8 reserved_at_100[0x100]; +}; + struct mlx5_ifc_cmd_pas_bits { u8 pa_h[0x20]; @@ -756,7 +787,15 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; - u8 reserved_at_e00[0x1200]; + u8 reserved_at_e00[0x700]; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive_rdma; + + u8 reserved_at_1580[0x280]; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_transmit_rdma; + + u8 reserved_at_1880[0x780]; u8 sw_steering_nic_rx_action_drop_icm_address[0x40]; @@ -767,6 +806,18 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 reserved_at_20c0[0x5f40]; }; +struct mlx5_ifc_port_selection_cap_bits { + u8 reserved_at_0[0x10]; + u8 port_select_flow_table[0x1]; + u8 reserved_at_11[0xf]; + + u8 reserved_at_20[0x1e0]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_port_selection; + + u8 reserved_at_400[0x7c00]; +}; + enum { MLX5_FDB_TO_VPORT_REG_C_0 = 0x01, MLX5_FDB_TO_VPORT_REG_C_1 = 0x02, @@ -782,7 +833,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 fdb_to_vport_reg_c_id[0x8]; u8 reserved_at_8[0xd]; u8 fdb_modify_header_fwd_to_table[0x1]; - u8 reserved_at_16[0x1]; + u8 fdb_ipv4_ttl_modify[0x1]; u8 flow_source[0x1]; u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; @@ -1262,7 +1313,7 @@ enum { enum { MLX5_FLEX_PARSER_GENEVE_ENABLED = 1 << 3, MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED = 1 << 4, - mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED = 1 << 5, + MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED = 1 << 5, MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7, MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8, MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9, @@ -1306,7 +1357,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vhca_resource_manager[0x1]; u8 hca_cap_2[0x1]; - u8 reserved_at_21[0x2]; + u8 reserved_at_21[0x1]; + u8 dtor[0x1]; u8 event_on_vhca_state_teardown_request[0x1]; u8 event_on_vhca_state_in_use[0x1]; u8 event_on_vhca_state_active[0x1]; @@ -1336,7 +1388,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_b0[0x1]; u8 uplink_follow[0x1]; u8 ts_cqe_to_dest_cqn[0x1]; - u8 reserved_at_b3[0xd]; + u8 reserved_at_b3[0x7]; + u8 shampo[0x1]; + u8 reserved_at_bb[0x5]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -1514,7 +1568,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uar_4k[0x1]; u8 reserved_at_241[0x9]; u8 uar_sz[0x6]; - u8 reserved_at_248[0x2]; + u8 port_selection_cap[0x1]; + u8 reserved_at_248[0x1]; u8 umem_uid_0[0x1]; u8 reserved_at_250[0x5]; u8 log_pg_sz[0x8]; @@ -1587,7 +1642,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_tis_per_sq[0x5]; u8 ext_stride_num_range[0x1]; - u8 reserved_at_3a1[0x2]; + u8 roce_rw_supported[0x1]; + u8 log_max_current_uc_list_wr_supported[0x1]; u8 log_max_stride_sz_rq[0x5]; u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; @@ -1716,7 +1772,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 flex_parser_id_outer_first_mpls_over_gre[0x4]; u8 flex_parser_id_outer_first_mpls_over_udp_label[0x4]; - u8 reserved_at_6e0[0x10]; + u8 max_num_match_definer[0x10]; u8 sf_base_id[0x10]; u8 flex_parser_id_gtpu_dw_2[0x4]; @@ -1731,7 +1787,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_760[0x20]; u8 vhca_tunnel_commands[0x40]; - u8 reserved_at_7c0[0x40]; + u8 match_definer_format_supported[0x40]; }; struct mlx5_ifc_cmd_hca_cap_2_bits { @@ -1750,6 +1806,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, + MLX5_FLOW_DESTINATION_TYPE_UPLINK = 0x8, MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, @@ -1804,7 +1861,9 @@ struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_misc4_bits misc_parameters_4; - u8 reserved_at_c00[0x400]; + struct mlx5_ifc_fte_match_set_misc5_bits misc_parameters_5; + + u8 reserved_at_e00[0x200]; }; enum { @@ -1876,7 +1935,21 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_139[0x4]; u8 log_wqe_stride_size[0x3]; - u8 reserved_at_140[0x4c0]; + u8 reserved_at_140[0x80]; + + u8 headers_mkey[0x20]; + + u8 shampo_enable[0x1]; + u8 reserved_at_1e1[0x4]; + u8 log_reservation_size[0x3]; + u8 reserved_at_1e8[0x5]; + u8 log_max_num_of_packets_per_reservation[0x3]; + u8 reserved_at_1f0[0x6]; + u8 log_headers_entry_size[0x2]; + u8 reserved_at_1f8[0x4]; + u8 log_headers_buffer_entry_num[0x4]; + + u8 reserved_at_200[0x400]; struct mlx5_ifc_cmd_pas_bits pas[]; }; @@ -2807,6 +2880,40 @@ struct mlx5_ifc_dropped_packet_logged_bits { u8 reserved_at_0[0xe0]; }; +struct mlx5_ifc_default_timeout_bits { + u8 to_multiplier[0x3]; + u8 reserved_at_3[0x9]; + u8 to_value[0x14]; +}; + +struct mlx5_ifc_dtor_reg_bits { + u8 reserved_at_0[0x20]; + + struct mlx5_ifc_default_timeout_bits pcie_toggle_to; + + u8 reserved_at_40[0x60]; + + struct mlx5_ifc_default_timeout_bits health_poll_to; + + struct mlx5_ifc_default_timeout_bits full_crdump_to; + + struct mlx5_ifc_default_timeout_bits fw_reset_to; + + struct mlx5_ifc_default_timeout_bits flush_on_err_to; + + struct mlx5_ifc_default_timeout_bits pci_sync_update_to; + + struct mlx5_ifc_default_timeout_bits tear_down_to; + + struct mlx5_ifc_default_timeout_bits fsm_reactivate_to; + + struct mlx5_ifc_default_timeout_bits reclaim_pages_to; + + struct mlx5_ifc_default_timeout_bits reclaim_vfs_pages_to; + + u8 reserved_at_1c0[0x40]; +}; + enum { MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, @@ -3118,6 +3225,20 @@ struct mlx5_ifc_roce_addr_layout_bits { u8 reserved_at_e0[0x20]; }; +struct mlx5_ifc_shampo_cap_bits { + u8 reserved_at_0[0x3]; + u8 shampo_log_max_reservation_size[0x5]; + u8 reserved_at_8[0x3]; + u8 shampo_log_min_reservation_size[0x5]; + u8 shampo_min_mss_size[0x10]; + + u8 reserved_at_20[0x3]; + u8 shampo_max_log_headers_entry_size[0x5]; + u8 reserved_at_28[0x18]; + + u8 reserved_at_40[0x7c0]; +}; + union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2; @@ -3128,6 +3249,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; + struct mlx5_ifc_port_selection_cap_bits port_selection_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; struct mlx5_ifc_debug_cap_bits debug_cap; @@ -3135,6 +3257,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; + struct mlx5_ifc_shampo_cap_bits shampo_cap; u8 reserved_at_0[0x8000]; }; @@ -3309,8 +3432,9 @@ enum { }; enum { - MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, - MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, + MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0), + MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1), + MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO = BIT(2), }; enum { @@ -3335,7 +3459,7 @@ struct mlx5_ifc_tirc_bits { u8 reserved_at_80[0x4]; u8 lro_timeout_period_usecs[0x10]; - u8 lro_enable_mask[0x4]; + u8 packet_merge_mask[0x4]; u8 lro_max_ip_payload_size[0x8]; u8 reserved_at_a0[0x40]; @@ -3517,6 +3641,18 @@ enum { MLX5_RQC_STATE_ERR = 0x3, }; +enum { + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_BYTE = 0x0, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE = 0x1, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_PAGE = 0x2, +}; + +enum { + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_NO_MATCH = 0x0, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED = 0x1, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_FIVE_TUPLE = 0x2, +}; + struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; u8 delay_drop_en[0x1]; @@ -3549,7 +3685,13 @@ struct mlx5_ifc_rqc_bits { u8 reserved_at_c0[0x10]; u8 hairpin_peer_vhca[0x10]; - u8 reserved_at_e0[0xa0]; + u8 reserved_at_e0[0x46]; + u8 shampo_no_match_alignment_granularity[0x2]; + u8 reserved_at_128[0x6]; + u8 shampo_match_criteria_type[0x2]; + u8 reservation_timeout[0x10]; + + u8 reserved_at_140[0x40]; struct mlx5_ifc_wq_bits wq; }; @@ -4097,13 +4239,19 @@ struct mlx5_ifc_health_buffer_bits { u8 assert_callra[0x20]; - u8 reserved_at_140[0x40]; + u8 reserved_at_140[0x20]; + + u8 time[0x20]; u8 fw_version[0x20]; u8 hw_id[0x20]; - u8 reserved_at_1c0[0x20]; + u8 rfr[0x1]; + u8 reserved_at_1c1[0x3]; + u8 valid[0x1]; + u8 severity[0x3]; + u8 reserved_at_1c8[0x18]; u8 irisc_index[0x8]; u8 synd[0x8]; @@ -5616,6 +5764,236 @@ struct mlx5_ifc_query_fte_in_bits { u8 reserved_at_120[0xe0]; }; +struct mlx5_ifc_match_definer_format_0_bits { + u8 reserved_at_0[0x100]; + + u8 metadata_reg_c_0[0x20]; + + u8 metadata_reg_c_1[0x20]; + + u8 outer_dmac_47_16[0x20]; + + u8 outer_dmac_15_0[0x10]; + u8 outer_ethertype[0x10]; + + u8 reserved_at_180[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 outer_ip_frag[0x1]; + u8 outer_qp_type[0x2]; + u8 outer_encap_type[0x2]; + u8 port_number[0x2]; + u8 outer_l3_type[0x2]; + u8 outer_l4_type[0x2]; + u8 outer_first_vlan_type[0x2]; + u8 outer_first_vlan_prio[0x3]; + u8 outer_first_vlan_cfi[0x1]; + u8 outer_first_vlan_vid[0xc]; + + u8 outer_l4_type_ext[0x4]; + u8 reserved_at_1a4[0x2]; + u8 outer_ipsec_layer[0x2]; + u8 outer_l2_type[0x2]; + u8 force_lb[0x1]; + u8 outer_l2_ok[0x1]; + u8 outer_l3_ok[0x1]; + u8 outer_l4_ok[0x1]; + u8 outer_second_vlan_type[0x2]; + u8 outer_second_vlan_prio[0x3]; + u8 outer_second_vlan_cfi[0x1]; + u8 outer_second_vlan_vid[0xc]; + + u8 outer_smac_47_16[0x20]; + + u8 outer_smac_15_0[0x10]; + u8 inner_ipv4_checksum_ok[0x1]; + u8 inner_l4_checksum_ok[0x1]; + u8 outer_ipv4_checksum_ok[0x1]; + u8 outer_l4_checksum_ok[0x1]; + u8 inner_l3_ok[0x1]; + u8 inner_l4_ok[0x1]; + u8 outer_l3_ok_duplicate[0x1]; + u8 outer_l4_ok_duplicate[0x1]; + u8 outer_tcp_cwr[0x1]; + u8 outer_tcp_ece[0x1]; + u8 outer_tcp_urg[0x1]; + u8 outer_tcp_ack[0x1]; + u8 outer_tcp_psh[0x1]; + u8 outer_tcp_rst[0x1]; + u8 outer_tcp_syn[0x1]; + u8 outer_tcp_fin[0x1]; +}; + +struct mlx5_ifc_match_definer_format_22_bits { + u8 reserved_at_0[0x100]; + + u8 outer_ip_src_addr[0x20]; + + u8 outer_ip_dest_addr[0x20]; + + u8 outer_l4_sport[0x10]; + u8 outer_l4_dport[0x10]; + + u8 reserved_at_160[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 outer_ip_frag[0x1]; + u8 outer_qp_type[0x2]; + u8 outer_encap_type[0x2]; + u8 port_number[0x2]; + u8 outer_l3_type[0x2]; + u8 outer_l4_type[0x2]; + u8 outer_first_vlan_type[0x2]; + u8 outer_first_vlan_prio[0x3]; + u8 outer_first_vlan_cfi[0x1]; + u8 outer_first_vlan_vid[0xc]; + + u8 metadata_reg_c_0[0x20]; + + u8 outer_dmac_47_16[0x20]; + + u8 outer_smac_47_16[0x20]; + + u8 outer_smac_15_0[0x10]; + u8 outer_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_format_23_bits { + u8 reserved_at_0[0x100]; + + u8 inner_ip_src_addr[0x20]; + + u8 inner_ip_dest_addr[0x20]; + + u8 inner_l4_sport[0x10]; + u8 inner_l4_dport[0x10]; + + u8 reserved_at_160[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 inner_ip_frag[0x1]; + u8 inner_qp_type[0x2]; + u8 inner_encap_type[0x2]; + u8 port_number[0x2]; + u8 inner_l3_type[0x2]; + u8 inner_l4_type[0x2]; + u8 inner_first_vlan_type[0x2]; + u8 inner_first_vlan_prio[0x3]; + u8 inner_first_vlan_cfi[0x1]; + u8 inner_first_vlan_vid[0xc]; + + u8 tunnel_header_0[0x20]; + + u8 inner_dmac_47_16[0x20]; + + u8 inner_smac_47_16[0x20]; + + u8 inner_smac_15_0[0x10]; + u8 inner_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_format_29_bits { + u8 reserved_at_0[0xc0]; + + u8 outer_ip_dest_addr[0x80]; + + u8 outer_ip_src_addr[0x80]; + + u8 outer_l4_sport[0x10]; + u8 outer_l4_dport[0x10]; + + u8 reserved_at_1e0[0x20]; +}; + +struct mlx5_ifc_match_definer_format_30_bits { + u8 reserved_at_0[0xa0]; + + u8 outer_ip_dest_addr[0x80]; + + u8 outer_ip_src_addr[0x80]; + + u8 outer_dmac_47_16[0x20]; + + u8 outer_smac_47_16[0x20]; + + u8 outer_smac_15_0[0x10]; + u8 outer_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_format_31_bits { + u8 reserved_at_0[0xc0]; + + u8 inner_ip_dest_addr[0x80]; + + u8 inner_ip_src_addr[0x80]; + + u8 inner_l4_sport[0x10]; + u8 inner_l4_dport[0x10]; + + u8 reserved_at_1e0[0x20]; +}; + +struct mlx5_ifc_match_definer_format_32_bits { + u8 reserved_at_0[0xa0]; + + u8 inner_ip_dest_addr[0x80]; + + u8 inner_ip_src_addr[0x80]; + + u8 inner_dmac_47_16[0x20]; + + u8 inner_smac_47_16[0x20]; + + u8 inner_smac_15_0[0x10]; + u8 inner_dmac_15_0[0x10]; +}; + +struct mlx5_ifc_match_definer_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x40]; + + u8 reserved_at_80[0x10]; + u8 format_id[0x10]; + + u8 reserved_at_a0[0x160]; + + u8 match_mask[16][0x20]; +}; + +struct mlx5_ifc_general_obj_in_cmd_hdr_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 vhca_tunnel_id[0x10]; + u8 obj_type[0x10]; + + u8 obj_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_general_obj_out_cmd_hdr_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 obj_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_create_match_definer_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_match_definer_bits obj_context; +}; + +struct mlx5_ifc_create_match_definer_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -5623,6 +6001,7 @@ enum { MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_4 = 0x5, + MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_5 = 0x6, }; struct mlx5_ifc_query_flow_group_out_bits { @@ -6369,7 +6748,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits { u8 reserved_at_3c[0x1]; u8 hash[0x1]; u8 reserved_at_3e[0x1]; - u8 lro[0x1]; + u8 packet_merge[0x1]; }; struct mlx5_ifc_modify_tir_out_bits { @@ -7569,7 +7948,7 @@ struct mlx5_ifc_dealloc_uar_out_bits { struct mlx5_ifc_dealloc_uar_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -8090,6 +8469,11 @@ struct mlx5_ifc_create_flow_group_out_bits { }; enum { + MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_TCAM_SUBTABLE = 0x0, + MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT = 0x1, +}; + +enum { MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, @@ -8110,7 +8494,9 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 reserved_at_60[0x20]; u8 table_type[0x8]; - u8 reserved_at_88[0x18]; + u8 reserved_at_88[0x4]; + u8 group_type[0x4]; + u8 reserved_at_90[0x10]; u8 reserved_at_a0[0x8]; u8 table_id[0x18]; @@ -8125,7 +8511,10 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 end_flow_index[0x20]; - u8 reserved_at_140[0xa0]; + u8 reserved_at_140[0x10]; + u8 match_definer_id[0x10]; + + u8 reserved_at_160[0x80]; u8 reserved_at_1e0[0x18]; u8 match_criteria_enable[0x8]; @@ -8416,7 +8805,7 @@ struct mlx5_ifc_alloc_uar_out_bits { struct mlx5_ifc_alloc_uar_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -9334,7 +9723,10 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_84_to_68[0x11]; u8 tracer_registers[0x4]; - u8 regs_63_to_32[0x20]; + u8 regs_63_to_46[0x12]; + u8 mrtc[0x1]; + u8 regs_44_to_32[0xd]; + u8 regs_31_to_0[0x20]; }; @@ -10060,6 +10452,17 @@ struct mlx5_ifc_pddr_reg_bits { union mlx5_ifc_pddr_reg_page_data_auto_bits page_data; }; +struct mlx5_ifc_mrtc_reg_bits { + u8 time_synced[0x1]; + u8 reserved_at_1[0x1f]; + + u8 reserved_at_20[0x20]; + + u8 time_h[0x20]; + + u8 time_l[0x20]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -10121,6 +10524,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mirc_reg_bits mirc_reg; struct mlx5_ifc_mfrl_reg_bits mfrl_reg; struct mlx5_ifc_mtutc_reg_bits mtutc_reg; + struct mlx5_ifc_mrtc_reg_bits mrtc_reg; u8 reserved_at_0[0x60e0]; }; @@ -10398,9 +10802,16 @@ struct mlx5_ifc_dcbx_param_bits { u8 reserved_at_a0[0x160]; }; +enum { + MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY = 0, + MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT, +}; + struct mlx5_ifc_lagc_bits { u8 fdb_selection_mode[0x1]; - u8 reserved_at_1[0x1c]; + u8 reserved_at_1[0x14]; + u8 port_select_mode[0x3]; + u8 reserved_at_18[0x5]; u8 lag_state[0x3]; u8 reserved_at_20[0x14]; @@ -10614,29 +11025,6 @@ struct mlx5_ifc_dealloc_memic_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_general_obj_in_cmd_hdr_bits { - u8 opcode[0x10]; - u8 uid[0x10]; - - u8 vhca_tunnel_id[0x10]; - u8 obj_type[0x10]; - - u8 obj_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - -struct mlx5_ifc_general_obj_out_cmd_hdr_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 obj_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - struct mlx5_ifc_umem_bits { u8 reserved_at_0[0x80]; diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f..aa47705191bc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,10 +36,7 @@ struct mempolicy; struct anon_vma; struct anon_vma_chain; -struct file_ra_state; struct user_struct; -struct writeback_control; -struct bdi_writeback; struct pt_regs; extern int sysctl_page_lock_unfairness; @@ -216,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -/* - * Any attempt to mark this function as static leads to build failure - * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() - * is referred to by BPF code. This must be visible for error injection. - */ -int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp, void **shadowp); #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) @@ -434,51 +424,6 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; -/** - * enum fault_flag - Fault flag definitions. - * @FAULT_FLAG_WRITE: Fault was a write fault. - * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. - * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. - * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. - * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. - * @FAULT_FLAG_TRIED: The fault has been tried once. - * @FAULT_FLAG_USER: The fault originated in userspace. - * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. - * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. - * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. - * - * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify - * whether we would allow page faults to retry by specifying these two - * fault flags correctly. Currently there can be three legal combinations: - * - * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and - * this is the first try - * - * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and - * we've already tried at least once - * - * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry - * - * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never - * be used. Note that page faults can be allowed to retry for multiple times, - * in which case we'll have an initial fault with flags (a) then later on - * continuous faults with flags (b). We should always try to detect pending - * signals before a retry to make sure the continuous page faults can still be - * interrupted if necessary. - */ -enum fault_flag { - FAULT_FLAG_WRITE = 1 << 0, - FAULT_FLAG_MKWRITE = 1 << 1, - FAULT_FLAG_ALLOW_RETRY = 1 << 2, - FAULT_FLAG_RETRY_NOWAIT = 1 << 3, - FAULT_FLAG_KILLABLE = 1 << 4, - FAULT_FLAG_TRIED = 1 << 5, - FAULT_FLAG_USER = 1 << 6, - FAULT_FLAG_REMOTE = 1 << 7, - FAULT_FLAG_INSTRUCTION = 1 << 8, - FAULT_FLAG_INTERRUPTIBLE = 1 << 9, -}; - /* * The default fault flags that should be used by most of the * arch-specific page fault handlers. @@ -587,6 +532,10 @@ enum page_entry_size { */ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); + /** + * @close: Called when the VMA is being removed from the MM. + * Context: User context. May sleep. Caller holds mmap_lock. + */ void (*close)(struct vm_area_struct * area); /* Called any time before splitting to check if it's allowed */ int (*may_split)(struct vm_area_struct *area, unsigned long addr); @@ -724,6 +673,27 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +static inline unsigned int compound_order(struct page *page) +{ + if (!PageHead(page)) + return 0; + return page[1].compound_order; +} + +/** + * folio_order - The allocation order of a folio. + * @folio: The folio. + * + * A folio is composed of 2^order pages. See get_order() for the definition + * of order. + * + * Return: The order of the folio. + */ +static inline unsigned int folio_order(struct folio *folio) +{ + return compound_order(&folio->page); +} + #include <linux/huge_mm.h> /* @@ -748,13 +718,18 @@ static inline int put_page_testzero(struct page *page) return page_ref_dec_and_test(page); } +static inline int folio_put_testzero(struct folio *folio) +{ + return put_page_testzero(&folio->page); +} + /* * Try to grab a ref unless the page has a refcount of zero, return false if * that is the case. * This can be called when MMU is off so it must not access * any of the virtual mappings. */ -static inline int get_page_unless_zero(struct page *page) +static inline bool get_page_unless_zero(struct page *page) { return page_ref_add_unless(page, 1, 0); } @@ -799,40 +774,6 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif -extern void *kvmalloc_node(size_t size, gfp_t flags, int node); -static inline void *kvmalloc(size_t size, gfp_t flags) -{ - return kvmalloc_node(size, flags, NUMA_NO_NODE); -} -static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) -{ - return kvmalloc_node(size, flags | __GFP_ZERO, node); -} -static inline void *kvzalloc(size_t size, gfp_t flags) -{ - return kvmalloc(size, flags | __GFP_ZERO); -} - -static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(n, size, &bytes))) - return NULL; - - return kvmalloc(bytes, flags); -} - -static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) -{ - return kvmalloc_array(n, size, flags | __GFP_ZERO); -} - -extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, - gfp_t flags); -extern void kvfree(const void *addr); -extern void kvfree_sensitive(const void *addr, size_t len); - static inline int head_compound_mapcount(struct page *head) { return atomic_read(compound_mapcount_ptr(head)) + 1; @@ -879,19 +820,15 @@ static inline int page_mapcount(struct page *page) #ifdef CONFIG_TRANSPARENT_HUGEPAGE int total_mapcount(struct page *page); -int page_trans_huge_mapcount(struct page *page, int *total_mapcount); +int page_trans_huge_mapcount(struct page *page); #else static inline int total_mapcount(struct page *page) { return page_mapcount(page); } -static inline int page_trans_huge_mapcount(struct page *page, - int *total_mapcount) +static inline int page_trans_huge_mapcount(struct page *page) { - int mapcount = page_mapcount(page); - if (total_mapcount) - *total_mapcount = mapcount; - return mapcount; + return page_mapcount(page); } #endif @@ -902,12 +839,21 @@ static inline struct page *virt_to_head_page(const void *x) return compound_head(page); } +static inline struct folio *virt_to_folio(const void *x) +{ + struct page *page = virt_to_page(x); + + return page_folio(page); +} + void __put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); -void copy_huge_page(struct page *dst, struct page *src); +void folio_copy(struct folio *dst, struct folio *src); + +unsigned long nr_free_buffer_pages(void); /* * Compound pages have a destructor function. Provide a @@ -943,13 +889,6 @@ static inline void destroy_compound_page(struct page *page) compound_page_dtors[page[1].compound_dtor](page); } -static inline unsigned int compound_order(struct page *page) -{ - if (!PageHead(page)) - return 0; - return page[1].compound_order; -} - static inline bool hpage_pincount_available(struct page *page) { /* @@ -1131,6 +1070,11 @@ static inline enum zone_type page_zonenum(const struct page *page) return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } +static inline enum zone_type folio_zonenum(const struct folio *folio) +{ + return page_zonenum(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE static inline bool is_zone_device_page(const struct page *page) { @@ -1200,18 +1144,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page) } /* 127: arbitrary random number, small enough to assemble well */ -#define page_ref_zero_or_close_to_overflow(page) \ - ((unsigned int) page_ref_count(page) + 127u <= 127u) +#define folio_ref_zero_or_close_to_overflow(folio) \ + ((unsigned int) folio_ref_count(folio) + 127u <= 127u) + +/** + * folio_get - Increment the reference count on a folio. + * @folio: The folio. + * + * Context: May be called in any context, as long as you know that + * you have a refcount on the folio. If you do not already have one, + * folio_try_get() may be the right interface for you to use. + */ +static inline void folio_get(struct folio *folio) +{ + VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio); + folio_ref_inc(folio); +} static inline void get_page(struct page *page) { - page = compound_head(page); - /* - * Getting a normal page or the head of a compound page - * requires to already have an elevated page->_refcount. - */ - VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); - page_ref_inc(page); + folio_get(page_folio(page)); } bool __must_check try_grab_page(struct page *page, unsigned int flags); @@ -1228,9 +1180,28 @@ static inline __must_check bool try_get_page(struct page *page) return true; } +/** + * folio_put - Decrement the reference count on a folio. + * @folio: The folio. + * + * If the folio's reference count reaches zero, the memory will be + * released back to the page allocator and may be used by another + * allocation immediately. Do not access the memory or the struct folio + * after calling folio_put() unless you can be sure that it wasn't the + * last reference. + * + * Context: May be called in process or interrupt context, but not in NMI + * context. May be called while holding a spinlock. + */ +static inline void folio_put(struct folio *folio) +{ + if (folio_put_testzero(folio)) + __put_page(&folio->page); +} + static inline void put_page(struct page *page) { - page = compound_head(page); + struct folio *folio = page_folio(page); /* * For devmap managed pages we need to catch refcount transition from @@ -1238,13 +1209,12 @@ static inline void put_page(struct page *page) * need to inform the device driver through callback. See * include/linux/memremap.h and HMM for details. */ - if (page_is_devmap_managed(page)) { - put_devmap_managed_page(page); + if (page_is_devmap_managed(&folio->page)) { + put_devmap_managed_page(&folio->page); return; } - if (put_page_testzero(page)) - __put_page(page); + folio_put(folio); } /* @@ -1379,6 +1349,11 @@ static inline int page_to_nid(const struct page *page) } #endif +static inline int folio_nid(const struct folio *folio) +{ + return page_to_nid(&folio->page); +} + #ifdef CONFIG_NUMA_BALANCING static inline int cpu_pid_to_cpupid(int cpu, int pid) { @@ -1546,6 +1521,16 @@ static inline pg_data_t *page_pgdat(const struct page *page) return NODE_DATA(page_to_nid(page)); } +static inline struct zone *folio_zone(const struct folio *folio) +{ + return page_zone(&folio->page); +} + +static inline pg_data_t *folio_pgdat(const struct folio *folio) +{ + return page_pgdat(&folio->page); +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { @@ -1559,6 +1544,20 @@ static inline unsigned long page_to_section(const struct page *page) } #endif +/** + * folio_pfn - Return the Page Frame Number of a folio. + * @folio: The folio. + * + * A folio may contain multiple pages. The pages have consecutive + * Page Frame Numbers. + * + * Return: The Page Frame Number of the first page in the folio. + */ +static inline unsigned long folio_pfn(struct folio *folio) +{ + return page_to_pfn(&folio->page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) @@ -1595,6 +1594,89 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +/** + * folio_nr_pages - The number of pages in the folio. + * @folio: The folio. + * + * Return: A positive power of two. + */ +static inline long folio_nr_pages(struct folio *folio) +{ + return compound_nr(&folio->page); +} + +/** + * folio_next - Move to the next physical folio. + * @folio: The folio we're currently operating on. + * + * If you have physically contiguous memory which may span more than + * one folio (eg a &struct bio_vec), use this function to move from one + * folio to the next. Do not use it if the memory is only virtually + * contiguous as the folios are almost certainly not adjacent to each + * other. This is the folio equivalent to writing ``page++``. + * + * Context: We assume that the folios are refcounted and/or locked at a + * higher level and do not adjust the reference counts. + * Return: The next struct folio. + */ +static inline struct folio *folio_next(struct folio *folio) +{ + return (struct folio *)folio_page(folio, folio_nr_pages(folio)); +} + +/** + * folio_shift - The size of the memory described by this folio. + * @folio: The folio. + * + * A folio represents a number of bytes which is a power-of-two in size. + * This function tells you which power-of-two the folio is. See also + * folio_size() and folio_order(). + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The base-2 logarithm of the size of this folio. + */ +static inline unsigned int folio_shift(struct folio *folio) +{ + return PAGE_SHIFT + folio_order(folio); +} + +/** + * folio_size - The number of bytes in a folio. + * @folio: The folio. + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The number of bytes in this folio. + */ +static inline size_t folio_size(struct folio *folio) +{ + return PAGE_SIZE << folio_order(folio); +} + +#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +static inline int arch_make_page_accessible(struct page *page) +{ + return 0; +} +#endif + +#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE +static inline int arch_make_folio_accessible(struct folio *folio) +{ + int ret; + long i, nr = folio_nr_pages(folio); + + for (i = 0; i < nr; i++) { + ret = arch_make_page_accessible(folio_page(folio, i)); + if (ret) + break; + } + + return ret; +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ @@ -1633,21 +1715,13 @@ void page_address_init(void); #define page_address_init() do { } while(0) #endif -extern void *page_rmapping(struct page *page); -extern struct anon_vma *page_anon_vma(struct page *page); -extern struct address_space *page_mapping(struct page *page); - -extern struct address_space *__page_file_mapping(struct page *); - -static inline -struct address_space *page_file_mapping(struct page *page) +static inline void *folio_address(const struct folio *folio) { - if (unlikely(PageSwapCache(page))) - return __page_file_mapping(page); - - return page->mapping; + return page_address(&folio->page); } +extern void *page_rmapping(struct page *page); +extern struct anon_vma *page_anon_vma(struct page *page); extern pgoff_t __page_file_index(struct page *page); /* @@ -1662,7 +1736,7 @@ static inline pgoff_t page_index(struct page *page) } bool page_mapped(struct page *page); -struct address_space *page_mapping(struct page *page); +bool folio_mapped(struct folio *folio); /* * Return true only if the page has been allocated with @@ -1700,6 +1774,7 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) #define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) +#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* * Flags passed to show_mem() and show_free_areas() to suppress output in @@ -1717,16 +1792,6 @@ static inline bool can_do_mlock(void) { return false; } extern int user_shm_lock(size_t, struct ucounts *); extern void user_shm_unlock(size_t, struct ucounts *); -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct address_space *check_mapping; /* Check page->mapping if set */ - pgoff_t first_index; /* Lowest page->index to unmap */ - pgoff_t last_index; /* Highest page->index to unmap */ - struct page *single_page; /* Locked page to be unmapped */ -}; - struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -1761,7 +1826,6 @@ extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); -int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); int invalidate_inode_page(struct page *page); @@ -1772,7 +1836,6 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); -void unmap_mapping_page(struct page *page); void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows); void unmap_mapping_range(struct address_space *mapping, @@ -1793,7 +1856,6 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, BUG(); return -EFAULT; } -static inline void unmap_mapping_page(struct page *page) { } static inline void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { } static inline void unmap_mapping_range(struct address_space *mapping, @@ -1850,24 +1912,12 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); -extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); -int redirty_page_for_writepage(struct writeback_control *wbc, - struct page *page); -void account_page_cleaned(struct page *page, struct address_space *mapping, - struct bdi_writeback *wb); -int set_page_dirty(struct page *page); +bool folio_mark_dirty(struct folio *folio); +bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -void __cancel_dirty_page(struct page *page); -static inline void cancel_dirty_page(struct page *page) -{ - /* Avoid atomic ops, locking, etc. when not actually needed. */ - if (PageDirty(page)) - __cancel_dirty_page(page); -} -int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); @@ -2447,7 +2497,7 @@ static inline unsigned long get_num_physpages(void) * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, * max_highmem_pfn}; * for_each_valid_physical_page_range() - * memblock_add_node(base, size, nid) + * memblock_add_node(base, size, nid, MEMBLOCK_NONE) * free_area_init(max_zone_pfns); */ void free_area_init(unsigned long *max_zone_pfn); @@ -2475,6 +2525,7 @@ extern void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int migratetype); extern void setup_per_zone_wmarks(void); +extern void calculate_min_free_kbytes(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); @@ -2548,7 +2599,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx); + struct mempolicy *, struct vm_userfaultfd_ctx, const char *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); @@ -2659,10 +2710,6 @@ extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); -/* mm/page-writeback.c */ -int __must_check write_one_page(struct page *page); -void task_dirty_inc(struct task_struct *tsk); - extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); @@ -2851,7 +2898,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO * and return without waiting upon it */ -#define FOLL_POPULATE 0x40 /* fault in page */ +#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ +#define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ @@ -3060,7 +3108,6 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *, #endif void drop_slab(void); -void drop_slab_node(int nid); #ifndef CONFIG_MMU #define randomize_va_space 0 @@ -3113,6 +3160,7 @@ enum mf_flags { MF_ACTION_REQUIRED = 1 << 1, MF_MUST_KILL = 1 << 2, MF_SOFT_OFFLINE = 1 << 3, + MF_UNPOISON = 1 << 4, }; extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); @@ -3124,6 +3172,19 @@ extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); +#ifndef arch_memory_failure +static inline int arch_memory_failure(unsigned long pfn, int flags) +{ + return -ENXIO; +} +#endif + +#ifndef arch_is_platform_page +static inline bool arch_is_platform_page(u64 paddr) +{ + return false; +} +#endif /* * Error handlers for various types of pages. @@ -3140,7 +3201,6 @@ enum mf_action_page_type { MF_MSG_KERNEL_HIGH_ORDER, MF_MSG_SLAB, MF_MSG_DIFFERENT_COMPOUND, - MF_MSG_POISONED_HUGE, MF_MSG_HUGE, MF_MSG_FREE_HUGE, MF_MSG_NON_PMD_HUGE, @@ -3155,7 +3215,6 @@ enum mf_action_page_type { MF_MSG_CLEAN_LRU, MF_MSG_TRUNCATED_LRU, MF_MSG_BUDDY, - MF_MSG_BUDDY_2ND, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, MF_MSG_UNKNOWN, @@ -3284,5 +3343,16 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) return 0; } +#ifdef CONFIG_ANON_VMA_NAME +int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, + unsigned long len_in, const char *name); +#else +static inline int +madvise_set_anon_name(struct mm_struct *mm, unsigned long start, + unsigned long len_in, const char *name) { + return 0; +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355ea1ee32bd..b725839dfe71 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -2,31 +2,39 @@ #ifndef LINUX_MM_INLINE_H #define LINUX_MM_INLINE_H +#include <linux/atomic.h> #include <linux/huge_mm.h> #include <linux/swap.h> +#include <linux/string.h> /** - * page_is_file_lru - should the page be on a file LRU or anon LRU? - * @page: the page to test - * - * Returns 1 if @page is a regular filesystem backed page cache page or a lazily - * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal - * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by - * functions that manipulate the LRU lists, to sort a page onto the right LRU - * list. + * folio_is_file_lru - Should the folio be on a file LRU or anon LRU? + * @folio: The folio to test. * * We would like to get this info without a page flag, but the state - * needs to survive until the page is last deleted from the LRU, which + * needs to survive until the folio is last deleted from the LRU, which * could be as far down as __page_cache_release. + * + * Return: An integer (not a boolean!) used to sort a folio onto the + * right LRU list and to account folios correctly. + * 1 if @folio is a regular filesystem backed page cache folio + * or a lazily freed anonymous folio (e.g. via MADV_FREE). + * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise + * ram or swap backed folio. */ +static inline int folio_is_file_lru(struct folio *folio) +{ + return !folio_test_swapbacked(folio); +} + static inline int page_is_file_lru(struct page *page) { - return !PageSwapBacked(page); + return folio_is_file_lru(page_folio(page)); } static __always_inline void update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, - int nr_pages) + long nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -39,69 +47,228 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, } /** - * __clear_page_lru_flags - clear page lru flags before releasing a page - * @page: the page that was on lru and now has a zero reference + * __folio_clear_lru_flags - Clear page lru flags before releasing a page. + * @folio: The folio that was on lru and now has a zero reference. */ -static __always_inline void __clear_page_lru_flags(struct page *page) +static __always_inline void __folio_clear_lru_flags(struct folio *folio) { - VM_BUG_ON_PAGE(!PageLRU(page), page); + VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio); - __ClearPageLRU(page); + __folio_clear_lru(folio); /* this shouldn't happen, so leave the flags to bad_page() */ - if (PageActive(page) && PageUnevictable(page)) + if (folio_test_active(folio) && folio_test_unevictable(folio)) return; - __ClearPageActive(page); - __ClearPageUnevictable(page); + __folio_clear_active(folio); + __folio_clear_unevictable(folio); +} + +static __always_inline void __clear_page_lru_flags(struct page *page) +{ + __folio_clear_lru_flags(page_folio(page)); } /** - * page_lru - which LRU list should a page be on? - * @page: the page to test + * folio_lru_list - Which LRU list should a folio be on? + * @folio: The folio to test. * - * Returns the LRU list a page should be on, as an index + * Return: The LRU list a folio should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list page_lru(struct page *page) +static __always_inline enum lru_list folio_lru_list(struct folio *folio) { enum lru_list lru; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); - if (PageUnevictable(page)) + if (folio_test_unevictable(folio)) return LRU_UNEVICTABLE; - lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; - if (PageActive(page)) + lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + if (folio_test_active(folio)) lru += LRU_ACTIVE; return lru; } +static __always_inline +void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); + + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add(&folio->lru, &lruvec->lists[lru]); +} + static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio(lruvec, page_folio(page)); +} + +static __always_inline +void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add(&page->lru, &lruvec->lists[lru]); + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add_tail(&folio->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio_tail(lruvec, page_folio(page)); +} - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add_tail(&page->lru, &lruvec->lists[lru]); +static __always_inline +void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) +{ + list_del(&folio->lru); + update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), + -folio_nr_pages(folio)); } static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec) { - list_del(&page->lru); - update_lru_size(lruvec, page_lru(page), page_zonenum(page), - -thp_nr_pages(page)); + lruvec_del_folio(lruvec, page_folio(page)); } + +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling vma_anon_name() and while using + * the returned pointer. + */ +extern const char *vma_anon_name(struct vm_area_struct *vma); + +/* + * mmap_lock should be read-locked for orig_vma->vm_mm. + * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be + * isolated. + */ +extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma); + +/* + * mmap_lock should be write-locked or vma should have been isolated under + * write-locked mmap_lock protection. + */ +extern void free_vma_anon_name(struct vm_area_struct *vma); + +/* mmap_lock should be read-locked */ +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + const char *vma_name = vma_anon_name(vma); + + /* either both NULL, or pointers to same string */ + if (vma_name == name) + return true; + + return name && vma_name && !strcmp(name, vma_name); +} +#else /* CONFIG_ANON_VMA_NAME */ +static inline const char *vma_anon_name(struct vm_area_struct *vma) +{ + return NULL; +} +static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_vma_anon_name(struct vm_area_struct *vma) {} +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + return true; +} +#endif /* CONFIG_ANON_VMA_NAME */ + +static inline void init_tlb_flush_pending(struct mm_struct *mm) +{ + atomic_set(&mm->tlb_flush_pending, 0); +} + +static inline void inc_tlb_flush_pending(struct mm_struct *mm) +{ + atomic_inc(&mm->tlb_flush_pending); + /* + * The only time this value is relevant is when there are indeed pages + * to flush. And we'll only flush pages after changing them, which + * requires the PTL. + * + * So the ordering here is: + * + * atomic_inc(&mm->tlb_flush_pending); + * spin_lock(&ptl); + * ... + * set_pte_at(); + * spin_unlock(&ptl); + * + * spin_lock(&ptl) + * mm_tlb_flush_pending(); + * .... + * spin_unlock(&ptl); + * + * flush_tlb_range(); + * atomic_dec(&mm->tlb_flush_pending); + * + * Where the increment if constrained by the PTL unlock, it thus + * ensures that the increment is visible if the PTE modification is + * visible. After all, if there is no PTE modification, nobody cares + * about TLB flushes either. + * + * This very much relies on users (mm_tlb_flush_pending() and + * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and + * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc + * locks (PPC) the unlock of one doesn't order against the lock of + * another PTL. + * + * The decrement is ordered by the flush_tlb_range(), such that + * mm_tlb_flush_pending() will not return false unless all flushes have + * completed. + */ +} + +static inline void dec_tlb_flush_pending(struct mm_struct *mm) +{ + /* + * See inc_tlb_flush_pending(). + * + * This cannot be smp_mb__before_atomic() because smp_mb() simply does + * not order against TLB invalidate completion, which is what we need. + * + * Therefore we must rely on tlb_flush_*() to guarantee order. + */ + atomic_dec(&mm->tlb_flush_pending); +} + +static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +{ + /* + * Must be called after having acquired the PTL; orders against that + * PTLs release and therefore ensures that if we observe the modified + * PTE we must also observe the increment from inc_tlb_flush_pending(). + * + * That is, it only guarantees to return true if there is a flush + * pending for _this_ PTL. + */ + return atomic_read(&mm->tlb_flush_pending); +} + +static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +{ + /* + * Similar to mm_tlb_flush_pending(), we must have acquired the PTL + * for which there is a TLB flush pending in order to guarantee + * we've seen both that PTE modification and the increment. + * + * (no requirement on actually still holding the PTL, that is irrelevant) + */ + return atomic_read(&mm->tlb_flush_pending) > 1; +} + + #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f8ee09c711f..3764c1b51b02 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -5,6 +5,7 @@ #include <linux/mm_types_task.h> #include <linux/auxvec.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> @@ -12,6 +13,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/uprobes.h> +#include <linux/rcupdate.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> @@ -55,11 +57,11 @@ struct mem_cgroup; * in each subpage, but you may need to restore some of their values * afterwards. * - * SLUB uses cmpxchg_double() to atomically update its freelist and - * counters. That requires that freelist & counters be adjacent and - * double-word aligned. We align all struct pages to double-word - * boundaries, and ensure that 'freelist' is aligned within the - * struct. + * SLUB uses cmpxchg_double() to atomically update its freelist and counters. + * That requires that freelist & counters in struct slab be adjacent and + * double-word aligned. Because struct slab currently just reinterprets the + * bits of struct page, we align all struct pages to double-word boundaries, + * and ensure that 'freelist' is aligned within struct slab. */ #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) @@ -124,10 +126,8 @@ struct page { struct page *next; #ifdef CONFIG_64BIT int pages; /* Nr of pages left */ - int pobjects; /* Approximate count */ #else short int pages; - short int pobjects; #endif }; }; @@ -239,6 +239,72 @@ struct page { #endif } _struct_page_alignment; +/** + * struct folio - Represents a contiguous set of bytes. + * @flags: Identical to the page flags. + * @lru: Least Recently Used list; tracks how recently this folio was used. + * @mapping: The file this page belongs to, or refers to the anon_vma for + * anonymous memory. + * @index: Offset within the file, in units of pages. For anonymous memory, + * this is the index from the beginning of the mmap. + * @private: Filesystem per-folio data (see folio_attach_private()). + * Used for swp_entry_t if folio_test_swapcache(). + * @_mapcount: Do not access this member directly. Use folio_mapcount() to + * find out how many times this folio is mapped by userspace. + * @_refcount: Do not access this member directly. Use folio_ref_count() + * to find how many references there are to this folio. + * @memcg_data: Memory Control Group data. + * + * A folio is a physically, virtually and logically contiguous set + * of bytes. It is a power-of-two in size, and it is aligned to that + * same power-of-two. It is at least as large as %PAGE_SIZE. If it is + * in the page cache, it is at a file offset which is a multiple of that + * power-of-two. It may be mapped into userspace at an address which is + * at an arbitrary page offset, but its kernel virtual address is aligned + * to its size. + */ +struct folio { + /* private: don't document the anon union */ + union { + struct { + /* public: */ + unsigned long flags; + struct list_head lru; + struct address_space *mapping; + pgoff_t index; + void *private; + atomic_t _mapcount; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long memcg_data; +#endif + /* private: the union with struct page is transitional */ + }; + struct page page; + }; +}; + +static_assert(sizeof(struct page) == sizeof(struct folio)); +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) +FOLIO_MATCH(flags, flags); +FOLIO_MATCH(lru, lru); +FOLIO_MATCH(compound_head, lru); +FOLIO_MATCH(index, index); +FOLIO_MATCH(private, private); +FOLIO_MATCH(_mapcount, _mapcount); +FOLIO_MATCH(_refcount, _refcount); +#ifdef CONFIG_MEMCG +FOLIO_MATCH(memcg_data, memcg_data); +#endif +#undef FOLIO_MATCH + +static inline atomic_t *folio_mapcount_ptr(struct folio *folio) +{ + struct page *tail = &folio->page + 1; + return &tail->compound_mapcount; +} + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; @@ -257,6 +323,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page) #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) +/* + * page_private can be used on tail pages. However, PagePrivate is only + * checked by the VM on the head page. So page_private on the tail pages + * should be used for data that's ancillary to the head page (eg attaching + * buffer heads to tail pages after attaching buffer heads to the head page) + */ #define page_private(page) ((page)->private) static inline void set_page_private(struct page *page, unsigned long private) @@ -264,6 +336,11 @@ static inline void set_page_private(struct page *page, unsigned long private) page->private = private; } +static inline void *folio_get_private(struct folio *folio) +{ + return folio->private; +} + struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) @@ -310,6 +387,12 @@ struct vm_userfaultfd_ctx { struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ +struct anon_vma_name { + struct kref kref; + /* The name needs to be at the end because it is dynamically sized. */ + char name[]; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -350,11 +433,19 @@ struct vm_area_struct { /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. + * + * For private anonymous mappings, a pointer to a null terminated string + * containing the name given to the vma, or NULL if unnamed. */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; + + union { + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; + /* Serialized by mmap_sem. */ + struct anon_vma_name *anon_name; + }; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma @@ -387,17 +478,6 @@ struct vm_area_struct { struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; -struct core_thread { - struct task_struct *task; - struct core_thread *next; -}; - -struct core_state { - atomic_t nr_threads; - struct core_thread dumper; - struct completion startup; -}; - struct kioctx_table; struct mm_struct { struct { @@ -518,8 +598,6 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access */ - struct core_state *core_state; /* coredumping support */ - #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; @@ -569,9 +647,12 @@ struct mm_struct { atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ - bool tlb_flush_batched; + atomic_t tlb_flush_batched; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_PREEMPT_RT + struct rcu_head delayed_drop; +#endif #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif @@ -611,90 +692,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_finish_mmu(struct mmu_gather *tlb); -static inline void init_tlb_flush_pending(struct mm_struct *mm) -{ - atomic_set(&mm->tlb_flush_pending, 0); -} - -static inline void inc_tlb_flush_pending(struct mm_struct *mm) -{ - atomic_inc(&mm->tlb_flush_pending); - /* - * The only time this value is relevant is when there are indeed pages - * to flush. And we'll only flush pages after changing them, which - * requires the PTL. - * - * So the ordering here is: - * - * atomic_inc(&mm->tlb_flush_pending); - * spin_lock(&ptl); - * ... - * set_pte_at(); - * spin_unlock(&ptl); - * - * spin_lock(&ptl) - * mm_tlb_flush_pending(); - * .... - * spin_unlock(&ptl); - * - * flush_tlb_range(); - * atomic_dec(&mm->tlb_flush_pending); - * - * Where the increment if constrained by the PTL unlock, it thus - * ensures that the increment is visible if the PTE modification is - * visible. After all, if there is no PTE modification, nobody cares - * about TLB flushes either. - * - * This very much relies on users (mm_tlb_flush_pending() and - * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and - * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc - * locks (PPC) the unlock of one doesn't order against the lock of - * another PTL. - * - * The decrement is ordered by the flush_tlb_range(), such that - * mm_tlb_flush_pending() will not return false unless all flushes have - * completed. - */ -} - -static inline void dec_tlb_flush_pending(struct mm_struct *mm) -{ - /* - * See inc_tlb_flush_pending(). - * - * This cannot be smp_mb__before_atomic() because smp_mb() simply does - * not order against TLB invalidate completion, which is what we need. - * - * Therefore we must rely on tlb_flush_*() to guarantee order. - */ - atomic_dec(&mm->tlb_flush_pending); -} - -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) -{ - /* - * Must be called after having acquired the PTL; orders against that - * PTLs release and therefore ensures that if we observe the modified - * PTE we must also observe the increment from inc_tlb_flush_pending(). - * - * That is, it only guarantees to return true if there is a flush - * pending for _this_ PTL. - */ - return atomic_read(&mm->tlb_flush_pending); -} - -static inline bool mm_tlb_flush_nested(struct mm_struct *mm) -{ - /* - * Similar to mm_tlb_flush_pending(), we must have acquired the PTL - * for which there is a TLB flush pending in order to guarantee - * we've seen both that PTE modification and the increment. - * - * (no requirement on actually still holding the PTL, that is irrelevant) - */ - return atomic_read(&mm->tlb_flush_pending) > 1; -} - struct vm_fault; /** @@ -809,4 +806,49 @@ typedef struct { unsigned long val; } swp_entry_t; +/** + * enum fault_flag - Fault flag definitions. + * @FAULT_FLAG_WRITE: Fault was a write fault. + * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. + * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. + * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. + * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. + * @FAULT_FLAG_TRIED: The fault has been tried once. + * @FAULT_FLAG_USER: The fault originated in userspace. + * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. + * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. + * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. + * + * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify + * whether we would allow page faults to retry by specifying these two + * fault flags correctly. Currently there can be three legal combinations: + * + * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and + * this is the first try + * + * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and + * we've already tried at least once + * + * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry + * + * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never + * be used. Note that page faults can be allowed to retry for multiple times, + * in which case we'll have an initial fault with flags (a) then later on + * continuous faults with flags (b). We should always try to detect pending + * signals before a retry to make sure the continuous page faults can still be + * interrupted if necessary. + */ +enum fault_flag { + FAULT_FLAG_WRITE = 1 << 0, + FAULT_FLAG_MKWRITE = 1 << 1, + FAULT_FLAG_ALLOW_RETRY = 1 << 2, + FAULT_FLAG_RETRY_NOWAIT = 1 << 3, + FAULT_FLAG_KILLABLE = 1 << 4, + FAULT_FLAG_TRIED = 1 << 5, + FAULT_FLAG_USER = 1 << 6, + FAULT_FLAG_REMOTE = 1 << 7, + FAULT_FLAG_INSTRUCTION = 1 << 8, + FAULT_FLAG_INTERRUPTIBLE = 1 << 9, +}; + #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0c0c9a0fdf57..7afb57cab00b 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -15,7 +15,7 @@ #include <linux/mmc/card.h> #include <linux/mmc/pm.h> #include <linux/dma-direction.h> -#include <linux/keyslot-manager.h> +#include <linux/blk-crypto-profile.h> struct mmc_ios { unsigned int clock; /* clock rate */ @@ -162,6 +162,9 @@ struct mmc_host_ops { /* Prepare HS400 target operating frequency depending host driver */ int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios); + /* Execute HS400 tuning depending host driver */ + int (*execute_hs400_tuning)(struct mmc_host *host, struct mmc_card *card); + /* Prepare switch to DDR during the HS400 init sequence */ int (*hs400_prepare_ddr)(struct mmc_host *host); @@ -492,7 +495,7 @@ struct mmc_host { /* Inline encryption support */ #ifdef CONFIG_MMC_CRYPTO - struct blk_keyslot_manager ksm; + struct blk_crypto_profile crypto_profile; #endif /* Host Software Queue support */ @@ -634,5 +637,6 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data) int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); +int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); #endif /* LINUX_MMC_HOST_H */ diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h deleted file mode 100644 index 1d42872d22f3..000000000000 --- a/include/linux/mmc/sdhci-pci-data.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_MMC_SDHCI_PCI_DATA_H -#define LINUX_MMC_SDHCI_PCI_DATA_H - -struct pci_dev; - -struct sdhci_pci_data { - struct pci_dev *pdev; - int slotno; - int rst_n_gpio; /* Set to -EINVAL if unused */ - int cd_gpio; /* Set to -EINVAL if unused */ - int (*setup)(struct sdhci_pci_data *data); - void (*cleanup)(struct sdhci_pci_data *data); -}; - -extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, - int slotno); -#endif diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index a85c9f0bd470..53f0efa0bccf 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -105,6 +105,7 @@ #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 #define SDIO_DEVICE_ID_MEDIATEK_MT7668 0x7668 +#define SDIO_DEVICE_ID_MEDIATEK_MT7961 0x7961 #define SDIO_VENDOR_ID_MICROCHIP_WILC 0x0296 #define SDIO_DEVICE_ID_MICROCHIP_WILC1000 0x5347 diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 1935d4c72d10..d7285f8148a3 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_BUG_ON_FOLIO(cond, folio) \ + do { \ + if (unlikely(cond)) { \ + dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\ + BUG(); \ + } \ + } while (0) #define VM_BUG_ON_VMA(cond, vma) \ do { \ if (unlikely(cond)) { \ @@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) @@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm); #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) +#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond) #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond) #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6a1d79d84675..aed44e9b5d89 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -199,6 +199,7 @@ enum node_stat_item { NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_THROTTLED_WRITTEN, /* NR_WRITTEN while reclaim throttled */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ @@ -272,6 +273,14 @@ enum lru_list { NR_LRU_LISTS }; +enum vmscan_throttle_state { + VMSCAN_THROTTLE_WRITEBACK, + VMSCAN_THROTTLE_ISOLATED, + VMSCAN_THROTTLE_NOPROGRESS, + VMSCAN_THROTTLE_CONGESTED, + NR_VMSCAN_THROTTLE, +}; + #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) @@ -841,6 +850,13 @@ typedef struct pglist_data { int node_id; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; + + /* workqueues for throttling reclaim for different reasons. */ + wait_queue_head_t reclaim_wait[NR_VMSCAN_THROTTLE]; + + atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */ + unsigned long nr_reclaim_start; /* nr pages written while throttled + * when throttling started. */ struct task_struct *kswapd; /* Protected by mem_hotplug_begin/end() */ int kswapd_order; @@ -1031,6 +1047,15 @@ static inline int is_highmem_idx(enum zone_type idx) #endif } +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void); +#else +static inline bool has_managed_dma(void) +{ + return false; +} +#endif + /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references @@ -1220,6 +1245,28 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #define for_each_zone_zonelist(zone, z, zlist, highidx) \ for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) +/* Whether the 'nodes' are all movable nodes */ +static inline bool movable_only_nodes(nodemask_t *nodes) +{ + struct zonelist *zonelist; + struct zoneref *z; + int nid; + + if (nodes_empty(*nodes)) + return false; + + /* + * We can chose arbitrary node from the nodemask to get a + * zonelist as they are interlinked. We just need to find + * at least one zone that can satisfy kernel allocations. + */ + nid = first_node(*nodes); + zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; + z = first_zones_zonelist(zonelist, ZONE_NORMAL, nodes); + return (!z->zone) ? true : false; +} + + #ifdef CONFIG_SPARSEMEM #include <asm/sparsemem.h> #endif @@ -1481,7 +1528,7 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - ms = __nr_to_section(pfn_to_section_nr(pfn)); + ms = __pfn_to_section(pfn); if (!valid_section(ms)) return 0; /* @@ -1496,7 +1543,7 @@ static inline int pfn_in_present_section(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - return present_section(__nr_to_section(pfn_to_section_nr(pfn))); + return present_section(__pfn_to_section(pfn)); } static inline unsigned long next_present_section_nr(unsigned long section_nr) diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h new file mode 100644 index 000000000000..ee5a217de2a8 --- /dev/null +++ b/include/linux/mnt_idmapping.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MNT_IDMAPPING_H +#define _LINUX_MNT_IDMAPPING_H + +#include <linux/types.h> +#include <linux/uidgid.h> + +struct user_namespace; +/* + * Carries the initial idmapping of 0:0:4294967295 which is an identity + * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is + * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. + */ +extern struct user_namespace init_user_ns; + +/** + * initial_idmapping - check whether this is the initial mapping + * @ns: idmapping to check + * + * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, + * [...], 1000 to 1000 [...]. + * + * Return: true if this is the initial mapping, false if not. + */ +static inline bool initial_idmapping(const struct user_namespace *ns) +{ + return ns == &init_user_ns; +} + +/** + * no_idmapping - check whether we can skip remapping a kuid/gid + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * This function can be used to check whether a remapping between two + * idmappings is required. + * An idmapped mount is a mount that has an idmapping attached to it that + * is different from the filsystem's idmapping and the initial idmapping. + * If the initial mapping is used or the idmapping of the mount and the + * filesystem are identical no remapping is required. + * + * Return: true if remapping can be skipped, false if not. + */ +static inline bool no_idmapping(const struct user_namespace *mnt_userns, + const struct user_namespace *fs_userns) +{ + return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; +} + +/** + * mapped_kuid_fs - map a filesystem kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kuid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * from_kuid() so we can simply retrieve the value via __kuid_val() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + if (initial_idmapping(fs_userns)) + uid = __kuid_val(kuid); + else + uid = from_kuid(fs_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + return make_kuid(mnt_userns, uid); +} + +/** + * mapped_kgid_fs - map a filesystem kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this + * function when preparing a @kgid to be reported to userspace. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * from_kgid() so we can simply retrieve the value via __kgid_val() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + if (initial_idmapping(fs_userns)) + gid = __kgid_val(kgid); + else + gid = from_kgid(fs_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + return make_kgid(mnt_userns, gid); +} + +/** + * mapped_kuid_user - map a user kuid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kuid : kuid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this + * function when preparing a @kuid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kuid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kuid won't change when calling + * make_kuid() so we can simply retrieve the value via KUIDT_INIT() + * directly. + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is + * returned. + */ +static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kuid; + uid = from_kuid(mnt_userns, kuid); + if (uid == (uid_t)-1) + return INVALID_UID; + if (initial_idmapping(fs_userns)) + return KUIDT_INIT(uid); + return make_kuid(fs_userns, uid); +} + +/** + * mapped_kgid_user - map a user kgid into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @kgid : kgid to be mapped + * + * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this + * function when preparing a @kgid to be written to disk or inode. + * + * If no_idmapping() determines that this is not an idmapped mount we can + * simply return @kgid unchanged. + * If initial_idmapping() tells us that the filesystem is not mounted with an + * idmapping we know the value of @kgid won't change when calling + * make_kgid() so we can simply retrieve the value via KGIDT_INIT() + * directly. + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is + * returned. + */ +static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + gid_t gid; + + if (no_idmapping(mnt_userns, fs_userns)) + return kgid; + gid = from_kgid(mnt_userns, kgid); + if (gid == (gid_t)-1) + return INVALID_GID; + if (initial_idmapping(fs_userns)) + return KGIDT_INIT(gid); + return make_kgid(fs_userns, gid); +} + +/** + * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsuid. A common example is initializing the i_uid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsuid mapped up according to @mnt_userns. + */ +static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) +{ + return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); +} + +/** + * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsgid. A common example is initializing the i_gid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsgid mapped up according to @mnt_userns. + */ +static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) +{ + return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); +} + +#endif /* _LINUX_MNT_IDMAPPING_H */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index ae2e75d15b21..4bb71979a8fd 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -895,4 +895,18 @@ struct dfl_device_id { kernel_ulong_t driver_data; }; +/* ISHTP (Integrated Sensor Hub Transport Protocol) */ + +#define ISHTP_MODULE_PREFIX "ishtp:" + +/** + * struct ishtp_device_id - ISHTP device identifier + * @guid: GUID of the device. + * @driver_data: pointer to driver specific data + */ +struct ishtp_device_id { + guid_t guid; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/module.h b/include/linux/module.h index c9f1200b2312..1e135fd5c076 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -290,7 +290,8 @@ extern typeof(name) __mod_##type##__##name##_device_table \ * files require multiple MODULE_FIRMWARE() specifiers */ #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) -#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) +#define _MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) +#define MODULE_IMPORT_NS(ns) _MODULE_IMPORT_NS(ns) struct notifier_block; @@ -595,9 +596,9 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); -extern void __noreturn __module_put_and_exit(struct module *mod, +extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); -#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) +#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD int module_refcount(struct module *mod); @@ -790,7 +791,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb) return 0; } -#define module_put_and_exit(code) do_exit(code) +#define module_put_and_kthread_exit(code) kthread_exit(code) static inline void print_modules(void) { diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 8071148f29a6..e05ee9f001ff 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -12,6 +12,7 @@ /** * struct vif_device - interface representor for multicast routing * @dev: network device being used + * @dev_tracker: refcount tracker for @dev reference * @bytes_in: statistic; bytes ingressing * @bytes_out: statistic; bytes egresing * @pkt_in: statistic; packets ingressing @@ -26,6 +27,7 @@ */ struct vif_device { struct net_device *dev; + netdevice_tracker dev_tracker; unsigned long bytes_in, bytes_out; unsigned long pkt_in, pkt_out; unsigned long rate_limit; diff --git a/include/linux/msi.h b/include/linux/msi.h index 49cf6eb222e7..fc918a658d48 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -2,7 +2,23 @@ #ifndef LINUX_MSI_H #define LINUX_MSI_H -#include <linux/kobject.h> +/* + * This header file contains MSI data structures and functions which are + * only relevant for: + * - Interrupt core code + * - PCI/MSI core code + * - MSI interrupt domain implementations + * - IOMMU, low level VFIO, NTB and other justified exceptions + * dealing with low level MSI details. + * + * Regular device drivers have no business with any of these functions and + * especially storing MSI descriptor pointers in random code is considered + * abuse. The only function which is relevant for drivers is msi_get_virq(). + */ + +#include <linux/cpumask.h> +#include <linux/xarray.h> +#include <linux/mutex.h> #include <linux/list.h> #include <asm/msi.h> @@ -56,6 +72,8 @@ struct irq_data; struct msi_desc; struct pci_dev; struct platform_msi_priv_data; +struct device_attribute; + void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); @@ -69,62 +87,59 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, struct msi_msg *msg); /** - * platform_msi_desc - Platform device specific msi descriptor data - * @msi_priv_data: Pointer to platform private data - * @msi_index: The index of the MSI descriptor for multi MSI - */ -struct platform_msi_desc { - struct platform_msi_priv_data *msi_priv_data; - u16 msi_index; -}; - -/** - * fsl_mc_msi_desc - FSL-MC device specific msi descriptor data - * @msi_index: The index of the MSI descriptor + * pci_msi_desc - PCI/MSI specific MSI descriptor data + * + * @msi_mask: [PCI MSI] MSI cached mask bits + * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits + * @is_msix: [PCI MSI/X] True if MSI-X + * @multiple: [PCI MSI/X] log2 num of messages allocated + * @multi_cap: [PCI MSI/X] log2 num of messages supported + * @can_mask: [PCI MSI/X] Masking supported? + * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit + * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq + * @mask_pos: [PCI MSI] Mask register position + * @mask_base: [PCI MSI-X] Mask register base address */ -struct fsl_mc_msi_desc { - u16 msi_index; +struct pci_msi_desc { + union { + u32 msi_mask; + u32 msix_ctrl; + }; + struct { + u8 is_msix : 1; + u8 multiple : 3; + u8 multi_cap : 3; + u8 can_mask : 1; + u8 is_64 : 1; + u8 is_virtual : 1; + unsigned default_irq; + } msi_attrib; + union { + u8 mask_pos; + void __iomem *mask_base; + }; }; -/** - * ti_sci_inta_msi_desc - TISCI based INTA specific msi descriptor data - * @dev_index: TISCI device index - */ -struct ti_sci_inta_msi_desc { - u16 dev_index; -}; +#define MSI_MAX_INDEX ((unsigned int)USHRT_MAX) /** * struct msi_desc - Descriptor structure for MSI based interrupts - * @list: List head for management * @irq: The base interrupt number * @nvec_used: The number of vectors used * @dev: Pointer to the device which uses this descriptor * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor + * @sysfs_attr: Pointer to sysfs device attribute * * @write_msi_msg: Callback that may be called when the MSI message * address or data changes * @write_msi_msg_data: Data parameter for the callback. * - * @msi_mask: [PCI MSI] MSI cached mask bits - * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits - * @is_msix: [PCI MSI/X] True if MSI-X - * @multiple: [PCI MSI/X] log2 num of messages allocated - * @multi_cap: [PCI MSI/X] log2 num of messages supported - * @maskbit: [PCI MSI/X] Mask-Pending bit supported? - * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit - * @entry_nr: [PCI MSI/X] Entry which is described by this descriptor - * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq - * @mask_pos: [PCI MSI] Mask register position - * @mask_base: [PCI MSI-X] Mask register base address - * @platform: [platform] Platform device specific msi descriptor data - * @fsl_mc: [fsl-mc] FSL MC device specific msi descriptor data - * @inta: [INTA] TISCI based INTA specific msi descriptor data + * @msi_index: Index of the msi descriptor + * @pci: PCI specific msi descriptor data */ struct msi_desc { /* Shared device/bus type independent data */ - struct list_head list; unsigned int irq; unsigned int nvec_used; struct device *dev; @@ -133,61 +148,71 @@ struct msi_desc { #ifdef CONFIG_IRQ_MSI_IOMMU const void *iommu_cookie; #endif +#ifdef CONFIG_SYSFS + struct device_attribute *sysfs_attrs; +#endif void (*write_msi_msg)(struct msi_desc *entry, void *data); void *write_msi_msg_data; - union { - /* PCI MSI/X specific data */ - struct { - union { - u32 msi_mask; - u32 msix_ctrl; - }; - struct { - u8 is_msix : 1; - u8 multiple : 3; - u8 multi_cap : 3; - u8 maskbit : 1; - u8 is_64 : 1; - u8 is_virtual : 1; - u16 entry_nr; - unsigned default_irq; - } msi_attrib; - union { - u8 mask_pos; - void __iomem *mask_base; - }; - }; - - /* - * Non PCI variants add their data structure here. New - * entries need to use a named structure. We want - * proper name spaces for this. The PCI part is - * anonymous for now as it would require an immediate - * tree wide cleanup. - */ - struct platform_msi_desc platform; - struct fsl_mc_msi_desc fsl_mc; - struct ti_sci_inta_msi_desc inta; - }; + u16 msi_index; + struct pci_msi_desc pci; +}; + +/* + * Filter values for the MSI descriptor iterators and accessor functions. + */ +enum msi_desc_filter { + /* All descriptors */ + MSI_DESC_ALL, + /* Descriptors which have no interrupt associated */ + MSI_DESC_NOTASSOCIATED, + /* Descriptors which have an interrupt associated */ + MSI_DESC_ASSOCIATED, }; -/* Helpers to hide struct msi_desc implementation details */ +/** + * msi_device_data - MSI per device data + * @properties: MSI properties which are interesting to drivers + * @platform_data: Platform-MSI specific data + * @mutex: Mutex protecting the MSI descriptor store + * @__store: Xarray for storing MSI descriptor pointers + * @__iter_idx: Index to search the next entry for iterators + */ +struct msi_device_data { + unsigned long properties; + struct platform_msi_priv_data *platform_data; + struct mutex mutex; + struct xarray __store; + unsigned long __iter_idx; +}; + +int msi_setup_device_data(struct device *dev); + +unsigned int msi_get_virq(struct device *dev, unsigned int index); +void msi_lock_descs(struct device *dev); +void msi_unlock_descs(struct device *dev); + +struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter); +struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); + +/** + * msi_for_each_desc - Iterate the MSI descriptors + * + * @desc: struct msi_desc pointer used as iterator + * @dev: struct device pointer - device to iterate + * @filter: Filter for descriptor selection + * + * Notes: + * - The loop must be protected with a msi_lock_descs()/msi_unlock_descs() + * pair. + * - It is safe to remove a retrieved MSI descriptor in the loop. + */ +#define msi_for_each_desc(desc, dev, filter) \ + for ((desc) = msi_first_desc((dev), (filter)); (desc); \ + (desc) = msi_next_desc((dev), (filter))) + #define msi_desc_to_dev(desc) ((desc)->dev) -#define dev_to_msi_list(dev) (&(dev)->msi_list) -#define first_msi_entry(dev) \ - list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list) -#define for_each_msi_entry(desc, dev) \ - list_for_each_entry((desc), dev_to_msi_list((dev)), list) -#define for_each_msi_entry_safe(desc, tmp, dev) \ - list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) -#define for_each_msi_vector(desc, __irq, dev) \ - for_each_msi_entry((desc), (dev)) \ - if ((desc)->irq) \ - for (__irq = (desc)->irq; \ - __irq < ((desc)->irq + (desc)->nvec_used); \ - __irq++) #ifdef CONFIG_IRQ_MSI_IOMMU static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) @@ -213,36 +238,33 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, #endif #ifdef CONFIG_PCI_MSI -#define first_pci_msi_entry(pdev) first_msi_entry(&(pdev)->dev) -#define for_each_pci_msi_entry(desc, pdev) \ - for_each_msi_entry((desc), &(pdev)->dev) - struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); -void *msi_desc_to_pci_sysdata(struct msi_desc *desc); void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); #else /* CONFIG_PCI_MSI */ -static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) -{ - return NULL; -} static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) { } #endif /* CONFIG_PCI_MSI */ -struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, - const struct irq_affinity_desc *affinity); -void free_msi_entry(struct msi_desc *entry); +int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); +void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, + unsigned int first_index, unsigned int last_index); + +/** + * msi_free_msi_descs - Free MSI descriptors of a device + * @dev: Device to free the descriptors + */ +static inline void msi_free_msi_descs(struct device *dev) +{ + msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX); +} + void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); -const struct attribute_group **msi_populate_sysfs(struct device *dev); -void msi_destroy_sysfs(struct device *dev, - const struct attribute_group **msi_irq_groups); - /* * The arch hooks to setup up msi irqs. Default functions are implemented * as weak symbols so that they /can/ be overriden by architecture specific @@ -256,25 +278,20 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); void arch_teardown_msi_irq(unsigned int irq); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void arch_teardown_msi_irqs(struct pci_dev *dev); -#else -static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - WARN_ON_ONCE(1); - return -ENODEV; -} - -static inline void arch_teardown_msi_irqs(struct pci_dev *dev) -{ - WARN_ON_ONCE(1); -} -#endif +#ifdef CONFIG_SYSFS +int msi_device_populate_sysfs(struct device *dev); +void msi_device_destroy_sysfs(struct device *dev); +#else /* CONFIG_SYSFS */ +static inline int msi_device_populate_sysfs(struct device *dev) { return 0; } +static inline void msi_device_destroy_sysfs(struct device *dev) { } +#endif /* !CONFIG_SYSFS */ +#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */ /* - * The restore hooks are still available as they are useful even - * for fully irq domain based setups. Courtesy to XEN/X86. + * The restore hook is still available even for fully irq domain based + * setups. Courtesy to XEN/X86. */ -void arch_restore_msi_irqs(struct pci_dev *dev); -void default_restore_msi_irqs(struct pci_dev *dev); +bool arch_restore_msi_irqs(struct pci_dev *dev); #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN @@ -294,20 +311,17 @@ struct msi_domain_info; * @msi_free: Domain specific function to free a MSI interrupts * @msi_check: Callback for verification of the domain/info/dev data * @msi_prepare: Prepare the allocation of the interrupts in the domain - * @msi_finish: Optional callback to finalize the allocation * @set_desc: Set the msi descriptor for an interrupt - * @handle_error: Optional error handler if the allocation fails * @domain_alloc_irqs: Optional function to override the default allocation * function. * @domain_free_irqs: Optional function to override the default free * function. * - * @get_hwirq, @msi_init and @msi_free are callbacks used by - * msi_create_irq_domain() and related interfaces + * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying + * irqdomain. * - * @msi_check, @msi_prepare, @msi_finish, @set_desc and @handle_error - * are callbacks used by msi_domain_alloc_irqs() and related - * interfaces which are based on msi_desc. + * @msi_check, @msi_prepare and @set_desc are callbacks used by + * msi_domain_alloc/free_irqs(). * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -341,11 +355,8 @@ struct msi_domain_ops { int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); - void (*msi_finish)(msi_alloc_info_t *arg, int retval); void (*set_desc)(msi_alloc_info_t *arg, struct msi_desc *desc); - int (*handle_error)(struct irq_domain *domain, - struct msi_desc *desc, int error); int (*domain_alloc_irqs)(struct irq_domain *domain, struct device *dev, int nvec); void (*domain_free_irqs)(struct irq_domain *domain, @@ -399,6 +410,14 @@ enum { MSI_FLAG_MUST_REACTIVATE = (1 << 5), /* Is level-triggered capable, using two messages */ MSI_FLAG_LEVEL_CAPABLE = (1 << 6), + /* Populate sysfs on alloc() and destroy it on free() */ + MSI_FLAG_DEV_SYSFS = (1 << 7), + /* MSI-X entries must be contiguous */ + MSI_FLAG_MSIX_CONTIGUOUS = (1 << 8), + /* Allocate simple MSI descriptors */ + MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 9), + /* Free MSI descriptors */ + MSI_FLAG_FREE_MSI_DESCS = (1 << 10), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, @@ -409,9 +428,12 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct irq_domain *parent); int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); +int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, + int nvec); int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); +void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); @@ -440,20 +462,17 @@ __platform_msi_create_device_domain(struct device *dev, #define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \ __platform_msi_create_device_domain(dev, nvec, true, write, ops, data) -int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs); -void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nvec); +int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN -void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int pci_msi_domain_check_cap(struct irq_domain *domain, - struct msi_domain_info *info, struct device *dev); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); bool pci_dev_has_special_msi_domain(struct pci_dev *pdev); diff --git a/include/linux/mtd/latch-addr-flash.h b/include/linux/mtd/latch-addr-flash.h deleted file mode 100644 index e94b8e128074..000000000000 --- a/include/linux/mtd/latch-addr-flash.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Interface for NOR flash driver whose high address lines are latched - * - * Copyright © 2008 MontaVista Software, Inc. <[email protected]> - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ -#ifndef __LATCH_ADDR_FLASH__ -#define __LATCH_ADDR_FLASH__ - -struct map_info; -struct mtd_partition; - -struct latch_addr_flash_data { - unsigned int width; - unsigned int size; - - int (*init)(void *data, int cs); - void (*done)(void *data); - void (*set_window)(unsigned long offset, void *data); - void *data; - - unsigned int nr_parts; - struct mtd_partition *parts; -}; - -#endif diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 88227044fc86..1ffa933121f6 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -72,8 +72,6 @@ struct mtd_oob_ops { uint8_t *oobbuf; }; -#define MTD_MAX_OOBFREE_ENTRIES_LARGE 32 -#define MTD_MAX_ECCPOS_ENTRIES_LARGE 640 /** * struct mtd_oob_region - oob region definition * @offset: region offset @@ -713,4 +711,7 @@ static inline int mtd_is_bitflip_or_eccerr(int err) { unsigned mtd_mmap_capabilities(struct mtd_info *mtd); +extern char *mtd_expert_analysis_warning; +extern bool mtd_expert_analysis_mode; + #endif /* __MTD_MTD_H__ */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index b2f9dd3cbd69..5b88cd51fadb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1539,6 +1539,8 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, bool force_8bit, bool check_only); int nand_write_data_op(struct nand_chip *chip, const void *buf, unsigned int len, bool force_8bit); +int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf, + int oob_required, int page); /* Scan and identify a NAND device */ int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips, diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index f67457748ed8..fc90fce26e33 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -371,7 +371,6 @@ struct spi_nor_flash_parameter; * @bouncebuf_size: size of the bounce buffer * @info: SPI NOR part JEDEC MFR ID and other info * @manufacturer: SPI NOR manufacturer - * @page_size: the page size of the SPI NOR * @addr_width: number of address bytes * @erase_opcode: the opcode for erasing a sector * @read_opcode: the read opcode @@ -401,7 +400,6 @@ struct spi_nor { size_t bouncebuf_size; const struct flash_info *info; const struct spi_nor_manufacturer *manufacturer; - u32 page_size; u8 addr_width; u8 erase_opcode; u8 read_opcode; diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h index 5fc6bb2fefad..7a09b040ac39 100644 --- a/include/linux/mux/consumer.h +++ b/include/linux/mux/consumer.h @@ -16,10 +16,25 @@ struct device; struct mux_control; unsigned int mux_control_states(struct mux_control *mux); -int __must_check mux_control_select(struct mux_control *mux, - unsigned int state); -int __must_check mux_control_try_select(struct mux_control *mux, - unsigned int state); +int __must_check mux_control_select_delay(struct mux_control *mux, + unsigned int state, + unsigned int delay_us); +int __must_check mux_control_try_select_delay(struct mux_control *mux, + unsigned int state, + unsigned int delay_us); + +static inline int __must_check mux_control_select(struct mux_control *mux, + unsigned int state) +{ + return mux_control_select_delay(mux, state, 0); +} + +static inline int __must_check mux_control_try_select(struct mux_control *mux, + unsigned int state) +{ + return mux_control_try_select_delay(mux, state, 0); +} + int mux_control_deselect(struct mux_control *mux); struct mux_control *mux_control_get(struct device *dev, const char *mux_name); diff --git a/include/linux/mux/driver.h b/include/linux/mux/driver.h index 627a2c6bc02d..18824064f8c0 100644 --- a/include/linux/mux/driver.h +++ b/include/linux/mux/driver.h @@ -12,6 +12,7 @@ #include <dt-bindings/mux/mux.h> #include <linux/device.h> +#include <linux/ktime.h> #include <linux/semaphore.h> struct mux_chip; @@ -33,6 +34,7 @@ struct mux_control_ops { * @states: The number of mux controller states. * @idle_state: The mux controller state to use when inactive, or one * of MUX_IDLE_AS_IS and MUX_IDLE_DISCONNECT. + * @last_change: Timestamp of last change * * Mux drivers may only change @states and @idle_state, and may only do so * between allocation and registration of the mux controller. Specifically, @@ -47,6 +49,8 @@ struct mux_control { unsigned int states; int idle_state; + + ktime_t last_change; }; /** diff --git a/include/linux/nd.h b/include/linux/nd.h index ee9ad76afbba..8a8c63edb1b2 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -88,7 +88,7 @@ struct nd_namespace_pmem { struct nd_namespace_io nsio; unsigned long lbasize; char *alt_name; - u8 *uuid; + uuid_t *uuid; int id; }; @@ -105,7 +105,7 @@ struct nd_namespace_pmem { struct nd_namespace_blk { struct nd_namespace_common common; char *alt_name; - u8 *uuid; + uuid_t *uuid; int id; unsigned long lbasize; resource_size_t size; diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index e32f6712aee0..1289593411d3 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -26,6 +26,11 @@ enum iidc_reset_type { IIDC_GLOBR, }; +enum iidc_rdma_protocol { + IIDC_RDMA_PROTOCOL_IWARP = BIT(0), + IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), +}; + #define IIDC_MAX_USER_PRIORITY 8 /* Struct to hold per RDMA Qset info */ @@ -70,8 +75,6 @@ int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); -#define IIDC_RDMA_ROCE_NAME "roce" - /* Structure representing auxiliary driver tailored information about the core * PCI dev, each auxiliary driver using the IIDC interface will have an * instance of this struct dedicated to it. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d79163208dfd..3213c7227b59 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -48,6 +48,7 @@ #include <uapi/linux/pkt_cls.h> #include <linux/hashtable.h> #include <linux/rbtree.h> +#include <net/net_trackers.h> struct netpoll_info; struct device; @@ -299,7 +300,6 @@ enum netdev_state_t { __LINK_STATE_TESTING, }; - struct gro_list { struct list_head list; int count; @@ -579,6 +579,8 @@ struct netdev_queue { * read-mostly part */ struct net_device *dev; + netdevice_tracker dev_tracker; + struct Qdisc __rcu *qdisc; struct Qdisc *qdisc_sleeping; #ifdef CONFIG_SYSFS @@ -592,7 +594,7 @@ struct netdev_queue { * Number of TX timeouts for this queue * (/sys/class/net/DEV/Q/trans_timeout) */ - unsigned long trans_timeout; + atomic_long_t trans_timeout; /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; @@ -734,6 +736,8 @@ struct netdev_rx_queue { #endif struct kobject kobj; struct net_device *dev; + netdevice_tracker dev_tracker; + #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif @@ -916,6 +920,7 @@ enum tc_setup_type { TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, TC_SETUP_QDISC_HTB, + TC_SETUP_ACT, }; /* These structures hold the attributes of bpf state that are being passed @@ -1297,11 +1302,6 @@ struct netdev_net_notifier { * TX queue. * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. - * void (*ndo_change_proto_down)(struct net_device *dev, - * bool proto_down); - * This function is used to pass protocol port error state information - * to the switch driver. The switch driver can react to the proto_down - * by doing a phys down on the associated switch port. * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); * This function is used to get egress tunnel information for given skb. * This is useful for retrieving outer tunnel header parameters while @@ -1542,8 +1542,6 @@ struct net_device_ops { int queue_index, u32 maxrate); int (*ndo_get_iflink)(const struct net_device *dev); - int (*ndo_change_proto_down)(struct net_device *dev, - bool proto_down); int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, @@ -1612,6 +1610,7 @@ struct net_device_ops { * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) + * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1646,6 +1645,7 @@ enum netdev_priv_flags { IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, IFF_TX_SKB_NO_LINEAR = 1<<31, + IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1861,6 +1861,7 @@ enum netdev_ml_priv_type { * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing + * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) @@ -1869,6 +1870,7 @@ enum netdev_ml_priv_type { * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device * @dev_refcnt: Number of references to this device + * @refcnt_tracker: Tracker directory for tracked references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one * @@ -1916,7 +1918,6 @@ enum netdev_ml_priv_type { * @sfp_bus: attached &struct sfp_bus structure. * * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock - * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the @@ -1937,10 +1938,16 @@ enum netdev_ml_priv_type { * @udp_tunnel_nic: UDP tunnel offload state * @xdp_state: stores info on attached XDP BPF programs * - * @nested_level: Used as as a parameter of spin_lock_nested() of + * @nested_level: Used as a parameter of spin_lock_nested() of * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. + * @gro_max_size: Maximum size of aggregated packet in generic + * receive offload (GRO) + * + * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. + * @linkwatch_dev_tracker: refcount tracker used by linkwatch. + * @watchdog_dev_tracker: refcount tracker used by watchdog. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -1980,7 +1987,7 @@ struct net_device { /* Read-mostly cache-line for fast-path access */ unsigned int flags; - unsigned int priv_flags; + unsigned long long priv_flags; const struct net_device_ops *netdev_ops; int ifindex; unsigned short gflags; @@ -2093,7 +2100,7 @@ struct net_device { #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; #endif -#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) +#if IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; #endif struct in_device __rcu *ip_ptr; @@ -2117,7 +2124,7 @@ struct net_device { * Cache lines mostly used on receive path (including eth_type_trans()) */ /* Interface address info used in eth_type_trans() */ - unsigned char *dev_addr; + const unsigned char *dev_addr; struct netdev_rx_queue *_rx; unsigned int num_rx_queues; @@ -2126,6 +2133,8 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; int napi_defer_hard_irqs; +#define GRO_MAX_SIZE 65536 + unsigned int gro_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -2161,6 +2170,9 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; #endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); @@ -2178,6 +2190,7 @@ struct net_device { #else refcount_t dev_refcnt; #endif + struct ref_tracker_dir refcnt_tracker; struct list_head link_watch_list; @@ -2250,7 +2263,6 @@ struct net_device { struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; - struct lock_class_key *qdisc_running_key; bool proto_down; unsigned wol_enabled:1; unsigned threaded:1; @@ -2266,6 +2278,10 @@ struct net_device { /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; + + u8 dev_addr_shadow[MAX_ADDR_LEN]; + netdevice_tracker linkwatch_dev_tracker; + netdevice_tracker watchdog_dev_tracker; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2360,13 +2376,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, #define netdev_lockdep_set_classes(dev) \ { \ static struct lock_class_key qdisc_tx_busylock_key; \ - static struct lock_class_key qdisc_running_key; \ static struct lock_class_key qdisc_xmit_lock_key; \ static struct lock_class_key dev_addr_list_lock_key; \ unsigned int i; \ \ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ - (dev)->qdisc_running_key = &qdisc_running_key; \ lockdep_set_class(&(dev)->addr_list_lock, \ &dev_addr_list_lock_key); \ for (i = 0; i < (dev)->num_tx_queues; i++) \ @@ -2520,113 +2534,11 @@ static inline void netif_napi_del(struct napi_struct *napi) synchronize_net(); } -struct napi_gro_cb { - /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ - void *frag0; - - /* Length of frag0. */ - unsigned int frag0_len; - - /* This indicates where we are processing relative to skb->data. */ - int data_offset; - - /* This is non-zero if the packet cannot be merged with the new skb. */ - u16 flush; - - /* Save the IP ID here and check when we get to the transport layer */ - u16 flush_id; - - /* Number of segments aggregated. */ - u16 count; - - /* Start offset for remote checksum offload */ - u16 gro_remcsum_start; - - /* jiffies when first packet was created/queued */ - unsigned long age; - - /* Used in ipv6_gro_receive() and foo-over-udp */ - u16 proto; - - /* This is non-zero if the packet may be of the same flow. */ - u8 same_flow:1; - - /* Used in tunnel GRO receive */ - u8 encap_mark:1; - - /* GRO checksum is valid */ - u8 csum_valid:1; - - /* Number of checksums via CHECKSUM_UNNECESSARY */ - u8 csum_cnt:3; - - /* Free the skb? */ - u8 free:2; -#define NAPI_GRO_FREE 1 -#define NAPI_GRO_FREE_STOLEN_HEAD 2 - - /* Used in foo-over-udp, set in udp[46]_gro_receive */ - u8 is_ipv6:1; - - /* Used in GRE, set in fou/gue_gro_receive */ - u8 is_fou:1; - - /* Used to determine if flush_id can be ignored */ - u8 is_atomic:1; - - /* Number of gro_receive callbacks this packet already went through */ - u8 recursion_counter:4; - - /* GRO is done by frag_list pointer chaining. */ - u8 is_flist:1; - - /* used to support CHECKSUM_COMPLETE for tunneling protocols */ - __wsum csum; - - /* used in skb_gro_receive() slow path */ - struct sk_buff *last; -}; - -#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) - -#define GRO_RECURSION_LIMIT 15 -static inline int gro_recursion_inc_test(struct sk_buff *skb) -{ - return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; -} - -typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); -static inline struct sk_buff *call_gro_receive(gro_receive_t cb, - struct list_head *head, - struct sk_buff *skb) -{ - if (unlikely(gro_recursion_inc_test(skb))) { - NAPI_GRO_CB(skb)->flush |= 1; - return NULL; - } - - return cb(head, skb); -} - -typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, - struct sk_buff *); -static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, - struct sock *sk, - struct list_head *head, - struct sk_buff *skb) -{ - if (unlikely(gro_recursion_inc_test(skb))) { - NAPI_GRO_CB(skb)->flush |= 1; - return NULL; - } - - return cb(sk, head, skb); -} - struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; struct net_device *dev; /* NULL is wildcarded here */ + netdevice_tracker dev_tracker; int (*func) (struct sk_buff *, struct net_device *, struct packet_type *, @@ -2955,6 +2867,7 @@ struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); +bool netdev_name_in_use(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); void dev_close(struct net_device *dev); @@ -3004,254 +2917,7 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); -int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); -int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); - -static inline unsigned int skb_gro_offset(const struct sk_buff *skb) -{ - return NAPI_GRO_CB(skb)->data_offset; -} - -static inline unsigned int skb_gro_len(const struct sk_buff *skb) -{ - return skb->len - NAPI_GRO_CB(skb)->data_offset; -} - -static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) -{ - NAPI_GRO_CB(skb)->data_offset += len; -} - -static inline void *skb_gro_header_fast(struct sk_buff *skb, - unsigned int offset) -{ - return NAPI_GRO_CB(skb)->frag0 + offset; -} - -static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) -{ - return NAPI_GRO_CB(skb)->frag0_len < hlen; -} - -static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) -{ - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; -} - -static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, - unsigned int offset) -{ - if (!pskb_may_pull(skb, hlen)) - return NULL; - - skb_gro_frag0_invalidate(skb); - return skb->data + offset; -} - -static inline void *skb_gro_network_header(struct sk_buff *skb) -{ - return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + - skb_network_offset(skb); -} - -static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (NAPI_GRO_CB(skb)->csum_valid) - NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, - csum_partial(start, len, 0)); -} - -/* GRO checksum functions. These are logical equivalents of the normal - * checksum functions (in skbuff.h) except that they operate on the GRO - * offsets and fields in sk_buff. - */ - -__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); - -static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) -{ - return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); -} - -static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, - bool zero_okay, - __sum16 check) -{ - return ((skb->ip_summed != CHECKSUM_PARTIAL || - skb_checksum_start_offset(skb) < - skb_gro_offset(skb)) && - !skb_at_gro_remcsum_start(skb) && - NAPI_GRO_CB(skb)->csum_cnt == 0 && - (!zero_okay || check)); -} - -static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, - __wsum psum) -{ - if (NAPI_GRO_CB(skb)->csum_valid && - !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) - return 0; - - NAPI_GRO_CB(skb)->csum = psum; - - return __skb_gro_checksum_complete(skb); -} - -static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) -{ - if (NAPI_GRO_CB(skb)->csum_cnt > 0) { - /* Consume a checksum from CHECKSUM_UNNECESSARY */ - NAPI_GRO_CB(skb)->csum_cnt--; - } else { - /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we - * verified a new top level checksum or an encapsulated one - * during GRO. This saves work if we fallback to normal path. - */ - __skb_incr_checksum_unnecessary(skb); - } -} - -#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ - compute_pseudo) \ -({ \ - __sum16 __ret = 0; \ - if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ - __ret = __skb_gro_checksum_validate_complete(skb, \ - compute_pseudo(skb, proto)); \ - if (!__ret) \ - skb_gro_incr_csum_unnecessary(skb); \ - __ret; \ -}) - -#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ - __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) - -#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ - compute_pseudo) \ - __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) - -#define skb_gro_checksum_simple_validate(skb) \ - __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) - -static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) -{ - return (NAPI_GRO_CB(skb)->csum_cnt == 0 && - !NAPI_GRO_CB(skb)->csum_valid); -} - -static inline void __skb_gro_checksum_convert(struct sk_buff *skb, - __wsum pseudo) -{ - NAPI_GRO_CB(skb)->csum = ~pseudo; - NAPI_GRO_CB(skb)->csum_valid = 1; -} - -#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ -do { \ - if (__skb_gro_checksum_convert_check(skb)) \ - __skb_gro_checksum_convert(skb, \ - compute_pseudo(skb, proto)); \ -} while (0) - -struct gro_remcsum { - int offset; - __wsum delta; -}; - -static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) -{ - grc->offset = 0; - grc->delta = 0; -} - -static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, - unsigned int off, size_t hdrlen, - int start, int offset, - struct gro_remcsum *grc, - bool nopartial) -{ - __wsum delta; - size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - - BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); - - if (!nopartial) { - NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; - return ptr; - } - - ptr = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, off + plen)) { - ptr = skb_gro_header_slow(skb, off + plen, off); - if (!ptr) - return NULL; - } - - delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, - start, offset); - /* Adjust skb->csum since we changed the packet */ - NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); - - grc->offset = off + hdrlen + offset; - grc->delta = delta; - - return ptr; -} - -static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, - struct gro_remcsum *grc) -{ - void *ptr; - size_t plen = grc->offset + sizeof(u16); - - if (!grc->delta) - return; - - ptr = skb_gro_header_fast(skb, grc->offset); - if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) { - ptr = skb_gro_header_slow(skb, plen, grc->offset); - if (!ptr) - return; - } - - remcsum_unadjust((__sum16 *)ptr, grc->delta); -} - -#ifdef CONFIG_XFRM_OFFLOAD -static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) -{ - if (PTR_ERR(pp) != -EINPROGRESS) - NAPI_GRO_CB(skb)->flush |= flush; -} -static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, - struct sk_buff *pp, - int flush, - struct gro_remcsum *grc) -{ - if (PTR_ERR(pp) != -EINPROGRESS) { - NAPI_GRO_CB(skb)->flush |= flush; - skb_gro_remcsum_cleanup(skb, grc); - skb->remcsum_offload = 0; - } -} -#else -static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) -{ - NAPI_GRO_CB(skb)->flush |= flush; -} -static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, - struct sk_buff *pp, - int flush, - struct gro_remcsum *grc) -{ - NAPI_GRO_CB(skb)->flush |= flush; - skb_gro_remcsum_cleanup(skb, grc); - skb->remcsum_offload = 0; -} -#endif static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -4006,6 +3672,7 @@ int netif_rx_ni(struct sk_buff *skb); int netif_rx_any_context(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); +void netif_receive_skb_list_internal(struct list_head *head); void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); @@ -4079,7 +3746,6 @@ int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_proto_down_generic(struct net_device *dev, bool proto_down); void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, u32 value); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); @@ -4155,6 +3821,7 @@ void netdev_run_todo(void); * @dev: network device * * Release reference to device to allow it to be freed. + * Try using dev_put_track() instead. */ static inline void dev_put(struct net_device *dev) { @@ -4172,6 +3839,7 @@ static inline void dev_put(struct net_device *dev) * @dev: network device * * Hold reference to device to keep it from being freed. + * Try using dev_hold_track() instead. */ static inline void dev_hold(struct net_device *dev) { @@ -4184,6 +3852,55 @@ static inline void dev_hold(struct net_device *dev) } } +static inline void netdev_tracker_alloc(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); +#endif +} + +static inline void netdev_tracker_free(struct net_device *dev, + netdevice_tracker *tracker) +{ +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + ref_tracker_free(&dev->refcnt_tracker, tracker); +#endif +} + +static inline void dev_hold_track(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) +{ + if (dev) { + dev_hold(dev); + netdev_tracker_alloc(dev, tracker, gfp); + } +} + +static inline void dev_put_track(struct net_device *dev, + netdevice_tracker *tracker) +{ + if (dev) { + netdev_tracker_free(dev, tracker); + dev_put(dev); + } +} + +static inline void dev_replace_track(struct net_device *odev, + struct net_device *ndev, + netdevice_tracker *tracker, + gfp_t gfp) +{ + if (odev) + netdev_tracker_free(odev, tracker); + + dev_hold(ndev); + dev_put(odev); + + if (ndev) + netdev_tracker_alloc(ndev, tracker, gfp); +} + /* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller * who is responsible for serialization of these calls. @@ -4403,7 +4120,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, cpu); } static inline bool __netif_tx_acquire(struct netdev_queue *txq) @@ -4420,33 +4138,50 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + + if (likely(ok)) { + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); + } return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } +/* + * txq->trans_start can be read locklessly from dev_watchdog() + */ static inline void txq_trans_update(struct netdev_queue *txq) { if (txq->xmit_lock_owner != -1) - txq->trans_start = jiffies; + WRITE_ONCE(txq->trans_start, jiffies); +} + +static inline void txq_trans_cond_update(struct netdev_queue *txq) +{ + unsigned long now = jiffies; + + if (READ_ONCE(txq->trans_start) != now) + WRITE_ONCE(txq->trans_start, now); } /* legacy drivers only, netdev_start_xmit() sets txq->trans_start */ @@ -4454,8 +4189,7 @@ static inline void netif_trans_update(struct net_device *dev) { struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); - if (txq->trans_start != jiffies) - txq->trans_start = jiffies; + txq_trans_cond_update(txq); } /** @@ -4464,27 +4198,7 @@ static inline void netif_trans_update(struct net_device *dev) * * Get network device transmit lock */ -static inline void netif_tx_lock(struct net_device *dev) -{ - unsigned int i; - int cpu; - - spin_lock(&dev->tx_global_lock); - cpu = smp_processor_id(); - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* We are the only thread of execution doing a - * freeze, but we have to grab the _xmit_lock in - * order to synchronize with threads which are in - * the ->hard_start_xmit() handler and already - * checked the frozen bit. - */ - __netif_tx_lock(txq, cpu); - set_bit(__QUEUE_STATE_FROZEN, &txq->state); - __netif_tx_unlock(txq); - } -} +void netif_tx_lock(struct net_device *dev); static inline void netif_tx_lock_bh(struct net_device *dev) { @@ -4492,22 +4206,7 @@ static inline void netif_tx_lock_bh(struct net_device *dev) netif_tx_lock(dev); } -static inline void netif_tx_unlock(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - /* No need to grab the _xmit_lock here. If the - * queue is not stopped for another reason, we - * force a schedule. - */ - clear_bit(__QUEUE_STATE_FROZEN, &txq->state); - netif_schedule_queue(txq); - } - spin_unlock(&dev->tx_global_lock); -} +void netif_tx_unlock(struct net_device *dev); static inline void netif_tx_unlock_bh(struct net_device *dev) { @@ -4641,10 +4340,13 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ +void dev_addr_mod(struct net_device *dev, unsigned int offset, + const void *addr, size_t len); + static inline void -__dev_addr_set(struct net_device *dev, const u8 *addr, size_t len) +__dev_addr_set(struct net_device *dev, const void *addr, size_t len) { - memcpy(dev->dev_addr, addr, len); + dev_addr_mod(dev, 0, addr, len); } static inline void dev_addr_set(struct net_device *dev, const u8 *addr) @@ -4652,19 +4354,13 @@ static inline void dev_addr_set(struct net_device *dev, const u8 *addr) __dev_addr_set(dev, addr, dev->addr_len); } -static inline void -dev_addr_mod(struct net_device *dev, unsigned int offset, - const u8 *addr, size_t len) -{ - memcpy(&dev->dev_addr[offset], addr, len); -} - int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); +void dev_addr_check(struct net_device *dev); /* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, const unsigned char *addr); @@ -4800,8 +4496,6 @@ struct netdev_nested_priv { bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter); #ifdef CONFIG_LOCKDEP static LIST_HEAD(net_unlink_list); @@ -5105,7 +4799,22 @@ static inline bool netif_needs_gso(struct sk_buff *skb, static inline void netif_set_gso_max_size(struct net_device *dev, unsigned int size) { - dev->gso_max_size = size; + /* dev->gso_max_size is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_max_size, size); +} + +static inline void netif_set_gso_max_segs(struct net_device *dev, + unsigned int segs) +{ + /* dev->gso_max_segs is read locklessly from sk_setup_caps() */ + WRITE_ONCE(dev->gso_max_segs, segs); +} + +static inline void netif_set_gro_max_size(struct net_device *dev, + unsigned int size) +{ + /* This pairs with the READ_ONCE() in skb_gro_receive() */ + WRITE_ONCE(dev->gro_max_size, size); } static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, @@ -5236,7 +4945,7 @@ static inline void netif_keep_dst(struct net_device *dev) static inline bool netif_reduces_vlan_mtu(struct net_device *dev) { /* TODO: reserve and use an additional IFF bit, if we get more users */ - return dev->priv_flags & IFF_MACSEC; + return netif_is_macsec(dev); } extern struct pernet_operations __net_initdata loopback_net_ops; @@ -5292,7 +5001,7 @@ void netdev_info(const struct net_device *dev, const char *format, ...); #define netdev_level_once(level, dev, fmt, ...) \ do { \ - static bool __print_once __read_mostly; \ + static bool __section(".data.once") __print_once; \ \ if (!__print_once) { \ __print_once = true; \ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3fda1a508733..15e71bfff726 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -381,13 +381,13 @@ struct nf_nat_hook { enum ip_conntrack_dir dir); }; -extern struct nf_nat_hook __rcu *nf_nat_hook; +extern const struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #if IS_ENABLED(CONFIG_NF_NAT) - struct nf_nat_hook *nat_hook; + const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); @@ -440,7 +440,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_zones_common.h> -extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; void nf_ct_attach(struct sk_buff *, const struct sk_buff *); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, @@ -463,8 +462,9 @@ struct nf_ct_hook { void (*destroy)(struct nf_conntrack *); bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); + void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); }; -extern struct nf_ct_hook __rcu *nf_ct_hook; +extern const struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; @@ -479,7 +479,7 @@ struct nfnl_ct_hook { void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; -extern struct nfnl_ct_hook __rcu *nfnl_ct_hook; +extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; /** * nf_skb_duplicated - TEE target has sent a packet diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 700ea077ce2d..2770db2fa080 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -2,7 +2,7 @@ #ifndef _NF_CONNTRACK_COMMON_H #define _NF_CONNTRACK_COMMON_H -#include <linux/atomic.h> +#include <linux/refcount.h> #include <uapi/linux/netfilter/nf_conntrack_common.h> struct ip_conntrack_stat { @@ -25,19 +25,21 @@ struct ip_conntrack_stat { #define NFCT_PTRMASK ~(NFCT_INFOMASK) struct nf_conntrack { - atomic_t use; + refcount_t use; }; void nf_conntrack_destroy(struct nf_conntrack *nfct); + +/* like nf_ct_put, but without module dependency on nf_conntrack */ static inline void nf_conntrack_put(struct nf_conntrack *nfct) { - if (nfct && atomic_dec_and_test(&nfct->use)) + if (nfct && refcount_dec_and_test(&nfct->use)) nf_conntrack_destroy(nfct); } static inline void nf_conntrack_get(struct nf_conntrack *nfct) { if (nfct) - atomic_inc(&nfct->use); + refcount_inc(&nfct->use); } #endif /* _NF_CONNTRACK_COMMON_H */ diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 4f9a4b3c5892..a40aaf645fa4 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -54,9 +54,8 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); void arpt_unregister_table_pre_exit(struct net *net, const char *name); -extern unsigned int arpt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 10a01978bc0d..a13296d6c7ce 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -112,9 +112,8 @@ extern int ebt_register_table(struct net *net, const struct nf_hook_ops *ops); extern void ebt_unregister_table(struct net *net, const char *tablename); void ebt_unregister_table_pre_exit(struct net *net, const char *tablename); -extern unsigned int ebt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct ebt_table *table); +extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h deleted file mode 100644 index a13774be2eb5..000000000000 --- a/include/linux/netfilter_ingress.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_INGRESS_H_ -#define _NETFILTER_INGRESS_H_ - -#include <linux/netfilter.h> -#include <linux/netdevice.h> - -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) -{ -#ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) - return false; -#endif - return rcu_access_pointer(skb->dev->nf_hooks_ingress); -} - -/* caller must hold rcu_read_lock */ -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); - struct nf_hook_state state; - int ret; - - /* Must recheck the ingress hook head, in the event it became NULL - * after the check in nf_hook_ingress_active evaluated to true. - */ - if (unlikely(!e)) - return 0; - - nf_hook_state_init(&state, NF_NETDEV_INGRESS, - NFPROTO_NETDEV, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e, 0); - if (ret == 0) - return -1; - - return ret; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) -{ - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); -} -#else /* CONFIG_NETFILTER_INGRESS */ -static inline int nf_hook_ingress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return 0; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) {} -#endif /* CONFIG_NETFILTER_INGRESS */ -#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 8d09bfe850dc..132b0e4a6d4d 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -63,9 +63,9 @@ struct ipt_error { } extern void *ipt_alloc_initial_table(const struct xt_table *); -extern unsigned int ipt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int ipt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 79e73fd7d965..8b8885a73c76 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -29,9 +29,8 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops); void ip6t_unregister_table_pre_exit(struct net *net, const char *name); void ip6t_unregister_table_exit(struct net *net, const char *name); -extern unsigned int ip6t_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table); +extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include <net/compat.h> diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h new file mode 100644 index 000000000000..b4dd96e4dc8d --- /dev/null +++ b/include/linux/netfilter_netdev.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NETFILTER_NETDEV_H_ +#define _NETFILTER_NETDEV_H_ + +#include <linux/netfilter.h> +#include <linux/netdevice.h> + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return rcu_access_pointer(skb->dev->nf_hooks_ingress); +} + +/* caller must hold rcu_read_lock */ +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_state state; + int ret; + + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, NF_NETDEV_INGRESS, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + if (ret == 0) + return -1; + + return ret; +} + +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} +#endif /* CONFIG_NETFILTER_INGRESS */ + +#ifdef CONFIG_NETFILTER_EGRESS +static inline bool nf_hook_egress_active(void) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS])) + return false; +#endif + return true; +} + +/** + * nf_hook_egress - classify packets before transmission + * @skb: packet to be classified + * @rc: result code which shall be returned by __dev_queue_xmit() on failure + * @dev: netdev whose egress hooks shall be applied to @skb + * + * Returns @skb on success or %NULL if the packet was consumed or filtered. + * Caller must hold rcu_read_lock. + * + * On ingress, packets are classified first by tc, then by netfilter. + * On egress, the order is reversed for symmetry. Conceptually, tc and + * netfilter can be thought of as layers, with netfilter layered above tc: + * When tc redirects a packet to another interface, netfilter is not applied + * because the packet is on the tc layer. + * + * The nf_skip_egress flag controls whether netfilter is applied on egress. + * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the + * packet passes through tc and netfilter. Because __dev_queue_xmit() may be + * called recursively by tunnel drivers such as vxlan, the flag is reverted to + * false after sch_handle_egress(). This ensures that netfilter is applied + * both on the overlay and underlying network. + */ +static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, + struct net_device *dev) +{ + struct nf_hook_entries *e; + struct nf_hook_state state; + int ret; + +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + if (skb->nf_skip_egress) + return skb; +#endif + + e = rcu_dereference_check(dev->nf_hooks_egress, rcu_read_lock_bh_held()); + if (!e) + return skb; + + nf_hook_state_init(&state, NF_NETDEV_EGRESS, + NFPROTO_NETDEV, dev, NULL, NULL, + dev_net(dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + + if (ret == 1) { + return skb; + } else if (ret < 0) { + *rc = NET_XMIT_DROP; + return NULL; + } else { /* ret == 0 */ + *rc = NET_XMIT_SUCCESS; + return NULL; + } +} +#else /* CONFIG_NETFILTER_EGRESS */ +static inline bool nf_hook_egress_active(void) +{ + return false; +} + +static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, + struct net_device *dev) +{ + return skb; +} +#endif /* CONFIG_NETFILTER_EGRESS */ + +static inline void nf_skip_egress(struct sk_buff *skb, bool skip) +{ +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + skb->nf_skip_egress = skip; +#endif +} + +static inline void nf_hook_netdev_init(struct net_device *dev) +{ +#ifdef CONFIG_NETFILTER_INGRESS + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); +#endif +} + +#endif /* _NETFILTER_NETDEV_H_ */ diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d6a4158a9a6..b46c39d98bbd 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -22,6 +22,7 @@ * Overload PG_private_2 to give us PG_fscache - this is used to indicate that * a page is currently backed by a local disk cache */ +#define folio_test_fscache(folio) folio_test_private_2(folio) #define PageFsCache(page) PagePrivate2((page)) #define SetPageFsCache(page) SetPagePrivate2((page)) #define ClearPageFsCache(page) ClearPagePrivate2((page)) @@ -29,60 +30,80 @@ #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) /** - * set_page_fscache - Set PG_fscache on a page and take a ref - * @page: The page. + * folio_start_fscache - Start an fscache write on a folio. + * @folio: The folio. * - * Set the PG_fscache (PG_private_2) flag on a page and take the reference - * needed for the VM to handle its lifetime correctly. This sets the flag and - * takes the reference unconditionally, so care must be taken not to set the - * flag again if it's already set. + * Call this function before writing a folio to a local cache. Starting a + * second write before the first one finishes is not allowed. */ -static inline void set_page_fscache(struct page *page) +static inline void folio_start_fscache(struct folio *folio) { - set_page_private_2(page); + VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); + folio_get(folio); + folio_set_private_2(folio); } /** - * end_page_fscache - Clear PG_fscache and release any waiters - * @page: The page - * - * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers - * waiting for this. The page ref held for PG_private_2 being set is released. + * folio_end_fscache - End an fscache write on a folio. + * @folio: The folio. * - * This is, for example, used when a netfs page is being written to a local - * disk cache, thereby allowing writes to the cache for the same page to be - * serialised. + * Call this function after the folio has been written to the local cache. + * This will wake any sleepers waiting on this folio. */ -static inline void end_page_fscache(struct page *page) +static inline void folio_end_fscache(struct folio *folio) { - end_page_private_2(page); + folio_end_private_2(folio); } /** - * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page. + * If this folio is currently being written to a local cache, wait for + * the write to finish. Another write may start after this one finishes, + * unless the caller holds the folio lock. */ -static inline void wait_on_page_fscache(struct page *page) +static inline void folio_wait_fscache(struct folio *folio) { - wait_on_page_private_2(page); + folio_wait_private_2(folio); } /** - * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a - * fatal signal is received by the calling task. + * If this folio is currently being written to a local cache, wait + * for the write to finish or for a fatal signal to be received. + * Another write may start after this one finishes, unless the caller + * holds the folio lock. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ +static inline int folio_wait_fscache_killable(struct folio *folio) +{ + return folio_wait_private_2_killable(folio); +} + +static inline void set_page_fscache(struct page *page) +{ + folio_start_fscache(page_folio(page)); +} + +static inline void end_page_fscache(struct page *page) +{ + folio_end_private_2(page_folio(page)); +} + +static inline void wait_on_page_fscache(struct page *page) +{ + folio_wait_private_2(page_folio(page)); +} + static inline int wait_on_page_fscache_killable(struct page *page) { - return wait_on_page_private_2_killable(page); + return folio_wait_private_2_killable(page_folio(page)); } enum netfs_read_source { @@ -103,6 +124,7 @@ struct netfs_cache_resources { void *cache_priv; void *cache_priv2; unsigned int debug_id; /* Cookie debug ID */ + unsigned int inval_counter; /* object->inval_counter at begin_op */ }; /* @@ -145,13 +167,13 @@ struct netfs_read_request { short error; /* 0 or error that occurred */ loff_t i_size; /* Size of the file */ loff_t start; /* Start position */ - pgoff_t no_unlock_page; /* Don't unlock this page after read */ + pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ refcount_t usage; unsigned long flags; #define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ #define NETFS_RREQ_WRITE_TO_CACHE 1 /* Need to write to the cache */ -#define NETFS_RREQ_NO_UNLOCK_PAGE 2 /* Don't unlock no_unlock_page on completion */ -#define NETFS_RREQ_DONT_UNLOCK_PAGES 3 /* Don't unlock the pages on completion */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ const struct netfs_read_request_ops *netfs_ops; @@ -169,12 +191,21 @@ struct netfs_read_request_ops { void (*issue_op)(struct netfs_read_subrequest *subreq); bool (*is_still_valid)(struct netfs_read_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct page *page, void **_fsdata); + struct folio *folio, void **_fsdata); void (*done)(struct netfs_read_request *rreq); void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; /* + * How to handle reading from a hole. + */ +enum netfs_read_from_hole { + NETFS_READ_HOLE_IGNORE, + NETFS_READ_HOLE_CLEAR, + NETFS_READ_HOLE_FAIL, +}; + +/* * Table of operations for access to a cache. This is obtained by * rreq->ops->begin_cache_operation(). */ @@ -186,7 +217,7 @@ struct netfs_cache_ops { int (*read)(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, - bool seek_data, + enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv); @@ -211,7 +242,8 @@ struct netfs_cache_ops { * actually do. */ int (*prepare_write)(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size); + loff_t *_start, size_t *_len, loff_t i_size, + bool no_space_allocated_yet); }; struct readahead_control; @@ -219,11 +251,11 @@ extern void netfs_readahead(struct readahead_control *, const struct netfs_read_request_ops *, void *); extern int netfs_readpage(struct file *, - struct page *, + struct folio *, const struct netfs_read_request_ops *, void *); extern int netfs_write_begin(struct file *, struct address_space *, - loff_t, unsigned int, unsigned int, struct page **, + loff_t, unsigned int, unsigned int, struct folio **, void **, const struct netfs_read_request_ops *, void *); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 61b1c7fcc401..1ec631838af9 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -156,10 +156,6 @@ bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); -int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, - __u32 portid, __u32 group, gfp_t allocation, - int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), - void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e6a2d72e0dc7..bd19c4b91e31 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -24,6 +24,7 @@ union inet_addr { struct netpoll { struct net_device *dev; + netdevice_tracker dev_tracker; char dev_name[IFNAMSIZ]; const char *name; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 15004c469807..5662d8be04eb 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -292,6 +292,10 @@ enum nfsstat4 { NFS4ERR_XATTR2BIG = 10096, }; +/* error codes for internal client use */ +#define NFS4ERR_RESET_TO_MDS 12001 +#define NFS4ERR_RESET_TO_PNFS 12002 + static inline bool seqid_mutating_err(u32 err) { /* See RFC 7530, section 9.1.7 */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b9a8b925db43..00835bacd236 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -81,7 +81,7 @@ struct nfs_open_context { fl_owner_t flock_owner; struct dentry *dentry; const struct cred *cred; - struct rpc_cred *ll_cred; /* low-level cred - use to check for expiry */ + struct rpc_cred __rcu *ll_cred; /* low-level cred - use to check for expiry */ struct nfs4_state *state; fmode_t mode; @@ -103,6 +103,7 @@ struct nfs_open_dir_context { __be32 verf[NFS_DIR_VERIFIER_SIZE]; __u64 dir_cookie; __u64 dup_cookie; + pgoff_t page_index; signed char duped; }; @@ -154,36 +155,39 @@ struct nfs_inode { unsigned long attrtimeo_timestamp; unsigned long attr_gencount; - /* "Generation counter" for the attribute cache. This is - * bumped whenever we update the metadata on the - * server. - */ - unsigned long cache_change_attribute; struct rb_root access_cache; struct list_head access_cache_entry_lru; struct list_head access_cache_inode_lru; - /* - * This is the cookie verifier used for NFSv3 readdir - * operations - */ - __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; - - atomic_long_t nrequests; - struct nfs_mds_commit_info commit_info; + union { + /* Directory */ + struct { + /* "Generation counter" for the attribute cache. + * This is bumped whenever we update the metadata + * on the server. + */ + unsigned long cache_change_attribute; + /* + * This is the cookie verifier used for NFSv3 readdir + * operations + */ + __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; + /* Readers: in-flight sillydelete RPC calls */ + /* Writers: rmdir */ + struct rw_semaphore rmdir_sem; + }; + /* Regular file */ + struct { + atomic_long_t nrequests; + struct nfs_mds_commit_info commit_info; + struct mutex commit_mutex; + }; + }; /* Open contexts for shared mmap writes */ struct list_head open_files; - /* Readers: in-flight sillydelete RPC calls */ - /* Writers: rmdir */ - struct rw_semaphore rmdir_sem; - struct mutex commit_mutex; - - /* track last access to cached pages */ - unsigned long page_index; - #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ @@ -271,7 +275,7 @@ struct nfs4_copy_state { #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ -#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ +#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ @@ -383,7 +387,7 @@ extern void nfs_zap_caches(struct inode *); extern void nfs_set_inode_stale(struct inode *inode); extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, - struct nfs_fattr *, struct nfs4_label *); + struct nfs_fattr *); struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); @@ -404,8 +408,7 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map extern int nfs_revalidate_mapping_rcu(struct inode *inode); extern int nfs_setattr(struct user_namespace *, struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); -extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label); +extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct cred *cred, fmode_t mode); @@ -421,9 +424,22 @@ extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr); extern unsigned long nfs_inc_attr_generation_counter(void); extern struct nfs_fattr *nfs_alloc_fattr(void); +extern struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server); + +static inline void nfs4_label_free(struct nfs4_label *label) +{ +#ifdef CONFIG_NFS_V4_SECURITY_LABEL + if (label) { + kfree(label->label); + kfree(label); + } +#endif +} static inline void nfs_free_fattr(const struct nfs_fattr *fattr) { + if (fattr) + nfs4_label_free(fattr->label); kfree(fattr); } @@ -511,10 +527,9 @@ extern void nfs_set_verifier(struct dentry * dentry, unsigned long verf); extern void nfs_clear_verifier_delegated(struct inode *inode); #endif /* IS_ENABLED(CONFIG_NFS_V4) */ extern struct dentry *nfs_add_or_obtain(struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr, - struct nfs4_label *label); + struct nfs_fh *fh, struct nfs_fattr *fattr); extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr, struct nfs4_label *label); + struct nfs_fattr *fattr); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, @@ -569,11 +584,14 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); +bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline int nfs_have_writebacks(struct inode *inode) { - return atomic_long_read(&NFS_I(inode)->nrequests) != 0; + if (S_ISREG(inode->i_mode)) + return atomic_long_read(&NFS_I(inode)->nrequests) != 0; + return 0; } /* diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2a9acbfe00f0..77b2dba27bbb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -120,11 +120,6 @@ struct nfs_client { * This is used to generate the mv0 callback address. */ char cl_ipaddr[48]; - -#ifdef CONFIG_NFS_FSCACHE - struct fscache_cookie *fscache; /* client index cache cookie */ -#endif - struct net *cl_net; struct list_head pending_cb_stateids; }; @@ -194,8 +189,8 @@ struct nfs_server { struct nfs_auth_info auth_info; /* parsed auth flavors */ #ifdef CONFIG_NFS_FSCACHE - struct nfs_fscache_key *fscache_key; /* unique key for superblock */ - struct fscache_cookie *fscache; /* superblock cookie */ + struct fscache_volume *fscache; /* superblock cookie */ + char *fscache_uniq; /* Uniquifier (or NULL) */ #endif u32 pnfs_blksize; /* layout_blksize attr */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e9698b6278a5..967a0098f0a9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -488,7 +488,6 @@ struct nfs_openres { struct nfs4_change_info cinfo; __u32 rflags; struct nfs_fattr * f_attr; - struct nfs4_label *f_label; struct nfs_seqid * seqid; const struct nfs_server *server; fmode_t delegation_type; @@ -753,7 +752,6 @@ struct nfs_entry { int eof; struct nfs_fh * fh; struct nfs_fattr * fattr; - struct nfs4_label *label; unsigned char d_type; struct nfs_server * server; }; @@ -834,7 +832,6 @@ struct nfs_getaclres { struct nfs_setattrres { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - struct nfs4_label *label; const struct nfs_server * server; }; @@ -1041,7 +1038,6 @@ struct nfs4_create_res { const struct nfs_server * server; struct nfs_fh * fh; struct nfs_fattr * fattr; - struct nfs4_label *label; struct nfs4_change_info dir_cinfo; }; @@ -1066,7 +1062,6 @@ struct nfs4_getattr_res { struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; - struct nfs4_label *label; }; struct nfs4_link_arg { @@ -1081,7 +1076,6 @@ struct nfs4_link_res { struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; - struct nfs4_label *label; struct nfs4_change_info cinfo; struct nfs_fattr * dir_attr; }; @@ -1098,7 +1092,6 @@ struct nfs4_lookup_res { const struct nfs_server * server; struct nfs_fattr * fattr; struct nfs_fh * fh; - struct nfs4_label *label; }; struct nfs4_lookupp_arg { @@ -1112,7 +1105,6 @@ struct nfs4_lookupp_res { const struct nfs_server *server; struct nfs_fattr *fattr; struct nfs_fh *fh; - struct nfs4_label *label; }; struct nfs4_lookup_root_arg { @@ -1738,15 +1730,13 @@ struct nfs_rpc_ops { int (*submount) (struct fs_context *, struct nfs_server *); int (*try_get_tree) (struct fs_context *); int (*getattr) (struct nfs_server *, struct nfs_fh *, - struct nfs_fattr *, struct nfs4_label *, - struct inode *); + struct nfs_fattr *, struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); int (*lookup) (struct inode *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *, - struct nfs4_label *); + struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, - struct nfs_fattr *, struct nfs4_label *); + struct nfs_fattr *); int (*access) (struct inode *, struct nfs_access_entry *); int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); diff --git a/include/linux/node.h b/include/linux/node.h index 8e5a29897936..bb21fd631b16 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,7 +85,7 @@ struct node { struct device dev; struct list_head access_list; -#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS) struct work_struct node_work; #endif #ifdef CONFIG_HMEM_REPORTING @@ -98,7 +98,7 @@ struct memory_block; extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); -#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, enum meminit_context context); diff --git a/include/linux/numa.h b/include/linux/numa.h index cb44cfe2b725..59df211d051f 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -58,4 +58,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP +extern const struct attribute_group arch_node_dev_group; +#endif + #endif /* _LINUX_NUMA_H */ diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 2a38f2b477a5..cb909edb76c4 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -7,6 +7,7 @@ #define _NVME_FC_DRIVER_H 1 #include <linux/scatterlist.h> +#include <linux/blk-mq.h> /* @@ -497,6 +498,8 @@ struct nvme_fc_port_template { int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *rport, struct nvmefc_ls_rsp *ls_rsp); + void (*map_queues)(struct nvme_fc_local_port *localport, + struct blk_mq_queue_map *map); u32 max_hw_queues; u16 max_sgl_segments; @@ -779,6 +782,10 @@ struct nvmet_fc_target_port { * LS received. * Entrypoint is Mandatory. * + * @map_queues: This functions lets the driver expose the queue mapping + * to the block layer. + * Entrypoint is Optional. + * * @fcp_op: Called to perform a data transfer or transmit a response. * The nvmefc_tgt_fcp_req structure is the same LLDD-supplied * exchange structure specified in the nvmet_fc_rcv_fcp_req() call diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 3ec8e50efa16..4dd7e6fe92fb 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,6 +6,8 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H +#define NVME_RDMA_MAX_QUEUE_SIZE 128 + enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, }; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b7c4c4130b65..855dd9b3e84b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -27,8 +27,20 @@ #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { - NVME_NQN_DISC = 1, /* Discovery type target subsystem */ - NVME_NQN_NVME = 2, /* NVME type target subsystem */ + /* Referral to another discovery type target subsystem */ + NVME_NQN_DISC = 1, + + /* NVME type target subsystem */ + NVME_NQN_NVME = 2, + + /* Current discovery type target subsystem */ + NVME_NQN_CURR = 3, +}; + +enum nvme_ctrl_type { + NVME_CTRL_IO = 1, /* I/O controller */ + NVME_CTRL_DISC = 2, /* Discovery controller */ + NVME_CTRL_ADMIN = 3, /* Administrative controller */ }; /* Address Family codes for Discovery Log Page entry ADRFAM field */ @@ -244,7 +256,9 @@ struct nvme_id_ctrl { __le32 rtd3e; __le32 oaes; __le32 ctratt; - __u8 rsvd100[28]; + __u8 rsvd100[11]; + __u8 cntrltype; + __u8 fguid[16]; __le16 crdt1; __le16 crdt2; __le16 crdt3; @@ -312,6 +326,7 @@ struct nvme_id_ctrl { }; enum { + NVME_CTRL_CMIC_MULTI_PORT = 1 << 0, NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1, NVME_CTRL_CMIC_ANA = 1 << 3, NVME_CTRL_ONCS_COMPARE = 1 << 0, @@ -1303,6 +1318,12 @@ struct nvmf_common_command { #define MAX_DISC_LOGS 255 +/* Discovery log page entry flags (EFLAGS): */ +enum { + NVME_DISC_EFLAGS_EPCSD = (1 << 1), + NVME_DISC_EFLAGS_DUPRETINFO = (1 << 0), +}; + /* Discovery log page entry */ struct nvmf_disc_rsp_page_entry { __u8 trtype; @@ -1312,7 +1333,8 @@ struct nvmf_disc_rsp_page_entry { __le16 portid; __le16 cntlid; __le16 asqsz; - __u8 resv8[22]; + __le16 eflags; + __u8 resv10[20]; char trsvcid[NVMF_TRSVCID_SIZE]; __u8 resv64[192]; char subnqn[NVMF_NQN_FIELD_LEN]; diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 104505e9028f..98efb7b5660d 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -19,6 +19,9 @@ typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, void *val, size_t bytes); typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, void *val, size_t bytes); +/* used for vendor specific post processing of cell data */ +typedef int (*nvmem_cell_post_process_t)(void *priv, const char *id, unsigned int offset, + void *buf, size_t bytes); enum nvmem_type { NVMEM_TYPE_UNKNOWN = 0, @@ -62,6 +65,7 @@ struct nvmem_keepout { * @no_of_node: Device should not use the parent's of_node even if it's !NULL. * @reg_read: Callback to read data. * @reg_write: Callback to write data. + * @cell_post_process: Callback for vendor specific post processing of cell data * @size: Device size. * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. @@ -92,6 +96,7 @@ struct nvmem_config { bool no_of_node; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; + nvmem_cell_post_process_t cell_post_process; int size; int word_size; int stride; diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 7e72d975cb76..aca52db2f3f3 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -66,6 +66,17 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func +/* + * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore + * for the case where a function is intentionally missing frame pointer setup, + * but otherwise needs objtool/ORC coverage when frame pointers are disabled. + */ +#ifdef CONFIG_FRAME_POINTER +#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) +#else +#define STACK_FRAME_NON_STANDARD_FP(func) +#endif + #else /* __ASSEMBLY__ */ /* @@ -127,6 +138,7 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#define STACK_FRAME_NON_STANDARD_FP(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 diff --git a/include/linux/of.h b/include/linux/of.h index 6f1c41f109bb..ff143a027abc 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -185,7 +185,7 @@ static inline bool of_node_is_root(const struct device_node *node) return node && (node->parent == NULL); } -static inline int of_node_check_flag(struct device_node *n, unsigned long flag) +static inline int of_node_check_flag(const struct device_node *n, unsigned long flag) { return test_bit(flag, &n->_flags); } @@ -353,6 +353,7 @@ extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); extern struct device_node *of_get_next_cpu_node(struct device_node *prev); extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, int index); +extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); #define for_each_property_of_node(dn, pp) \ for (pp = dn->properties; pp != NULL; pp = pp->next) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index cf6a65b94d40..d69ad5bb1eb1 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -58,20 +58,17 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); extern uint32_t of_get_flat_dt_phandle(unsigned long node); -extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, - int depth, void *data); -extern int early_init_dt_scan_memory(unsigned long node, const char *uname, - int depth, void *data); +extern int early_init_dt_scan_chosen(char *cmdline); +extern int early_init_dt_scan_memory(void); +extern void early_init_dt_check_for_usable_mem_range(void); extern int early_init_dt_scan_chosen_stdout(void); extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_fdt_reserve_self(void); -extern void __init early_init_dt_scan_chosen_arch(unsigned long node); extern void early_init_dt_add_memory_arch(u64 base, u64 size); extern u64 dt_mem_next_cell(int s, const __be32 **cellp); /* Early flat tree scan hooks */ -extern int early_init_dt_scan_root(unsigned long node, const char *uname, - int depth, void *data); +extern int early_init_dt_scan_root(void); extern bool early_init_dt_scan(void *params); extern bool early_init_dt_verify(void *params); @@ -87,6 +84,7 @@ extern void unflatten_and_copy_device_tree(void); extern void early_init_devtree(void *); extern void early_get_first_memblock_info(void *, phys_addr_t *); #else /* CONFIG_OF_EARLY_FLATTREE */ +static inline void early_init_dt_check_for_usable_mem_range(void) {} static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; } static inline void early_init_fdt_scan_reserved_mem(void) {} static inline void early_init_fdt_reserve_self(void) {} diff --git a/include/linux/of_net.h b/include/linux/of_net.h index daef3b0d9270..0484b613ca64 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -8,12 +8,13 @@ #include <linux/phy.h> -#ifdef CONFIG_OF_NET +#if defined(CONFIG_OF) && defined(CONFIG_NET) #include <linux/of.h> struct net_device; extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface); extern int of_get_mac_address(struct device_node *np, u8 *mac); +int of_get_ethdev_address(struct device_node *np, struct net_device *dev); extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else static inline int of_get_phy_mode(struct device_node *np, @@ -27,6 +28,11 @@ static inline int of_get_mac_address(struct device_node *np, u8 *mac) return -ENODEV; } +static inline int of_get_ethdev_address(struct device_node *np, struct net_device *dev) +{ + return -ENODEV; +} + static inline struct net_device *of_find_net_device_by_node(struct device_node *np) { return NULL; diff --git a/include/linux/once.h b/include/linux/once.h index d361fb14ac3a..f54523052bbc 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -38,7 +38,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, #define DO_ONCE(func, ...) \ ({ \ bool ___ret = false; \ - static bool ___done = false; \ + static bool __section(".data.once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ unsigned long ___flags; \ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index fbfd3fad48f2..129421002443 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -68,9 +68,6 @@ * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as * a result of MADV_FREE). * - * PG_uptodate tells whether the page's contents is valid. When a read - * completes, the page becomes uptodate, unless a disk I/O error happened. - * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * @@ -143,6 +140,8 @@ enum pageflags { #endif __NR_PAGEFLAGS, + PG_readahead = PG_reclaim, + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -202,6 +201,34 @@ static inline unsigned long _compound_head(const struct page *page) #define compound_head(page) ((typeof(page))_compound_head(page)) +/** + * page_folio - Converts from page to folio. + * @p: The page. + * + * Every page is part of a folio. This function cannot be called on a + * NULL pointer. + * + * Context: No reference, nor lock is required on @page. If the caller + * does not hold a reference, this call may race with a folio split, so + * it should re-check the folio still contains this page after gaining + * a reference on the folio. + * Return: The folio which contains this page. + */ +#define page_folio(p) (_Generic((p), \ + const struct page *: (const struct folio *)_compound_head(p), \ + struct page *: (struct folio *)_compound_head(p))) + +/** + * folio_page - Return a page from a folio. + * @folio: The folio. + * @n: The page number to return. + * + * @n is relative to the start of the folio. This function does not + * check that the page number lies within @folio; the caller is presumed + * to have a reference to the page. + */ +#define folio_page(folio, n) nth_page(&(folio)->page, n) + static __always_inline int PageTail(struct page *page) { return READ_ONCE(page->compound_head) & 1; @@ -215,7 +242,7 @@ static __always_inline int PageCompound(struct page *page) #define PAGE_POISON_PATTERN -1l static inline int PagePoisoned(const struct page *page) { - return page->flags == PAGE_POISON_PATTERN; + return READ_ONCE(page->flags) == PAGE_POISON_PATTERN; } #ifdef CONFIG_DEBUG_VM @@ -226,6 +253,15 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static unsigned long *folio_flags(struct folio *folio, unsigned n) +{ + struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + /* * Page flags policies wrt compound pages * @@ -270,36 +306,64 @@ static inline void page_init_poison(struct page *page, size_t size) VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ PF_POISONED_CHECK(&page[1]); }) +/* Which page is the flag stored in */ +#define FOLIO_PF_ANY 0 +#define FOLIO_PF_HEAD 0 +#define FOLIO_PF_ONLY_HEAD 0 +#define FOLIO_PF_NO_TAIL 0 +#define FOLIO_PF_NO_COMPOUND 0 +#define FOLIO_PF_SECOND 1 + /* * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname, policy) \ +static __always_inline bool folio_test_##lname(struct folio *folio) \ +{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int Page##uname(struct page *page) \ - { return test_bit(PG_##lname, &policy(page, 0)->flags); } +{ return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_set_##lname(struct folio *folio) \ +{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void SetPage##uname(struct page *page) \ - { set_bit(PG_##lname, &policy(page, 1)->flags); } +{ set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_clear_##lname(struct folio *folio) \ +{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void ClearPage##uname(struct page *page) \ - { clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_set_##lname(struct folio *folio) \ +{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __SetPage##uname(struct page *page) \ - { __set_bit(PG_##lname, &policy(page, 1)->flags); } +{ __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_clear_##lname(struct folio *folio) \ +{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __ClearPage##uname(struct page *page) \ - { __clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_set_##lname(struct folio *folio) \ +{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestSetPage##uname(struct page *page) \ - { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_clear_##lname(struct folio *folio) \ +{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestClearPage##uname(struct page *page) \ - { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ @@ -315,29 +379,37 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) -#define TESTPAGEFLAG_FALSE(uname) \ +#define TESTPAGEFLAG_FALSE(uname, lname) \ +static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ static inline int Page##uname(const struct page *page) { return 0; } -#define SETPAGEFLAG_NOOP(uname) \ +#define SETPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_set_##lname(struct folio *folio) { } \ static inline void SetPage##uname(struct page *page) { } -#define CLEARPAGEFLAG_NOOP(uname) \ +#define CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_clear_##lname(struct folio *folio) { } \ static inline void ClearPage##uname(struct page *page) { } -#define __CLEARPAGEFLAG_NOOP(uname) \ +#define __CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void __folio_clear_##lname(struct folio *folio) { } \ static inline void __ClearPage##uname(struct page *page) { } -#define TESTSETFLAG_FALSE(uname) \ +#define TESTSETFLAG_FALSE(uname, lname) \ +static inline bool folio_test_set_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestSetPage##uname(struct page *page) { return 0; } -#define TESTCLEARFLAG_FALSE(uname) \ +#define TESTCLEARFLAG_FALSE(uname, lname) \ +static inline bool folio_test_clear_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestClearPage##uname(struct page *page) { return 0; } -#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \ - SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname) +#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ + SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) -#define TESTSCFLAG_FALSE(uname) \ - TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) +#define TESTSCFLAG_FALSE(uname, lname) \ + TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) @@ -393,8 +465,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND) +PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) + TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) #ifdef CONFIG_HIGHMEM /* @@ -403,22 +475,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #else -PAGEFLAG_FALSE(HighMem) +PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline int PageSwapCache(struct page *page) +static __always_inline bool folio_test_swapcache(struct folio *folio) { -#ifdef CONFIG_THP_SWAP - page = compound_head(page); -#endif - return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags); + return folio_test_swapbacked(folio) && + test_bit(PG_swapcache, folio_flags(folio, 0)); +} +static __always_inline bool PageSwapCache(struct page *page) +{ + return folio_test_swapcache(page_folio(page)); } + SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) #else -PAGEFLAG_FALSE(SwapCache) +PAGEFLAG_FALSE(SwapCache, swapcache) #endif PAGEFLAG(Unevictable, unevictable, PF_HEAD) @@ -430,23 +505,27 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) #else -PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) - TESTSCFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) + TESTSCFLAG_FALSE(Mlocked, mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) #else -PAGEFLAG_FALSE(Uncached) +PAGEFLAG_FALSE(Uncached, uncached) #endif #ifdef CONFIG_MEMORY_FAILURE PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) +#define MAGIC_HWPOISON 0x48575053U /* HWPS */ +extern void SetPageHWPoisonTakenOff(struct page *page); +extern void ClearPageHWPoisonTakenOff(struct page *page); extern bool take_page_off_buddy(struct page *page); +extern bool put_page_back_buddy(struct page *page); #else -PAGEFLAG_FALSE(HWPoison) +PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif @@ -460,7 +539,7 @@ PAGEFLAG(Idle, idle, PF_ANY) #ifdef CONFIG_KASAN_HW_TAGS PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) #else -PAGEFLAG_FALSE(SkipKASanPoison) +PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison) #endif /* @@ -498,10 +577,14 @@ static __always_inline int PageMappingFlags(struct page *page) return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageAnon(struct page *page) +static __always_inline bool folio_test_anon(struct folio *folio) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; + return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; +} + +static __always_inline bool PageAnon(struct page *page) +{ + return folio_test_anon(page_folio(page)); } static __always_inline int __PageMovable(struct page *page) @@ -517,30 +600,42 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline int PageKsm(struct page *page) +static __always_inline bool folio_test_ksm(struct folio *folio) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } + +static __always_inline bool PageKsm(struct page *page) +{ + return folio_test_ksm(page_folio(page)); +} #else -TESTPAGEFLAG_FALSE(Ksm) +TESTPAGEFLAG_FALSE(Ksm, ksm) #endif u64 stable_page_flags(struct page *page); -static inline int PageUptodate(struct page *page) +/** + * folio_test_uptodate - Is this folio up to date? + * @folio: The folio. + * + * The uptodate flag is set on a folio when every byte in the folio is + * at least as new as the corresponding bytes on storage. Anonymous + * and CoW folios are always uptodate. If the folio is not uptodate, + * some of the bytes in it may be; see the is_partially_uptodate() + * address_space operation. + */ +static inline bool folio_test_uptodate(struct folio *folio) { - int ret; - page = compound_head(page); - ret = test_bit(PG_uptodate, &(page)->flags); + bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); /* - * Must ensure that the data we read out of the page is loaded - * _after_ we've loaded page->flags to check for PageUptodate. - * We can skip the barrier if the page is not uptodate, because + * Must ensure that the data we read out of the folio is loaded + * _after_ we've loaded folio->flags to check the uptodate bit. + * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * - * See SetPageUptodate() for the other side of the story. + * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); @@ -548,47 +643,71 @@ static inline int PageUptodate(struct page *page) return ret; } -static __always_inline void __SetPageUptodate(struct page *page) +static inline int PageUptodate(struct page *page) +{ + return folio_test_uptodate(page_folio(page)); +} + +static __always_inline void __folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); smp_wmb(); - __set_bit(PG_uptodate, &page->flags); + __set_bit(PG_uptodate, folio_flags(folio, 0)); } -static __always_inline void SetPageUptodate(struct page *page) +static __always_inline void folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); /* * Memory barrier must be issued before setting the PG_uptodate bit, - * so that all previous stores issued in order to bring the page - * uptodate are actually visible before PageUptodate becomes true. + * so that all previous stores issued in order to bring the folio + * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); - set_bit(PG_uptodate, &page->flags); + set_bit(PG_uptodate, folio_flags(folio, 0)); +} + +static __always_inline void __SetPageUptodate(struct page *page) +{ + __folio_mark_uptodate((struct folio *)page); +} + +static __always_inline void SetPageUptodate(struct page *page) +{ + folio_mark_uptodate((struct folio *)page); } CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -int test_clear_page_writeback(struct page *page); -int __test_set_page_writeback(struct page *page, bool keep_write); +bool __folio_start_writeback(struct folio *folio, bool keep_write); +bool set_page_writeback(struct page *page); -#define test_set_page_writeback(page) \ - __test_set_page_writeback(page, false) -#define test_set_page_writeback_keepwrite(page) \ - __test_set_page_writeback(page, true) +#define folio_start_writeback(folio) \ + __folio_start_writeback(folio, false) +#define folio_start_writeback_keepwrite(folio) \ + __folio_start_writeback(folio, true) -static inline void set_page_writeback(struct page *page) +static inline void set_page_writeback_keepwrite(struct page *page) { - test_set_page_writeback(page); + folio_start_writeback_keepwrite(page_folio(page)); } -static inline void set_page_writeback_keepwrite(struct page *page) +static inline bool test_set_page_writeback(struct page *page) { - test_set_page_writeback_keepwrite(page); + return set_page_writeback(page); } __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +/** + * folio_test_large() - Does this folio contain more than one page? + * @folio: The folio to test. + * + * Return: True if the folio is larger than one page. + */ +static inline bool folio_test_large(struct folio *folio) +{ + return folio_test_head(folio); +} + static __always_inline void set_compound_head(struct page *page, struct page *head) { WRITE_ONCE(page->compound_head, (unsigned long)head + 1); @@ -612,12 +731,15 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); int PageHeadHuge(struct page *page); +static inline bool folio_test_hugetlb(struct folio *folio) +{ + return PageHeadHuge(&folio->page); +} #else -TESTPAGEFLAG_FALSE(Huge) -TESTPAGEFLAG_FALSE(HeadHuge) +TESTPAGEFLAG_FALSE(Huge, hugetlb) +TESTPAGEFLAG_FALSE(HeadHuge, headhuge) #endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -633,6 +755,11 @@ static inline int PageTransHuge(struct page *page) return PageHead(page); } +static inline bool folio_test_transhuge(struct folio *folio) +{ + return folio_test_head(folio); +} + /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -669,12 +796,12 @@ static inline int PageTransTail(struct page *page) PAGEFLAG(DoubleMap, double_map, PF_SECOND) TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else -TESTPAGEFLAG_FALSE(TransHuge) -TESTPAGEFLAG_FALSE(TransCompound) -TESTPAGEFLAG_FALSE(TransCompoundMap) -TESTPAGEFLAG_FALSE(TransTail) -PAGEFLAG_FALSE(DoubleMap) - TESTSCFLAG_FALSE(DoubleMap) +TESTPAGEFLAG_FALSE(TransHuge, transhuge) +TESTPAGEFLAG_FALSE(TransCompound, transcompound) +TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) +TESTPAGEFLAG_FALSE(TransTail, transtail) +PAGEFLAG_FALSE(DoubleMap, double_map) + TESTSCFLAG_FALSE(DoubleMap, double_map) #endif #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) @@ -687,8 +814,8 @@ PAGEFLAG_FALSE(DoubleMap) PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) #else -PAGEFLAG_FALSE(HasHWPoisoned) - TESTSCFLAG_FALSE(HasHWPoisoned) +PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) + TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* @@ -872,6 +999,11 @@ static inline int page_has_private(struct page *page) return !!(page->flags & PAGE_FLAGS_PRIVATE); } +static inline bool folio_has_private(struct folio *folio) +{ + return page_has_private(&folio->page); +} + #undef PF_ANY #undef PF_HEAD #undef PF_ONLY_HEAD diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index d8a6aecf99cb..4663dfed1293 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -8,46 +8,15 @@ #ifdef CONFIG_PAGE_IDLE_FLAG -#ifdef CONFIG_64BIT -static inline bool page_is_young(struct page *page) -{ - return PageYoung(page); -} - -static inline void set_page_young(struct page *page) -{ - SetPageYoung(page); -} - -static inline bool test_and_clear_page_young(struct page *page) -{ - return TestClearPageYoung(page); -} - -static inline bool page_is_idle(struct page *page) -{ - return PageIdle(page); -} - -static inline void set_page_idle(struct page *page) -{ - SetPageIdle(page); -} - -static inline void clear_page_idle(struct page *page) -{ - ClearPageIdle(page); -} -#else /* !CONFIG_64BIT */ +#ifndef CONFIG_64BIT /* * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ -extern struct page_ext_operations page_idle_ops; -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -55,9 +24,9 @@ static inline bool page_is_young(struct page *page) return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -65,9 +34,9 @@ static inline void set_page_young(struct page *page) set_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -75,9 +44,9 @@ static inline bool test_and_clear_page_young(struct page *page) return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -85,9 +54,9 @@ static inline bool page_is_idle(struct page *page) return test_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -95,46 +64,75 @@ static inline void set_page_idle(struct page *page) set_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } -#endif /* CONFIG_64BIT */ +#endif /* !CONFIG_64BIT */ #else /* !CONFIG_PAGE_IDLE_FLAG */ -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { return false; } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { return false; } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { return false; } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { } #endif /* CONFIG_PAGE_IDLE_FLAG */ +static inline bool page_is_young(struct page *page) +{ + return folio_test_young(page_folio(page)); +} + +static inline void set_page_young(struct page *page) +{ + folio_set_young(page_folio(page)); +} + +static inline bool test_and_clear_page_young(struct page *page) +{ + return folio_test_clear_young(page_folio(page)); +} + +static inline bool page_is_idle(struct page *page) +{ + return folio_test_idle(page_folio(page)); +} + +static inline void set_page_idle(struct page *page) +{ + folio_set_idle(page_folio(page)); +} + +static inline void clear_page_idle(struct page *page) +{ + folio_clear_idle(page_folio(page)); +} #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 719bfe5108c5..119a0c9d2a8b 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -8,24 +8,24 @@ extern struct static_key_false page_owner_inited; extern struct page_ext_operations page_owner_ops; -extern void __reset_page_owner(struct page *page, unsigned int order); +extern void __reset_page_owner(struct page *page, unsigned short order); extern void __set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask); + unsigned short order, gfp_t gfp_mask); extern void __split_page_owner(struct page *page, unsigned int nr); -extern void __copy_page_owner(struct page *oldpage, struct page *newpage); +extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone); -static inline void reset_page_owner(struct page *page, unsigned int order) +static inline void reset_page_owner(struct page *page, unsigned short order) { if (static_branch_unlikely(&page_owner_inited)) __reset_page_owner(page, order); } static inline void set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { if (static_branch_unlikely(&page_owner_inited)) __set_page_owner(page, order, gfp_mask); @@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr) if (static_branch_unlikely(&page_owner_inited)) __split_page_owner(page, nr); } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) { if (static_branch_unlikely(&page_owner_inited)) - __copy_page_owner(oldpage, newpage); + __folio_copy_owner(newfolio, old); } static inline void set_page_owner_migrate_reason(struct page *page, int reason) { @@ -52,7 +52,7 @@ static inline void dump_page_owner(const struct page *page) __dump_page_owner(page); } #else -static inline void reset_page_owner(struct page *page, unsigned int order) +static inline void reset_page_owner(struct page *page, unsigned short order) { } static inline void set_page_owner(struct page *page, @@ -60,10 +60,10 @@ static inline void set_page_owner(struct page *page, { } static inline void split_page_owner(struct page *page, - unsigned int order) + unsigned short order) { } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) { } static inline void set_page_owner_migrate_reason(struct page *page, int reason) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 7ad46f45df39..2e677e6ad09f 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page) return atomic_read(&page->_refcount); } +/** + * folio_ref_count - The reference count on this folio. + * @folio: The folio. + * + * The refcount is usually incremented by calls to folio_get() and + * decremented by calls to folio_put(). Some typical users of the + * folio refcount: + * + * - Each reference from a page table + * - The page cache + * - Filesystem private data + * - The LRU list + * - Pipes + * - Direct IO which references this page in the process address space + * + * Return: The number of references to this folio. + */ +static inline int folio_ref_count(const struct folio *folio) +{ + return page_ref_count(&folio->page); +} + static inline int page_count(const struct page *page) { - return atomic_read(&compound_head(page)->_refcount); + return folio_ref_count(page_folio(page)); } static inline void set_page_count(struct page *page, int v) @@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v) __page_ref_set(page, v); } +static inline void folio_set_count(struct folio *folio, int v) +{ + set_page_count(&folio->page, v); +} + /* * Setup the page count before being freed into the page allocator for * the first time (boot or memory hotplug) @@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr) __page_ref_mod(page, nr); } +static inline void folio_ref_add(struct folio *folio, int nr) +{ + page_ref_add(&folio->page, nr); +} + static inline void page_ref_sub(struct page *page, int nr) { atomic_sub(nr, &page->_refcount); @@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr) __page_ref_mod(page, -nr); } +static inline void folio_ref_sub(struct folio *folio, int nr) +{ + page_ref_sub(&folio->page, nr); +} + static inline int page_ref_sub_return(struct page *page, int nr) { int ret = atomic_sub_return(nr, &page->_refcount); @@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_return(struct folio *folio, int nr) +{ + return page_ref_sub_return(&folio->page, nr); +} + static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); @@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page) __page_ref_mod(page, 1); } +static inline void folio_ref_inc(struct folio *folio) +{ + page_ref_inc(&folio->page); +} + static inline void page_ref_dec(struct page *page) { atomic_dec(&page->_refcount); @@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page) __page_ref_mod(page, -1); } +static inline void folio_ref_dec(struct folio *folio) +{ + page_ref_dec(&folio->page); +} + static inline int page_ref_sub_and_test(struct page *page, int nr) { int ret = atomic_sub_and_test(nr, &page->_refcount); @@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_and_test(struct folio *folio, int nr) +{ + return page_ref_sub_and_test(&folio->page, nr); +} + static inline int page_ref_inc_return(struct page *page) { int ret = atomic_inc_return(&page->_refcount); @@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page) return ret; } +static inline int folio_ref_inc_return(struct folio *folio) +{ + return page_ref_inc_return(&folio->page); +} + static inline int page_ref_dec_and_test(struct page *page) { int ret = atomic_dec_and_test(&page->_refcount); @@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page) return ret; } +static inline int folio_ref_dec_and_test(struct folio *folio) +{ + return page_ref_dec_and_test(&folio->page); +} + static inline int page_ref_dec_return(struct page *page) { int ret = atomic_dec_return(&page->_refcount); @@ -161,15 +228,91 @@ static inline int page_ref_dec_return(struct page *page) return ret; } -static inline int page_ref_add_unless(struct page *page, int nr, int u) +static inline int folio_ref_dec_return(struct folio *folio) +{ + return page_ref_dec_return(&folio->page); +} + +static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - int ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = atomic_add_unless(&page->_refcount, nr, u); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); return ret; } +static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) +{ + return page_ref_add_unless(&folio->page, nr, u); +} + +/** + * folio_try_get - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * If you do not already have a reference to a folio, you can attempt to + * get one using this function. It may fail if, for example, the folio + * has been freed since you found a pointer to it, or it is frozen for + * the purposes of splitting or migration. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get(struct folio *folio) +{ + return folio_ref_add_unless(folio, 1, 0); +} + +static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) +{ +#ifdef CONFIG_TINY_RCU + /* + * The caller guarantees the folio will not be freed from interrupt + * context, so (on !SMP) we only need preemption to be disabled + * and TINY_RCU does that for us. + */ +# ifdef CONFIG_PREEMPT_COUNT + VM_BUG_ON(!in_atomic() && !irqs_disabled()); +# endif + VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); + folio_ref_add(folio, count); +#else + if (unlikely(!folio_ref_add_unless(folio, count, 0))) { + /* Either the folio has been freed, or will be freed. */ + return false; + } +#endif + return true; +} + +/** + * folio_try_get_rcu - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * This is a version of folio_try_get() optimised for non-SMP kernels. + * If you are still holding the rcu_read_lock() after looking up the + * page and know that the page cannot have its refcount decreased to + * zero in interrupt context, you can use this instead of folio_try_get(). + * + * Example users include get_user_pages_fast() (as pages are not unmapped + * from interrupt context) and the page cache lookups (as pages are not + * truncated from interrupt context). We also know that pages are not + * frozen in interrupt context for the purposes of splitting or migration. + * + * You can also use this function if you're holding a lock that prevents + * pages being frozen & removed; eg the i_pages lock for the page cache + * or the mmap_sem or page table lock for page tables. In this case, + * it will always succeed, and you could have used a plain folio_get(), + * but it's sometimes more convenient to have a common function called + * from both locked and RCU-protected contexts. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get_rcu(struct folio *folio) +{ + return folio_ref_try_add_rcu(folio, 1); +} + static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); @@ -179,6 +322,11 @@ static inline int page_ref_freeze(struct page *page, int count) return ret; } +static inline int folio_ref_freeze(struct folio *folio, int count) +{ + return page_ref_freeze(&folio->page, count); +} + static inline void page_ref_unfreeze(struct page *page, int count) { VM_BUG_ON_PAGE(page_count(page) != 0, page); @@ -189,4 +337,8 @@ static inline void page_ref_unfreeze(struct page *page, int count) __page_ref_unfreeze(page, count); } +static inline void folio_ref_unfreeze(struct folio *folio, int count) +{ + page_ref_unfreeze(&folio->page, count); +} #endif diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h new file mode 100644 index 000000000000..38cace1da7b6 --- /dev/null +++ b/include/linux/page_table_check.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2021, Google LLC. + * Pasha Tatashin <[email protected]> + */ +#ifndef __LINUX_PAGE_TABLE_CHECK_H +#define __LINUX_PAGE_TABLE_CHECK_H + +#ifdef CONFIG_PAGE_TABLE_CHECK +#include <linux/jump_label.h> + +extern struct static_key_true page_table_check_disabled; +extern struct page_ext_operations page_table_check_ops; + +void __page_table_check_zero(struct page *page, unsigned int order); +void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t pte); +void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr, + pmd_t pmd); +void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr, + pud_t pud); +void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); +void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud); + +static inline void page_table_check_alloc(struct page *page, unsigned int order) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_zero(page, order); +} + +static inline void page_table_check_free(struct page *page, unsigned int order) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_zero(page, order); +} + +static inline void page_table_check_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t pte) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear(mm, addr, pte); +} + +static inline void page_table_check_pmd_clear(struct mm_struct *mm, + unsigned long addr, pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pmd_clear(mm, addr, pmd); +} + +static inline void page_table_check_pud_clear(struct mm_struct *mm, + unsigned long addr, pud_t pud) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pud_clear(mm, addr, pud); +} + +static inline void page_table_check_pte_set(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pte_t pte) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_set(mm, addr, ptep, pte); +} + +static inline void page_table_check_pmd_set(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pmd_set(mm, addr, pmdp, pmd); +} + +static inline void page_table_check_pud_set(struct mm_struct *mm, + unsigned long addr, pud_t *pudp, + pud_t pud) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pud_set(mm, addr, pudp, pud); +} + +#else + +static inline void page_table_check_alloc(struct page *page, unsigned int order) +{ +} + +static inline void page_table_check_free(struct page *page, unsigned int order) +{ +} + +static inline void page_table_check_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t pte) +{ +} + +static inline void page_table_check_pmd_clear(struct mm_struct *mm, + unsigned long addr, pmd_t pmd) +{ +} + +static inline void page_table_check_pud_clear(struct mm_struct *mm, + unsigned long addr, pud_t pud) +{ +} + +static inline void page_table_check_pte_set(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pte_t pte) +{ +} + +static inline void page_table_check_pmd_set(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + pmd_t pmd) +{ +} + +static inline void page_table_check_pud_set(struct mm_struct *mm, + unsigned long addr, pud_t *pudp, + pud_t pud) +{ +} + +#endif /* CONFIG_PAGE_TABLE_CHECK */ +#endif /* __LINUX_PAGE_TABLE_CHECK_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 62db6b0176b9..270bf5136c34 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -16,7 +16,7 @@ #include <linux/hardirq.h> /* for in_interrupt() */ #include <linux/hugetlb_inline.h> -struct pagevec; +struct folio_batch; static inline bool mapping_empty(struct address_space *mapping) { @@ -24,6 +24,56 @@ static inline bool mapping_empty(struct address_space *mapping) } /* + * mapping_shrinkable - test if page cache state allows inode reclaim + * @mapping: the page cache mapping + * + * This checks the mapping's cache state for the pupose of inode + * reclaim and LRU management. + * + * The caller is expected to hold the i_lock, but is not required to + * hold the i_pages lock, which usually protects cache state. That's + * because the i_lock and the list_lru lock that protect the inode and + * its LRU state don't nest inside the irq-safe i_pages lock. + * + * Cache deletions are performed under the i_lock, which ensures that + * when an inode goes empty, it will reliably get queued on the LRU. + * + * Cache additions do not acquire the i_lock and may race with this + * check, in which case we'll report the inode as shrinkable when it + * has cache pages. This is okay: the shrinker also checks the + * refcount and the referenced bit, which will be elevated or set in + * the process of adding new cache pages to an inode. + */ +static inline bool mapping_shrinkable(struct address_space *mapping) +{ + void *head; + + /* + * On highmem systems, there could be lowmem pressure from the + * inodes before there is highmem pressure from the page + * cache. Make inodes shrinkable regardless of cache state. + */ + if (IS_ENABLED(CONFIG_HIGHMEM)) + return true; + + /* Cache completely empty? Shrink away. */ + head = rcu_access_pointer(mapping->i_pages.xa_head); + if (!head) + return true; + + /* + * The xarray stores single offset-0 entries directly in the + * head pointer, which allows non-resident page cache entries + * to escape the shadow shrinker's list of xarray nodes. The + * inode shrinker needs to pick them up under memory pressure. + */ + if (!xa_is_node(head) && xa_is_value(head)) + return true; + + return false; +} + +/* * Bits in mapping->flags. */ enum mapping_flags { @@ -34,7 +84,7 @@ enum mapping_flags { AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, - AS_THP_SUPPORT = 6, /* THPs supported */ + AS_LARGE_FOLIO_SUPPORT = 6, }; /** @@ -126,9 +176,25 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) m->gfp_mask = mask; } -static inline bool mapping_thp_support(struct address_space *mapping) +/** + * mapping_set_large_folios() - Indicate the file supports large folios. + * @mapping: The file. + * + * The filesystem should call this function in its inode constructor to + * indicate that the VFS can use large folios to cache the contents of + * the file. + * + * Context: This should not be called while the inode is active as it + * is non-atomic. + */ +static inline void mapping_set_large_folios(struct address_space *mapping) { - return test_bit(AS_THP_SUPPORT, &mapping->flags); + __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); +} + +static inline bool mapping_large_folio_support(struct address_space *mapping) +{ + return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } static inline int filemap_nr_thps(struct address_space *mapping) @@ -143,7 +209,7 @@ static inline int filemap_nr_thps(struct address_space *mapping) static inline void filemap_nr_thps_inc(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_thp_support(mapping)) + if (!mapping_large_folio_support(mapping)) atomic_inc(&mapping->nr_thps); #else WARN_ON_ONCE(1); @@ -153,7 +219,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping) static inline void filemap_nr_thps_dec(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_thp_support(mapping)) + if (!mapping_large_folio_support(mapping)) atomic_dec(&mapping->nr_thps); #else WARN_ON_ONCE(1); @@ -162,149 +228,151 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) void release_pages(struct page **pages, int nr); +struct address_space *page_mapping(struct page *); +struct address_space *folio_mapping(struct folio *); +struct address_space *swapcache_mapping(struct folio *); + +/** + * folio_file_mapping - Find the mapping this folio belongs to. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Folios in the swap cache return the mapping of the + * swap file or swap device where the data is stored. This is different + * from the mapping returned by folio_mapping(). The only reason to + * use it is if, like NFS, you return 0 from ->activate_swapfile. + * + * Do not call this for folios which aren't in the page cache or swap cache. + */ +static inline struct address_space *folio_file_mapping(struct folio *folio) +{ + if (unlikely(folio_test_swapcache(folio))) + return swapcache_mapping(folio); + + return folio->mapping; +} + +static inline struct address_space *page_file_mapping(struct page *page) +{ + return folio_file_mapping(page_folio(page)); +} + /* * For file cache pages, return the address_space, otherwise return NULL */ static inline struct address_space *page_mapping_file(struct page *page) { - if (unlikely(PageSwapCache(page))) + struct folio *folio = page_folio(page); + + if (unlikely(folio_test_swapcache(folio))) return NULL; - return page_mapping(page); + return folio_mapping(folio); } -/* - * speculatively take a reference to a page. - * If the page is free (_refcount == 0), then _refcount is untouched, and 0 - * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned. - * - * This function must be called inside the same rcu_read_lock() section as has - * been used to lookup the page in the pagecache radix-tree (or page table): - * this allows allocators to use a synchronize_rcu() to stabilize _refcount. - * - * Unless an RCU grace period has passed, the count of all pages coming out - * of the allocator must be considered unstable. page_count may return higher - * than expected, and put_page must be able to do the right thing when the - * page has been finished with, no matter what it is subsequently allocated - * for (because put_page is what is used here to drop an invalid speculative - * reference). - * - * This is the interesting part of the lockless pagecache (and lockless - * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page) - * has the following pattern: - * 1. find page in radix tree - * 2. conditionally increment refcount - * 3. check the page is still in pagecache (if no, goto 1) - * - * Remove-side that cares about stability of _refcount (eg. reclaim) has the - * following (with the i_pages lock held): - * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) - * B. remove page from pagecache - * C. free the page - * - * There are 2 critical interleavings that matter: - * - 2 runs before A: in this case, A sees elevated refcount and bails out - * - A runs before 2: in this case, 2 sees zero refcount and retries; - * subsequently, B will complete and 1 will find no page, causing the - * lookup to return NULL. +/** + * folio_inode - Get the host inode for this folio. + * @folio: The folio. * - * It is possible that between 1 and 2, the page is removed then the exact same - * page is inserted into the same position in pagecache. That's OK: the - * old find_get_page using a lock could equally have run before or after - * such a re-insertion, depending on order that locks are granted. + * For folios which are in the page cache, return the inode that this folio + * belongs to. * - * Lookups racing against pagecache insertion isn't a big problem: either 1 - * will find the page or it will not. Likewise, the old find_get_page could run - * either before the insertion or afterwards, depending on timing. + * Do not call this for folios which aren't in the page cache. */ -static inline int __page_cache_add_speculative(struct page *page, int count) +static inline struct inode *folio_inode(struct folio *folio) { -#ifdef CONFIG_TINY_RCU -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - /* - * Preempt must be disabled here - we rely on rcu_read_lock doing - * this for us. - * - * Pagecache won't be truncated from interrupt context, so if we have - * found a page in the radix tree here, we have pinned its refcount by - * disabling preempt, and hence no need for the "speculative get" that - * SMP requires. - */ - VM_BUG_ON_PAGE(page_count(page) == 0, page); - page_ref_add(page, count); - -#else - if (unlikely(!page_ref_add_unless(page, count, 0))) { - /* - * Either the page has been freed, or will be freed. - * In either case, retry here and the caller should - * do the right thing (see comments above). - */ - return 0; - } -#endif - VM_BUG_ON_PAGE(PageTail(page), page); + return folio->mapping->host; +} - return 1; +static inline bool page_cache_add_speculative(struct page *page, int count) +{ + return folio_ref_try_add_rcu((struct folio *)page, count); } -static inline int page_cache_get_speculative(struct page *page) +static inline bool page_cache_get_speculative(struct page *page) { - return __page_cache_add_speculative(page, 1); + return page_cache_add_speculative(page, 1); } -static inline int page_cache_add_speculative(struct page *page, int count) +/** + * folio_attach_private - Attach private data to a folio. + * @folio: Folio to attach data to. + * @data: Data to attach to folio. + * + * Attaching private data to a folio increments the page's reference count. + * The data must be detached before the folio will be freed. + */ +static inline void folio_attach_private(struct folio *folio, void *data) { - return __page_cache_add_speculative(page, count); + folio_get(folio); + folio->private = data; + folio_set_private(folio); } /** - * attach_page_private - Attach private data to a page. - * @page: Page to attach data to. - * @data: Data to attach to page. + * folio_change_private - Change private data on a folio. + * @folio: Folio to change the data on. + * @data: Data to set on the folio. * - * Attaching private data to a page increments the page's reference count. - * The data must be detached before the page will be freed. + * Change the private data attached to a folio and return the old + * data. The page must previously have had data attached and the data + * must be detached before the folio will be freed. + * + * Return: Data that was previously attached to the folio. */ -static inline void attach_page_private(struct page *page, void *data) +static inline void *folio_change_private(struct folio *folio, void *data) { - get_page(page); - set_page_private(page, (unsigned long)data); - SetPagePrivate(page); + void *old = folio_get_private(folio); + + folio->private = data; + return old; } /** - * detach_page_private - Detach private data from a page. - * @page: Page to detach data from. + * folio_detach_private - Detach private data from a folio. + * @folio: Folio to detach data from. * - * Removes the data that was previously attached to the page and decrements + * Removes the data that was previously attached to the folio and decrements * the refcount on the page. * - * Return: Data that was attached to the page. + * Return: Data that was attached to the folio. */ -static inline void *detach_page_private(struct page *page) +static inline void *folio_detach_private(struct folio *folio) { - void *data = (void *)page_private(page); + void *data = folio_get_private(folio); - if (!PagePrivate(page)) + if (!folio_test_private(folio)) return NULL; - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); + folio_clear_private(folio); + folio->private = NULL; + folio_put(folio); return data; } +static inline void attach_page_private(struct page *page, void *data) +{ + folio_attach_private(page_folio(page), data); +} + +static inline void *detach_page_private(struct page *page) +{ + return folio_detach_private(page_folio(page)); +} + #ifdef CONFIG_NUMA -extern struct page *__page_cache_alloc(gfp_t gfp); +struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else -static inline struct page *__page_cache_alloc(gfp_t gfp) +static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) { - return alloc_pages(gfp, 0); + return folio_alloc(gfp, order); } #endif +static inline struct page *__page_cache_alloc(gfp_t gfp) +{ + return &filemap_alloc_folio(gfp, 0)->page; +} + static inline struct page *page_cache_alloc(struct address_space *x) { return __page_cache_alloc(mapping_gfp_mask(x)); @@ -331,9 +399,28 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_FOR_MMAP 0x00000040 #define FGP_HEAD 0x00000080 #define FGP_ENTRY 0x00000100 +#define FGP_STABLE 0x00000200 -struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, - int fgp_flags, gfp_t cache_gfp_mask); +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); + +/** + * filemap_get_folio - Find and get a folio. + * @mapping: The address_space to search. + * @index: The page index. + * + * Looks up the page cache entry at @mapping & @index. If a folio is + * present, it is returned with an increased refcount. + * + * Otherwise, %NULL is returned. + */ +static inline struct folio *filemap_get_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, 0, 0); +} /** * find_get_page - find and get a page reference @@ -377,25 +464,6 @@ static inline struct page *find_lock_page(struct address_space *mapping, } /** - * find_lock_head - Locate, pin and lock a pagecache page. - * @mapping: The address_space to search. - * @index: The page index. - * - * Looks up the page cache entry at @mapping & @index. If there is a - * page cache page, its head page is returned locked and with an increased - * refcount. - * - * Context: May sleep. - * Return: A struct page which is !PageTail, or %NULL if there is no page - * in the cache for this index. - */ -static inline struct page *find_lock_head(struct address_space *mapping, - pgoff_t index) -{ - return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0); -} - -/** * find_or_create_page - locate or add a pagecache page * @mapping: the page's address_space * @index: the page's index into the mapping @@ -443,13 +511,71 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -/* Does this page contain this index? */ -static inline bool thp_contains(struct page *head, pgoff_t index) +#define swapcache_index(folio) __page_file_index(&(folio)->page) + +/** + * folio_index - File index of a folio. + * @folio: The folio. + * + * For a folio which is either in the page cache or the swap cache, + * return its index within the address_space it belongs to. If you know + * the page is definitely in the page cache, you can look at the folio's + * index directly. + * + * Return: The index (offset in units of pages) of a folio in its file. + */ +static inline pgoff_t folio_index(struct folio *folio) +{ + if (unlikely(folio_test_swapcache(folio))) + return swapcache_index(folio); + return folio->index; +} + +/** + * folio_next_index - Get the index of the next folio. + * @folio: The current folio. + * + * Return: The index of the folio which follows this folio in the file. + */ +static inline pgoff_t folio_next_index(struct folio *folio) +{ + return folio->index + folio_nr_pages(folio); +} + +/** + * folio_file_page - The page for a particular index. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Sometimes after looking up a folio in the page cache, we need to + * obtain the specific page for an index (eg a page fault). + * + * Return: The page containing the file data for this index. + */ +static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) { /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (PageHuge(head)) - return head->index == index; - return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); + if (folio_test_hugetlb(folio)) + return &folio->page; + return folio_page(folio, index & (folio_nr_pages(folio) - 1)); +} + +/** + * folio_contains - Does this folio contain this index? + * @folio: The folio. + * @index: The page index within the file. + * + * Context: The caller should have the page locked in order to prevent + * (eg) shmem from moving the page between the page cache and swap cache + * and changing its index in the middle of the operation. + * Return: true or false. + */ +static inline bool folio_contains(struct folio *folio, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (folio_test_hugetlb(folio)) + return folio->index == index; + return index - folio_index(folio) < folio_nr_pages(folio); } /* @@ -465,8 +591,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } -unsigned find_get_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages); @@ -502,8 +626,10 @@ static inline struct page *grab_cache_page(struct address_space *mapping, return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); } -extern struct page * read_cache_page(struct address_space *mapping, - pgoff_t index, filler_t *filler, void *data); +struct folio *read_cache_folio(struct address_space *, pgoff_t index, + filler_t *filler, void *data); +struct page *read_cache_page(struct address_space *, pgoff_t index, + filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern int read_cache_pages(struct address_space *mapping, @@ -515,6 +641,12 @@ static inline struct page *read_mapping_page(struct address_space *mapping, return read_cache_page(mapping, index, NULL, data); } +static inline struct folio *read_mapping_folio(struct address_space *mapping, + pgoff_t index, void *data) +{ + return read_cache_folio(mapping, index, NULL, data); +} + /* * Get index of the page within radix-tree (but not for hugetlb pages). * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) @@ -560,6 +692,27 @@ static inline loff_t page_file_offset(struct page *page) return ((loff_t)page_index(page)) << PAGE_SHIFT; } +/** + * folio_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + */ +static inline loff_t folio_pos(struct folio *folio) +{ + return page_offset(&folio->page); +} + +/** + * folio_file_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + * + * This differs from folio_pos() for folios which belong to a swap file. + * NFS is the only filesystem today which needs to use folio_file_pos(). + */ +static inline loff_t folio_file_pos(struct folio *folio) +{ + return page_file_offset(&folio->page); +} + extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, unsigned long address); @@ -575,13 +728,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } struct wait_page_key { - struct page *page; + struct folio *folio; int bit_nr; int page_match; }; struct wait_page_queue { - struct page *page; + struct folio *folio; int bit_nr; wait_queue_entry_t wait; }; @@ -589,7 +742,7 @@ struct wait_page_queue { static inline bool wake_page_match(struct wait_page_queue *wait_page, struct wait_page_key *key) { - if (wait_page->page != key->page) + if (wait_page->folio != key->folio) return false; key->page_match = 1; @@ -599,20 +752,31 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, return true; } -extern void __lock_page(struct page *page); -extern int __lock_page_killable(struct page *page); -extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); -extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, +void __folio_lock(struct folio *folio); +int __folio_lock_killable(struct folio *folio); +bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, unsigned int flags); -extern void unlock_page(struct page *page); +void unlock_page(struct page *page); +void folio_unlock(struct folio *folio); + +static inline bool folio_trylock(struct folio *folio) +{ + return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0))); +} /* * Return true if the page was successfully locked */ static inline int trylock_page(struct page *page) { - page = compound_head(page); - return (likely(!test_and_set_bit_lock(PG_locked, &page->flags))); + return folio_trylock(page_folio(page)); +} + +static inline void folio_lock(struct folio *folio) +{ + might_sleep(); + if (!folio_trylock(folio)) + __folio_lock(folio); } /* @@ -620,38 +784,30 @@ static inline int trylock_page(struct page *page) */ static inline void lock_page(struct page *page) { + struct folio *folio; might_sleep(); - if (!trylock_page(page)) - __lock_page(page); + + folio = page_folio(page); + if (!folio_trylock(folio)) + __folio_lock(folio); } -/* - * lock_page_killable is like lock_page but can be interrupted by fatal - * signals. It returns 0 if it locked the page and -EINTR if it was - * killed while waiting. - */ -static inline int lock_page_killable(struct page *page) +static inline int folio_lock_killable(struct folio *folio) { might_sleep(); - if (!trylock_page(page)) - return __lock_page_killable(page); + if (!folio_trylock(folio)) + return __folio_lock_killable(folio); return 0; } /* - * lock_page_async - Lock the page, unless this would block. If the page - * is already locked, then queue a callback when the page becomes unlocked. - * This callback can then retry the operation. - * - * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page - * was already locked and the callback defined in 'wait' was queued. + * lock_page_killable is like lock_page but can be interrupted by fatal + * signals. It returns 0 if it locked the page and -EINTR if it was + * killed while waiting. */ -static inline int lock_page_async(struct page *page, - struct wait_page_queue *wait) +static inline int lock_page_killable(struct page *page) { - if (!trylock_page(page)) - return __lock_page_async(page, wait); - return 0; + return folio_lock_killable(page_folio(page)); } /* @@ -659,145 +815,129 @@ static inline int lock_page_async(struct page *page, * caller indicated that it can handle a retry. * * Return value and mmap_lock implications depend on flags; see - * __lock_page_or_retry(). + * __folio_lock_or_retry(). */ -static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, +static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { + struct folio *folio; might_sleep(); - return trylock_page(page) || __lock_page_or_retry(page, mm, flags); + + folio = page_folio(page); + return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); } /* - * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc., + * This is exported only for folio_wait_locked/folio_wait_writeback, etc., * and should not be used directly. */ -extern void wait_on_page_bit(struct page *page, int bit_nr); -extern int wait_on_page_bit_killable(struct page *page, int bit_nr); +void folio_wait_bit(struct folio *folio, int bit_nr); +int folio_wait_bit_killable(struct folio *folio, int bit_nr); /* - * Wait for a page to be unlocked. + * Wait for a folio to be unlocked. * - * This must be called with the caller "holding" the page, - * ie with increased "page->count" so that the page won't + * This must be called with the caller "holding" the folio, + * ie with increased "page->count" so that the folio won't * go away during the wait.. */ +static inline void folio_wait_locked(struct folio *folio) +{ + if (folio_test_locked(folio)) + folio_wait_bit(folio, PG_locked); +} + +static inline int folio_wait_locked_killable(struct folio *folio) +{ + if (!folio_test_locked(folio)) + return 0; + return folio_wait_bit_killable(folio, PG_locked); +} + static inline void wait_on_page_locked(struct page *page) { - if (PageLocked(page)) - wait_on_page_bit(compound_head(page), PG_locked); + folio_wait_locked(page_folio(page)); } static inline int wait_on_page_locked_killable(struct page *page) { - if (!PageLocked(page)) - return 0; - return wait_on_page_bit_killable(compound_head(page), PG_locked); + return folio_wait_locked_killable(page_folio(page)); } -int put_and_wait_on_page_locked(struct page *page, int state); +int folio_put_wait_locked(struct folio *folio, int state); void wait_on_page_writeback(struct page *page); -int wait_on_page_writeback_killable(struct page *page); -extern void end_page_writeback(struct page *page); +void folio_wait_writeback(struct folio *folio); +int folio_wait_writeback_killable(struct folio *folio); +void end_page_writeback(struct page *page); +void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); +void folio_wait_stable(struct folio *folio); +void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); +static inline void __set_page_dirty(struct page *page, + struct address_space *mapping, int warn) +{ + __folio_mark_dirty(page_folio(page), mapping, warn); +} +void folio_account_cleaned(struct folio *folio, struct address_space *mapping, + struct bdi_writeback *wb); +void __folio_cancel_dirty(struct folio *folio); +static inline void folio_cancel_dirty(struct folio *folio) +{ + /* Avoid atomic ops, locking, etc. when not actually needed. */ + if (folio_test_dirty(folio)) + __folio_cancel_dirty(folio); +} +static inline void cancel_dirty_page(struct page *page) +{ + folio_cancel_dirty(page_folio(page)); +} +bool folio_clear_dirty_for_io(struct folio *folio); +bool clear_page_dirty_for_io(struct page *page); +int __must_check folio_write_one(struct folio *folio); +static inline int __must_check write_one_page(struct page *page) +{ + return folio_write_one(page_folio(page)); +} -void __set_page_dirty(struct page *, struct address_space *, int warn); int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); void page_endio(struct page *page, bool is_write, int err); -/** - * set_page_private_2 - Set PG_private_2 on a page and take a ref - * @page: The page. - * - * Set the PG_private_2 flag on a page and take the reference needed for the VM - * to handle its lifetime correctly. This sets the flag and takes the - * reference unconditionally, so care must be taken not to set the flag again - * if it's already set. - */ -static inline void set_page_private_2(struct page *page) -{ - page = compound_head(page); - get_page(page); - SetPagePrivate2(page); -} - -void end_page_private_2(struct page *page); -void wait_on_page_private_2(struct page *page); -int wait_on_page_private_2_killable(struct page *page); +void folio_end_private_2(struct folio *folio); +void folio_wait_private_2(struct folio *folio); +int folio_wait_private_2_killable(struct folio *folio); /* * Add an arbitrary waiter to a page's wait queue */ -extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); +void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); /* - * Fault everything in given userspace address range in. + * Fault in userspace address range. */ -static inline int fault_in_pages_writeable(char __user *uaddr, size_t size) -{ - char __user *end = uaddr + size - 1; - - if (unlikely(size == 0)) - return 0; - - if (unlikely(uaddr > end)) - return -EFAULT; - /* - * Writing zeroes into userspace here is OK, because we know that if - * the zero gets there, we'll be overwriting it. - */ - do { - if (unlikely(__put_user(0, uaddr) != 0)) - return -EFAULT; - uaddr += PAGE_SIZE; - } while (uaddr <= end); - - /* Check whether the range spilled into the next page. */ - if (((unsigned long)uaddr & PAGE_MASK) == - ((unsigned long)end & PAGE_MASK)) - return __put_user(0, end); - - return 0; -} - -static inline int fault_in_pages_readable(const char __user *uaddr, size_t size) -{ - volatile char c; - const char __user *end = uaddr + size - 1; - - if (unlikely(size == 0)) - return 0; - - if (unlikely(uaddr > end)) - return -EFAULT; - - do { - if (unlikely(__get_user(c, uaddr) != 0)) - return -EFAULT; - uaddr += PAGE_SIZE; - } while (uaddr <= end); - - /* Check whether the range spilled into the next page. */ - if (((unsigned long)uaddr & PAGE_MASK) == - ((unsigned long)end & PAGE_MASK)) { - return __get_user(c, end); - } - - (void)c; - return 0; -} +size_t fault_in_writeable(char __user *uaddr, size_t size); +size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); +size_t fault_in_readable(const char __user *uaddr, size_t size); int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); + pgoff_t index, gfp_t gfp); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); -extern void delete_from_page_cache(struct page *page); -extern void __delete_from_page_cache(struct page *page, void *shadow); + pgoff_t index, gfp_t gfp); +int filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp); +void filemap_remove_folio(struct folio *folio); +void delete_from_page_cache(struct page *page); +void __filemap_remove_folio(struct folio *folio, void *shadow); +static inline void __delete_from_page_cache(struct page *page, void *shadow) +{ + __filemap_remove_folio(page_folio(page), shadow); +} void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, - struct pagevec *pvec); + struct folio_batch *fbatch); +int try_to_release_page(struct page *page, gfp_t gfp); +bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); @@ -817,6 +957,39 @@ static inline int add_to_page_cache(struct page *page, return error; } +/* Must be non-static for BPF error injection */ +int __filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp, void **shadowp); + +bool filemap_range_has_writeback(struct address_space *mapping, + loff_t start_byte, loff_t end_byte); + +/** + * filemap_range_needs_writeback - check if range potentially needs writeback + * @mapping: address space within which to check + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Find at least one page in the range supplied, usually used to check if + * direct writing in this range will trigger a writeback. Used by O_DIRECT + * read/write with IOCB_NOWAIT, to see if the caller needs to do + * filemap_write_and_wait_range() before proceeding. + * + * Return: %true if the caller should do filemap_write_and_wait_range() before + * doing O_DIRECT to a page in this range, %false otherwise. + */ +static inline bool filemap_range_needs_writeback(struct address_space *mapping, + loff_t start_byte, + loff_t end_byte) +{ + if (!mapping->nrpages) + return false; + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return false; + return filemap_range_has_writeback(mapping, start_byte, end_byte); +} + /** * struct readahead_control - Describes a readahead request. * @@ -856,7 +1029,7 @@ struct readahead_control { void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); -void page_cache_async_ra(struct readahead_control *, struct page *, +void page_cache_async_ra(struct readahead_control *, struct folio *, unsigned long req_count); void readahead_expand(struct readahead_control *ractl, loff_t new_start, size_t new_len); @@ -903,36 +1076,60 @@ void page_cache_async_readahead(struct address_space *mapping, struct page *page, pgoff_t index, unsigned long req_count) { DEFINE_READAHEAD(ractl, file, ra, mapping, index); - page_cache_async_ra(&ractl, page, req_count); + page_cache_async_ra(&ractl, page_folio(page), req_count); +} + +static inline struct folio *__readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio; + + BUG_ON(ractl->_batch_count > ractl->_nr_pages); + ractl->_nr_pages -= ractl->_batch_count; + ractl->_index += ractl->_batch_count; + + if (!ractl->_nr_pages) { + ractl->_batch_count = 0; + return NULL; + } + + folio = xa_load(&ractl->mapping->i_pages, ractl->_index); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + ractl->_batch_count = folio_nr_pages(folio); + + return folio; } /** * readahead_page - Get the next page to read. - * @rac: The current readahead request. + * @ractl: The current readahead request. * * Context: The page is locked and has an elevated refcount. The caller * should decreases the refcount once the page has been submitted for I/O * and unlock the page once all I/O to that page has completed. * Return: A pointer to the next page, or %NULL if we are done. */ -static inline struct page *readahead_page(struct readahead_control *rac) +static inline struct page *readahead_page(struct readahead_control *ractl) { - struct page *page; - - BUG_ON(rac->_batch_count > rac->_nr_pages); - rac->_nr_pages -= rac->_batch_count; - rac->_index += rac->_batch_count; + struct folio *folio = __readahead_folio(ractl); - if (!rac->_nr_pages) { - rac->_batch_count = 0; - return NULL; - } + return &folio->page; +} - page = xa_load(&rac->mapping->i_pages, rac->_index); - VM_BUG_ON_PAGE(!PageLocked(page), page); - rac->_batch_count = thp_nr_pages(page); +/** + * readahead_folio - Get the next folio to read. + * @ractl: The current readahead request. + * + * Context: The folio is locked. The caller should unlock the folio once + * all I/O to that folio has completed. + * Return: A pointer to the next folio, or %NULL if we are done. + */ +static inline struct folio *readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio = __readahead_folio(ractl); - return page; + if (folio) + folio_put(folio); + return folio; } static inline unsigned int __readahead_batch(struct readahead_control *rac, @@ -956,16 +1153,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; rac->_batch_count += thp_nr_pages(page); - - /* - * The page cache isn't using multi-index entries yet, - * so the xas cursor needs to be manually moved to the - * next index. This can be removed once the page cache - * is converted. - */ - if (PageHead(page)) - xas_set(&xas, rac->_index + rac->_batch_count); - if (i == array_sz) break; } @@ -1040,6 +1227,34 @@ static inline unsigned long dir_pages(struct inode *inode) } /** + * folio_mkwrite_check_truncate - check if folio was truncated + * @folio: the folio to check + * @inode: the inode to check the folio against + * + * Return: the number of bytes in the folio up to EOF, + * or -EFAULT if the folio was truncated. + */ +static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, + struct inode *inode) +{ + loff_t size = i_size_read(inode); + pgoff_t index = size >> PAGE_SHIFT; + size_t offset = offset_in_folio(folio, size); + + if (!folio->mapping) + return -EFAULT; + + /* folio is wholly inside EOF */ + if (folio_next_index(folio) - 1 < index) + return folio_size(folio); + /* folio is wholly past EOF */ + if (folio->index > index || !offset) + return -EFAULT; + /* folio is partially inside EOF */ + return offset; +} + +/** * page_mkwrite_check_truncate - check if page was truncated * @page: the page to check * @inode: the inode to check the page against @@ -1068,19 +1283,25 @@ static inline int page_mkwrite_check_truncate(struct page *page, } /** - * i_blocks_per_page - How many blocks fit in this page. + * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks. - * @page: The page (head page if the page is a THP). + * @folio: The folio. * - * If the block size is larger than the size of this page, return zero. + * If the block size is larger than the size of this folio, return zero. * - * Context: The caller should hold a refcount on the page to prevent it + * Context: The caller should hold a refcount on the folio to prevent it * from being split. - * Return: The number of filesystem blocks covered by this page. + * Return: The number of filesystem blocks covered by this folio. */ static inline +unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) +{ + return folio_size(folio) >> inode->i_blkbits; +} + +static inline unsigned int i_blocks_per_page(struct inode *inode, struct page *page) { - return thp_size(page) >> inode->i_blkbits; + return i_blocks_per_folio(inode, page_folio(page)); } #endif /* _LINUX_PAGEMAP_H */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7f3f19065a9f..dda8d5868c81 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -15,8 +15,10 @@ #define PAGEVEC_SIZE 15 struct page; +struct folio; struct address_space; +/* Layout must match folio_batch */ struct pagevec { unsigned char nr; bool percpu_pvec_drained; @@ -25,7 +27,6 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); -void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start, pgoff_t end); @@ -81,4 +82,68 @@ static inline void pagevec_release(struct pagevec *pvec) __pagevec_release(pvec); } +/** + * struct folio_batch - A collection of folios. + * + * The folio_batch is used to amortise the cost of retrieving and + * operating on a set of folios. The order of folios in the batch may be + * significant (eg delete_from_page_cache_batch()). Some users of the + * folio_batch store "exceptional" entries in it which can be removed + * by calling folio_batch_remove_exceptionals(). + */ +struct folio_batch { + unsigned char nr; + bool percpu_pvec_drained; + struct folio *folios[PAGEVEC_SIZE]; +}; + +/* Layout must match pagevec */ +static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch)); +static_assert(offsetof(struct pagevec, pages) == + offsetof(struct folio_batch, folios)); + +/** + * folio_batch_init() - Initialise a batch of folios + * @fbatch: The folio batch. + * + * A freshly initialised folio_batch contains zero folios. + */ +static inline void folio_batch_init(struct folio_batch *fbatch) +{ + fbatch->nr = 0; +} + +static inline unsigned int folio_batch_count(struct folio_batch *fbatch) +{ + return fbatch->nr; +} + +static inline unsigned int fbatch_space(struct folio_batch *fbatch) +{ + return PAGEVEC_SIZE - fbatch->nr; +} + +/** + * folio_batch_add() - Add a folio to a batch. + * @fbatch: The folio batch. + * @folio: The folio to add. + * + * The folio is added to the end of the batch. + * The batch must have previously been initialised using folio_batch_init(). + * + * Return: The number of slots still available. + */ +static inline unsigned folio_batch_add(struct folio_batch *fbatch, + struct folio *folio) +{ + fbatch->folios[fbatch->nr++] = folio; + return fbatch_space(fbatch); +} + +static inline void folio_batch_release(struct folio_batch *fbatch) +{ + pagevec_release((struct pagevec *)fbatch); +} + +void folio_batch_remove_exceptionals(struct folio_batch *fbatch); #endif /* _LINUX_PAGEVEC_H */ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index d2558121d48c..6f7949b2fd8d 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -3,6 +3,7 @@ #define _LINUX_PART_STAT_H #include <linux/genhd.h> +#include <asm/local.h> struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index f16de399d2de..078225b514d4 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -84,6 +84,14 @@ extern struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root, void acpi_pci_add_bus(struct pci_bus *bus); void acpi_pci_remove_bus(struct pci_bus *bus); +#ifdef CONFIG_PCI +void pci_acpi_setup(struct device *dev, struct acpi_device *adev); +void pci_acpi_cleanup(struct device *dev, struct acpi_device *adev); +#else +static inline void pci_acpi_setup(struct device *dev, struct acpi_device *adev) {} +static inline void pci_acpi_cleanup(struct device *dev, struct acpi_device *adev) {} +#endif + #ifdef CONFIG_ACPI_PCI_SLOT void acpi_pci_slot_init(void); void acpi_pci_slot_enumerate(struct pci_bus *bus); diff --git a/include/linux/pci.h b/include/linux/pci.h index cd8aa6fce204..8253a5413d7c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -155,6 +155,15 @@ enum pci_interrupt_pin { #define PCI_NUM_INTX 4 /* + * Reading from a device that doesn't respond typically returns ~0. A + * successful read from a device may also return ~0, so you need additional + * information to reliably identify errors. + */ +#define PCI_ERROR_RESPONSE (~0ULL) +#define PCI_SET_ERROR_RESPONSE(val) (*(val) = ((typeof(*(val))) PCI_ERROR_RESPONSE)) +#define PCI_POSSIBLE_ERROR(val) ((val) == ((typeof(val)) PCI_ERROR_RESPONSE)) + +/* * pci_power_t values must match the bits in the Capabilities PME_Support * and Control/Status PowerState fields in the Power Management capability. */ @@ -233,6 +242,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), /* Don't use Relaxed Ordering for TLPs directed at this device */ PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), + /* Device does honor MSI masking despite saying otherwise */ + PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { @@ -423,7 +434,8 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ - unsigned int is_managed:1; + unsigned int is_managed:1; /* Managed via devres */ + unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ unsigned int state_saved:1; unsigned int is_physfn:1; @@ -453,6 +465,7 @@ struct pci_dev { unsigned int link_active_reporting:1;/* Device capable of reporting link active */ unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ + unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -471,7 +484,8 @@ struct pci_dev { u8 ptm_granularity; #endif #ifdef CONFIG_PCI_MSI - const struct attribute_group **msi_irq_groups; + void __iomem *msix_base; + raw_spinlock_t msi_lock; #endif struct pci_vpd vpd; #ifdef CONFIG_PCIE_DPC @@ -900,7 +914,10 @@ struct pci_driver { struct pci_dynids dynids; }; -#define to_pci_driver(drv) container_of(drv, struct pci_driver, driver) +static inline struct pci_driver *to_pci_driver(struct device_driver *drv) +{ + return drv ? container_of(drv, struct pci_driver, driver) : NULL; +} /** * PCI_DEVICE - macro used to describe a specific PCI device @@ -1130,6 +1147,7 @@ u16 pci_find_ext_capability(struct pci_dev *dev, int cap); u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap); +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec); u64 pci_get_dsn(struct pci_dev *dev); @@ -1350,6 +1368,8 @@ void pci_unlock_rescan_remove(void); /* Vital Product Data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +ssize_t pci_read_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, void *buf); +ssize_t pci_write_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); @@ -1498,19 +1518,8 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, #define PCI_IRQ_ALL_TYPES \ (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) -/* kmem_cache style wrapper around pci_alloc_consistent() */ - #include <linux/dmapool.h> -#define pci_pool dma_pool -#define pci_pool_create(name, pdev, size, align, allocation) \ - dma_pool_create(name, &pdev->dev, size, align, allocation) -#define pci_pool_destroy(pool) dma_pool_destroy(pool) -#define pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle) -#define pci_pool_zalloc(pool, flags, handle) \ - dma_pool_zalloc(pool, flags, handle) -#define pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr) - struct msix_entry { u32 vector; /* Kernel uses to write allocated vector */ u16 entry; /* Driver uses to specify entry, OS writes */ @@ -1671,6 +1680,7 @@ void pci_cfg_access_lock(struct pci_dev *dev); bool pci_cfg_access_trylock(struct pci_dev *dev); void pci_cfg_access_unlock(struct pci_dev *dev); +void pci_dev_lock(struct pci_dev *dev); int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); @@ -1777,7 +1787,10 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) { return NULL; } -#define pci_dev_present(ids) (0) + +static inline int pci_dev_present(const struct pci_device_id *ids) +{ return 0; } + #define no_pci_devices() (1) #define pci_dev_put(dev) do { } while (0) @@ -2126,7 +2139,7 @@ void pcibios_disable_device(struct pci_dev *dev); void pcibios_set_master(struct pci_dev *dev); int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); -int pcibios_add_device(struct pci_dev *dev); +int pcibios_device_add(struct pci_dev *dev); void pcibios_release_device(struct pci_dev *dev); #ifdef CONFIG_PCI void pcibios_penalize_isa_irq(int irq, int active); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 011f2f1ea5bb..aad54c666407 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -555,6 +555,7 @@ #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0 #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 @@ -1964,24 +1965,6 @@ #define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003 #define PCI_VENDOR_ID_MOXA 0x1393 -#define PCI_DEVICE_ID_MOXA_RC7000 0x0001 -#define PCI_DEVICE_ID_MOXA_CP102 0x1020 -#define PCI_DEVICE_ID_MOXA_CP102UL 0x1021 -#define PCI_DEVICE_ID_MOXA_CP102U 0x1022 -#define PCI_DEVICE_ID_MOXA_C104 0x1040 -#define PCI_DEVICE_ID_MOXA_CP104U 0x1041 -#define PCI_DEVICE_ID_MOXA_CP104JU 0x1042 -#define PCI_DEVICE_ID_MOXA_CP104EL 0x1043 -#define PCI_DEVICE_ID_MOXA_CT114 0x1140 -#define PCI_DEVICE_ID_MOXA_CP114 0x1141 -#define PCI_DEVICE_ID_MOXA_CP118U 0x1180 -#define PCI_DEVICE_ID_MOXA_CP118EL 0x1181 -#define PCI_DEVICE_ID_MOXA_CP132 0x1320 -#define PCI_DEVICE_ID_MOXA_CP132U 0x1321 -#define PCI_DEVICE_ID_MOXA_CP134U 0x1340 -#define PCI_DEVICE_ID_MOXA_C168 0x1680 -#define PCI_DEVICE_ID_MOXA_CP168U 0x1681 -#define PCI_DEVICE_ID_MOXA_CP168EL 0x1682 #define PCI_DEVICE_ID_MOXA_CP204J 0x2040 #define PCI_DEVICE_ID_MOXA_C218 0x2180 #define PCI_DEVICE_ID_MOXA_C320 0x3200 @@ -2635,8 +2618,8 @@ #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -#define PCI_DEVICE_ID_INTEL_PXH_1 0x032A -#define PCI_DEVICE_ID_INTEL_PXHV 0x032C +#define PCI_DEVICE_ID_INTEL_PXH_1 0x032a +#define PCI_DEVICE_ID_INTEL_PXHV 0x032c #define PCI_DEVICE_ID_INTEL_80332_0 0x0330 #define PCI_DEVICE_ID_INTEL_80332_1 0x0332 #define PCI_DEVICE_ID_INTEL_80333_0 0x0370 @@ -2654,14 +2637,14 @@ #define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824 -#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F -#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095E +#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084f +#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095e #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 #define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085 -#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108F +#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108f #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 @@ -2755,12 +2738,6 @@ #define PCI_DEVICE_ID_INTEL_82801EB_11 0x24db #define PCI_DEVICE_ID_INTEL_82801EB_12 0x24dc #define PCI_DEVICE_ID_INTEL_82801EB_13 0x24dd -#define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1 -#define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2 -#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4 -#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6 -#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab -#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac #define PCI_DEVICE_ID_INTEL_82820_HB 0x2500 #define PCI_DEVICE_ID_INTEL_82820_UP_HB 0x2501 #define PCI_DEVICE_ID_INTEL_82850_HB 0x2530 @@ -2775,14 +2752,15 @@ #define PCI_DEVICE_ID_INTEL_82915G_IG 0x2582 #define PCI_DEVICE_ID_INTEL_82915GM_HB 0x2590 #define PCI_DEVICE_ID_INTEL_82915GM_IG 0x2592 -#define PCI_DEVICE_ID_INTEL_5000_ERR 0x25F0 -#define PCI_DEVICE_ID_INTEL_5000_FBD0 0x25F5 -#define PCI_DEVICE_ID_INTEL_5000_FBD1 0x25F6 -#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 -#define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772 -#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 -#define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27A0 -#define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27A2 +#define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1 +#define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2 +#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4 +#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6 +#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab +#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac +#define PCI_DEVICE_ID_INTEL_5000_ERR 0x25f0 +#define PCI_DEVICE_ID_INTEL_5000_FBD0 0x25f5 +#define PCI_DEVICE_ID_INTEL_5000_FBD1 0x25f6 #define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640 #define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641 #define PCI_DEVICE_ID_INTEL_ICH6_2 0x2642 @@ -2794,6 +2772,11 @@ #define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698 #define PCI_DEVICE_ID_INTEL_ESB2_17 0x269b #define PCI_DEVICE_ID_INTEL_ESB2_18 0x269e +#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 +#define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772 +#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 +#define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27a0 +#define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27a2 #define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 #define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 #define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 @@ -2846,7 +2829,7 @@ #define PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_PHY0 0x2c91 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR 0x2c98 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD 0x2c99 -#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST 0x2c9C +#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST 0x2c9c #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL 0x2ca0 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR 0x2ca1 #define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK 0x2ca2 @@ -2958,16 +2941,16 @@ #define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */ #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f -#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 -#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 -#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 -#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 #define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035 #define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036 -#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff #define PCI_DEVICE_ID_INTEL_EP80579_0 0x5031 #define PCI_DEVICE_ID_INTEL_EP80579_1 0x5032 +#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 +#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 +#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 +#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 +#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h index a6440d6ebe95..5712cc2ce775 100644 --- a/include/linux/pcs-lynx.h +++ b/include/linux/pcs-lynx.h @@ -9,13 +9,10 @@ #include <linux/mdio.h> #include <linux/phylink.h> -struct lynx_pcs { - struct phylink_pcs pcs; - struct mdio_device *mdio; -}; +struct mdio_device *lynx_get_mdio_device(struct phylink_pcs *pcs); -struct lynx_pcs *lynx_pcs_create(struct mdio_device *mdio); +struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio); -void lynx_pcs_destroy(struct lynx_pcs *pcs); +void lynx_pcs_destroy(struct phylink_pcs *pcs); #endif /* __LINUX_PCS_LYNX_H */ diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index ae16a9856305..d73a1c08c3e3 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -51,9 +51,9 @@ #define _LINUX_PERCPU_REFCOUNT_H #include <linux/atomic.h> -#include <linux/kernel.h> #include <linux/percpu.h> #include <linux/rcupdate.h> +#include <linux/types.h> #include <linux/gfp.h> struct percpu_ref; @@ -267,6 +267,28 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) } /** + * percpu_ref_tryget_live_rcu - same as percpu_ref_tryget_live() but the + * caller is responsible for taking RCU. + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_tryget_live_rcu(struct percpu_ref *ref) +{ + unsigned long __percpu *percpu_count; + bool ret = false; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (likely(__ref_is_percpu(ref, &percpu_count))) { + this_cpu_inc(*percpu_count); + ret = true; + } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { + ret = atomic_long_inc_not_zero(&ref->data->count); + } + return ret; +} + +/** * percpu_ref_tryget_live - try to increment a live percpu refcount * @ref: percpu_ref to try-get * @@ -283,20 +305,11 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) */ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { - unsigned long __percpu *percpu_count; bool ret = false; rcu_read_lock(); - - if (__ref_is_percpu(ref, &percpu_count)) { - this_cpu_inc(*percpu_count); - ret = true; - } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { - ret = atomic_long_inc_not_zero(&ref->data->count); - } - + ret = percpu_ref_tryget_live_rcu(ref); rcu_read_unlock(); - return ret; } diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 5e76af742c80..ae4004e7957e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -6,7 +6,6 @@ #include <linux/preempt.h> #include <linux/smp.h> #include <linux/cpumask.h> -#include <linux/printk.h> #include <linux/pfn.h> #include <linux/init.h> @@ -123,7 +122,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1); extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); @@ -131,8 +130,8 @@ extern bool is_kernel_percpu_address(unsigned long addr); extern void __init setup_per_cpu_areas(void); #endif -extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); -extern void __percpu *__alloc_percpu(size_t size, size_t align); +extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); +extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9b60bb89d86a..117f230bcdfd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -26,11 +26,13 @@ # include <asm/local64.h> #endif +#define PERF_GUEST_ACTIVE 0x01 +#define PERF_GUEST_USER 0x02 + struct perf_guest_info_callbacks { - int (*is_in_guest)(void); - int (*is_user_mode)(void); - unsigned long (*get_guest_ip)(void); - void (*handle_intel_pt_intr)(void); + unsigned int (*state)(void); + unsigned long (*get_ip)(void); + unsigned int (*handle_intel_pt_intr)(void); }; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -57,6 +59,7 @@ struct perf_guest_info_callbacks { #include <linux/cgroup.h> #include <linux/refcount.h> #include <linux/security.h> +#include <linux/static_call.h> #include <asm/local.h> struct perf_callchain_entry { @@ -129,6 +132,15 @@ struct hw_perf_event_extra { }; /** + * hw_perf_event::flag values + * + * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific + * usage. + */ +#define PERF_EVENT_FLAG_ARCH 0x0000ffff +#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 + +/** * struct hw_perf_event - performance event hardware details: */ struct hw_perf_event { @@ -610,6 +622,7 @@ struct swevent_hlist { #define PERF_ATTACH_SCHED_CB 0x20 #define PERF_ATTACH_CHILD 0x40 +struct bpf_prog; struct perf_cgroup; struct perf_buffer; @@ -821,6 +834,7 @@ struct perf_event_context { int nr_events; int nr_active; + int nr_user; int is_active; int nr_stat; int nr_freq; @@ -1239,9 +1253,32 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); -extern struct perf_guest_info_callbacks *perf_guest_cbs; -extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); +#ifdef CONFIG_GUEST_PERF_EVENTS +extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; + +DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); +DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip); +DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); + +static inline unsigned int perf_guest_state(void) +{ + return static_call(__perf_guest_state)(); +} +static inline unsigned long perf_guest_get_ip(void) +{ + return static_call(__perf_guest_get_ip)(); +} +static inline unsigned int perf_guest_handle_intel_pt_intr(void) +{ + return static_call(__perf_guest_handle_intel_pt_intr)(); +} +extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); +extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); +#else +static inline unsigned int perf_guest_state(void) { return 0; } +static inline unsigned long perf_guest_get_ip(void) { return 0; } +static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } +#endif /* CONFIG_GUEST_PERF_EVENTS */ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); @@ -1400,6 +1437,7 @@ perf_event_addr_filters(struct perf_event *event) } extern void perf_event_addr_filters_sync(struct perf_event *event); +extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id); extern int perf_output_begin(struct perf_output_handle *handle, struct perf_sample_data *data, @@ -1484,11 +1522,6 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void perf_bp_event(struct perf_event *event, void *data) { } -static inline int perf_register_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } -static inline int perf_unregister_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } - static inline void perf_event_mmap(struct vm_area_struct *vma) { } typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); @@ -1614,4 +1647,26 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event, extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); #endif +/* + * Snapshot branch stack on software events. + * + * Branch stack can be very useful in understanding software events. For + * example, when a long function, e.g. sys_perf_event_open, returns an + * errno, it is not obvious why the function failed. Branch stack could + * provide very helpful information in this type of scenarios. + * + * On software event, it is necessary to stop the hardware branch recorder + * fast. Otherwise, the hardware register/buffer will be flushed with + * entries of the triggering event. Therefore, static call is used to + * stop the hardware recorder. + */ + +/* + * cnt is the number of entries allocated for entries. + * Return number of entries copied to . + */ +typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries, + unsigned int cnt); +DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t); + #endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e24d2c992b11..bc8713a76e03 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -258,6 +258,14 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef __HAVE_ARCH_PTEP_CLEAR +static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_clear(mm, addr, ptep); +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, diff --git a/include/linux/phy.h b/include/linux/phy.h index 736e1d1a47c4..6de8d7a90d78 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -99,7 +99,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay * @PHY_INTERFACE_MODE_RTBI: Reduced TBI - * @PHY_INTERFACE_MODE_SMII: ??? MII + * @PHY_INTERFACE_MODE_SMII: Serial MII * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax @@ -155,6 +155,40 @@ typedef enum { PHY_INTERFACE_MODE_MAX, } phy_interface_t; +/* PHY interface mode bitmap handling */ +#define DECLARE_PHY_INTERFACE_MASK(name) \ + DECLARE_BITMAP(name, PHY_INTERFACE_MODE_MAX) + +static inline void phy_interface_zero(unsigned long *intf) +{ + bitmap_zero(intf, PHY_INTERFACE_MODE_MAX); +} + +static inline bool phy_interface_empty(const unsigned long *intf) +{ + return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX); +} + +static inline void phy_interface_and(unsigned long *dst, const unsigned long *a, + const unsigned long *b) +{ + bitmap_and(dst, a, b, PHY_INTERFACE_MODE_MAX); +} + +static inline void phy_interface_or(unsigned long *dst, const unsigned long *a, + const unsigned long *b) +{ + bitmap_or(dst, a, b, PHY_INTERFACE_MODE_MAX); +} + +static inline void phy_interface_set_rgmii(unsigned long *intf) +{ + __set_bit(PHY_INTERFACE_MODE_RGMII, intf); + __set_bit(PHY_INTERFACE_MODE_RGMII_ID, intf); + __set_bit(PHY_INTERFACE_MODE_RGMII_RXID, intf); + __set_bit(PHY_INTERFACE_MODE_RGMII_TXID, intf); +} + /* * phy_supported_speeds - return all speeds currently supported by a PHY device */ @@ -504,11 +538,12 @@ struct macsec_ops; * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. - * Bits [15:0] are free to use by the PHY driver to communicate - * driver specific behavior. - * Bits [23:16] are currently reserved for future use. - * Bits [31:24] are reserved for defining generic - * PHY driver behavior. + * + * - Bits [15:0] are free to use by the PHY driver to communicate + * driver specific behavior. + * - Bits [23:16] are currently reserved for future use. + * - Bits [31:24] are reserved for defining generic + * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY @@ -1584,6 +1619,7 @@ int genphy_c45_config_aneg(struct phy_device *phydev); int genphy_c45_loopback(struct phy_device *phydev, bool enable); int genphy_c45_pma_resume(struct phy_device *phydev); int genphy_c45_pma_suspend(struct phy_device *phydev); +int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 237291196ce2..713a0c928b7c 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -20,6 +20,29 @@ enum { MLO_AN_PHY = 0, /* Conventional PHY */ MLO_AN_FIXED, /* Fixed-link mode */ MLO_AN_INBAND, /* In-band protocol */ + + MAC_SYM_PAUSE = BIT(0), + MAC_ASYM_PAUSE = BIT(1), + MAC_10HD = BIT(2), + MAC_10FD = BIT(3), + MAC_10 = MAC_10HD | MAC_10FD, + MAC_100HD = BIT(4), + MAC_100FD = BIT(5), + MAC_100 = MAC_100HD | MAC_100FD, + MAC_1000HD = BIT(6), + MAC_1000FD = BIT(7), + MAC_1000 = MAC_1000HD | MAC_1000FD, + MAC_2500FD = BIT(8), + MAC_5000FD = BIT(9), + MAC_10000FD = BIT(10), + MAC_20000FD = BIT(11), + MAC_25000FD = BIT(12), + MAC_40000FD = BIT(13), + MAC_50000FD = BIT(14), + MAC_56000FD = BIT(15), + MAC_100000FD = BIT(16), + MAC_200000FD = BIT(17), + MAC_400000FD = BIT(18), }; static inline bool phylink_autoneg_inband(unsigned int mode) @@ -61,26 +84,35 @@ enum phylink_op_type { * struct phylink_config - PHYLINK configuration structure * @dev: a pointer to a struct device associated with the MAC * @type: operation type of PHYLINK instance + * @legacy_pre_march2020: driver has not been updated for March 2020 updates + * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") * @pcs_poll: MAC PCS cannot provide link change interrupt * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. + * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx + * are supported by the MAC/PCS. + * @mac_capabilities: MAC pause/speed/duplex capabilities. */ struct phylink_config { struct device *dev; enum phylink_op_type type; + bool legacy_pre_march2020; bool pcs_poll; bool poll_fixed_state; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); + DECLARE_PHY_INTERFACE_MASK(supported_interfaces); + unsigned long mac_capabilities; }; /** * struct phylink_mac_ops - MAC operations structure. * @validate: Validate and update the link configuration. + * @mac_select_pcs: Select a PCS for the interface mode. * @mac_pcs_get_state: Read the current link state from the hardware. * @mac_prepare: prepare for a major reconfiguration of the interface. * @mac_config: configure the MAC for the selected mode and state. @@ -95,6 +127,8 @@ struct phylink_mac_ops { void (*validate)(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); + struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config, + phy_interface_t interface); void (*mac_pcs_get_state)(struct phylink_config *config, struct phylink_link_state *state); int (*mac_prepare)(struct phylink_config *config, unsigned int mode, @@ -133,14 +167,35 @@ struct phylink_mac_ops { * based on @state->advertising and/or @state->speed and update * @state->interface accordingly. See phylink_helper_basex_speed(). * - * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink expects the - * MAC driver to return all supported link modes. + * When @config->supported_interfaces has been set, phylink will iterate + * over the supported interfaces to determine the full capability of the + * MAC. The validation function must not print errors if @state->interface + * is set to an unexpected value. + * + * When @config->supported_interfaces is empty, phylink will call this + * function with @state->interface set to %PHY_INTERFACE_MODE_NA, and + * expects the MAC driver to return all supported link modes. * * If the @state->interface mode is not supported, then the @supported * mask must be cleared. */ void validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); +/** + * mac_select_pcs: Select a PCS for the interface mode. + * @config: a pointer to a &struct phylink_config. + * @interface: PHY interface mode for PCS + * + * Return the &struct phylink_pcs for the specified interface mode, or + * NULL if none is required, or an error pointer on error. + * + * This must not modify any state. It is used to query which PCS should + * be used. Phylink will use this during validation to ensure that the + * configuration is valid, and when setting a configuration to internally + * set the PCS that will be used. + */ +struct phylink_pcs *mac_select_pcs(struct phylink_config *config, + phy_interface_t interface); /** * mac_pcs_get_state() - Read the current inband link state from the hardware @@ -153,6 +208,10 @@ void validate(struct phylink_config *config, unsigned long *supported, * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. + * + * Note: This is a legacy method. This function will not be called unless + * legacy_pre_march2020 is set in &struct phylink_config and there is no + * PCS attached. */ void mac_pcs_get_state(struct phylink_config *config, struct phylink_link_state *state); @@ -193,6 +252,15 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * + * Note: For legacy March 2020 drivers (drivers with legacy_pre_march2020 set + * in their &phylnk_config and which don't have a PCS), this function will be + * called on each link up event, and to also change the in-band advert. For + * non-legacy drivers, it will only be called to reconfigure the MAC for a + * "major" change in e.g. interface mode. It will not be called for changes + * in speed, duplex or pause modes or to change the in-band advertisement. + * In any case, it is strongly preferred that speed, duplex and pause settings + * are handled in the mac_link_up() method and not in this method. + * * (this requires a rewrite - please refer to mac_link_up() for situations * where the PCS and MAC are not tightly integrated.) * @@ -277,6 +345,10 @@ int mac_finish(struct phylink_config *config, unsigned int mode, /** * mac_an_restart() - restart 802.3z BaseX autonegotiation * @config: a pointer to a &struct phylink_config. + * + * Note: This is a legacy method. This function will not be called unless + * legacy_pre_march2020 is set in &struct phylink_config and there is no + * PCS attached. */ void mac_an_restart(struct phylink_config *config); @@ -344,6 +416,7 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. + * @pcs_validate: validate the link configuration. * @pcs_get_state: read the current MAC PCS link state from the hardware. * @pcs_config: configure the MAC PCS for the selected mode and state. * @pcs_an_restart: restart 802.3z BaseX autonegotiation. @@ -351,6 +424,8 @@ struct phylink_pcs { * (where necessary). */ struct phylink_pcs_ops { + int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, + const struct phylink_link_state *state); void (*pcs_get_state)(struct phylink_pcs *pcs, struct phylink_link_state *state); int (*pcs_config)(struct phylink_pcs *pcs, unsigned int mode, @@ -364,6 +439,23 @@ struct phylink_pcs_ops { #if 0 /* For kernel-doc purposes only. */ /** + * pcs_validate() - validate the link configuration. + * @pcs: a pointer to a &struct phylink_pcs. + * @supported: ethtool bitmask for supported link modes. + * @state: a const pointer to a &struct phylink_link_state. + * + * Validate the interface mode, and advertising's autoneg bit, removing any + * media ethtool link modes that would not be supportable from the supported + * mask. Phylink will propagate the changes to the advertising mask. See the + * &struct phylink_mac_ops validate() method. + * + * Returns -EINVAL if the interface mode/autoneg mode is not supported. + * Returns non-zero positive if the link state can be supported. + */ +int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, + const struct phylink_link_state *state); + +/** * pcs_get_state() - Read the current inband link state from the hardware * @pcs: a pointer to a &struct phylink_pcs. * @state: a pointer to a &struct phylink_link_state. @@ -433,6 +525,12 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); #endif +void phylink_get_linkmodes(unsigned long *linkmodes, phy_interface_t interface, + unsigned long mac_capabilities); +void phylink_generic_validate(struct phylink_config *config, + unsigned long *supported, + struct phylink_link_state *state); + struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); @@ -484,13 +582,15 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); +void phylink_set_10g_modes(unsigned long *mask); void phylink_helper_basex_speed(struct phylink_link_state *state); +void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, + u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, struct phylink_link_state *state); -int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, - phy_interface_t interface, - const unsigned long *advertising); +int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface, + const unsigned long *advertising); int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, phy_interface_t interface, const unsigned long *advertising); diff --git a/include/linux/pid.h b/include/linux/pid.h index af308e15f174..343abf22092e 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -78,6 +78,7 @@ struct file; extern struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); +struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); int pidfd_create(struct pid *pid, unsigned int flags); static inline struct pid *get_pid(struct pid *pid) diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index eee0e3948537..2422211d6a5a 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -91,6 +91,8 @@ struct pinctrl_map; * configuration (eg. the currently selected mux function) drive values on * the line. Use argument 1 to enable output mode, argument 0 to disable * it. + * @PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS: this will configure the output impedance + * of the pin with the value passed as argument. The argument is in ohms. * @PIN_CONFIG_PERSIST_STATE: retain pin state across sleep or controller reset * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power * supplies, the argument to this parameter (on a custom format) tells @@ -129,6 +131,7 @@ enum pin_config_param { PIN_CONFIG_MODE_PWM, PIN_CONFIG_OUTPUT, PIN_CONFIG_OUTPUT_ENABLE, + PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS, PIN_CONFIG_PERSIST_STATE, PIN_CONFIG_POWER_SOURCE, PIN_CONFIG_SKEW_DELAY, diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 174601554b06..f9c5ac80d59b 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -152,14 +152,6 @@ struct packet_stacked_data }; #define PSD_POOL_SIZE 64 -struct pktcdvd_kobj -{ - struct kobject kobj; - struct pktcdvd_device *pd; -}; -#define to_pktcdvdkobj(_k) \ - ((struct pktcdvd_kobj*)container_of(_k,struct pktcdvd_kobj,kobj)) - struct pktcdvd_device { struct block_device *bdev; /* dev attached */ @@ -183,6 +175,8 @@ struct pktcdvd_device spinlock_t lock; /* Serialize access to bio_queue */ struct rb_root bio_queue; /* Work queue of bios we need to handle */ int bio_queue_size; /* Number of nodes in bio_queue */ + bool congested; /* Someone is waiting for bio_queue_size + * to drop. */ sector_t current_sector; /* Keep track of where the elevator is */ atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ /* needs to be run. */ @@ -195,8 +189,6 @@ struct pktcdvd_device int write_congestion_on; struct device *dev; /* sysfs pktcdvd[0-7] dev */ - struct pktcdvd_kobj *kobj_stat; /* sysfs pktcdvd[0-7]/stat/ */ - struct pktcdvd_kobj *kobj_wqueue; /* sysfs pktcdvd[0-7]/write_queue/ */ struct dentry *dfs_d_root; /* debugfs: devname directory */ struct dentry *dfs_f_info; /* debugfs: info file */ diff --git a/include/linux/platform_data/ad5755.h b/include/linux/platform_data/ad5755.h deleted file mode 100644 index e371e08f04bc..000000000000 --- a/include/linux/platform_data/ad5755.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2012 Analog Devices Inc. - */ -#ifndef __LINUX_PLATFORM_DATA_AD5755_H__ -#define __LINUX_PLATFORM_DATA_AD5755_H__ - -enum ad5755_mode { - AD5755_MODE_VOLTAGE_0V_5V = 0, - AD5755_MODE_VOLTAGE_0V_10V = 1, - AD5755_MODE_VOLTAGE_PLUSMINUS_5V = 2, - AD5755_MODE_VOLTAGE_PLUSMINUS_10V = 3, - AD5755_MODE_CURRENT_4mA_20mA = 4, - AD5755_MODE_CURRENT_0mA_20mA = 5, - AD5755_MODE_CURRENT_0mA_24mA = 6, -}; - -enum ad5755_dc_dc_phase { - AD5755_DC_DC_PHASE_ALL_SAME_EDGE = 0, - AD5755_DC_DC_PHASE_A_B_SAME_EDGE_C_D_OPP_EDGE = 1, - AD5755_DC_DC_PHASE_A_C_SAME_EDGE_B_D_OPP_EDGE = 2, - AD5755_DC_DC_PHASE_90_DEGREE = 3, -}; - -enum ad5755_dc_dc_freq { - AD5755_DC_DC_FREQ_250kHZ = 0, - AD5755_DC_DC_FREQ_410kHZ = 1, - AD5755_DC_DC_FREQ_650kHZ = 2, -}; - -enum ad5755_dc_dc_maxv { - AD5755_DC_DC_MAXV_23V = 0, - AD5755_DC_DC_MAXV_24V5 = 1, - AD5755_DC_DC_MAXV_27V = 2, - AD5755_DC_DC_MAXV_29V5 = 3, -}; - -enum ad5755_slew_rate { - AD5755_SLEW_RATE_64k = 0, - AD5755_SLEW_RATE_32k = 1, - AD5755_SLEW_RATE_16k = 2, - AD5755_SLEW_RATE_8k = 3, - AD5755_SLEW_RATE_4k = 4, - AD5755_SLEW_RATE_2k = 5, - AD5755_SLEW_RATE_1k = 6, - AD5755_SLEW_RATE_500 = 7, - AD5755_SLEW_RATE_250 = 8, - AD5755_SLEW_RATE_125 = 9, - AD5755_SLEW_RATE_64 = 10, - AD5755_SLEW_RATE_32 = 11, - AD5755_SLEW_RATE_16 = 12, - AD5755_SLEW_RATE_8 = 13, - AD5755_SLEW_RATE_4 = 14, - AD5755_SLEW_RATE_0_5 = 15, -}; - -enum ad5755_slew_step_size { - AD5755_SLEW_STEP_SIZE_1 = 0, - AD5755_SLEW_STEP_SIZE_2 = 1, - AD5755_SLEW_STEP_SIZE_4 = 2, - AD5755_SLEW_STEP_SIZE_8 = 3, - AD5755_SLEW_STEP_SIZE_16 = 4, - AD5755_SLEW_STEP_SIZE_32 = 5, - AD5755_SLEW_STEP_SIZE_64 = 6, - AD5755_SLEW_STEP_SIZE_128 = 7, - AD5755_SLEW_STEP_SIZE_256 = 8, -}; - -/** - * struct ad5755_platform_data - AD5755 DAC driver platform data - * @ext_dc_dc_compenstation_resistor: Whether an external DC-DC converter - * compensation register is used. - * @dc_dc_phase: DC-DC converter phase. - * @dc_dc_freq: DC-DC converter frequency. - * @dc_dc_maxv: DC-DC maximum allowed boost voltage. - * @dac.mode: The mode to be used for the DAC output. - * @dac.ext_current_sense_resistor: Whether an external current sense resistor - * is used. - * @dac.enable_voltage_overrange: Whether to enable 20% voltage output overrange. - * @dac.slew.enable: Whether to enable digital slew. - * @dac.slew.rate: Slew rate of the digital slew. - * @dac.slew.step_size: Slew step size of the digital slew. - **/ -struct ad5755_platform_data { - bool ext_dc_dc_compenstation_resistor; - enum ad5755_dc_dc_phase dc_dc_phase; - enum ad5755_dc_dc_freq dc_dc_freq; - enum ad5755_dc_dc_maxv dc_dc_maxv; - - struct { - enum ad5755_mode mode; - bool ext_current_sense_resistor; - bool enable_voltage_overrange; - struct { - bool enable; - enum ad5755_slew_rate rate; - enum ad5755_slew_step_size step_size; - } slew; - } dac[4]; -}; - -#endif diff --git a/include/linux/platform_data/bcm7038_wdt.h b/include/linux/platform_data/bcm7038_wdt.h new file mode 100644 index 000000000000..e18cfd9ec8f9 --- /dev/null +++ b/include/linux/platform_data/bcm7038_wdt.h @@ -0,0 +1,8 @@ +#ifndef __BCM7038_WDT_PDATA_H +#define __BCM7038_WDT_PDATA_H + +struct bcm7038_wdt_platform_data { + const char *clk_name; +}; + +#endif /* __BCM7038_WDT_PDATA_H */ diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h index 1d30bf278231..2b5676ff35be 100644 --- a/include/linux/platform_data/brcmfmac.h +++ b/include/linux/platform_data/brcmfmac.h @@ -125,7 +125,7 @@ struct brcmfmac_pd_cc_entry { */ struct brcmfmac_pd_cc { int table_size; - struct brcmfmac_pd_cc_entry table[0]; + struct brcmfmac_pd_cc_entry table[]; }; /** diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h index b9f682459f08..11a2a23fd9b2 100644 --- a/include/linux/platform_data/clk-fch.h +++ b/include/linux/platform_data/clk-fch.h @@ -12,7 +12,7 @@ struct fch_clk_data { void __iomem *base; - u32 is_rv; + char *name; }; #endif /* __CLK_FCH_H */ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 02599687770c..df3c78c92ca2 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -205,7 +205,7 @@ struct cros_ec_dev { struct cros_ec_debugfs *debug_info; bool has_kb_wake_angle; u16 cmd_offset; - u32 features[2]; + struct ec_response_get_features features; }; #define to_cros_ec_dev(dev) container_of(dev, struct cros_ec_dev, class_dev) @@ -227,10 +227,13 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); -int cros_ec_check_features(struct cros_ec_dev *ec, int feature); +bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); +int cros_ec_command(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, + int outsize, void *indata, int insize); + /** * cros_ec_get_time_ns() - Return time in ns. * diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 101333fe2b8d..40185f9d7c14 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -25,12 +25,75 @@ enum mlxreg_wdt_type { }; /** + * enum mlxreg_hotplug_kind - kind of hotplug entry + * + * @MLXREG_HOTPLUG_DEVICE_NA: do not care; + * @MLXREG_HOTPLUG_LC_PRESENT: entry for line card presence in/out events; + * @MLXREG_HOTPLUG_LC_VERIFIED: entry for line card verification status events + * coming after line card security signature validation; + * @MLXREG_HOTPLUG_LC_POWERED: entry for line card power on/off events; + * @MLXREG_HOTPLUG_LC_SYNCED: entry for line card synchronization events, coming + * after hardware-firmware synchronization handshake; + * @MLXREG_HOTPLUG_LC_READY: entry for line card ready events, indicating line card + PHYs ready / unready state; + * @MLXREG_HOTPLUG_LC_ACTIVE: entry for line card active events, indicating firmware + * availability / unavailability for the ports on line card; + * @MLXREG_HOTPLUG_LC_THERMAL: entry for line card thermal shutdown events, positive + * event indicates that system should power off the line + * card for which this event has been received; + */ +enum mlxreg_hotplug_kind { + MLXREG_HOTPLUG_DEVICE_NA = 0, + MLXREG_HOTPLUG_LC_PRESENT = 1, + MLXREG_HOTPLUG_LC_VERIFIED = 2, + MLXREG_HOTPLUG_LC_POWERED = 3, + MLXREG_HOTPLUG_LC_SYNCED = 4, + MLXREG_HOTPLUG_LC_READY = 5, + MLXREG_HOTPLUG_LC_ACTIVE = 6, + MLXREG_HOTPLUG_LC_THERMAL = 7, +}; + +/** + * enum mlxreg_hotplug_device_action - hotplug device action required for + * driver's connectivity + * + * @MLXREG_HOTPLUG_DEVICE_DEFAULT_ACTION: probe device for 'on' event, remove + * for 'off' event; + * @MLXREG_HOTPLUG_DEVICE_PLATFORM_ACTION: probe platform device for 'on' + * event, remove for 'off' event; + * @MLXREG_HOTPLUG_DEVICE_NO_ACTION: no connectivity action is required; + */ +enum mlxreg_hotplug_device_action { + MLXREG_HOTPLUG_DEVICE_DEFAULT_ACTION = 0, + MLXREG_HOTPLUG_DEVICE_PLATFORM_ACTION = 1, + MLXREG_HOTPLUG_DEVICE_NO_ACTION = 2, +}; + +/** + * struct mlxreg_core_hotplug_notifier - hotplug notifier block: + * + * @identity: notifier identity name; + * @handle: user handle to be passed by user handler function; + * @user_handler: user handler function associated with the event; + */ +struct mlxreg_core_hotplug_notifier { + char identity[MLXREG_CORE_LABEL_MAX_SIZE]; + void *handle; + int (*user_handler)(void *handle, enum mlxreg_hotplug_kind kind, u8 action); +}; + +/** * struct mlxreg_hotplug_device - I2C device data: * * @adapter: I2C device adapter; * @client: I2C device client; * @brdinfo: device board information; * @nr: I2C device adapter number, to which device is to be attached; + * @pdev: platform device, if device is instantiated as a platform device; + * @action: action to be performed upon event receiving; + * @handle: user handle to be passed by user handler function; + * @user_handler: user handler function associated with the event; + * @notifier: pointer to event notifier block; * * Structure represents I2C hotplug device static data (board topology) and * dynamic data (related kernel objects handles). @@ -40,6 +103,11 @@ struct mlxreg_hotplug_device { struct i2c_client *client; struct i2c_board_info *brdinfo; int nr; + struct platform_device *pdev; + enum mlxreg_hotplug_device_action action; + void *handle; + int (*user_handler)(void *handle, enum mlxreg_hotplug_kind kind, u8 action); + struct mlxreg_core_hotplug_notifier *notifier; }; /** @@ -51,12 +119,18 @@ struct mlxreg_hotplug_device { * @bit: attribute effective bit; * @capability: attribute capability register; * @reg_prsnt: attribute presence register; + * @reg_sync: attribute synch register; + * @reg_pwr: attribute power register; + * @reg_ena: attribute enable register; * @mode: access mode; * @np - pointer to node platform associated with attribute; * @hpdev - hotplug device data; + * @notifier: pointer to event notifier block; * @health_cntr: dynamic device health indication counter; * @attached: true if device has been attached after good health indication; * @regnum: number of registers occupied by multi-register attribute; + * @slot: slot number, at which device is located; + * @secured: if set indicates that entry access is secured; */ struct mlxreg_core_data { char label[MLXREG_CORE_LABEL_MAX_SIZE]; @@ -65,18 +139,25 @@ struct mlxreg_core_data { u32 bit; u32 capability; u32 reg_prsnt; + u32 reg_sync; + u32 reg_pwr; + u32 reg_ena; umode_t mode; struct device_node *np; struct mlxreg_hotplug_device hpdev; + struct mlxreg_core_hotplug_notifier *notifier; u32 health_cntr; bool attached; u8 regnum; + u8 slot; + u8 secured; }; /** * struct mlxreg_core_item - same type components controlled by the driver: * * @data: component data; + * @kind: kind of hotplug attribute; * @aggr_mask: group aggregation mask; * @reg: group interrupt status register; * @mask: group interrupt mask; @@ -89,6 +170,7 @@ struct mlxreg_core_data { */ struct mlxreg_core_item { struct mlxreg_core_data *data; + enum mlxreg_hotplug_kind kind; u32 aggr_mask; u32 reg; u32 mask; diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index de6ada739121..8c2f1f185353 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -7,6 +7,7 @@ #define _MTD_NAND_OMAP2_H #include <linux/mtd/partitions.h> +#include <linux/mod_devicetable.h> #define GPMC_BCH_NUM_REMAINDER 8 @@ -61,4 +62,11 @@ struct gpmc_nand_regs { void __iomem *gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER]; }; -#endif + +static const struct of_device_id omap_nand_ids[] = { + { .compatible = "ti,omap2-nand", }, + { .compatible = "ti,am64-nand", }, + {}, +}; + +#endif /* _MTD_NAND_OMAP2_H */ diff --git a/include/linux/platform_data/ntc_thermistor.h b/include/linux/platform_data/ntc_thermistor.h deleted file mode 100644 index b324d03e580c..000000000000 --- a/include/linux/platform_data/ntc_thermistor.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * ntc_thermistor.h - NTC Thermistors - * - * Copyright (C) 2010 Samsung Electronics - * MyungJoo Ham <[email protected]> - */ -#ifndef _LINUX_NTC_H -#define _LINUX_NTC_H - -struct iio_channel; - -enum ntc_thermistor_type { - TYPE_B57330V2103, - TYPE_B57891S0103, - TYPE_NCPXXWB473, - TYPE_NCPXXWF104, - TYPE_NCPXXWL333, - TYPE_NCPXXXH103, -}; - -struct ntc_thermistor_platform_data { - /* - * One (not both) of read_uV and read_ohm should be provided and only - * one of the two should be provided. - * Both functions should return negative value for an error case. - * - * pullup_uV, pullup_ohm, pulldown_ohm, and connect are required to use - * read_uV() - * - * How to setup pullup_ohm, pulldown_ohm, and connect is - * described at Documentation/hwmon/ntc_thermistor.rst - * - * pullup/down_ohm: 0 for infinite / not-connected - * - * chan: iio_channel pointer to communicate with the ADC which the - * thermistor is using for conversion of the analog values. - */ - int (*read_uv)(struct ntc_thermistor_platform_data *); - unsigned int pullup_uv; - - unsigned int pullup_ohm; - unsigned int pulldown_ohm; - enum { NTC_CONNECTED_POSITIVE, NTC_CONNECTED_GROUND } connect; - struct iio_channel *chan; - - int (*read_ohm)(void); -}; - -#endif /* _LINUX_NTC_H */ diff --git a/include/linux/platform_data/pata_ixp4xx_cf.h b/include/linux/platform_data/pata_ixp4xx_cf.h deleted file mode 100644 index e60fa41da4a5..000000000000 --- a/include/linux/platform_data/pata_ixp4xx_cf.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PLATFORM_DATA_PATA_IXP4XX_H -#define __PLATFORM_DATA_PATA_IXP4XX_H - -#include <linux/types.h> - -/* - * This structure provide a means for the board setup code - * to give information to th pata_ixp4xx driver. It is - * passed as platform_data. - */ -struct ixp4xx_pata_data { - volatile u32 *cs0_cfg; - volatile u32 *cs1_cfg; - unsigned long cs0_bits; - unsigned long cs1_bits; - void __iomem *cmd; - void __iomem *ctl; -}; - -#endif diff --git a/include/linux/platform_data/spi-clps711x.h b/include/linux/platform_data/spi-clps711x.h deleted file mode 100644 index efaa596848c9..000000000000 --- a/include/linux/platform_data/spi-clps711x.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CLPS711X SPI bus driver definitions - * - * Copyright (C) 2012 Alexander Shiyan <[email protected]> - */ - -#ifndef ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H -#define ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H - -/* Board specific platform_data */ -struct spi_clps711x_pdata { - int *chipselect; /* Array of GPIO-numbers */ - int num_chipselect; /* Total count of GPIOs */ -}; - -#endif diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 9837fb011f2f..eb556f988d57 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,9 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_OTG BIT(30) +#define SYSC_QUIRK_RESET_ON_CTX_LOST BIT(29) +#define SYSC_QUIRK_REINIT_ON_CTX_LOST BIT(28) #define SYSC_QUIRK_REINIT_ON_RESUME BIT(27) #define SYSC_QUIRK_GPMC_DEBUG BIT(26) #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) diff --git a/include/linux/platform_data/tps68470.h b/include/linux/platform_data/tps68470.h new file mode 100644 index 000000000000..126d082c3f2e --- /dev/null +++ b/include/linux/platform_data/tps68470.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * TI TPS68470 PMIC platform data definition. + * + * Copyright (c) 2021 Red Hat Inc. + * + * Red Hat authors: + * Hans de Goede <[email protected]> + */ +#ifndef __PDATA_TPS68470_H +#define __PDATA_TPS68470_H + +enum tps68470_regulators { + TPS68470_CORE, + TPS68470_ANA, + TPS68470_VCM, + TPS68470_VIO, + TPS68470_VSIO, + TPS68470_AUX1, + TPS68470_AUX2, + TPS68470_NUM_REGULATORS +}; + +struct regulator_init_data; + +struct tps68470_regulator_platform_data { + const struct regulator_init_data *reg_init_data[TPS68470_NUM_REGULATORS]; +}; + +struct tps68470_clk_platform_data { + const char *consumer_dev_name; + const char *consumer_con_id; +}; + +#endif diff --git a/include/linux/platform_data/ux500_wdt.h b/include/linux/platform_data/ux500_wdt.h deleted file mode 100644 index de6a4ad41e76..000000000000 --- a/include/linux/platform_data/ux500_wdt.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST Ericsson SA 2011 - * - * STE Ux500 Watchdog platform data - */ -#ifndef __UX500_WDT_H -#define __UX500_WDT_H - -/** - * struct ux500_wdt_data - */ -struct ux500_wdt_data { - unsigned int timeout; - bool has_28_bits_resolution; -}; - -#endif /* __UX500_WDT_H */ diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 17dc5cb6f3f2..a571b47ff362 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -77,6 +77,8 @@ #define ASUS_WMI_DEVID_THERMAL_CTRL 0x00110011 #define ASUS_WMI_DEVID_FAN_CTRL 0x00110012 /* deprecated */ #define ASUS_WMI_DEVID_CPU_FAN_CTRL 0x00110013 +#define ASUS_WMI_DEVID_CPU_FAN_CURVE 0x00110024 +#define ASUS_WMI_DEVID_GPU_FAN_CURVE 0x00110025 /* Power */ #define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012 diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h new file mode 100644 index 000000000000..62d2bc774067 --- /dev/null +++ b/include/linux/platform_data/x86/simatic-ipc-base.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Siemens SIMATIC IPC drivers + * + * Copyright (c) Siemens AG, 2018-2021 + * + * Authors: + * Henning Schild <[email protected]> + * Gerd Haeussler <[email protected]> + */ + +#ifndef __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H +#define __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H + +#include <linux/types.h> + +#define SIMATIC_IPC_DEVICE_NONE 0 +#define SIMATIC_IPC_DEVICE_227D 1 +#define SIMATIC_IPC_DEVICE_427E 2 +#define SIMATIC_IPC_DEVICE_127E 3 +#define SIMATIC_IPC_DEVICE_227E 4 + +struct simatic_ipc_platform { + u8 devmode; +}; + +u32 simatic_ipc_get_membase0(unsigned int p2sb); + +#endif /* __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H */ diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h new file mode 100644 index 000000000000..f3b76b39776b --- /dev/null +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Siemens SIMATIC IPC drivers + * + * Copyright (c) Siemens AG, 2018-2021 + * + * Authors: + * Henning Schild <[email protected]> + * Gerd Haeussler <[email protected]> + */ + +#ifndef __PLATFORM_DATA_X86_SIMATIC_IPC_H +#define __PLATFORM_DATA_X86_SIMATIC_IPC_H + +#include <linux/dmi.h> +#include <linux/platform_data/x86/simatic-ipc-base.h> + +#define SIMATIC_IPC_DMI_ENTRY_OEM 129 +/* binary type */ +#define SIMATIC_IPC_DMI_TYPE 0xff +#define SIMATIC_IPC_DMI_GROUP 0x05 +#define SIMATIC_IPC_DMI_ENTRY 0x02 +#define SIMATIC_IPC_DMI_TID 0x02 + +enum simatic_ipc_station_ids { + SIMATIC_IPC_INVALID_STATION_ID = 0, + SIMATIC_IPC_IPC227D = 0x00000501, + SIMATIC_IPC_IPC427D = 0x00000701, + SIMATIC_IPC_IPC227E = 0x00000901, + SIMATIC_IPC_IPC277E = 0x00000902, + SIMATIC_IPC_IPC427E = 0x00000A01, + SIMATIC_IPC_IPC477E = 0x00000A02, + SIMATIC_IPC_IPC127E = 0x00000D01, +}; + +static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) +{ + struct { + u8 type; /* type (0xff = binary) */ + u8 len; /* len of data entry */ + u8 group; + u8 entry; + u8 tid; + __le32 station_id; /* station id (LE) */ + } __packed * data_entry = (void *)data + sizeof(struct dmi_header); + + while ((u8 *)data_entry < data + max_len) { + if (data_entry->type == SIMATIC_IPC_DMI_TYPE && + data_entry->len == sizeof(*data_entry) && + data_entry->group == SIMATIC_IPC_DMI_GROUP && + data_entry->entry == SIMATIC_IPC_DMI_ENTRY && + data_entry->tid == SIMATIC_IPC_DMI_TID) { + return le32_to_cpu(data_entry->station_id); + } + data_entry = (void *)((u8 *)(data_entry) + data_entry->len); + } + + return SIMATIC_IPC_INVALID_STATION_ID; +} + +static inline void +simatic_ipc_find_dmi_entry_helper(const struct dmi_header *dh, void *_data) +{ + u32 *id = _data; + + if (dh->type != SIMATIC_IPC_DMI_ENTRY_OEM) + return; + + *id = simatic_ipc_get_station_id((u8 *)dh, dh->length); +} + +#endif /* __PLATFORM_DATA_X86_SIMATIC_IPC_H */ diff --git a/include/linux/platform_data/x86/soc.h b/include/linux/platform_data/x86/soc.h new file mode 100644 index 000000000000..da05f425587a --- /dev/null +++ b/include/linux/platform_data/x86/soc.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Helpers for Intel SoC model detection + * + * Copyright (c) 2019, Intel Corporation. + */ + +#ifndef __PLATFORM_DATA_X86_SOC_H +#define __PLATFORM_DATA_X86_SOC_H + +#if IS_ENABLED(CONFIG_X86) + +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> + +#define SOC_INTEL_IS_CPU(soc, type) \ +static inline bool soc_intel_is_##soc(void) \ +{ \ + static const struct x86_cpu_id soc##_cpu_ids[] = { \ + X86_MATCH_INTEL_FAM6_MODEL(type, NULL), \ + {} \ + }; \ + const struct x86_cpu_id *id; \ + \ + id = x86_match_cpu(soc##_cpu_ids); \ + if (id) \ + return true; \ + return false; \ +} + +SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT); +SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT); +SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT); +SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS); +SOC_INTEL_IS_CPU(cml, KABYLAKE_L); + +#else /* IS_ENABLED(CONFIG_X86) */ + +static inline bool soc_intel_is_byt(void) +{ + return false; +} + +static inline bool soc_intel_is_cht(void) +{ + return false; +} + +static inline bool soc_intel_is_apl(void) +{ + return false; +} + +static inline bool soc_intel_is_glk(void) +{ + return false; +} + +static inline bool soc_intel_is_cml(void) +{ + return false; +} +#endif /* IS_ENABLED(CONFIG_X86) */ + +#endif /* __PLATFORM_DATA_X86_SOC_H */ diff --git a/include/linux/plist.h b/include/linux/plist.h index 66bab1bca35c..0f352c1d3c80 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -73,8 +73,11 @@ #ifndef _LINUX_PLIST_H_ #define _LINUX_PLIST_H_ -#include <linux/kernel.h> +#include <linux/container_of.h> #include <linux/list.h> +#include <linux/types.h> + +#include <asm/bug.h> struct plist_head { struct list_head node_list; diff --git a/include/linux/pm.h b/include/linux/pm.h index 1d8209c09686..e1e9402180b9 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -300,47 +300,59 @@ struct dev_pm_ops { int (*runtime_idle)(struct device *dev); }; +#define SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend = pm_sleep_ptr(suspend_fn), \ + .resume = pm_sleep_ptr(resume_fn), \ + .freeze = pm_sleep_ptr(suspend_fn), \ + .thaw = pm_sleep_ptr(resume_fn), \ + .poweroff = pm_sleep_ptr(suspend_fn), \ + .restore = pm_sleep_ptr(resume_fn), + +#define LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend_late = pm_sleep_ptr(suspend_fn), \ + .resume_early = pm_sleep_ptr(resume_fn), \ + .freeze_late = pm_sleep_ptr(suspend_fn), \ + .thaw_early = pm_sleep_ptr(resume_fn), \ + .poweroff_late = pm_sleep_ptr(suspend_fn), \ + .restore_early = pm_sleep_ptr(resume_fn), + +#define NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend_noirq = pm_sleep_ptr(suspend_fn), \ + .resume_noirq = pm_sleep_ptr(resume_fn), \ + .freeze_noirq = pm_sleep_ptr(suspend_fn), \ + .thaw_noirq = pm_sleep_ptr(resume_fn), \ + .poweroff_noirq = pm_sleep_ptr(suspend_fn), \ + .restore_noirq = pm_sleep_ptr(resume_fn), + +#define RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + .runtime_suspend = suspend_fn, \ + .runtime_resume = resume_fn, \ + .runtime_idle = idle_fn, + #ifdef CONFIG_PM_SLEEP #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend = suspend_fn, \ - .resume = resume_fn, \ - .freeze = suspend_fn, \ - .thaw = resume_fn, \ - .poweroff = suspend_fn, \ - .restore = resume_fn, + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM_SLEEP #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend_late = suspend_fn, \ - .resume_early = resume_fn, \ - .freeze_late = suspend_fn, \ - .thaw_early = resume_fn, \ - .poweroff_late = suspend_fn, \ - .restore_early = resume_fn, + LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM_SLEEP #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ - .suspend_noirq = suspend_fn, \ - .resume_noirq = resume_fn, \ - .freeze_noirq = suspend_fn, \ - .thaw_noirq = resume_fn, \ - .poweroff_noirq = suspend_fn, \ - .restore_noirq = resume_fn, + NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #else #define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif #ifdef CONFIG_PM #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ - .runtime_suspend = suspend_fn, \ - .runtime_resume = resume_fn, \ - .runtime_idle = idle_fn, + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #else #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) #endif @@ -349,9 +361,9 @@ struct dev_pm_ops { * Use this if you want to use the same suspend and resume callbacks for suspend * to RAM and hibernation. */ -#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ -const struct dev_pm_ops __maybe_unused name = { \ - SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +#define DEFINE_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +static const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } /* @@ -367,17 +379,27 @@ const struct dev_pm_ops __maybe_unused name = { \ * .resume_early(), to the same routines as .runtime_suspend() and * .runtime_resume(), respectively (and analogously for hibernation). */ +#define DEFINE_UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ +static const struct dev_pm_ops name = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ +} + +/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ +#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +const struct dev_pm_ops __maybe_unused name = { \ + SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ +} + +/* Deprecated. Use DEFINE_UNIVERSAL_DEV_PM_OPS() instead. */ #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ const struct dev_pm_ops __maybe_unused name = { \ SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define pm_ptr(_ptr) (_ptr) -#else -#define pm_ptr(_ptr) NULL -#endif +#define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr)) +#define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr)) /* * PM_EVENT_ messages @@ -499,6 +521,7 @@ const struct dev_pm_ops __maybe_unused name = { \ */ enum rpm_status { + RPM_INVALID = -1, RPM_ACTIVE = 0, RPM_RESUMING, RPM_SUSPENDED, @@ -612,6 +635,7 @@ struct dev_pm_info { unsigned int links_count; enum rpm_request request; enum rpm_status runtime_status; + enum rpm_status last_status; int runtime_error; int autosuspend_delay; u64 last_busy; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 84150a22fd7c..879c138c7b8e 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -156,9 +156,9 @@ int devm_pm_opp_set_clkname(struct device *dev, const char *name); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); -struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); void dev_pm_opp_detach_genpd(struct opp_table *opp_table); -int devm_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); +int devm_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -376,7 +376,7 @@ static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name) return -EOPNOTSUPP; } -static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs) +static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs) { return ERR_PTR(-EOPNOTSUPP); } @@ -384,7 +384,7 @@ static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, cons static inline void dev_pm_opp_detach_genpd(struct opp_table *opp_table) {} static inline int devm_pm_opp_attach_genpd(struct device *dev, - const char **names, + const char * const *names, struct device ***virt_devs) { return -EOPNOTSUPP; @@ -439,7 +439,9 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev) #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int dev_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); +int devm_pm_opp_of_add_table_indexed(struct device *dev, int index); int dev_pm_opp_of_add_table_noclk(struct device *dev, int index); +int devm_pm_opp_of_add_table_noclk(struct device *dev, int index); void dev_pm_opp_of_remove_table(struct device *dev); int devm_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); @@ -465,11 +467,21 @@ static inline int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) return -EOPNOTSUPP; } +static inline int devm_pm_opp_of_add_table_indexed(struct device *dev, int index) +{ + return -EOPNOTSUPP; +} + static inline int dev_pm_opp_of_add_table_noclk(struct device *dev, int index) { return -EOPNOTSUPP; } +static inline int devm_pm_opp_of_add_table_noclk(struct device *dev, int index) +{ + return -EOPNOTSUPP; +} + static inline void dev_pm_opp_of_remove_table(struct device *dev) { } diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 222da43b7096..016de5776b6d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); +extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); extern int devm_pm_runtime_enable(struct device *dev); @@ -129,7 +130,7 @@ static inline bool pm_runtime_suspended(struct device *dev) * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * - * Return %true if runtime PM is enabled for @dev and its runtime PM status is + * Return %true if runtime PM is disabled for @dev or its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is @@ -283,6 +284,8 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} +static inline void pm_runtime_release_supplier(struct device_link *link, + bool check_idle) {} #endif /* !CONFIG_PM */ diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h index cd5b62db9084..e63a63aa47a3 100644 --- a/include/linux/pm_wakeirq.h +++ b/include/linux/pm_wakeirq.h @@ -17,8 +17,8 @@ #ifdef CONFIG_PM extern int dev_pm_set_wake_irq(struct device *dev, int irq); -extern int dev_pm_set_dedicated_wake_irq(struct device *dev, - int irq); +extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq); +extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq); extern void dev_pm_clear_wake_irq(struct device *dev); extern void dev_pm_enable_wake_irq(struct device *dev); extern void dev_pm_disable_wake_irq(struct device *dev); @@ -35,6 +35,11 @@ static inline int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) return 0; } +static inline int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq) +{ + return 0; +} + static inline void dev_pm_clear_wake_irq(struct device *dev) { } diff --git a/include/linux/pmu.h b/include/linux/pmu.h index 52453a24a24f..c677442d007c 100644 --- a/include/linux/pmu.h +++ b/include/linux/pmu.h @@ -13,7 +13,7 @@ #include <uapi/linux/pmu.h> -extern int find_via_pmu(void); +extern int __init find_via_pmu(void); extern int pmu_request(struct adb_request *req, void (*done)(struct adb_request *), int nbytes, ...); diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h deleted file mode 100644 index 17d7d0d20eca..000000000000 --- a/include/linux/pnfs_osd_xdr.h +++ /dev/null @@ -1,317 +0,0 @@ -/* - * pNFS-osd on-the-wire data structures - * - * Copyright (C) 2007 Panasas Inc. [year of first publication] - * All rights reserved. - * - * Benny Halevy <[email protected]> - * Boaz Harrosh <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * See the file COPYING included with this distribution for more details. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Panasas company nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __PNFS_OSD_XDR_H__ -#define __PNFS_OSD_XDR_H__ - -#include <linux/nfs_fs.h> - -/* - * draft-ietf-nfsv4-minorversion-22 - * draft-ietf-nfsv4-pnfs-obj-12 - */ - -/* Layout Structure */ - -enum pnfs_osd_raid_algorithm4 { - PNFS_OSD_RAID_0 = 1, - PNFS_OSD_RAID_4 = 2, - PNFS_OSD_RAID_5 = 3, - PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ -}; - -/* struct pnfs_osd_data_map4 { - * uint32_t odm_num_comps; - * length4 odm_stripe_unit; - * uint32_t odm_group_width; - * uint32_t odm_group_depth; - * uint32_t odm_mirror_cnt; - * pnfs_osd_raid_algorithm4 odm_raid_algorithm; - * }; - */ -struct pnfs_osd_data_map { - u32 odm_num_comps; - u64 odm_stripe_unit; - u32 odm_group_width; - u32 odm_group_depth; - u32 odm_mirror_cnt; - u32 odm_raid_algorithm; -}; - -/* struct pnfs_osd_objid4 { - * deviceid4 oid_device_id; - * uint64_t oid_partition_id; - * uint64_t oid_object_id; - * }; - */ -struct pnfs_osd_objid { - struct nfs4_deviceid oid_device_id; - u64 oid_partition_id; - u64 oid_object_id; -}; - -/* For printout. I use: - * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer)); - * BE style - */ -#define _DEVID_LO(oid_device_id) \ - (unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data) - -#define _DEVID_HI(oid_device_id) \ - (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) - -enum pnfs_osd_version { - PNFS_OSD_MISSING = 0, - PNFS_OSD_VERSION_1 = 1, - PNFS_OSD_VERSION_2 = 2 -}; - -struct pnfs_osd_opaque_cred { - u32 cred_len; - void *cred; -}; - -enum pnfs_osd_cap_key_sec { - PNFS_OSD_CAP_KEY_SEC_NONE = 0, - PNFS_OSD_CAP_KEY_SEC_SSV = 1, -}; - -/* struct pnfs_osd_object_cred4 { - * pnfs_osd_objid4 oc_object_id; - * pnfs_osd_version4 oc_osd_version; - * pnfs_osd_cap_key_sec4 oc_cap_key_sec; - * opaque oc_capability_key<>; - * opaque oc_capability<>; - * }; - */ -struct pnfs_osd_object_cred { - struct pnfs_osd_objid oc_object_id; - u32 oc_osd_version; - u32 oc_cap_key_sec; - struct pnfs_osd_opaque_cred oc_cap_key; - struct pnfs_osd_opaque_cred oc_cap; -}; - -/* struct pnfs_osd_layout4 { - * pnfs_osd_data_map4 olo_map; - * uint32_t olo_comps_index; - * pnfs_osd_object_cred4 olo_components<>; - * }; - */ -struct pnfs_osd_layout { - struct pnfs_osd_data_map olo_map; - u32 olo_comps_index; - u32 olo_num_comps; - struct pnfs_osd_object_cred *olo_comps; -}; - -/* Device Address */ -enum pnfs_osd_targetid_type { - OBJ_TARGET_ANON = 1, - OBJ_TARGET_SCSI_NAME = 2, - OBJ_TARGET_SCSI_DEVICE_ID = 3, -}; - -/* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) { - * case OBJ_TARGET_SCSI_NAME: - * string oti_scsi_name<>; - * - * case OBJ_TARGET_SCSI_DEVICE_ID: - * opaque oti_scsi_device_id<>; - * - * default: - * void; - * }; - * - * union pnfs_osd_targetaddr4 switch (bool ota_available) { - * case TRUE: - * netaddr4 ota_netaddr; - * case FALSE: - * void; - * }; - * - * struct pnfs_osd_deviceaddr4 { - * pnfs_osd_targetid4 oda_targetid; - * pnfs_osd_targetaddr4 oda_targetaddr; - * uint64_t oda_lun; - * opaque oda_systemid<>; - * pnfs_osd_object_cred4 oda_root_obj_cred; - * opaque oda_osdname<>; - * }; - */ -struct pnfs_osd_targetid { - u32 oti_type; - struct nfs4_string oti_scsi_device_id; -}; - -/* struct netaddr4 { - * // see struct rpcb in RFC1833 - * string r_netid<>; // network id - * string r_addr<>; // universal address - * }; - */ -struct pnfs_osd_net_addr { - struct nfs4_string r_netid; - struct nfs4_string r_addr; -}; - -struct pnfs_osd_targetaddr { - u32 ota_available; - struct pnfs_osd_net_addr ota_netaddr; -}; - -struct pnfs_osd_deviceaddr { - struct pnfs_osd_targetid oda_targetid; - struct pnfs_osd_targetaddr oda_targetaddr; - u8 oda_lun[8]; - struct nfs4_string oda_systemid; - struct pnfs_osd_object_cred oda_root_obj_cred; - struct nfs4_string oda_osdname; -}; - -/* LAYOUTCOMMIT: layoutupdate */ - -/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { - * case TRUE: - * int64_t dsu_delta; - * case FALSE: - * void; - * }; - * - * struct pnfs_osd_layoutupdate4 { - * pnfs_osd_deltaspaceused4 olu_delta_space_used; - * bool olu_ioerr_flag; - * }; - */ -struct pnfs_osd_layoutupdate { - u32 dsu_valid; - s64 dsu_delta; - u32 olu_ioerr_flag; -}; - -/* LAYOUTRETURN: I/O Rrror Report */ - -enum pnfs_osd_errno { - PNFS_OSD_ERR_EIO = 1, - PNFS_OSD_ERR_NOT_FOUND = 2, - PNFS_OSD_ERR_NO_SPACE = 3, - PNFS_OSD_ERR_BAD_CRED = 4, - PNFS_OSD_ERR_NO_ACCESS = 5, - PNFS_OSD_ERR_UNREACHABLE = 6, - PNFS_OSD_ERR_RESOURCE = 7 -}; - -/* struct pnfs_osd_ioerr4 { - * pnfs_osd_objid4 oer_component; - * length4 oer_comp_offset; - * length4 oer_comp_length; - * bool oer_iswrite; - * pnfs_osd_errno4 oer_errno; - * }; - */ -struct pnfs_osd_ioerr { - struct pnfs_osd_objid oer_component; - u64 oer_comp_offset; - u64 oer_comp_length; - u32 oer_iswrite; - u32 oer_errno; -}; - -/* OSD XDR Client API */ -/* Layout helpers */ -/* Layout decoding is done in two parts: - * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part - * of the layout. @iter members need not be initialized. - * Returned: - * @layout members are set. (@layout->olo_comps set to NULL). - * - * Zero on success, or negative error if passed xdr is broken. - * - * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns - * false, to decode the next component. - * Returned: - * true if there is more to decode or false if we are done or error. - * - * Example: - * struct pnfs_osd_xdr_decode_layout_iter iter; - * struct pnfs_osd_layout layout; - * struct pnfs_osd_object_cred comp; - * int status; - * - * status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); - * if (unlikely(status)) - * goto err; - * while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) { - * // All of @comp strings point to inside the xdr_buffer - * // or scrach buffer. Copy them out to user memory eg. - * copy_single_comp(dest_comp++, &comp); - * } - * if (unlikely(status)) - * goto err; - */ - -struct pnfs_osd_xdr_decode_layout_iter { - unsigned total_comps; - unsigned decoded_comps; -}; - -extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, - struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr); - -extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, - struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, - int *err); - -/* Device Info helpers */ - -/* Note: All strings inside @deviceaddr point to space inside @p. - * @p should stay valid while @deviceaddr is in use. - */ -extern void pnfs_osd_xdr_decode_deviceaddr( - struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p); - -/* layoutupdate (layout_commit) xdr helpers */ -extern int -pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, - struct pnfs_osd_layoutupdate *lou); - -/* osd_ioerror encoding (layout_return) */ -extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); -extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); - -#endif /* __PNFS_OSD_XDR_H__ */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 00fef0064355..5bbcd280bfd2 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -184,8 +184,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct, #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK +void clear_posix_cputimers_work(struct task_struct *p); void posix_cputimers_init_work(void); #else +static inline void clear_posix_cputimers_work(struct task_struct *p) { } static inline void posix_cputimers_init_work(void) { } #endif diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index dd24756a8af7..c417abd2ab70 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -78,7 +78,7 @@ enum max17042_register { MAX17042_T_empty = 0x34, MAX17042_FullCAP0 = 0x35, - MAX17042_LAvg_empty = 0x36, + MAX17042_IAvg_empty = 0x36, MAX17042_FCTC = 0x37, MAX17042_RCOMP0 = 0x38, MAX17042_TempCo = 0x39, @@ -221,7 +221,7 @@ struct max17042_config_data { u16 fullcap; /* 0x10 */ u16 fullcapnom; /* 0x23 */ u16 socempty; /* 0x33 */ - u16 lavg_empty; /* 0x36 */ + u16 iavg_empty; /* 0x36 */ u16 dqacc; /* 0x45 */ u16 dpacc; /* 0x46 */ u16 qrtbl00; /* 0x12 */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9ca1f120a211..e218041cc000 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -66,6 +66,7 @@ enum { POWER_SUPPLY_HEALTH_WARM, POWER_SUPPLY_HEALTH_COOL, POWER_SUPPLY_HEALTH_HOT, + POWER_SUPPLY_HEALTH_NO_BATTERY, }; enum { @@ -132,6 +133,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, POWER_SUPPLY_PROP_CHARGE_CONTROL_START_THRESHOLD, /* in percents! */ POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD, /* in percents! */ + POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR, POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT, POWER_SUPPLY_PROP_INPUT_VOLTAGE_LIMIT, POWER_SUPPLY_PROP_INPUT_POWER_LIMIT, @@ -202,6 +204,12 @@ enum power_supply_usb_type { POWER_SUPPLY_USB_TYPE_APPLE_BRICK_ID, /* Apple Charging Method */ }; +enum power_supply_charge_behaviour { + POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO = 0, + POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE, + POWER_SUPPLY_CHARGE_BEHAVIOUR_FORCE_DISCHARGE, +}; + enum power_supply_notifier_events { PSY_EVENT_PROP_CHANGED, }; @@ -342,37 +350,206 @@ struct power_supply_resistance_temp_table { #define POWER_SUPPLY_OCV_TEMP_MAX 20 -/* +/** + * struct power_supply_battery_info - information about batteries + * @technology: from the POWER_SUPPLY_TECHNOLOGY_* enum + * @energy_full_design_uwh: energy content when fully charged in microwatt + * hours + * @charge_full_design_uah: charge content when fully charged in microampere + * hours + * @voltage_min_design_uv: minimum voltage across the poles when the battery + * is at minimum voltage level in microvolts. If the voltage drops below this + * level the battery will need precharging when using CC/CV charging. + * @voltage_max_design_uv: voltage across the poles when the battery is fully + * charged in microvolts. This is the "nominal voltage" i.e. the voltage + * printed on the label of the battery. + * @tricklecharge_current_ua: the tricklecharge current used when trickle + * charging the battery in microamperes. This is the charging phase when the + * battery is completely empty and we need to carefully trickle in some + * charge until we reach the precharging voltage. + * @precharge_current_ua: current to use in the precharge phase in microamperes, + * the precharge rate is limited by limiting the current to this value. + * @precharge_voltage_max_uv: the maximum voltage allowed when precharging in + * microvolts. When we pass this voltage we will nominally switch over to the + * CC (constant current) charging phase defined by constant_charge_current_ua + * and constant_charge_voltage_max_uv. + * @charge_term_current_ua: when the current in the CV (constant voltage) + * charging phase drops below this value in microamperes the charging will + * terminate completely and not restart until the voltage over the battery + * poles reach charge_restart_voltage_uv unless we use maintenance charging. + * @charge_restart_voltage_uv: when the battery has been fully charged by + * CC/CV charging and charging has been disabled, and the voltage subsequently + * drops below this value in microvolts, the charging will be restarted + * (typically using CV charging). + * @overvoltage_limit_uv: If the voltage exceeds the nominal voltage + * voltage_max_design_uv and we reach this voltage level, all charging must + * stop and emergency procedures take place, such as shutting down the system + * in some cases. + * @constant_charge_current_max_ua: current in microamperes to use in the CC + * (constant current) charging phase. The charging rate is limited + * by this current. This is the main charging phase and as the current is + * constant into the battery the voltage slowly ascends to + * constant_charge_voltage_max_uv. + * @constant_charge_voltage_max_uv: voltage in microvolts signifying the end of + * the CC (constant current) charging phase and the beginning of the CV + * (constant voltage) charging phase. + * @factory_internal_resistance_uohm: the internal resistance of the battery + * at fabrication time, expressed in microohms. This resistance will vary + * depending on the lifetime and charge of the battery, so this is just a + * nominal ballpark figure. + * @ocv_temp: array indicating the open circuit voltage (OCV) capacity + * temperature indices. This is an array of temperatures in degrees Celsius + * indicating which capacity table to use for a certain temperature, since + * the capacity for reasons of chemistry will be different at different + * temperatures. Determining capacity is a multivariate problem and the + * temperature is the first variable we determine. + * @temp_ambient_alert_min: the battery will go outside of operating conditions + * when the ambient temperature goes below this temperature in degrees + * Celsius. + * @temp_ambient_alert_max: the battery will go outside of operating conditions + * when the ambient temperature goes above this temperature in degrees + * Celsius. + * @temp_alert_min: the battery should issue an alert if the internal + * temperature goes below this temperature in degrees Celsius. + * @temp_alert_max: the battery should issue an alert if the internal + * temperature goes above this temperature in degrees Celsius. + * @temp_min: the battery will go outside of operating conditions when + * the internal temperature goes below this temperature in degrees Celsius. + * Normally this means the system should shut down. + * @temp_max: the battery will go outside of operating conditions when + * the internal temperature goes above this temperature in degrees Celsius. + * Normally this means the system should shut down. + * @ocv_table: for each entry in ocv_temp there is a corresponding entry in + * ocv_table and a size for each entry in ocv_table_size. These arrays + * determine the capacity in percent in relation to the voltage in microvolts + * at the indexed temperature. + * @ocv_table_size: for each entry in ocv_temp this array is giving the size of + * each entry in the array of capacity arrays in ocv_table. + * @resist_table: this is a table that correlates a battery temperature to the + * expected internal resistance at this temperature. The resistance is given + * as a percentage of factory_internal_resistance_uohm. Knowing the + * resistance of the battery is usually necessary for calculating the open + * circuit voltage (OCV) that is then used with the ocv_table to calculate + * the capacity of the battery. The resist_table must be ordered descending + * by temperature: highest temperature with lowest resistance first, lowest + * temperature with highest resistance last. + * @resist_table_size: the number of items in the resist_table. + * * This is the recommended struct to manage static battery parameters, * populated by power_supply_get_battery_info(). Most platform drivers should * use these for consistency. + * * Its field names must correspond to elements in enum power_supply_property. * The default field value is -EINVAL. - * Power supply class itself doesn't use this. + * + * The charging parameters here assume a CC/CV charging scheme. This method + * is most common with Lithium Ion batteries (other methods are possible) and + * looks as follows: + * + * ^ Battery voltage + * | --- overvoltage_limit_uv + * | + * | ................................................... + * | .. constant_charge_voltage_max_uv + * | .. + * | . + * | . + * | . + * | . + * | . + * | .. precharge_voltage_max_uv + * | .. + * |. (trickle charging) + * +------------------------------------------------------------------> time + * + * ^ Current into the battery + * | + * | ............. constant_charge_current_max_ua + * | . . + * | . . + * | . . + * | . . + * | . .. + * | . .... + * | . ..... + * | ... precharge_current_ua ....... charge_term_current_ua + * | . . + * | . . + * |.... tricklecharge_current_ua . + * | . + * +-----------------------------------------------------------------> time + * + * These diagrams are synchronized on time and the voltage and current + * follow each other. + * + * With CC/CV charging commence over time like this for an empty battery: + * + * 1. When the battery is completely empty it may need to be charged with + * an especially small current so that electrons just "trickle in", + * this is the tricklecharge_current_ua. + * + * 2. Next a small initial pre-charge current (precharge_current_ua) + * is applied if the voltage is below precharge_voltage_max_uv until we + * reach precharge_voltage_max_uv. CAUTION: in some texts this is referred + * to as "trickle charging" but the use in the Linux kernel is different + * see below! + * + * 3. Then the main charging current is applied, which is called the constant + * current (CC) phase. A current regulator is set up to allow + * constant_charge_current_max_ua of current to flow into the battery. + * The chemical reaction in the battery will make the voltage go up as + * charge goes into the battery. This current is applied until we reach + * the constant_charge_voltage_max_uv voltage. + * + * 4. At this voltage we switch over to the constant voltage (CV) phase. This + * means we allow current to go into the battery, but we keep the voltage + * fixed. This current will continue to charge the battery while keeping + * the voltage the same. A chemical reaction in the battery goes on + * storing energy without affecting the voltage. Over time the current + * will slowly drop and when we reach charge_term_current_ua we will + * end the constant voltage phase. + * + * After this the battery is fully charged, and if we do not support maintenance + * charging, the charging will not restart until power dissipation makes the + * voltage fall so that we reach charge_restart_voltage_uv and at this point + * we restart charging at the appropriate phase, usually this will be inside + * the CV phase. + * + * If we support maintenance charging the voltage is however kept high after + * the CV phase with a very low current. This is meant to let the same charge + * go in for usage while the charger is still connected, mainly for + * dissipation for the power consuming entity while connected to the + * charger. + * + * All charging MUST terminate if the overvoltage_limit_uv is ever reached. + * Overcharging Lithium Ion cells can be DANGEROUS and lead to fire or + * explosions. + * + * The power supply class itself doesn't use this struct as of now. */ struct power_supply_battery_info { - unsigned int technology; /* from the enum above */ - int energy_full_design_uwh; /* microWatt-hours */ - int charge_full_design_uah; /* microAmp-hours */ - int voltage_min_design_uv; /* microVolts */ - int voltage_max_design_uv; /* microVolts */ - int tricklecharge_current_ua; /* microAmps */ - int precharge_current_ua; /* microAmps */ - int precharge_voltage_max_uv; /* microVolts */ - int charge_term_current_ua; /* microAmps */ - int charge_restart_voltage_uv; /* microVolts */ - int overvoltage_limit_uv; /* microVolts */ - int constant_charge_current_max_ua; /* microAmps */ - int constant_charge_voltage_max_uv; /* microVolts */ - int factory_internal_resistance_uohm; /* microOhms */ - int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX];/* celsius */ - int temp_ambient_alert_min; /* celsius */ - int temp_ambient_alert_max; /* celsius */ - int temp_alert_min; /* celsius */ - int temp_alert_max; /* celsius */ - int temp_min; /* celsius */ - int temp_max; /* celsius */ + unsigned int technology; + int energy_full_design_uwh; + int charge_full_design_uah; + int voltage_min_design_uv; + int voltage_max_design_uv; + int tricklecharge_current_ua; + int precharge_current_ua; + int precharge_voltage_max_uv; + int charge_term_current_ua; + int charge_restart_voltage_uv; + int overvoltage_limit_uv; + int constant_charge_current_max_ua; + int constant_charge_voltage_max_uv; + int factory_internal_resistance_uohm; + int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX]; + int temp_ambient_alert_min; + int temp_ambient_alert_max; + int temp_alert_min; + int temp_alert_max; + int temp_min; + int temp_max; struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; struct power_supply_resistance_temp_table *resist_table; @@ -405,7 +582,7 @@ devm_power_supply_get_by_phandle(struct device *dev, const char *property) #endif /* CONFIG_OF */ extern int power_supply_get_battery_info(struct power_supply *psy, - struct power_supply_battery_info *info); + struct power_supply_battery_info **info_out); extern void power_supply_put_battery_info(struct power_supply *psy, struct power_supply_battery_info *info); extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, @@ -539,4 +716,28 @@ static inline void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {} #endif +#ifdef CONFIG_SYSFS +ssize_t power_supply_charge_behaviour_show(struct device *dev, + unsigned int available_behaviours, + enum power_supply_charge_behaviour behaviour, + char *buf); + +int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf); +#else +static inline +ssize_t power_supply_charge_behaviour_show(struct device *dev, + unsigned int available_behaviours, + enum power_supply_charge_behaviour behaviour, + char *buf) +{ + return -EOPNOTSUPP; +} + +static inline int power_supply_charge_behaviour_parse(unsigned int available_behaviours, + const char *buf) +{ + return -EOPNOTSUPP; +} +#endif + #endif /* __LINUX_POWER_SUPPLY_H__ */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 4d244e295e85..b4381f255a5c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -77,6 +77,27 @@ /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include <asm/preempt.h> +/** + * interrupt_context_level - return interrupt context level + * + * Returns the current interrupt context level. + * 0 - normal context + * 1 - softirq context + * 2 - hardirq context + * 3 - NMI context + */ +static __always_inline unsigned char interrupt_context_level(void) +{ + unsigned long pc = preempt_count(); + unsigned char level = 0; + + level += !!(pc & (NMI_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + + return level; +} + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT @@ -122,9 +143,10 @@ * The preempt_count offset after spin_lock() */ #if !defined(CONFIG_PREEMPT_RT) -#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET #else -#define PREEMPT_LOCK_OFFSET 0 +/* Locks on RT do not disable preemption */ +#define PREEMPT_LOCK_OFFSET 0 #endif /* diff --git a/include/linux/printk.h b/include/linux/printk.h index 85b656f82d75..9497f6b98339 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -198,6 +198,7 @@ void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; +void printk_trigger_flush(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -274,6 +275,9 @@ static inline void dump_stack_lvl(const char *log_lvl) static inline void dump_stack(void) { } +static inline void printk_trigger_flush(void) +{ +} #endif #ifdef CONFIG_SMP diff --git a/include/linux/profile.h b/include/linux/profile.h index fd18ca96f557..11db1ec516e2 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -31,11 +31,6 @@ static inline int create_proc_profile(void) } #endif -enum profile_type { - PROFILE_TASK_EXIT, - PROFILE_MUNMAP -}; - #ifdef CONFIG_PROFILING extern int prof_on __read_mostly; @@ -66,23 +61,6 @@ static inline void profile_hit(int type, void *ip) struct task_struct; struct mm_struct; -/* task is in do_exit() */ -void profile_task_exit(struct task_struct * task); - -/* task is dead, free task struct ? Returns 1 if - * the task was taken, 0 if the task should be freed. - */ -int profile_handoff_task(struct task_struct * task); - -/* sys_munmap */ -void profile_munmap(unsigned long addr); - -int task_handoff_register(struct notifier_block * n); -int task_handoff_unregister(struct notifier_block * n); - -int profile_event_register(enum profile_type, struct notifier_block * n); -int profile_event_unregister(enum profile_type, struct notifier_block * n); - #else #define prof_on 0 @@ -107,29 +85,6 @@ static inline void profile_hit(int type, void *ip) return; } -static inline int task_handoff_register(struct notifier_block * n) -{ - return -ENOSYS; -} - -static inline int task_handoff_unregister(struct notifier_block * n) -{ - return -ENOSYS; -} - -static inline int profile_event_register(enum profile_type t, struct notifier_block * n) -{ - return -ENOSYS; -} - -static inline int profile_event_unregister(enum profile_type t, struct notifier_block * n) -{ - return -ENOSYS; -} - -#define profile_task_exit(a) do { } while (0) -#define profile_handoff_task(a) (0) -#define profile_munmap(a) do { } while (0) #endif /* CONFIG_PROFILING */ diff --git a/include/linux/property.h b/include/linux/property.h index 357513a977e5..7399a0b45f98 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -15,6 +15,7 @@ #include <linux/types.h> struct device; +struct net_device; enum dev_prop_type { DEV_PROP_U8, @@ -121,6 +122,8 @@ void fwnode_handle_put(struct fwnode_handle *fwnode); int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); +void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index); + unsigned int device_get_child_node_count(struct device *dev); static inline bool device_property_read_bool(struct device *dev, @@ -377,10 +380,6 @@ property_entries_dup(const struct property_entry *properties); void property_entries_free(const struct property_entry *properties); -int device_add_properties(struct device *dev, - const struct property_entry *properties); -void device_remove_properties(struct device *dev); - bool device_dma_supported(struct device *dev); enum dev_dma_attr device_get_dma_attr(struct device *dev); @@ -389,11 +388,7 @@ const void *device_get_match_data(struct device *dev); int device_get_phy_mode(struct device *dev); -void *device_get_mac_address(struct device *dev, char *addr, int alen); - int fwnode_get_phy_mode(struct fwnode_handle *fwnode); -void *fwnode_get_mac_address(struct fwnode_handle *fwnode, - char *addr, int alen); struct fwnode_handle *fwnode_graph_get_next_endpoint( const struct fwnode_handle *fwnode, struct fwnode_handle *prev); struct fwnode_handle * @@ -404,9 +399,6 @@ struct fwnode_handle *fwnode_graph_get_remote_port( const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_endpoint( const struct fwnode_handle *fwnode); -struct fwnode_handle * -fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port, - u32 endpoint); static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode) { @@ -421,7 +413,8 @@ static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode) * one. * @FWNODE_GRAPH_DEVICE_DISABLED: That the device to which the remote * endpoint of the given endpoint belongs to, - * may be disabled. + * may be disabled, or that the endpoint is not + * connected. */ #define FWNODE_GRAPH_ENDPOINT_NEXT BIT(0) #define FWNODE_GRAPH_DEVICE_DISABLED BIT(1) @@ -429,6 +422,8 @@ static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode) struct fwnode_handle * fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode, u32 port, u32 endpoint, unsigned long flags); +unsigned int fwnode_graph_get_endpoint_count(struct fwnode_handle *fwnode, + unsigned long flags); #define fwnode_graph_for_each_endpoint(fwnode, child) \ for (child = NULL; \ diff --git a/include/linux/psi.h b/include/linux/psi.h index 65eb1476ac70..a70ca833c6d7 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PSI_H #define _LINUX_PSI_H diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 0a23300d49af..516c0fe836fd 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PSI_TYPES_H #define _LINUX_PSI_TYPES_H @@ -21,7 +22,17 @@ enum psi_task_count { * don't have to special case any state tracking for it. */ NR_ONCPU, - NR_PSI_TASK_COUNTS = 4, + /* + * For IO and CPU stalls the presence of running/oncpu tasks + * in the domain means a partial rather than a full stall. + * For memory it's not so simple because of page reclaimers: + * they are running/oncpu while representing a stall. To tell + * whether a domain has productivity left or not, we need to + * distinguish between regular running (i.e. productive) + * threads and memstall ones. + */ + NR_MEMSTALL_RUNNING, + NR_PSI_TASK_COUNTS = 5, }; /* Task state bitmasks */ @@ -29,6 +40,7 @@ enum psi_task_count { #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) #define TSK_ONCPU (1 << NR_ONCPU) +#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) /* Resources that workloads could be stalled on */ enum psi_res { diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index d48a7192e881..1595088c428b 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -52,6 +52,7 @@ enum sev_cmd { SEV_CMD_DF_FLUSH = 0x00A, SEV_CMD_DOWNLOAD_FIRMWARE = 0x00B, SEV_CMD_GET_ID = 0x00C, + SEV_CMD_INIT_EX = 0x00D, /* Guest commands */ SEV_CMD_DECOMMISSION = 0x020, @@ -102,6 +103,26 @@ struct sev_data_init { u32 tmr_len; /* In */ } __packed; +/** + * struct sev_data_init_ex - INIT_EX command parameters + * + * @length: len of the command buffer read by the PSP + * @flags: processing flags + * @tmr_address: system physical address used for SEV-ES + * @tmr_len: len of tmr_address + * @nv_address: system physical address used for PSP NV storage + * @nv_len: len of nv_address + */ +struct sev_data_init_ex { + u32 length; /* In */ + u32 flags; /* In */ + u64 tmr_address; /* In */ + u32 tmr_len; /* In */ + u32 reserved; /* In */ + u64 nv_address; /* In/Out */ + u32 nv_len; /* In */ +} __packed; + #define SEV_INIT_FLAGS_SEV_ES 0x01 /** diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index ae04968a3a47..9afd34a2d36c 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -37,6 +37,7 @@ #define PTP_MSGTYPE_PDELAY_RESP 0x3 #define PTP_EV_PORT 319 +#define PTP_GEN_PORT 320 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 2e5565067355..554454cb8693 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -351,15 +351,17 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); * * @hwtstamps: skb_shared_hwtstamps structure pointer * @vclock_index: phc index of ptp vclock. + * + * Returns converted timestamp, or 0 on error. */ -void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, - int vclock_index); +ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, + int vclock_index); #else static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } -static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, - int vclock_index) -{ } +static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ return 0; } #endif diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index b5ebf6c01292..8aee2945ff08 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -362,29 +362,25 @@ static inline void user_single_step_report(struct pt_regs *regs) #ifndef arch_ptrace_stop_needed /** * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called - * @code: current->exit_code value ptrace will stop with - * @info: siginfo_t pointer (or %NULL) for signal ptrace will stop with * * This is called with the siglock held, to decide whether or not it's - * necessary to release the siglock and call arch_ptrace_stop() with the - * same @code and @info arguments. It can be defined to a constant if - * arch_ptrace_stop() is never required, or always is. On machines where - * this makes sense, it should be defined to a quick test to optimize out - * calling arch_ptrace_stop() when it would be superfluous. For example, - * if the thread has not been back to user mode since the last stop, the - * thread state might indicate that nothing needs to be done. + * necessary to release the siglock and call arch_ptrace_stop(). It can be + * defined to a constant if arch_ptrace_stop() is never required, or always + * is. On machines where this makes sense, it should be defined to a quick + * test to optimize out calling arch_ptrace_stop() when it would be + * superfluous. For example, if the thread has not been back to user mode + * since the last stop, the thread state might indicate that nothing needs + * to be done. * * This is guaranteed to be invoked once before a task stops for ptrace and * may include arch-specific operations necessary prior to a ptrace stop. */ -#define arch_ptrace_stop_needed(code, info) (0) +#define arch_ptrace_stop_needed() (0) #endif #ifndef arch_ptrace_stop /** * arch_ptrace_stop - Do machine-specific work before stopping for ptrace - * @code: current->exit_code value ptrace will stop with - * @info: siginfo_t pointer (or %NULL) for signal ptrace will stop with * * This is called with no locks held when arch_ptrace_stop_needed() has * just returned nonzero. It is allowed to block, e.g. for user memory @@ -394,7 +390,7 @@ static inline void user_single_step_report(struct pt_regs *regs) * we only do it when the arch requires it for this particular stop, as * indicated by arch_ptrace_stop_needed(). */ -#define arch_ptrace_stop(code, info) do { } while (0) +#define arch_ptrace_stop() do { } while (0) #endif #ifndef current_pt_regs diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 725c9b784e60..9771a0761a40 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -414,6 +414,8 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args); +struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc, + const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, @@ -429,16 +431,19 @@ struct pwm_device *devm_fwnode_pwm_get(struct device *dev, #else static inline struct pwm_device *pwm_request(int pwm_id, const char *label) { + might_sleep(); return ERR_PTR(-ENODEV); } static inline void pwm_free(struct pwm_device *pwm) { + might_sleep(); } static inline int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) { + might_sleep(); return -ENOTSUPP; } @@ -450,6 +455,7 @@ static inline int pwm_adjust_config(struct pwm_device *pwm) static inline int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns) { + might_sleep(); return -EINVAL; } @@ -462,11 +468,13 @@ static inline int pwm_capture(struct pwm_device *pwm, static inline int pwm_enable(struct pwm_device *pwm) { + might_sleep(); return -EINVAL; } static inline void pwm_disable(struct pwm_device *pwm) { + might_sleep(); } static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) @@ -493,12 +501,14 @@ static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label) { + might_sleep(); return ERR_PTR(-ENODEV); } static inline struct pwm_device *pwm_get(struct device *dev, const char *consumer) { + might_sleep(); return ERR_PTR(-ENODEV); } @@ -506,16 +516,19 @@ static inline struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, const char *con_id) { + might_sleep(); return ERR_PTR(-ENODEV); } static inline void pwm_put(struct pwm_device *pwm) { + might_sleep(); } static inline struct pwm_device *devm_pwm_get(struct device *dev, const char *consumer) { + might_sleep(); return ERR_PTR(-ENODEV); } @@ -523,6 +536,7 @@ static inline struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np, const char *con_id) { + might_sleep(); return ERR_PTR(-ENODEV); } @@ -530,6 +544,7 @@ static inline struct pwm_device * devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode, const char *con_id) { + might_sleep(); return ERR_PTR(-ENODEV); } #endif diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 0a3807e927c5..827624840ee2 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* QLogic qed NIC Driver * Copyright (c) 2015-2016 QLogic Corporation - * Copyright (c) 2019-2020 Marvell International Ltd. + * Copyright (c) 2019-2021 Marvell International Ltd. */ #ifndef _COMMON_HSI_H @@ -47,10 +47,10 @@ #define ISCSI_CDU_TASK_SEG_TYPE 0 #define FCOE_CDU_TASK_SEG_TYPE 0 #define RDMA_CDU_TASK_SEG_TYPE 1 +#define ETH_CDU_TASK_SEG_TYPE 2 #define FW_ASSERT_GENERAL_ATTN_IDX 32 - /* Queue Zone sizes in bytes */ #define TSTORM_QZONE_SIZE 8 #define MSTORM_QZONE_SIZE 16 @@ -60,9 +60,12 @@ #define PSTORM_QZONE_SIZE 0 #define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG 7 -#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT 16 -#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE 48 -#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD 112 +#define ETH_MAX_RXQ_VF_DEFAULT 16 +#define ETH_MAX_RXQ_VF_DOUBLE 48 +#define ETH_MAX_RXQ_VF_QUAD 112 + +#define ETH_RGSRC_CTX_SIZE 6 +#define ETH_TGSRC_CTX_SIZE 6 /********************************/ /* CORE (LIGHT L2) FW CONSTANTS */ @@ -89,8 +92,8 @@ #define MAX_NUM_LL2_TX_STATS_COUNTERS 48 #define FW_MAJOR_VERSION 8 -#define FW_MINOR_VERSION 42 -#define FW_REVISION_VERSION 2 +#define FW_MINOR_VERSION 59 +#define FW_REVISION_VERSION 1 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -112,6 +115,7 @@ #define MAX_NUM_VFS (MAX_NUM_VFS_K2) #define MAX_NUM_FUNCTIONS_BB (MAX_NUM_PFS_BB + MAX_NUM_VFS_BB) +#define MAX_NUM_FUNCTIONS_K2 (MAX_NUM_PFS_K2 + MAX_NUM_VFS_K2) #define MAX_FUNCTION_NUMBER_BB (MAX_NUM_PFS + MAX_NUM_VFS_BB) #define MAX_FUNCTION_NUMBER_K2 (MAX_NUM_PFS + MAX_NUM_VFS_K2) @@ -133,7 +137,7 @@ #define NUM_OF_TCS (NUM_OF_PHYS_TCS + 1) /* CIDs */ -#define NUM_OF_CONNECTION_TYPES_E4 (8) +#define NUM_OF_CONNECTION_TYPES (8) #define NUM_OF_LCIDS (320) #define NUM_OF_LTIDS (320) @@ -144,7 +148,7 @@ #define GTT_DWORD_SIZE BIT(GTT_DWORD_SIZE_BITS) /* Tools Version */ -#define TOOLS_VERSION 10 +#define TOOLS_VERSION 11 /*****************/ /* CDU CONSTANTS */ @@ -162,6 +166,7 @@ #define CDU_CONTEXT_VALIDATION_CFG_USE_REGION (3) #define CDU_CONTEXT_VALIDATION_CFG_USE_CID (4) #define CDU_CONTEXT_VALIDATION_CFG_USE_ACTIVE (5) +#define CDU_CONTEXT_VALIDATION_DEFAULT_CFG (0x3d) /*****************/ /* DQ CONSTANTS */ @@ -302,6 +307,9 @@ /* PWM address mapping */ #define DQ_PWM_OFFSET_DPM_BASE 0x0 #define DQ_PWM_OFFSET_DPM_END 0x27 +#define DQ_PWM_OFFSET_XCM32_24ICID_BASE 0x28 +#define DQ_PWM_OFFSET_UCM32_24ICID_BASE 0x30 +#define DQ_PWM_OFFSET_TCM32_24ICID_BASE 0x38 #define DQ_PWM_OFFSET_XCM16_BASE 0x40 #define DQ_PWM_OFFSET_XCM32_BASE 0x44 #define DQ_PWM_OFFSET_UCM16_BASE 0x48 @@ -325,6 +333,13 @@ #define DQ_PWM_OFFSET_TCM_LL2_PROD_UPDATE \ (DQ_PWM_OFFSET_TCM32_BASE + DQ_TCM_AGG_VAL_SEL_REG9 - 4) +#define DQ_PWM_OFFSET_XCM_RDMA_24B_ICID_SQ_PROD \ + (DQ_PWM_OFFSET_XCM32_24ICID_BASE + 2) +#define DQ_PWM_OFFSET_UCM_RDMA_24B_ICID_CQ_CONS_32BIT \ + (DQ_PWM_OFFSET_UCM32_24ICID_BASE + 4) +#define DQ_PWM_OFFSET_TCM_ROCE_24B_ICID_RQ_PROD \ + (DQ_PWM_OFFSET_TCM32_24ICID_BASE + 1) + #define DQ_REGION_SHIFT (12) /* DPM */ @@ -360,6 +375,7 @@ /* Number of global Vport/QCN rate limiters */ #define MAX_QM_GLOBAL_RLS 256 +#define COMMON_MAX_QM_GLOBAL_RLS MAX_QM_GLOBAL_RLS /* QM registers data */ #define QM_LINE_CRD_REG_WIDTH 16 @@ -379,7 +395,7 @@ #define CAU_FSM_ETH_TX 1 /* Number of Protocol Indices per Status Block */ -#define PIS_PER_SB_E4 12 +#define PIS_PER_SB 12 #define MAX_PIS_PER_SB PIS_PER_SB #define CAU_HC_STOPPED_STATE 3 @@ -700,6 +716,13 @@ enum mf_mode { MAX_MF_MODE }; +/* Per protocol packet duplication enable bit vector. If set, duplicate + * offloaded traffic to LL2 debug queueu. + */ +struct offload_pkt_dup_enable { + __le16 enable_vector; +}; + /* Per-protocol connection types */ enum protocol_type { PROTOCOLID_TCP_ULP, @@ -717,6 +740,12 @@ enum protocol_type { MAX_PROTOCOL_TYPE }; +/* Pstorm packet duplication config */ +struct pstorm_pkt_dup_cfg { + struct offload_pkt_dup_enable enable; + __le16 reserved[3]; +}; + struct regpair { __le32 lo; __le32 hi; @@ -728,10 +757,24 @@ struct rdma_eqe_destroy_qp { u8 reserved[4]; }; +/* RoCE Suspend Event Data */ +struct rdma_eqe_suspend_qp { + __le32 cid; + u8 reserved[4]; +}; + /* RDMA Event Data Union */ union rdma_eqe_data { struct regpair async_handle; struct rdma_eqe_destroy_qp rdma_destroy_qp_data; + struct rdma_eqe_suspend_qp rdma_suspend_qp_data; +}; + +/* Tstorm packet duplication config */ +struct tstorm_pkt_dup_cfg { + struct offload_pkt_dup_enable enable; + __le16 reserved; + __le32 cid; }; struct tstorm_queue_zone { @@ -891,6 +934,15 @@ struct db_legacy_addr { #define DB_LEGACY_ADDR_ICID_SHIFT 5 }; +/* Structure for doorbell address, in legacy mode, without DEMS */ +struct db_legacy_wo_dems_addr { + __le32 addr; +#define DB_LEGACY_WO_DEMS_ADDR_RESERVED0_MASK 0x3 +#define DB_LEGACY_WO_DEMS_ADDR_RESERVED0_SHIFT 0 +#define DB_LEGACY_WO_DEMS_ADDR_ICID_MASK 0x3FFFFFFF +#define DB_LEGACY_WO_DEMS_ADDR_ICID_SHIFT 2 +}; + /* Structure for doorbell address, in PWM mode */ struct db_pwm_addr { __le32 addr; @@ -907,6 +959,31 @@ struct db_pwm_addr { }; /* Parameters to RDMA firmware, passed in EDPM doorbell */ +struct db_rdma_24b_icid_dpm_params { + __le32 params; +#define DB_RDMA_24B_ICID_DPM_PARAMS_SIZE_MASK 0x3F +#define DB_RDMA_24B_ICID_DPM_PARAMS_SIZE_SHIFT 0 +#define DB_RDMA_24B_ICID_DPM_PARAMS_DPM_TYPE_MASK 0x3 +#define DB_RDMA_24B_ICID_DPM_PARAMS_DPM_TYPE_SHIFT 6 +#define DB_RDMA_24B_ICID_DPM_PARAMS_OPCODE_MASK 0xFF +#define DB_RDMA_24B_ICID_DPM_PARAMS_OPCODE_SHIFT 8 +#define DB_RDMA_24B_ICID_DPM_PARAMS_ICID_EXT_MASK 0xFF +#define DB_RDMA_24B_ICID_DPM_PARAMS_ICID_EXT_SHIFT 16 +#define DB_RDMA_24B_ICID_DPM_PARAMS_INV_BYTE_CNT_MASK 0x7 +#define DB_RDMA_24B_ICID_DPM_PARAMS_INV_BYTE_CNT_SHIFT 24 +#define DB_RDMA_24B_ICID_DPM_PARAMS_EXT_ICID_MODE_EN_MASK 0x1 +#define DB_RDMA_24B_ICID_DPM_PARAMS_EXT_ICID_MODE_EN_SHIFT 27 +#define DB_RDMA_24B_ICID_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 +#define DB_RDMA_24B_ICID_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 +#define DB_RDMA_24B_ICID_DPM_PARAMS_S_FLG_MASK 0x1 +#define DB_RDMA_24B_ICID_DPM_PARAMS_S_FLG_SHIFT 29 +#define DB_RDMA_24B_ICID_DPM_PARAMS_RESERVED1_MASK 0x1 +#define DB_RDMA_24B_ICID_DPM_PARAMS_RESERVED1_SHIFT 30 +#define DB_RDMA_24B_ICID_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK 0x1 +#define DB_RDMA_24B_ICID_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT 31 +}; + +/* Parameters to RDMA firmware, passed in EDPM doorbell */ struct db_rdma_dpm_params { __le32 params; #define DB_RDMA_DPM_PARAMS_SIZE_MASK 0x3F @@ -1220,21 +1297,41 @@ struct rdif_task_context { __le32 reserved2; }; +/* Searcher Table struct */ +struct src_entry_header { + __le32 flags; +#define SRC_ENTRY_HEADER_NEXT_PTR_TYPE_MASK 0x1 +#define SRC_ENTRY_HEADER_NEXT_PTR_TYPE_SHIFT 0 +#define SRC_ENTRY_HEADER_EMPTY_MASK 0x1 +#define SRC_ENTRY_HEADER_EMPTY_SHIFT 1 +#define SRC_ENTRY_HEADER_RESERVED_MASK 0x3FFFFFFF +#define SRC_ENTRY_HEADER_RESERVED_SHIFT 2 + __le32 magic_number; + struct regpair next_ptr; +}; + +/* Enumeration for address type */ +enum src_header_next_ptr_type_enum { + e_physical_addr, + e_logical_addr, + MAX_SRC_HEADER_NEXT_PTR_TYPE_ENUM +}; + /* Status block structure */ -struct status_block_e4 { - __le16 pi_array[PIS_PER_SB_E4]; +struct status_block { + __le16 pi_array[PIS_PER_SB]; __le32 sb_num; -#define STATUS_BLOCK_E4_SB_NUM_MASK 0x1FF -#define STATUS_BLOCK_E4_SB_NUM_SHIFT 0 -#define STATUS_BLOCK_E4_ZERO_PAD_MASK 0x7F -#define STATUS_BLOCK_E4_ZERO_PAD_SHIFT 9 -#define STATUS_BLOCK_E4_ZERO_PAD2_MASK 0xFFFF -#define STATUS_BLOCK_E4_ZERO_PAD2_SHIFT 16 +#define STATUS_BLOCK_SB_NUM_MASK 0x1FF +#define STATUS_BLOCK_SB_NUM_SHIFT 0 +#define STATUS_BLOCK_ZERO_PAD_MASK 0x7F +#define STATUS_BLOCK_ZERO_PAD_SHIFT 9 +#define STATUS_BLOCK_ZERO_PAD2_MASK 0xFFFF +#define STATUS_BLOCK_ZERO_PAD2_SHIFT 16 __le32 prod_index; -#define STATUS_BLOCK_E4_PROD_INDEX_MASK 0xFFFFFF -#define STATUS_BLOCK_E4_PROD_INDEX_SHIFT 0 -#define STATUS_BLOCK_E4_ZERO_PAD3_MASK 0xFF -#define STATUS_BLOCK_E4_ZERO_PAD3_SHIFT 24 +#define STATUS_BLOCK_PROD_INDEX_MASK 0xFFFFFF +#define STATUS_BLOCK_PROD_INDEX_SHIFT 0 +#define STATUS_BLOCK_ZERO_PAD3_MASK 0xFF +#define STATUS_BLOCK_ZERO_PAD3_SHIFT 24 }; /* Tdif context */ diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index cd1207ad4ada..c84e08bc6802 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -67,6 +67,7 @@ /* Ethernet vport update constants */ #define ETH_FILTER_RULES_COUNT 10 #define ETH_RSS_IND_TABLE_ENTRIES_NUM 128 +#define ETH_RSS_IND_TABLE_MASK_SIZE_REGS (ETH_RSS_IND_TABLE_ENTRIES_NUM / 32) #define ETH_RSS_KEY_SIZE_REGS 10 #define ETH_RSS_ENGINE_NUM_K2 207 #define ETH_RSS_ENGINE_NUM_BB 127 diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h index 68eda1c21cde..7ba0abc867f1 100644 --- a/include/linux/qed/fcoe_common.h +++ b/include/linux/qed/fcoe_common.h @@ -150,49 +150,49 @@ struct ystorm_fcoe_task_st_ctx { u8 reserved2[8]; }; -struct e4_ystorm_fcoe_task_ag_ctx { +struct ystorm_fcoe_task_ag_ctx { u8 byte0; u8 byte1; __le16 word0; u8 flags0; -#define E4_YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_MASK 0xF -#define E4_YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_SHIFT 0 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT0_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT0_SHIFT 4 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT2_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT 6 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT3_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT 7 +#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_MASK 0xF +#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_SHIFT 0 +#define YSTORM_FCOE_TASK_AG_CTX_BIT0_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT0_SHIFT 4 +#define YSTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 +#define YSTORM_FCOE_TASK_AG_CTX_BIT2_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT 6 +#define YSTORM_FCOE_TASK_AG_CTX_BIT3_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT 7 u8 flags1; -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0_MASK 0x3 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0_SHIFT 0 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 2 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_MASK 0x3 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_SHIFT 4 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT 6 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 7 +#define YSTORM_FCOE_TASK_AG_CTX_CF0_MASK 0x3 +#define YSTORM_FCOE_TASK_AG_CTX_CF0_SHIFT 0 +#define YSTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 +#define YSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 2 +#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_MASK 0x3 +#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_SHIFT 4 +#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT 6 +#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 7 u8 flags2; -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT4_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT4_SHIFT 0 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 2 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 3 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 4 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 5 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 6 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 -#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 7 +#define YSTORM_FCOE_TASK_AG_CTX_BIT4_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT4_SHIFT 0 +#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 6 +#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 7 u8 byte2; __le32 reg0; u8 byte3; @@ -206,73 +206,73 @@ struct e4_ystorm_fcoe_task_ag_ctx { __le32 reg2; }; -struct e4_tstorm_fcoe_task_ag_ctx { +struct tstorm_fcoe_task_ag_ctx { u8 reserved; u8 byte1; __le16 icid; u8 flags0; -#define E4_TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF -#define E4_TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 -#define E4_TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 -#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 -#define E4_TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_SHIFT 6 -#define E4_TSTORM_FCOE_TASK_AG_CTX_VALID_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_VALID_SHIFT 7 +#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 +#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_VALID_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_VALID_SHIFT 7 u8 flags1; -#define E4_TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_SHIFT 0 -#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT5_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT5_SHIFT 1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_SHIFT 2 -#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_SHIFT 4 -#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_BIT5_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_BIT5_SHIFT 1 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 6 u8 flags2; -#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_SHIFT 0 -#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT 2 -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_SHIFT 4 -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_SHIFT 6 u8 flags3; -#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_MASK 0x3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_SHIFT 0 -#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_SHIFT 2 -#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_SHIFT 3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 4 -#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_SHIFT 5 -#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT 6 -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_SHIFT 7 +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_SHIFT 3 +#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_SHIFT 5 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_SHIFT 7 u8 flags4; -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_SHIFT 0 -#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_SHIFT 1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 2 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 3 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 4 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 5 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 6 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 -#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 7 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_SHIFT 1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 3 +#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 5 +#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 7 u8 cleanup_state; __le16 last_sent_tid; __le32 rec_rr_tov_exp_timeout; @@ -352,49 +352,49 @@ struct tstorm_fcoe_task_st_ctx { struct fcoe_tstorm_fcoe_task_st_ctx_read_only read_only; }; -struct e4_mstorm_fcoe_task_ag_ctx { +struct mstorm_fcoe_task_ag_ctx { u8 byte0; u8 byte1; __le16 icid; u8 flags0; -#define E4_MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF -#define E4_MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 -#define E4_MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_SHIFT 5 -#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT2_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT 6 -#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT3_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT 7 +#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_SHIFT 5 +#define MSTORM_FCOE_TASK_AG_CTX_BIT2_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT 6 +#define MSTORM_FCOE_TASK_AG_CTX_BIT3_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT 7 u8 flags1; -#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK 0x3 -#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT 0 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 2 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 4 -#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT 6 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 7 +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK 0x3 +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT 0 +#define MSTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 +#define MSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 2 +#define MSTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 +#define MSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 4 +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT 6 +#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 7 u8 flags2; -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 0 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 2 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 3 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 4 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 5 -#define E4_MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_SHIFT 6 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 -#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 7 +#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 0 +#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_SHIFT 6 +#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 7 u8 cleanup_state; __le32 received_bytes; u8 byte3; @@ -440,56 +440,56 @@ struct mstorm_fcoe_task_st_ctx { struct scsi_cached_sges data_desc; }; -struct e4_ustorm_fcoe_task_ag_ctx { +struct ustorm_fcoe_task_ag_ctx { u8 reserved; u8 byte1; __le16 icid; u8 flags0; -#define E4_USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF -#define E4_USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 -#define E4_USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 -#define E4_USTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF0_MASK 0x3 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF0_SHIFT 6 +#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define USTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 +#define USTORM_FCOE_TASK_AG_CTX_CF0_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF0_SHIFT 6 u8 flags1; -#define E4_USTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 0 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 2 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF3_MASK 0x3 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF3_SHIFT 4 -#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_MASK 0x3 -#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_SHIFT 6 +#define USTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 2 +#define USTORM_FCOE_TASK_AG_CTX_CF3_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF3_SHIFT 4 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_SHIFT 6 u8 flags2; -#define E4_USTORM_FCOE_TASK_AG_CTX_CF0EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT 0 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 1 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 2 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF3EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_CF3EN_SHIFT 3 -#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 5 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 6 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 7 +#define USTORM_FCOE_TASK_AG_CTX_CF0EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 1 +#define USTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 2 +#define USTORM_FCOE_TASK_AG_CTX_CF3EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF3EN_SHIFT 3 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4 +#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 5 +#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 6 +#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 7 u8 flags3; -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 0 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 2 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 -#define E4_USTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 3 -#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF -#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4 +#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 1 +#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 2 +#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 3 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4 __le32 dif_err_intervals; __le32 dif_error_1st_interval; __le32 global_cq_num; @@ -499,18 +499,18 @@ struct e4_ustorm_fcoe_task_ag_ctx { }; /* FCoE task context */ -struct e4_fcoe_task_context { +struct fcoe_task_context { struct ystorm_fcoe_task_st_ctx ystorm_st_context; struct regpair ystorm_st_padding[2]; struct tdif_task_context tdif_context; - struct e4_ystorm_fcoe_task_ag_ctx ystorm_ag_context; - struct e4_tstorm_fcoe_task_ag_ctx tstorm_ag_context; + struct ystorm_fcoe_task_ag_ctx ystorm_ag_context; + struct tstorm_fcoe_task_ag_ctx tstorm_ag_context; struct timers_context timer_context; struct tstorm_fcoe_task_st_ctx tstorm_st_context; struct regpair tstorm_st_padding[2]; - struct e4_mstorm_fcoe_task_ag_ctx mstorm_ag_context; + struct mstorm_fcoe_task_ag_ctx mstorm_ag_context; struct mstorm_fcoe_task_st_ctx mstorm_st_context; - struct e4_ustorm_fcoe_task_ag_ctx ustorm_ag_context; + struct ustorm_fcoe_task_ag_ctx ustorm_ag_context; struct rdif_task_context rdif_context; }; diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 157019f716f1..1a60285a01e3 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -714,49 +714,49 @@ struct ystorm_iscsi_task_st_ctx { union iscsi_task_hdr pdu_hdr; }; -struct e4_ystorm_iscsi_task_ag_ctx { +struct ystorm_iscsi_task_ag_ctx { u8 reserved; u8 byte1; __le16 word0; u8 flags0; -#define E4_YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK 0xF -#define E4_YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT 4 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_MASK 0x1 /* bit3 */ -#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_SHIFT 7 +#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK 0xF +#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT 4 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 +#define YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6 +#define YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_MASK 0x1 /* bit3 */ +#define YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_SHIFT 7 u8 flags1; -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK 0x3 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT 0 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 2 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_MASK 0x3 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_SHIFT 4 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT 6 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 7 +#define YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK 0x3 +#define YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT 0 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 2 +#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_MASK 0x3 +#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_SHIFT 4 +#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT 6 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 7 u8 flags2; -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT 0 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 2 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 3 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 4 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 5 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 6 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 7 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT 0 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 6 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 7 u8 byte2; __le32 TTT; u8 byte3; @@ -764,49 +764,49 @@ struct e4_ystorm_iscsi_task_ag_ctx { __le16 word1; }; -struct e4_mstorm_iscsi_task_ag_ctx { +struct mstorm_iscsi_task_ag_ctx { u8 cdu_validation; u8 byte1; __le16 task_cid; u8 flags0; -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT 7 +#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5 +#define MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT 7 u8 flags1; -#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_MASK 0x3 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT 0 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 2 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2_MASK 0x3 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT 4 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT 6 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 7 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_MASK 0x3 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT 0 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 2 +#define MSTORM_ISCSI_TASK_AG_CTX_CF2_MASK 0x3 +#define MSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT 4 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT 6 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 7 u8 flags2; -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT 0 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 2 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 3 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 4 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 5 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 6 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 -#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 7 +#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT 0 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 6 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 7 u8 byte2; __le32 reg0; u8 byte3; @@ -814,56 +814,56 @@ struct e4_mstorm_iscsi_task_ag_ctx { __le16 word1; }; -struct e4_ustorm_iscsi_task_ag_ctx { +struct ustorm_iscsi_task_ag_ctx { u8 reserved; u8 state; __le16 icid; u8 flags0; -#define E4_USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF -#define E4_USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 -#define E4_USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5 -#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK 0x3 -#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT 6 +#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5 +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT 6 u8 flags1; -#define E4_USTORM_ISCSI_TASK_AG_CTX_RESERVED1_MASK 0x3 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RESERVED1_SHIFT 0 -#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_MASK 0x3 -#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_SHIFT 2 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3_MASK 0x3 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT 4 -#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_MASK 0x3 -#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_SHIFT 6 +#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_SHIFT 2 +#define USTORM_ISCSI_TASK_AG_CTX_CF3_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT 4 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_MASK 0x3 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_SHIFT 6 u8 flags2; -#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT 0 -#define E4_USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT 1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_SHIFT 2 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT 3 -#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT 5 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 6 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT 7 +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT 1 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_SHIFT 2 +#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT 3 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT 5 +#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 6 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT 7 u8 flags3; -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 0 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 2 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 -#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 3 -#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF -#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4 +#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 0 +#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 2 +#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT 3 +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF +#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4 __le32 dif_err_intervals; __le32 dif_error_1st_interval; __le32 rcv_cont_len; @@ -952,14 +952,14 @@ struct ustorm_iscsi_task_st_ctx { }; /* iscsi task context */ -struct e4_iscsi_task_context { +struct iscsi_task_context { struct ystorm_iscsi_task_st_ctx ystorm_st_context; - struct e4_ystorm_iscsi_task_ag_ctx ystorm_ag_context; + struct ystorm_iscsi_task_ag_ctx ystorm_ag_context; struct regpair ystorm_ag_padding[2]; struct tdif_task_context tdif_context; - struct e4_mstorm_iscsi_task_ag_ctx mstorm_ag_context; + struct mstorm_iscsi_task_ag_ctx mstorm_ag_context; struct regpair mstorm_ag_padding[2]; - struct e4_ustorm_iscsi_task_ag_ctx ustorm_ag_context; + struct ustorm_iscsi_task_ag_ctx ustorm_ag_context; struct mstorm_iscsi_task_st_ctx mstorm_st_context; struct ustorm_iscsi_task_st_ctx ustorm_st_context; struct rdif_task_context rdif_context; @@ -1431,73 +1431,73 @@ struct ystorm_iscsi_stats_drv { struct regpair iscsi_tx_tcp_pkt_cnt; }; -struct e4_tstorm_iscsi_task_ag_ctx { +struct tstorm_iscsi_task_ag_ctx { u8 byte0; u8 byte1; __le16 word0; u8 flags0; -#define E4_TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK 0xF -#define E4_TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT 4 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT2_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT2_SHIFT 6 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT 7 +#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK 0xF +#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT 7 u8 flags1; -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT 0 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT5_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT5_SHIFT 1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT 2 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 4 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_SHIFT 1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT 6 u8 flags2; -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT 0 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4_SHIFT 2 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5_SHIFT 4 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_CF3_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_CF6_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF6_SHIFT 6 u8 flags3; -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7_MASK 0x3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7_SHIFT 0 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT 2 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT 4 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT 5 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4EN_SHIFT 6 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5EN_SHIFT 7 +#define TSTORM_ISCSI_TASK_AG_CTX_CF7_MASK 0x3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF7_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT 3 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT 5 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_SHIFT 7 u8 flags4; -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6EN_SHIFT 0 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7EN_SHIFT 1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 2 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 3 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 4 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 5 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 6 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 -#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 7 +#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_SHIFT 0 +#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_SHIFT 1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT 2 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT 3 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT 4 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT 5 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT 6 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT 7 u8 byte2; __le16 word1; __le32 reg0; diff --git a/include/linux/qed/nvmetcp_common.h b/include/linux/qed/nvmetcp_common.h index 5a2ab0606308..cc7c7481a0e0 100644 --- a/include/linux/qed/nvmetcp_common.h +++ b/include/linux/qed/nvmetcp_common.h @@ -410,7 +410,7 @@ struct e5_ystorm_nvmetcp_task_ag_ctx { u8 byte2; u8 byte3; u8 byte4; - u8 e4_reserved7; + u8 reserved7; }; struct e5_mstorm_nvmetcp_task_ag_ctx { @@ -445,7 +445,7 @@ struct e5_mstorm_nvmetcp_task_ag_ctx { u8 byte2; u8 byte3; u8 byte4; - u8 e4_reserved7; + u8 reserved7; }; struct e5_ustorm_nvmetcp_task_ag_ctx { @@ -489,17 +489,17 @@ struct e5_ustorm_nvmetcp_task_ag_ctx { #define E5_USTORM_NVMETCP_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT 7 u8 flags3; u8 flags4; -#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED5_MASK 0x3 -#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED5_SHIFT 0 -#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED6_MASK 0x1 -#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED6_SHIFT 2 -#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED7_MASK 0x1 -#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED7_SHIFT 3 +#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED5_MASK 0x3 +#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED5_SHIFT 0 +#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED6_MASK 0x1 +#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED6_SHIFT 2 +#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED7_MASK 0x1 +#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED7_SHIFT 3 #define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF #define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4 u8 byte2; u8 byte3; - u8 e4_reserved8; + u8 reserved8; __le32 dif_err_intervals; __le32 dif_error_1st_interval; __le32 rcv_cont_len; diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index f34dbd0db795..a84063492c71 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -268,14 +268,15 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain) } /** - * @brief qed_chain_advance_page - + * qed_chain_advance_page(): Advance the next element across pages for a + * linked chain. * - * Advance the next element across pages for a linked chain + * @p_chain: P_chain. + * @p_next_elem: P_next_elem. + * @idx_to_inc: Idx_to_inc. + * @page_to_inc: page_to_inc. * - * @param p_chain - * @param p_next_elem - * @param idx_to_inc - * @param page_to_inc + * Return: Void. */ static inline void qed_chain_advance_page(struct qed_chain *p_chain, @@ -336,12 +337,14 @@ qed_chain_advance_page(struct qed_chain *p_chain, } while (0) /** - * @brief qed_chain_return_produced - + * qed_chain_return_produced(): A chain in which the driver "Produces" + * elements should use this API + * to indicate previous produced elements + * are now consumed. * - * A chain in which the driver "Produces" elements should use this API - * to indicate previous produced elements are now consumed. + * @p_chain: Chain. * - * @param p_chain + * Return: Void. */ static inline void qed_chain_return_produced(struct qed_chain *p_chain) { @@ -353,15 +356,15 @@ static inline void qed_chain_return_produced(struct qed_chain *p_chain) } /** - * @brief qed_chain_produce - + * qed_chain_produce(): A chain in which the driver "Produces" + * elements should use this to get a pointer to + * the next element which can be "Produced". It's driver + * responsibility to validate that the chain has room for + * new element. * - * A chain in which the driver "Produces" elements should use this to get - * a pointer to the next element which can be "Produced". It's driver - * responsibility to validate that the chain has room for new element. + * @p_chain: Chain. * - * @param p_chain - * - * @return void*, a pointer to next element + * Return: void*, a pointer to next element. */ static inline void *qed_chain_produce(struct qed_chain *p_chain) { @@ -395,14 +398,11 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain) } /** - * @brief qed_chain_get_capacity - - * - * Get the maximum number of BDs in chain + * qed_chain_get_capacity(): Get the maximum number of BDs in chain * - * @param p_chain - * @param num + * @p_chain: Chain. * - * @return number of unusable BDs + * Return: number of unusable BDs. */ static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain) { @@ -410,12 +410,14 @@ static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain) } /** - * @brief qed_chain_recycle_consumed - + * qed_chain_recycle_consumed(): Returns an element which was + * previously consumed; + * Increments producers so they could + * be written to FW. * - * Returns an element which was previously consumed; - * Increments producers so they could be written to FW. + * @p_chain: Chain. * - * @param p_chain + * Return: Void. */ static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain) { @@ -427,14 +429,13 @@ static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain) } /** - * @brief qed_chain_consume - + * qed_chain_consume(): A Chain in which the driver utilizes data written + * by a different source (i.e., FW) should use this to + * access passed buffers. * - * A Chain in which the driver utilizes data written by a different source - * (i.e., FW) should use this to access passed buffers. + * @p_chain: Chain. * - * @param p_chain - * - * @return void*, a pointer to the next buffer written + * Return: void*, a pointer to the next buffer written. */ static inline void *qed_chain_consume(struct qed_chain *p_chain) { @@ -468,9 +469,11 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) } /** - * @brief qed_chain_reset - Resets the chain to its start state + * qed_chain_reset(): Resets the chain to its start state. + * + * @p_chain: pointer to a previously allocated chain. * - * @param p_chain pointer to a previously allocated chain + * Return Void. */ static inline void qed_chain_reset(struct qed_chain *p_chain) { @@ -519,13 +522,12 @@ static inline void qed_chain_reset(struct qed_chain *p_chain) } /** - * @brief qed_chain_get_last_elem - + * qed_chain_get_last_elem(): Returns a pointer to the last element of the + * chain. * - * Returns a pointer to the last element of the chain + * @p_chain: Chain. * - * @param p_chain - * - * @return void* + * Return: void*. */ static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain) { @@ -563,10 +565,13 @@ out: } /** - * @brief qed_chain_set_prod - sets the prod to the given value + * qed_chain_set_prod(): sets the prod to the given value. + * + * @p_chain: Chain. + * @prod_idx: Prod Idx. + * @p_prod_elem: Prod elem. * - * @param prod_idx - * @param p_prod_elem + * Return Void. */ static inline void qed_chain_set_prod(struct qed_chain *p_chain, u32 prod_idx, void *p_prod_elem) @@ -610,9 +615,11 @@ static inline void qed_chain_set_prod(struct qed_chain *p_chain, } /** - * @brief qed_chain_pbl_zero_mem - set chain memory to 0 + * qed_chain_pbl_zero_mem(): set chain memory to 0. + * + * @p_chain: Chain. * - * @param p_chain + * Return: Void. */ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain) { diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 812a4d751163..e1bf3219b4e6 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -145,12 +145,6 @@ struct qed_filter_mcast_params { unsigned char mac[64][ETH_ALEN]; }; -union qed_filter_type_params { - enum qed_filter_rx_mode_type accept_flags; - struct qed_filter_ucast_params ucast; - struct qed_filter_mcast_params mcast; -}; - enum qed_filter_type { QED_FILTER_TYPE_UCAST, QED_FILTER_TYPE_MCAST, @@ -158,11 +152,6 @@ enum qed_filter_type { QED_MAX_FILTER_TYPES, }; -struct qed_filter_params { - enum qed_filter_type type; - union qed_filter_type_params filter; -}; - struct qed_tunn_params { u16 vxlan_port; u8 update_vxlan_port; @@ -314,8 +303,14 @@ struct qed_eth_ops { int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); - int (*filter_config)(struct qed_dev *cdev, - struct qed_filter_params *params); + int (*filter_config_rx_mode)(struct qed_dev *cdev, + enum qed_filter_rx_mode_type type); + + int (*filter_config_ucast)(struct qed_dev *cdev, + struct qed_filter_ucast_params *params); + + int (*filter_config_mcast)(struct qed_dev *cdev, + struct qed_filter_mcast_params *params); int (*fastpath_stop)(struct qed_dev *cdev); @@ -336,7 +331,7 @@ struct qed_eth_ops { int (*configure_arfs_searcher)(struct qed_dev *cdev, enum qed_filter_config_mode mode); int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle); - int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac); + int (*req_bulletin_update_mac)(struct qed_dev *cdev, const u8 *mac); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 850b98991670..6dc4943d8aec 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -24,6 +24,9 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include <net/devlink.h> +#define QED_TX_SWS_TIMER_DFLT 500 +#define QED_TWO_MSL_TIMER_DFLT 4000 + enum dcbx_protocol_type { DCBX_PROTOCOL_ISCSI, DCBX_PROTOCOL_FCOE, @@ -588,7 +591,7 @@ enum qed_int_mode { }; struct qed_sb_info { - struct status_block_e4 *sb_virt; + struct status_block *sb_virt; dma_addr_t sb_phys; u32 sb_ack; /* Last given ack */ u16 igu_sb_id; @@ -613,7 +616,6 @@ enum qed_hw_err_type { enum qed_dev_type { QED_DEV_TYPE_BB, QED_DEV_TYPE_AH, - QED_DEV_TYPE_E5, }; struct qed_dev_info { @@ -650,6 +652,7 @@ struct qed_dev_info { bool wol_support; bool smart_an; + bool esl; /* MBI version */ u32 mbi_version; @@ -805,6 +808,12 @@ struct qed_devlink { struct devlink_health_reporter *fw_reporter; }; +struct qed_sb_info_dbg { + u32 igu_prod; + u32 igu_cons; + u16 pi[PIS_PER_SB]; +}; + struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, struct qed_link_output *link); @@ -819,47 +828,47 @@ struct qed_common_cb_ops { struct qed_selftest_ops { /** - * @brief selftest_interrupt - Perform interrupt test + * selftest_interrupt(): Perform interrupt test. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*selftest_interrupt)(struct qed_dev *cdev); /** - * @brief selftest_memory - Perform memory test + * selftest_memory(): Perform memory test. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*selftest_memory)(struct qed_dev *cdev); /** - * @brief selftest_register - Perform register test + * selftest_register(): Perform register test. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*selftest_register)(struct qed_dev *cdev); /** - * @brief selftest_clock - Perform clock test + * selftest_clock(): Perform clock test. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*selftest_clock)(struct qed_dev *cdev); /** - * @brief selftest_nvram - Perform nvram test + * selftest_nvram(): Perform nvram test. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*selftest_nvram) (struct qed_dev *cdev); }; @@ -927,47 +936,53 @@ struct qed_common_ops { enum qed_hw_err_type err_type); /** - * @brief can_link_change - can the instance change the link or not + * can_link_change(): can the instance change the link or not. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return true if link-change is allowed, false otherwise. + * Return: true if link-change is allowed, false otherwise. */ bool (*can_link_change)(struct qed_dev *cdev); /** - * @brief set_link - set links according to params + * set_link(): set links according to params. * - * @param cdev - * @param params - values used to override the default link configuration + * @cdev: Qed dev pointer. + * @params: values used to override the default link configuration. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*set_link)(struct qed_dev *cdev, struct qed_link_params *params); /** - * @brief get_link - returns the current link state. + * get_link(): returns the current link state. * - * @param cdev - * @param if_link - structure to be filled with current link configuration. + * @cdev: Qed dev pointer. + * @if_link: structure to be filled with current link configuration. + * + * Return: Void. */ void (*get_link)(struct qed_dev *cdev, struct qed_link_output *if_link); /** - * @brief - drains chip in case Tx completions fail to arrive due to pause. + * drain(): drains chip in case Tx completions fail to arrive due to pause. + * + * @cdev: Qed dev pointer. * - * @param cdev + * Return: Int. */ int (*drain)(struct qed_dev *cdev); /** - * @brief update_msglvl - update module debug level + * update_msglvl(): update module debug level. * - * @param cdev - * @param dp_module - * @param dp_level + * @cdev: Qed dev pointer. + * @dp_module: Debug module. + * @dp_level: Debug level. + * + * Return: Void. */ void (*update_msglvl)(struct qed_dev *cdev, u32 dp_module, @@ -981,70 +996,73 @@ struct qed_common_ops { struct qed_chain *p_chain); /** - * @brief nvm_flash - Flash nvm data. + * nvm_flash(): Flash nvm data. * - * @param cdev - * @param name - file containing the data + * @cdev: Qed dev pointer. + * @name: file containing the data. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*nvm_flash)(struct qed_dev *cdev, const char *name); /** - * @brief nvm_get_image - reads an entire image from nvram + * nvm_get_image(): reads an entire image from nvram. * - * @param cdev - * @param type - type of the request nvram image - * @param buf - preallocated buffer to fill with the image - * @param len - length of the allocated buffer + * @cdev: Qed dev pointer. + * @type: type of the request nvram image. + * @buf: preallocated buffer to fill with the image. + * @len: length of the allocated buffer. * - * @return 0 on success, error otherwise + * Return: 0 on success, error otherwise. */ int (*nvm_get_image)(struct qed_dev *cdev, enum qed_nvm_images type, u8 *buf, u16 len); /** - * @brief set_coalesce - Configure Rx coalesce value in usec + * set_coalesce(): Configure Rx coalesce value in usec. * - * @param cdev - * @param rx_coal - Rx coalesce value in usec - * @param tx_coal - Tx coalesce value in usec - * @param qid - Queue index - * @param sb_id - Status Block Id + * @cdev: Qed dev pointer. + * @rx_coal: Rx coalesce value in usec. + * @tx_coal: Tx coalesce value in usec. + * @handle: Handle. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, void *handle); /** - * @brief set_led - Configure LED mode + * set_led() - Configure LED mode. * - * @param cdev - * @param mode - LED mode + * @cdev: Qed dev pointer. + * @mode: LED mode. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); /** - * @brief attn_clr_enable - Prevent attentions from being reasserted + * attn_clr_enable(): Prevent attentions from being reasserted. + * + * @cdev: Qed dev pointer. + * @clr_enable: Clear enable. * - * @param cdev - * @param clr_enable + * Return: Void. */ void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable); /** - * @brief db_recovery_add - add doorbell information to the doorbell - * recovery mechanism. + * db_recovery_add(): add doorbell information to the doorbell + * recovery mechanism. + * + * @cdev: Qed dev pointer. + * @db_addr: Doorbell address. + * @db_data: Dddress of where db_data is stored. + * @db_width: Doorbell is 32b or 64b. + * @db_space: Doorbell recovery addresses are user or kernel space. * - * @param cdev - * @param db_addr - doorbell address - * @param db_data - address of where db_data is stored - * @param db_is_32b - doorbell is 32b pr 64b - * @param db_is_user - doorbell recovery addresses are user or kernel space + * Return: Int. */ int (*db_recovery_add)(struct qed_dev *cdev, void __iomem *db_addr, @@ -1053,120 +1071,143 @@ struct qed_common_ops { enum qed_db_rec_space db_space); /** - * @brief db_recovery_del - remove doorbell information from the doorbell + * db_recovery_del(): remove doorbell information from the doorbell * recovery mechanism. db_data serves as key (db_addr is not unique). * - * @param cdev - * @param db_addr - doorbell address - * @param db_data - address where db_data is stored. Serves as key for the - * entry to delete. + * @cdev: Qed dev pointer. + * @db_addr: Doorbell address. + * @db_data: Address where db_data is stored. Serves as key for the + * entry to delete. + * + * Return: Int. */ int (*db_recovery_del)(struct qed_dev *cdev, void __iomem *db_addr, void *db_data); /** - * @brief recovery_process - Trigger a recovery process + * recovery_process(): Trigger a recovery process. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*recovery_process)(struct qed_dev *cdev); /** - * @brief recovery_prolog - Execute the prolog operations of a recovery process + * recovery_prolog(): Execute the prolog operations of a recovery process. * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*recovery_prolog)(struct qed_dev *cdev); /** - * @brief update_drv_state - API to inform the change in the driver state. + * update_drv_state(): API to inform the change in the driver state. * - * @param cdev - * @param active + * @cdev: Qed dev pointer. + * @active: Active * + * Return: Int. */ int (*update_drv_state)(struct qed_dev *cdev, bool active); /** - * @brief update_mac - API to inform the change in the mac address + * update_mac(): API to inform the change in the mac address. * - * @param cdev - * @param mac + * @cdev: Qed dev pointer. + * @mac: MAC. * + * Return: Int. */ - int (*update_mac)(struct qed_dev *cdev, u8 *mac); + int (*update_mac)(struct qed_dev *cdev, const u8 *mac); /** - * @brief update_mtu - API to inform the change in the mtu + * update_mtu(): API to inform the change in the mtu. * - * @param cdev - * @param mtu + * @cdev: Qed dev pointer. + * @mtu: MTU. * + * Return: Int. */ int (*update_mtu)(struct qed_dev *cdev, u16 mtu); /** - * @brief update_wol - update of changes in the WoL configuration + * update_wol(): Update of changes in the WoL configuration. + * + * @cdev: Qed dev pointer. + * @enabled: true iff WoL should be enabled. * - * @param cdev - * @param enabled - true iff WoL should be enabled. + * Return: Int. */ int (*update_wol) (struct qed_dev *cdev, bool enabled); /** - * @brief read_module_eeprom + * read_module_eeprom(): Read EEPROM. * - * @param cdev - * @param buf - buffer - * @param dev_addr - PHY device memory region - * @param offset - offset into eeprom contents to be read - * @param len - buffer length, i.e., max bytes to be read + * @cdev: Qed dev pointer. + * @buf: buffer. + * @dev_addr: PHY device memory region. + * @offset: offset into eeprom contents to be read. + * @len: buffer length, i.e., max bytes to be read. + * + * Return: Int. */ int (*read_module_eeprom)(struct qed_dev *cdev, char *buf, u8 dev_addr, u32 offset, u32 len); /** - * @brief get_affin_hwfn_idx + * get_affin_hwfn_idx(): Get affine HW function. + * + * @cdev: Qed dev pointer. * - * @param cdev + * Return: u8. */ u8 (*get_affin_hwfn_idx)(struct qed_dev *cdev); /** - * @brief read_nvm_cfg - Read NVM config attribute value. - * @param cdev - * @param buf - buffer - * @param cmd - NVM CFG command id - * @param entity_id - Entity id + * read_nvm_cfg(): Read NVM config attribute value. + * + * @cdev: Qed dev pointer. + * @buf: Buffer. + * @cmd: NVM CFG command id. + * @entity_id: Entity id. * + * Return: Int. */ int (*read_nvm_cfg)(struct qed_dev *cdev, u8 **buf, u32 cmd, u32 entity_id); /** - * @brief read_nvm_cfg - Read NVM config attribute value. - * @param cdev - * @param cmd - NVM CFG command id + * read_nvm_cfg_len(): Read NVM config attribute value. * - * @return config id length, 0 on error. + * @cdev: Qed dev pointer. + * @cmd: NVM CFG command id. + * + * Return: config id length, 0 on error. */ int (*read_nvm_cfg_len)(struct qed_dev *cdev, u32 cmd); /** - * @brief set_grc_config - Configure value for grc config id. - * @param cdev - * @param cfg_id - grc config id - * @param val - grc config value + * set_grc_config(): Configure value for grc config id. + * + * @cdev: Qed dev pointer. + * @cfg_id: grc config id + * @val: grc config value * + * Return: Int. */ int (*set_grc_config)(struct qed_dev *cdev, u32 cfg_id, u32 val); struct devlink* (*devlink_register)(struct qed_dev *cdev); void (*devlink_unregister)(struct devlink *devlink); + + __printf(2, 3) void (*mfw_report)(struct qed_dev *cdev, char *fmt, ...); + + int (*get_sb_info)(struct qed_dev *cdev, struct qed_sb_info *sb, + u16 qid, struct qed_sb_info_dbg *sb_dbg); + + int (*get_esl_status)(struct qed_dev *cdev, bool *esl_active); }; #define MASK_FIELD(_name, _value) \ @@ -1386,7 +1427,7 @@ static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info) u16 rc = 0; prod = le32_to_cpu(sb_info->sb_virt->prod_index) & - STATUS_BLOCK_E4_PROD_INDEX_MASK; + STATUS_BLOCK_PROD_INDEX_MASK; if (sb_info->sb_ack != prod) { sb_info->sb_ack = prod; rc |= QED_SB_IDX; @@ -1397,18 +1438,16 @@ static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info) } /** + * qed_sb_ack(): This function creates an update command for interrupts + * that is written to the IGU. * - * @brief This function creates an update command for interrupts that is - * written to the IGU. - * - * @param sb_info - This is the structure allocated and - * initialized per status block. Assumption is - * that it was initialized using qed_sb_init - * @param int_cmd - Enable/Disable/Nop - * @param upd_flg - whether igu consumer should be - * updated. + * @sb_info: This is the structure allocated and + * initialized per status block. Assumption is + * that it was initialized using qed_sb_init + * @int_cmd: Enable/Disable/Nop + * @upd_flg: Whether igu consumer should be updated. * - * @return inline void + * Return: inline void. */ static inline void qed_sb_ack(struct qed_sb_info *sb_info, enum igu_int_cmd int_cmd, diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index 04180d9af560..494cdc3cd840 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -182,7 +182,7 @@ struct qed_iscsi_cb_ops { * @param stats - pointer to struck that would be filled * we stats * @return 0 on success, error otherwise. - * @change_mac Change MAC of interface + * @change_mac: Change MAC of interface * @param cdev * @param handle - the connection handle. * @param mac - new MAC to configure. diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index ff808d248883..5b67cd03276e 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -208,57 +208,57 @@ enum qed_ll2_xmit_flags { struct qed_ll2_ops { /** - * @brief start - initializes ll2 + * start(): Initializes ll2. * - * @param cdev - * @param params - protocol driver configuration for the ll2. + * @cdev: Qed dev pointer. + * @params: Protocol driver configuration for the ll2. * - * @return 0 on success, otherwise error value. + * Return: 0 on success, otherwise error value. */ int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params); /** - * @brief stop - stops the ll2 + * stop(): Stops the ll2 * - * @param cdev + * @cdev: Qed dev pointer. * - * @return 0 on success, otherwise error value. + * Return: 0 on success, otherwise error value. */ int (*stop)(struct qed_dev *cdev); /** - * @brief start_xmit - transmits an skb over the ll2 interface + * start_xmit(): Transmits an skb over the ll2 interface * - * @param cdev - * @param skb - * @param xmit_flags - Transmit options defined by the enum qed_ll2_xmit_flags. + * @cdev: Qed dev pointer. + * @skb: SKB. + * @xmit_flags: Transmit options defined by the enum qed_ll2_xmit_flags. * - * @return 0 on success, otherwise error value. + * Return: 0 on success, otherwise error value. */ int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb, unsigned long xmit_flags); /** - * @brief register_cb_ops - protocol driver register the callback for Rx/Tx + * register_cb_ops(): Protocol driver register the callback for Rx/Tx * packets. Should be called before `start'. * - * @param cdev - * @param cookie - to be passed to the callback functions. - * @param ops - the callback functions to register for Rx / Tx. + * @cdev: Qed dev pointer. + * @cookie: to be passed to the callback functions. + * @ops: the callback functions to register for Rx / Tx. * - * @return 0 on success, otherwise error value. + * Return: 0 on success, otherwise error value. */ void (*register_cb_ops)(struct qed_dev *cdev, const struct qed_ll2_cb_ops *ops, void *cookie); /** - * @brief get LL2 related statistics + * get_stats(): Get LL2 related statistics. * - * @param cdev - * @param stats - pointer to struct that would be filled with stats + * @cdev: Qed dev pointer. + * @stats: Pointer to struct that would be filled with stats. * - * @return 0 on success, error otherwise. + * Return: 0 on success, error otherwise. */ int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats); }; diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h index 14671bc19ed1..1d51df347560 100644 --- a/include/linux/qed/qed_nvmetcp_if.h +++ b/include/linux/qed/qed_nvmetcp_if.h @@ -171,6 +171,23 @@ struct nvmetcp_task_params { * @param dest_port * @clear_all_filters: Clear all filters. * @param cdev + * @init_read_io: Init read IO. + * @task_params + * @cmd_pdu_header + * @nvme_cmd + * @sgl_task_params + * @init_write_io: Init write IO. + * @task_params + * @cmd_pdu_header + * @nvme_cmd + * @sgl_task_params + * @init_icreq_exchange: Exchange ICReq. + * @task_params + * @init_conn_req_pdu_hdr + * @tx_sgl_task_params + * @rx_sgl_task_params + * @init_task_cleanup: Init task cleanup. + * @task_params */ struct qed_nvmetcp_ops { const struct qed_common_ops *common; diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index aeb242cefebf..3b76c07fbcf8 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -662,7 +662,8 @@ struct qed_rdma_ops { u8 connection_handle, struct qed_ll2_stats *p_stats); int (*ll2_set_mac_filter)(struct qed_dev *cdev, - u8 *old_mac_address, u8 *new_mac_address); + u8 *old_mac_address, + const u8 *new_mac_address); int (*iwarp_set_engine_affin)(struct qed_dev *cdev, bool b_reset); diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index bab078b25834..6dfed163ab6c 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -27,6 +27,7 @@ #define RDMA_MAX_PDS (64 * 1024) #define RDMA_MAX_XRC_SRQS (1024) #define RDMA_MAX_SRQS (32 * 1024) +#define RDMA_MAX_IRQ_ELEMS_IN_PAGE (128) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS #define RDMA_NUM_STATISTIC_COUNTERS_K2 MAX_NUM_VPORTS_K2 diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 64ad900ac742..f7c1d21c2f39 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -9,8 +9,10 @@ #define _LINUX_RADIX_TREE_H #include <linux/bitops.h> -#include <linux/kernel.h> +#include <linux/gfp.h> #include <linux/list.h> +#include <linux/lockdep.h> +#include <linux/math.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/rcupdate.h> diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 154e954b711d..d6e5a1feb947 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -81,7 +81,7 @@ struct raid6_calls { void (*xor_syndrome)(int, int, int, size_t, void **); int (*valid)(void); /* Returns 1 if this routine set is usable */ const char *name; /* Name of this routine set */ - int prefer; /* Has special performance attribute */ + int priority; /* Relative priority ranking if non-zero */ }; /* Selected algorithm */ diff --git a/include/linux/random.h b/include/linux/random.h index f45b8be3e3c4..c45b2693e51f 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; +extern void add_interrupt_randomness(int irq) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int wait_for_random_bytes(void); diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index b676aa419eef..c21c7f8103e2 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -4,7 +4,7 @@ #include <linux/bits.h> #include <linux/param.h> -#include <linux/spinlock_types.h> +#include <linux/spinlock_types_raw.h> #define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) #define DEFAULT_RATELIMIT_BURST 10 diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 3db96c4f45fd..659d13a7ddaa 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -69,7 +69,7 @@ struct rcu_cblist { * * * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | @@ -77,7 +77,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_OFFLOADED | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, | @@ -89,7 +89,9 @@ struct rcu_cblist { * | | * v v * --------------------------------------- ----------------------------------| - * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP | * | | | | * | | | | @@ -104,9 +106,10 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | - * | SEGCBLIST_KTHREAD_GP | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_GP | | + * | SEGCBLIST_KTHREAD_CB | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | * | handling callbacks. Enable bypass queueing. | @@ -120,7 +123,8 @@ struct rcu_cblist { * * * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -130,6 +134,22 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_CB | | + * | SEGCBLIST_KTHREAD_GP | + * | | + * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | + * | handles callbacks concurrently. Bypass enqueue is enabled. | + * | Invoke RCU core so we make sure not to preempt it in the middle with | + * | leaving some urgent work unattended within a jiffy. | + * ---------------------------------------------------------------------------- + * | + * v + * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -143,7 +163,9 @@ struct rcu_cblist { * | | * v v * ---------------------------------------------------------------------------| - * | | + * | | | + * | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | * | | | * | GP kthread woke up and | CB kthread woke up and | @@ -159,7 +181,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | 0 | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | @@ -168,17 +190,18 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | * ---------------------------------------------------------------------------- */ #define SEGCBLIST_ENABLED BIT(0) -#define SEGCBLIST_SOFTIRQ_ONLY BIT(1) -#define SEGCBLIST_KTHREAD_CB BIT(2) -#define SEGCBLIST_KTHREAD_GP BIT(3) -#define SEGCBLIST_OFFLOADED BIT(4) +#define SEGCBLIST_RCU_CORE BIT(1) +#define SEGCBLIST_LOCKING BIT(2) +#define SEGCBLIST_KTHREAD_CB BIT(3) +#define SEGCBLIST_KTHREAD_GP BIT(4) +#define SEGCBLIST_OFFLOADED BIT(5) struct rcu_segcblist { struct rcu_head *head; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 434d12fe2d4f..88b42eb46406 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -71,7 +71,8 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { preempt_enable(); - rcu_read_unlock_strict(); + if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) + rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) @@ -363,6 +364,12 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ +#define __unrcu_pointer(p, local) \ +({ \ + typeof(*p) *local = (typeof(*p) *__force)(p); \ + rcu_check_sparse(p, __rcu); \ + ((typeof(*p) __force __kernel *)(local)); \ +}) /** * unrcu_pointer - mark a pointer as not being RCU protected * @p: pointer needing to lose its __rcu property @@ -370,39 +377,35 @@ static inline void rcu_preempt_sleep_check(void) { } * Converts @p from an __rcu pointer to a __kernel pointer. * This allows an __rcu pointer to be used with xchg() and friends. */ -#define unrcu_pointer(p) \ -({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)(p); \ - rcu_check_sparse(p, __rcu); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ -}) +#define unrcu_pointer(p) __unrcu_pointer(p, __UNIQUE_ID(rcu)) -#define __rcu_access_pointer(p, space) \ +#define __rcu_access_pointer(p, local, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_check(p, c, space) \ +#define __rcu_dereference_check(p, local, c, space) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_protected(p, c, space) \ +#define __rcu_dereference_protected(p, local, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) -#define rcu_dereference_raw(p) \ +#define __rcu_dereference_raw(p, local) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(p) ________p1 = READ_ONCE(p); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + typeof(p) local = READ_ONCE(p); \ + ((typeof(*p) __force __kernel *)(local)); \ }) +#define rcu_dereference_raw(p) __rcu_dereference_raw(p, __UNIQUE_ID(rcu)) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable @@ -489,7 +492,7 @@ do { \ * when tearing down multi-linked structures after a grace period * has elapsed. */ -#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) +#define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) /** * rcu_dereference_check() - rcu_dereference with debug checking @@ -525,7 +528,8 @@ do { \ * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking @@ -540,7 +544,8 @@ do { \ * rcu_read_lock() but also rcu_read_lock_bh() into account. */ #define rcu_dereference_bh_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking @@ -555,7 +560,8 @@ do { \ * only rcu_read_lock() but also rcu_read_lock_sched() into account. */ #define rcu_dereference_sched_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_sched_held(), \ __rcu) /* @@ -565,7 +571,8 @@ do { \ * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ -#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) +#define rcu_dereference_raw_check(p) \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented @@ -584,7 +591,7 @@ do { \ * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) + __rcu_dereference_protected((p), __UNIQUE_ID(rcu), (c), __rcu) /** diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 86c8f6c98412..6f9c35817398 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -31,7 +31,7 @@ static inline int rcu_read_lock_trace_held(void) #ifdef CONFIG_TASKS_TRACE_RCU -void rcu_read_unlock_trace_special(struct task_struct *t, int nesting); +void rcu_read_unlock_trace_special(struct task_struct *t); /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section @@ -80,7 +80,8 @@ static inline void rcu_read_unlock_trace(void) WRITE_ONCE(t->trc_reader_nesting, nesting); return; // We assume shallow reader nesting. } - rcu_read_unlock_trace_special(t, nesting); + WARN_ON_ONCE(nesting != 0); + rcu_read_unlock_trace_special(t); } void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9be015305f9f..858f4d429946 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -85,7 +85,7 @@ static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq()) + (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h new file mode 100644 index 000000000000..c11c9db5825c --- /dev/null +++ b/include/linux/ref_tracker.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#ifndef _LINUX_REF_TRACKER_H +#define _LINUX_REF_TRACKER_H +#include <linux/refcount.h> +#include <linux/types.h> +#include <linux/spinlock.h> + +struct ref_tracker; + +struct ref_tracker_dir { +#ifdef CONFIG_REF_TRACKER + spinlock_t lock; + unsigned int quarantine_avail; + refcount_t untracked; + struct list_head list; /* List of active trackers */ + struct list_head quarantine; /* List of dead trackers */ +#endif +}; + +#ifdef CONFIG_REF_TRACKER +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, + unsigned int quarantine_count) +{ + INIT_LIST_HEAD(&dir->list); + INIT_LIST_HEAD(&dir->quarantine); + spin_lock_init(&dir->lock); + dir->quarantine_avail = quarantine_count; + refcount_set(&dir->untracked, 1); +} + +void ref_tracker_dir_exit(struct ref_tracker_dir *dir); + +void ref_tracker_dir_print(struct ref_tracker_dir *dir, + unsigned int display_limit); + +int ref_tracker_alloc(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp, gfp_t gfp); + +int ref_tracker_free(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp); + +#else /* CONFIG_REF_TRACKER */ + +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, + unsigned int quarantine_count) +{ +} + +static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir) +{ +} + +static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir, + unsigned int display_limit) +{ +} + +static inline int ref_tracker_alloc(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp, + gfp_t gfp) +{ + return 0; +} + +static inline int ref_tracker_free(struct ref_tracker_dir *dir, + struct ref_tracker **trackerp) +{ + return 0; +} + +#endif + +#endif /* _LINUX_REF_TRACKER_H */ diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e3c9a25a853a..22652e5fbc38 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -290,6 +290,11 @@ typedef void (*regmap_unlock)(void *); * read operation on a bus such as SPI, I2C, etc. Most of the * devices do not need this. * @reg_write: Same as above for writing. + * @reg_update_bits: Optional callback that if filled will be used to perform + * all the update_bits(rmw) operation. Should only be provided + * if the function require special handling with lock and reg + * handling and the operation cannot be represented as a simple + * update_bits operation on a bus such as SPI, I2C, etc. * @fast_io: Register IO is fast. Use a spinlock instead of a mutex * to perform locking. This field is ignored if custom lock/unlock * functions are used (see fields lock/unlock of struct regmap_config). @@ -372,6 +377,8 @@ struct regmap_config { int (*reg_read)(void *context, unsigned int reg, unsigned int *val); int (*reg_write)(void *context, unsigned int reg, unsigned int val); + int (*reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val); bool fast_io; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bd7a73db2e66..0228caaa6741 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -90,22 +90,28 @@ enum regulator_detection_severity { * @set_over_current_protection: Support enabling of and setting limits for over * current situation detection. Detection can be configured for three * levels of severity. - * REGULATOR_SEVERITY_PROT should automatically shut down the regulator(s). - * REGULATOR_SEVERITY_ERR should indicate that over-current situation is - * caused by an unrecoverable error but HW does not perform - * automatic shut down. - * REGULATOR_SEVERITY_WARN should indicate situation where hardware is - * still believed to not be damaged but that a board sepcific - * recovery action is needed. If lim_uA is 0 the limit should not - * be changed but the detection should just be enabled/disabled as - * is requested. + * + * - REGULATOR_SEVERITY_PROT should automatically shut down the regulator(s). + * + * - REGULATOR_SEVERITY_ERR should indicate that over-current situation is + * caused by an unrecoverable error but HW does not perform + * automatic shut down. + * + * - REGULATOR_SEVERITY_WARN should indicate situation where hardware is + * still believed to not be damaged but that a board sepcific + * recovery action is needed. If lim_uA is 0 the limit should not + * be changed but the detection should just be enabled/disabled as + * is requested. + * * @set_over_voltage_protection: Support enabling of and setting limits for over * voltage situation detection. Detection can be configured for same - * severities as over current protection. + * severities as over current protection. Units of uV. * @set_under_voltage_protection: Support enabling of and setting limits for - * under situation detection. + * under voltage situation detection. Detection can be configured for same + * severities as over current protection. Units of uV. * @set_thermal_protection: Support enabling of and setting limits for over - * temperature situation detection. + * temperature situation detection.Detection can be configured for same + * severities as over current protection. Units of degree Kelvin. * * @set_active_discharge: Set active discharge enable/disable of regulators. * @@ -499,7 +505,8 @@ struct regulator_irq_data { * best to shut-down regulator(s) or reboot the SOC if error * handling is repeatedly failing. If fatal_cnt is given the IRQ * handling is aborted if it fails for fatal_cnt times and die() - * callback (if populated) or BUG() is called to try to prevent + * callback (if populated) is called. If die() is not populated + * poweroff for the system is attempted in order to prevent any * further damage. * @reread_ms: The time which is waited before attempting to re-read status * at the worker if IC reading fails. Immediate re-read is done @@ -516,11 +523,12 @@ struct regulator_irq_data { * @data: Driver private data pointer which will be passed as such to * the renable, map_event and die callbacks in regulator_irq_data. * @die: Protection callback. If IC status reading or recovery actions - * fail fatal_cnt times this callback or BUG() is called. This - * callback should implement a final protection attempt like - * disabling the regulator. If protection succeeded this may - * return 0. If anything else is returned the core assumes final - * protection failed and calls BUG() as a last resort. + * fail fatal_cnt times this callback is called or system is + * powered off. This callback should implement a final protection + * attempt like disabling the regulator. If protection succeeded + * die() may return 0. If anything else is returned the core + * assumes final protection failed and attempts to perform a + * poweroff as a last resort. * @map_event: Driver callback to map IRQ status into regulator devices with * events / errors. NOTE: callback MUST initialize both the * errors and notifs for all rdevs which it signals having @@ -552,7 +560,6 @@ struct regulator_irq_data { */ struct regulator_irq_desc { const char *name; - int irq_flags; int fatal_cnt; int reread_ms; int irq_off_ms; @@ -644,6 +651,40 @@ struct regulator_dev { spinlock_t err_lock; }; +/* + * Convert error flags to corresponding notifications. + * + * Can be used by drivers which use the notification helpers to + * find out correct notification flags based on the error flags. Drivers + * can avoid storing both supported notification and error flags which + * may save few bytes. + */ +static inline int regulator_err2notif(int err) +{ + switch (err) { + case REGULATOR_ERROR_UNDER_VOLTAGE: + return REGULATOR_EVENT_UNDER_VOLTAGE; + case REGULATOR_ERROR_OVER_CURRENT: + return REGULATOR_EVENT_OVER_CURRENT; + case REGULATOR_ERROR_REGULATION_OUT: + return REGULATOR_EVENT_REGULATION_OUT; + case REGULATOR_ERROR_FAIL: + return REGULATOR_EVENT_FAIL; + case REGULATOR_ERROR_OVER_TEMP: + return REGULATOR_EVENT_OVER_TEMP; + case REGULATOR_ERROR_UNDER_VOLTAGE_WARN: + return REGULATOR_EVENT_UNDER_VOLTAGE_WARN; + case REGULATOR_ERROR_OVER_CURRENT_WARN: + return REGULATOR_EVENT_OVER_CURRENT_WARN; + case REGULATOR_ERROR_OVER_VOLTAGE_WARN: + return REGULATOR_EVENT_OVER_VOLTAGE_WARN; + case REGULATOR_ERROR_OVER_TEMP_WARN: + return REGULATOR_EVENT_OVER_TEMP_WARN; + } + return 0; +} + + struct regulator_dev * regulator_register(const struct regulator_desc *regulator_desc, const struct regulator_config *config); @@ -665,6 +706,8 @@ void *regulator_irq_helper(struct device *dev, int irq_flags, int common_errs, int *per_rdev_errs, struct regulator_dev **rdev, int rdev_amount); void regulator_irq_helper_cancel(void **handle); +int regulator_irq_map_event_simple(int irq, struct regulator_irq_data *rid, + unsigned long *dev_mask); void *rdev_get_drvdata(struct regulator_dev *rdev); struct device *rdev_get_dev(struct regulator_dev *rdev); diff --git a/include/linux/regulator/lp872x.h b/include/linux/regulator/lp872x.h index d780dbb8b423..b62e45aa1dd3 100644 --- a/include/linux/regulator/lp872x.h +++ b/include/linux/regulator/lp872x.h @@ -10,7 +10,7 @@ #include <linux/regulator/machine.h> #include <linux/platform_device.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #define LP872X_MAX_REGULATORS 9 @@ -40,11 +40,6 @@ enum lp872x_regulator_id { LP872X_ID_MAX, }; -enum lp872x_dvs_state { - DVS_LOW = GPIOF_OUT_INIT_LOW, - DVS_HIGH = GPIOF_OUT_INIT_HIGH, -}; - enum lp872x_dvs_sel { SEL_V1, SEL_V2, @@ -52,14 +47,14 @@ enum lp872x_dvs_sel { /** * lp872x_dvs - * @gpio : gpio pin number for dvs control + * @gpio : gpio descriptor for dvs control * @vsel : dvs selector for buck v1 or buck v2 register * @init_state : initial dvs pin state */ struct lp872x_dvs { - int gpio; + struct gpio_desc *gpio; enum lp872x_dvs_sel vsel; - enum lp872x_dvs_state init_state; + enum gpiod_flags init_state; }; /** @@ -78,14 +73,14 @@ struct lp872x_regulator_data { * @update_config : if LP872X_GENERAL_CFG register is updated, set true * @regulator_data : platform regulator id and init data * @dvs : dvs data for buck voltage control - * @enable_gpio : gpio pin number for enable control + * @enable_gpio : gpio descriptor for enable control */ struct lp872x_platform_data { u8 general_config; bool update_config; struct lp872x_regulator_data regulator_data[LP872X_MAX_REGULATORS]; struct lp872x_dvs *dvs; - int enable_gpio; + struct gpio_desc *enable_gpio; }; #endif diff --git a/include/linux/regulator/tps62360.h b/include/linux/regulator/tps62360.h index 94a90c06f1e5..398e74a1d941 100644 --- a/include/linux/regulator/tps62360.h +++ b/include/linux/regulator/tps62360.h @@ -19,10 +19,6 @@ * @en_discharge: Enable discharge the output capacitor via internal * register. * @en_internal_pulldn: internal pull down enable or not. - * @vsel0_gpio: Gpio number for vsel0. It should be -1 if this is tied with - * fixed logic. - * @vsel1_gpio: Gpio number for vsel1. It should be -1 if this is tied with - * fixed logic. * @vsel0_def_state: Default state of vsel0. 1 if it is high else 0. * @vsel1_def_state: Default state of vsel1. 1 if it is high else 0. */ @@ -30,8 +26,6 @@ struct tps62360_regulator_platform_data { struct regulator_init_data *reg_init_data; bool en_discharge; bool en_internal_pulldn; - int vsel0_gpio; - int vsel1_gpio; int vsel0_def_state; int vsel1_def_state; }; diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 83c09ac36b13..e0600e1e5c17 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -684,18 +684,6 @@ int rproc_coredump_add_custom_segment(struct rproc *rproc, void *priv); int rproc_coredump_set_elf_info(struct rproc *rproc, u8 class, u16 machine); -static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev) -{ - return container_of(vdev->dev.parent, struct rproc_vdev, dev); -} - -static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev) -{ - struct rproc_vdev *rvdev = vdev_to_rvdev(vdev); - - return rvdev->rproc; -} - void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev); void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev); diff --git a/include/linux/reset.h b/include/linux/reset.h index db0e6115a2f6..8a21b5756c3e 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -455,6 +455,26 @@ static inline struct reset_control *of_reset_control_get_exclusive( } /** + * of_reset_control_get_optional_exclusive - Lookup and obtain an optional exclusive + * reference to a reset controller. + * @node: device to be reset by the controller + * @id: reset line name + * + * Optional variant of of_reset_control_get_exclusive(). If the requested reset + * is not specified in the device tree, this function returns NULL instead of + * an error. + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ +static inline struct reset_control *of_reset_control_get_optional_exclusive( + struct device_node *node, const char *id) +{ + return __of_reset_control_get(node, id, 0, false, true, true); +} + +/** * of_reset_control_get_shared - Lookup and obtain a shared reference * to a reset controller. * @node: device to be reset by the controller diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 231e06b74b50..c35f3962dc4f 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -230,6 +230,13 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw); bool rfkill_blocked(struct rfkill *rfkill); /** + * rfkill_soft_blocked - Query soft rfkill block state + * + * @rfkill: rfkill struct to query + */ +bool rfkill_soft_blocked(struct rfkill *rfkill); + +/** * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type * diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index 4846f72759b2..c7e2f21dd5c1 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -9,18 +9,6 @@ #ifndef LINUX_RIO_IDS_H #define LINUX_RIO_IDS_H -#define RIO_VID_FREESCALE 0x0002 -#define RIO_DID_MPC8560 0x0003 - -#define RIO_VID_TUNDRA 0x000d -#define RIO_DID_TSI500 0x0500 -#define RIO_DID_TSI568 0x0568 -#define RIO_DID_TSI572 0x0572 -#define RIO_DID_TSI574 0x0574 -#define RIO_DID_TSI576 0x0578 /* Same ID as Tsi578 */ -#define RIO_DID_TSI577 0x0577 -#define RIO_DID_TSI578 0x0578 - #define RIO_VID_IDT 0x0038 #define RIO_DID_IDT70K200 0x0310 #define RIO_DID_IDTCPS8 0x035c @@ -33,7 +21,6 @@ #define RIO_DID_IDTCPS1616 0x0379 #define RIO_DID_IDTVPS1616 0x0377 #define RIO_DID_IDTSPS1616 0x0378 -#define RIO_DID_TSI721 0x80ab #define RIO_DID_IDTRXS1632 0x80e5 #define RIO_DID_IDTRXS2448 0x80e6 diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c976cc6de257..e704b1a4c06c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); * * returns the number of cleaned PTEs. */ -int page_mkclean(struct page *); +int folio_mkclean(struct folio *); /* * called in munlock()/munmap() path to check for other vmas holding @@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags) { } -static inline int page_mkclean(struct page *page) +static inline int folio_mkclean(struct folio *folio) { return 0; } - - #endif /* CONFIG_MMU */ +static inline int page_mkclean(struct page *page) +{ + return folio_mkclean(page_folio(page)); +} #endif /* _LINUX_RMAP_H */ diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index d97dcd049f18..02fa9116cd60 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -186,6 +186,8 @@ int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait); +ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept); + #else static inline int rpmsg_register_device(struct rpmsg_device *rpdev) @@ -231,7 +233,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev /* This shouldn't be possible */ WARN_ON(1); - return ERR_PTR(-ENXIO); + return NULL; } static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) @@ -296,6 +298,14 @@ static inline __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, return 0; } +static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + #endif /* IS_ENABLED(CONFIG_RPMSG) */ /* use a macro to avoid include chaining to get THIS_MODULE */ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index bd611e26291d..47fd1c2d3a57 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -66,6 +66,8 @@ struct rtc_class_ops { int (*alarm_irq_enable)(struct device *, unsigned int enabled); int (*read_offset)(struct device *, long *offset); int (*set_offset)(struct device *, long offset); + int (*param_get)(struct device *, struct rtc_param *param); + int (*param_set)(struct device *, struct rtc_param *param); }; struct rtc_device; @@ -80,6 +82,7 @@ struct rtc_timer { /* flags */ #define RTC_DEV_BUSY 0 +#define RTC_NO_CDEV 1 struct rtc_device { struct device dev; diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 9deedfeec2b1..7d049883a08a 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); +extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock); #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0) +#define rt_mutex_lock_nest_lock(lock, nest_lock) \ + do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ + } while (0) + #else extern void rt_mutex_lock(struct rt_mutex *lock); #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock) +#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock) #endif extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); +extern int rt_mutex_lock_killable(struct rt_mutex *lock); extern int rt_mutex_trylock(struct rt_mutex *lock); extern void rt_mutex_unlock(struct rt_mutex *lock); diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 7ce9a51ae5c0..2c0ad417ce3c 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -30,31 +30,16 @@ do { \ #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock); -#define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock) extern int do_raw_read_trylock(rwlock_t *lock); extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock); extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock); -#define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock) extern int do_raw_write_trylock(rwlock_t *lock); extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else - -#ifndef arch_read_lock_flags -# define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#endif - -#ifndef arch_write_lock_flags -# define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#endif - # define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_read_lock_flags(lock, flags) \ - do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) # define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) # define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_write_lock_flags(lock, flags) \ - do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) # define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) #endif diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index abfb53ab11be..f1db6f17c4fb 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -157,8 +157,7 @@ static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, do_raw_read_trylock, do_raw_read_lock, - do_raw_read_lock_flags, &flags); + LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock); return flags; } @@ -184,8 +183,7 @@ static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, do_raw_write_trylock, do_raw_write_lock, - do_raw_write_lock_flags, &flags); + LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); return flags; } diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 352c6127cb90..f9348769e558 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -11,7 +11,6 @@ #include <linux/linkage.h> #include <linux/types.h> -#include <linux/kernel.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 2713e689ad66..fc0357a6e19b 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -9,8 +9,17 @@ #ifndef __LINUX_SCALE_BITMAP_H #define __LINUX_SCALE_BITMAP_H -#include <linux/kernel.h> +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/cache.h> +#include <linux/list.h> +#include <linux/log2.h> +#include <linux/minmax.h> +#include <linux/percpu.h> #include <linux/slab.h> +#include <linux/smp.h> +#include <linux/types.h> +#include <linux/wait.h> struct seq_file; @@ -427,6 +436,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** + * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits + * @sbq: Bitmap queue to allocate from. + * @nr_tags: number of tags requested + * @offset: offset to add to returned bits + * + * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is + * a bit in the mask returned, and the caller must add @offset to the value to + * get the absolute tag value. + */ +unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, + unsigned int *offset); + +/** * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. @@ -515,6 +537,17 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu); +/** + * sbitmap_queue_clear_batch() - Free a batch of allocated bits + * &struct sbitmap_queue. + * @sbq: Bitmap to free from. + * @offset: offset for each tag in array + * @tags: array of tags + * @nr_tags: number of tags in array + */ +void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, + int *tags, int nr_tags); + static inline int sbq_index_inc(int index) { return (index + 1) & (SBQ_WAIT_QUEUES - 1); diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 266754a55327..7ff9d6386c12 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -69,10 +69,27 @@ struct sg_append_table { * a valid sg entry, or whether it points to the start of a new scatterlist. * Those low bits are there for everyone! (thanks mason :-) */ -#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN) -#define sg_is_last(sg) ((sg)->page_link & SG_END) -#define sg_chain_ptr(sg) \ - ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END))) +#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END) + +static inline unsigned int __sg_flags(struct scatterlist *sg) +{ + return sg->page_link & SG_PAGE_LINK_MASK; +} + +static inline struct scatterlist *sg_chain_ptr(struct scatterlist *sg) +{ + return (struct scatterlist *)(sg->page_link & ~SG_PAGE_LINK_MASK); +} + +static inline bool sg_is_chain(struct scatterlist *sg) +{ + return __sg_flags(sg) & SG_CHAIN; +} + +static inline bool sg_is_last(struct scatterlist *sg) +{ + return __sg_flags(sg) & SG_END; +} /** * sg_assign_page - Assign a given page to an SG entry @@ -92,7 +109,7 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) * In order for the low bit stealing approach to work, pages * must be aligned at a 32-bit boundary as a minimum. */ - BUG_ON((unsigned long) page & (SG_CHAIN | SG_END)); + BUG_ON((unsigned long)page & SG_PAGE_LINK_MASK); #ifdef CONFIG_DEBUG_SG BUG_ON(sg_is_chain(sg)); #endif @@ -126,7 +143,7 @@ static inline struct page *sg_page(struct scatterlist *sg) #ifdef CONFIG_DEBUG_SG BUG_ON(sg_is_chain(sg)); #endif - return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END)); + return (struct page *)((sg)->page_link & ~SG_PAGE_LINK_MASK); } /** diff --git a/include/linux/sched.h b/include/linux/sched.h index c1a927ddec64..a6a2db5f85ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -503,6 +503,8 @@ struct sched_statistics { u64 block_start; u64 block_max; + s64 sum_block_runtime; + u64 exec_max; u64 slice_max; @@ -521,8 +523,12 @@ struct sched_statistics { u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; + +#ifdef CONFIG_SCHED_CORE + u64 core_forceidle_sum; #endif -}; +#endif /* CONFIG_SCHEDSTATS */ +} ____cacheline_aligned; struct sched_entity { /* For load-balancing: */ @@ -538,8 +544,6 @@ struct sched_entity { u64 nr_migrations; - struct sched_statistics statistics; - #ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; @@ -750,10 +754,6 @@ struct task_struct { #ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; -#ifdef CONFIG_THREAD_INFO_IN_TASK - /* Current CPU: */ - unsigned int cpu; -#endif unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; @@ -775,10 +775,10 @@ struct task_struct { int normal_prio; unsigned int rt_priority; - const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; + const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE struct rb_node core_node; @@ -803,6 +803,8 @@ struct task_struct { struct uclamp_se uclamp[UCLAMP_CNT]; #endif + struct sched_statistics stats; + #ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; @@ -989,8 +991,8 @@ struct task_struct { /* CLONE_CHILD_CLEARTID: */ int __user *clear_child_tid; - /* PF_IO_WORKER */ - void *pf_io_worker; + /* PF_KTHREAD | PF_IO_WORKER */ + void *worker_private; u64 utime; u64 stime; @@ -1160,10 +1162,8 @@ struct task_struct { /* Stacked block device info: */ struct bio_list *bio_list; -#ifdef CONFIG_BLOCK /* Stack plugging: */ struct blk_plug *plug; -#endif /* VM state: */ struct reclaim_state *reclaim_state; @@ -1343,6 +1343,9 @@ struct task_struct { #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events kcsan_save_irqtrace; #endif +#ifdef CONFIG_KCSAN_WEAK_MEMORY + int kcsan_stack_depth; +#endif #endif #if IS_ENABLED(CONFIG_KUNIT) @@ -1665,6 +1668,7 @@ extern struct pid *cad_pid; #define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ +#define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */ #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ @@ -1886,10 +1890,7 @@ extern struct thread_info init_thread_info; extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK -static inline struct thread_info *task_thread_info(struct task_struct *task) -{ - return &task->thread_info; -} +# define task_thread_info(task) (&(task)->thread_info) #elif !defined(__HAVE_THREAD_FUNCTIONS) # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif @@ -2039,7 +2040,7 @@ static inline int _cond_resched(void) { return 0; } #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */ #define cond_resched() ({ \ - ___might_sleep(__FILE__, __LINE__, 0); \ + __might_resched(__FILE__, __LINE__, 0); \ _cond_resched(); \ }) @@ -2047,19 +2048,38 @@ extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_rwlock_read(rwlock_t *lock); extern int __cond_resched_rwlock_write(rwlock_t *lock); -#define cond_resched_lock(lock) ({ \ - ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ - __cond_resched_lock(lock); \ +#define MIGHT_RESCHED_RCU_SHIFT 8 +#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) + +#ifndef CONFIG_PREEMPT_RT +/* + * Non RT kernels have an elevated preempt count due to the held lock, + * but are not allowed to be inside a RCU read side critical section + */ +# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET +#else +/* + * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in + * cond_resched*lock() has to take that into account because it checks for + * preempt_count() and rcu_preempt_depth(). + */ +# define PREEMPT_LOCK_RESCHED_OFFSETS \ + (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) +#endif + +#define cond_resched_lock(lock) ({ \ + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ + __cond_resched_lock(lock); \ }) -#define cond_resched_rwlock_read(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ - __cond_resched_rwlock_read(lock); \ +#define cond_resched_rwlock_read(lock) ({ \ + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ + __cond_resched_rwlock_read(lock); \ }) -#define cond_resched_rwlock_write(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ - __cond_resched_rwlock_write(lock); \ +#define cond_resched_rwlock_write(lock) ({ \ + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ + __cond_resched_rwlock_write(lock); \ }) static inline void cond_resched_rcu(void) @@ -2114,11 +2134,7 @@ static __always_inline bool need_resched(void) static inline unsigned int task_cpu(const struct task_struct *p) { -#ifdef CONFIG_THREAD_INFO_IN_TASK - return READ_ONCE(p->cpu); -#else return READ_ONCE(task_thread_info(p)->cpu); -#endif } extern void set_task_cpu(struct task_struct *p, unsigned int cpu); @@ -2137,6 +2153,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ extern bool sched_task_on_rq(struct task_struct *p); +extern unsigned long get_wchan(struct task_struct *p); /* * In order to reduce various lock holder preemption latencies provide an @@ -2161,6 +2178,15 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #endif #ifdef CONFIG_SMP +static inline bool owner_on_cpu(struct task_struct *owner) +{ + /* + * As lock holder preemption issue, we both skip spinning if + * task is not on cpu or its cpu is preempted + */ + return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner)); +} + /* Returns effective CPU energy utilization, as seen by the scheduler */ unsigned long sched_cpu_util(int cpu, unsigned long max); #endif /* CONFIG_SMP */ diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6c9f19a33865..ce3c58286062 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -18,15 +18,16 @@ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, +extern bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else -static inline void task_cputime(struct task_struct *t, +static inline bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; + return false; } static inline u64 task_gtime(struct task_struct *t) diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 22873d276be6..d73d314d59c6 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -11,7 +11,11 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; +#ifdef CONFIG_SMP extern void wake_up_if_idle(int cpu); +#else +static inline void wake_up_if_idle(int cpu) { } +#endif /* * Idle thread specific functions to determine the need_resched diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 5561486fddef..aa5f09ca5bcf 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,6 +49,35 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +#ifdef CONFIG_PREEMPT_RT +/* + * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is + * by far the least expensive way to do that. + */ +static inline void __mmdrop_delayed(struct rcu_head *rhp) +{ + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); + + __mmdrop(mm); +} + +/* + * Invoked from finish_task_switch(). Delegates the heavy lifting on RT + * kernels via RCU. + */ +static inline void mmdrop_sched(struct mm_struct *mm) +{ + /* Provides a full memory barrier. See mmdrop() */ + if (atomic_dec_and_test(&mm->mm_count)) + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else +static inline void mmdrop_sched(struct mm_struct *mm) +{ + mmdrop(mm); +} +#endif + /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. @@ -185,6 +214,32 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif +/* Any memory-allocation retry loop should use + * memalloc_retry_wait(), and pass the flags for the most + * constrained allocation attempt that might have failed. + * This provides useful documentation of where loops are, + * and a central place to fine tune the waiting as the MM + * implementation changes. + */ +static inline void memalloc_retry_wait(gfp_t gfp_flags) +{ + /* We use io_schedule_timeout because waiting for memory + * typically included waiting for dirty pages to be + * written out, which requires IO. + */ + __set_current_state(TASK_UNINTERRUPTIBLE); + gfp_flags = current_gfp_context(gfp_flags); + if (gfpflags_allow_blocking(gfp_flags) && + !(gfp_flags & __GFP_NORETRY)) + /* Probably waited already, no need for much more */ + io_schedule_timeout(1); + else + /* Probably didn't wait, and has now released a lock, + * so now is a good time to wait + */ + io_schedule_timeout(HZ/50); +} + /** * might_alloc - Mark possible allocation sites * @gfp_mask: gfp_t flags that would be used to allocate diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index e5f4ce622ee6..b6ecb9fc4cd2 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -72,6 +72,17 @@ struct multiprocess_signals { struct hlist_node node; }; +struct core_thread { + struct task_struct *task; + struct core_thread *next; +}; + +struct core_state { + atomic_t nr_threads; + struct core_thread dumper; + struct completion startup; +}; + /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always @@ -98,18 +109,16 @@ struct signal_struct { /* thread group exit support */ int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ + /* notify group_exec_task when notify_count is less or equal to 0 */ int notify_count; - struct task_struct *group_exit_task; + struct task_struct *group_exec_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ + struct core_state *core_state; /* coredumping support */ + /* * PR_SET_CHILD_SUBREAPER marks a process, like a service * manager, to re-parent orphan (double-forking) child processes @@ -243,7 +252,6 @@ struct signal_struct { #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ -#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ /* * Pending notifications to parent. */ @@ -259,31 +267,25 @@ struct signal_struct { static inline void signal_set_stop_flags(struct signal_struct *sig, unsigned int flags) { - WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + WARN_ON(sig->flags & SIGNAL_GROUP_EXIT); sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; } -/* If true, all threads except ->group_exit_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); -} - extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *task, - sigset_t *mask, kernel_siginfo_t *info); +extern int dequeue_signal(struct task_struct *task, sigset_t *mask, + kernel_siginfo_t *info, enum pid_type *type); static inline int kernel_dequeue_signal(void) { struct task_struct *task = current; kernel_siginfo_t __info; + enum pid_type __type; int ret; spin_lock_irq(&task->sighand->siglock); - ret = dequeue_signal(task, &task->blocked, &__info); + ret = dequeue_signal(task, &task->blocked, &__info, &__type); spin_unlock_irq(&task->sighand->siglock); return ret; @@ -338,6 +340,8 @@ extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); +extern void force_fatal_sig(int); +extern void force_exit_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ef02be869cf2..b9198a1b3a84 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -54,10 +54,12 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p); +extern void sched_post_fork(struct task_struct *p, + struct kernel_clone_args *kargs); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); +void __noreturn make_task_dead(int signr); extern void proc_caches_init(void); @@ -157,7 +159,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. + * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 2413427e439c..d10150587d81 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) static inline unsigned long *end_of_stack(const struct task_struct *task) { +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; +#else return task->stack; +#endif } #elif !defined(__HAVE_THREAD_FUNCTIONS) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 8f0f778b7c91..8054641c0a7b 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -42,6 +42,13 @@ static inline int cpu_smt_flags(void) } #endif +#ifdef CONFIG_SCHED_CLUSTER +static inline int cpu_cluster_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} +#endif + #ifdef CONFIG_SCHED_MC static inline int cpu_core_flags(void) { @@ -98,7 +105,7 @@ struct sched_domain { /* idle_balance() stats */ u64 max_newidle_lb_cost; - unsigned long next_decay_max_lb_cost; + unsigned long last_decay_max_lb_cost; u64 avg_scan_cost; /* select_idle_sibling */ @@ -259,10 +266,10 @@ unsigned long arch_scale_thermal_pressure(int cpu) } #endif -#ifndef arch_set_thermal_pressure +#ifndef arch_update_thermal_pressure static __always_inline -void arch_set_thermal_pressure(const struct cpumask *cpus, - unsigned long th_pressure) +void arch_update_thermal_pressure(const struct cpumask *cpus, + unsigned long capped_frequency) { } #endif diff --git a/include/linux/sdb.h b/include/linux/sdb.h deleted file mode 100644 index a2404a2bbd10..000000000000 --- a/include/linux/sdb.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is the official version 1.1 of sdb.h - */ -#ifndef __SDB_H__ -#define __SDB_H__ -#ifdef __KERNEL__ -#include <linux/types.h> -#else -#include <stdint.h> -#endif - -/* - * All structures are 64 bytes long and are expected - * to live in an array, one for each interconnect. - * Most fields of the structures are shared among the - * various types, and most-specific fields are at the - * beginning (for alignment reasons, and to keep the - * magic number at the head of the interconnect record - */ - -/* Product, 40 bytes at offset 24, 8-byte aligned - * - * device_id is vendor-assigned; version is device-specific, - * date is hex (e.g 0x20120501), name is UTF-8, blank-filled - * and not terminated with a 0 byte. - */ -struct sdb_product { - uint64_t vendor_id; /* 0x18..0x1f */ - uint32_t device_id; /* 0x20..0x23 */ - uint32_t version; /* 0x24..0x27 */ - uint32_t date; /* 0x28..0x2b */ - uint8_t name[19]; /* 0x2c..0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* - * Component, 56 bytes at offset 8, 8-byte aligned - * - * The address range is first to last, inclusive - * (for example 0x100000 - 0x10ffff) - */ -struct sdb_component { - uint64_t addr_first; /* 0x08..0x0f */ - uint64_t addr_last; /* 0x10..0x17 */ - struct sdb_product product; /* 0x18..0x3f */ -}; - -/* Type of the SDB record */ -enum sdb_record_type { - sdb_type_interconnect = 0x00, - sdb_type_device = 0x01, - sdb_type_bridge = 0x02, - sdb_type_integration = 0x80, - sdb_type_repo_url = 0x81, - sdb_type_synthesis = 0x82, - sdb_type_empty = 0xFF, -}; - -/* Type 0: interconnect (first of the array) - * - * sdb_records is the length of the table including this first - * record, version is 1. The bus type is enumerated later. - */ -#define SDB_MAGIC 0x5344422d /* "SDB-" */ -struct sdb_interconnect { - uint32_t sdb_magic; /* 0x00-0x03 */ - uint16_t sdb_records; /* 0x04-0x05 */ - uint8_t sdb_version; /* 0x06 */ - uint8_t sdb_bus_type; /* 0x07 */ - struct sdb_component sdb_component; /* 0x08-0x3f */ -}; - -/* Type 1: device - * - * class is 0 for "custom device", other values are - * to be standardized; ABI version is for the driver, - * bus-specific bits are defined by each bus (see below) - */ -struct sdb_device { - uint16_t abi_class; /* 0x00-0x01 */ - uint8_t abi_ver_major; /* 0x02 */ - uint8_t abi_ver_minor; /* 0x03 */ - uint32_t bus_specific; /* 0x04-0x07 */ - struct sdb_component sdb_component; /* 0x08-0x3f */ -}; - -/* Type 2: bridge - * - * child is the address of the nested SDB table - */ -struct sdb_bridge { - uint64_t sdb_child; /* 0x00-0x07 */ - struct sdb_component sdb_component; /* 0x08-0x3f */ -}; - -/* Type 0x80: integration - * - * all types with bit 7 set are meta-information, so - * software can ignore the types it doesn't know. Here we - * just provide product information for an aggregate device - */ -struct sdb_integration { - uint8_t reserved[24]; /* 0x00-0x17 */ - struct sdb_product product; /* 0x08-0x3f */ -}; - -/* Type 0x81: Top module repository url - * - * again, an informative field that software can ignore - */ -struct sdb_repo_url { - uint8_t repo_url[63]; /* 0x00-0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* Type 0x82: Synthesis tool information - * - * this informative record - */ -struct sdb_synthesis { - uint8_t syn_name[16]; /* 0x00-0x0f */ - uint8_t commit_id[16]; /* 0x10-0x1f */ - uint8_t tool_name[8]; /* 0x20-0x27 */ - uint32_t tool_version; /* 0x28-0x2b */ - uint32_t date; /* 0x2c-0x2f */ - uint8_t user_name[15]; /* 0x30-0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* Type 0xff: empty - * - * this allows keeping empty slots during development, - * so they can be filled later with minimal efforts and - * no misleading description is ever shipped -- hopefully. - * It can also be used to pad a table to a desired length. - */ -struct sdb_empty { - uint8_t reserved[63]; /* 0x00-0x3e */ - uint8_t record_type; /* 0x3f */ -}; - -/* The type of bus, for bus-specific flags */ -enum sdb_bus_type { - sdb_wishbone = 0x00, - sdb_data = 0x01, -}; - -#define SDB_WB_WIDTH_MASK 0x0f -#define SDB_WB_ACCESS8 0x01 -#define SDB_WB_ACCESS16 0x02 -#define SDB_WB_ACCESS32 0x04 -#define SDB_WB_ACCESS64 0x08 -#define SDB_WB_LITTLE_ENDIAN 0x80 - -#define SDB_DATA_READ 0x04 -#define SDB_DATA_WRITE 0x02 -#define SDB_DATA_EXEC 0x01 - -#endif /* __SDB_H__ */ diff --git a/include/linux/security.h b/include/linux/security.h index 5b7288521300..6d72772182c8 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -179,7 +179,7 @@ struct xfrm_policy; struct xfrm_state; struct xfrm_user_sec_ctx; struct seq_file; -struct sctp_endpoint; +struct sctp_association; #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; @@ -258,13 +258,13 @@ extern int security_init(void); extern int early_security_init(void); /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr); -int security_binder_transaction(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file); +int security_binder_set_context_mgr(const struct cred *mgr); +int security_binder_transaction(const struct cred *from, + const struct cred *to); +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to); +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -313,12 +313,11 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb, unsigned long kern_flags, unsigned long *set_kern_flags); -int security_add_mnt_opt(const char *option, const char *val, - int len, void **mnt_opts); int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, - const struct qstr *name, void **ctx, - u32 *ctxlen); + const struct qstr *name, + const char **xattr_name, void **ctx, + u32 *ctxlen); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, @@ -417,7 +416,7 @@ int security_task_fix_setgid(struct cred *new, const struct cred *old, int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); -void security_task_getsecid_subj(struct task_struct *p, u32 *secid); +void security_current_getsecid_subj(u32 *secid); void security_task_getsecid_obj(struct task_struct *p, u32 *secid); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); @@ -508,25 +507,25 @@ static inline int early_security_init(void) return 0; } -static inline int security_binder_set_context_mgr(struct task_struct *mgr) +static inline int security_binder_set_context_mgr(const struct cred *mgr) { return 0; } -static inline int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transaction(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static inline int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return 0; @@ -710,12 +709,6 @@ static inline int security_sb_clone_mnt_opts(const struct super_block *oldsb, return 0; } -static inline int security_add_mnt_opt(const char *option, const char *val, - int len, void **mnt_opts) -{ - return 0; -} - static inline int security_move_mount(const struct path *from_path, const struct path *to_path) { @@ -739,6 +732,7 @@ static inline void security_inode_free(struct inode *inode) static inline int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, + const char **xattr_name, void **ctx, u32 *ctxlen) { @@ -1041,6 +1035,11 @@ static inline void security_transfer_creds(struct cred *new, { } +static inline void security_cred_getsecid(const struct cred *c, u32 *secid) +{ + *secid = 0; +} + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; @@ -1112,7 +1111,7 @@ static inline int security_task_getsid(struct task_struct *p) return 0; } -static inline void security_task_getsecid_subj(struct task_struct *p, u32 *secid) +static inline void security_current_getsecid_subj(u32 *secid) { *secid = 0; } @@ -1418,10 +1417,10 @@ int security_tun_dev_create(void); int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); -int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb); +int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb); int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); -void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, +void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); #else /* CONFIG_SECURITY_NETWORK */ @@ -1624,7 +1623,7 @@ static inline int security_tun_dev_open(void *security) return 0; } -static inline int security_sctp_assoc_request(struct sctp_endpoint *ep, +static inline int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb) { return 0; @@ -1637,7 +1636,7 @@ static inline int security_sctp_bind_connect(struct sock *sk, int optname, return 0; } -static inline void security_sctp_sk_clone(struct sctp_endpoint *ep, +static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk) { @@ -2038,4 +2037,20 @@ static inline int security_perf_event_write(struct perf_event *event) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_PERF_EVENTS */ +#ifdef CONFIG_IO_URING +#ifdef CONFIG_SECURITY +extern int security_uring_override_creds(const struct cred *new); +extern int security_uring_sqpoll(void); +#else +static inline int security_uring_override_creds(const struct cred *new) +{ + return 0; +} +static inline int security_uring_sqpoll(void) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_IO_URING */ + #endif /* ! __LINUX_SECURITY_H */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dd99569595fd..72dbb44a4573 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/string.h> +#include <linux/string_helpers.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/cpumask.h> @@ -135,7 +136,21 @@ static inline void seq_escape_str(struct seq_file *m, const char *src, seq_escape_mem(m, src, strlen(src), flags, esc); } -void seq_escape(struct seq_file *m, const char *s, const char *esc); +/** + * seq_escape - print string into buffer, escaping some characters + * @m: target buffer + * @s: NULL-terminated string + * @esc: set of characters that need escaping + * + * Puts string into buffer, replacing each occurrence of character from + * @esc with usual octal escape. + * + * Use seq_has_overflowed() to check for errors. + */ +static inline void seq_escape(struct seq_file *m, const char *s, const char *esc) +{ + seq_escape_str(m, s, ESCAPE_OCTAL, esc); +} void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, @@ -194,7 +209,7 @@ static const struct file_operations __name ## _fops = { \ #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ - return single_open(file, __name ## _show, inode->i_private); \ + return single_open(file, __name ## _show, PDE_DATA(inode)); \ } \ \ static const struct proc_ops __name ## _proc_ops = { \ diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h index 0fdbe1ddd8d1..b97912fdbae7 100644 --- a/include/linux/seq_file_net.h +++ b/include/linux/seq_file_net.h @@ -9,7 +9,8 @@ extern struct net init_net; struct seq_net_private { #ifdef CONFIG_NET_NS - struct net *net; + struct net *net; + netns_tracker ns_tracker; #endif }; diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h deleted file mode 100644 index 3cca2b8fac43..000000000000 --- a/include/linux/seqno-fence.h +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * seqno-fence, using a dma-buf to synchronize fencing - * - * Copyright (C) 2012 Texas Instruments - * Copyright (C) 2012 Canonical Ltd - * Authors: - * Rob Clark <[email protected]> - * Maarten Lankhorst <[email protected]> - */ - -#ifndef __LINUX_SEQNO_FENCE_H -#define __LINUX_SEQNO_FENCE_H - -#include <linux/dma-fence.h> -#include <linux/dma-buf.h> - -enum seqno_fence_condition { - SEQNO_FENCE_WAIT_GEQUAL, - SEQNO_FENCE_WAIT_NONZERO -}; - -struct seqno_fence { - struct dma_fence base; - - const struct dma_fence_ops *ops; - struct dma_buf *sync_buf; - uint32_t seqno_ofs; - enum seqno_fence_condition condition; -}; - -extern const struct dma_fence_ops seqno_fence_ops; - -/** - * to_seqno_fence - cast a fence to a seqno_fence - * @fence: fence to cast to a seqno_fence - * - * Returns NULL if the fence is not a seqno_fence, - * or the seqno_fence otherwise. - */ -static inline struct seqno_fence * -to_seqno_fence(struct dma_fence *fence) -{ - if (fence->ops != &seqno_fence_ops) - return NULL; - return container_of(fence, struct seqno_fence, base); -} - -/** - * seqno_fence_init - initialize a seqno fence - * @fence: seqno_fence to initialize - * @lock: pointer to spinlock to use for fence - * @sync_buf: buffer containing the memory location to signal on - * @context: the execution context this fence is a part of - * @seqno_ofs: the offset within @sync_buf - * @seqno: the sequence # to signal on - * @cond: fence wait condition - * @ops: the fence_ops for operations on this seqno fence - * - * This function initializes a struct seqno_fence with passed parameters, - * and takes a reference on sync_buf which is released on fence destruction. - * - * A seqno_fence is a dma_fence which can complete in software when - * enable_signaling is called, but it also completes when - * (s32)((sync_buf)[seqno_ofs] - seqno) >= 0 is true - * - * The seqno_fence will take a refcount on the sync_buf until it's - * destroyed, but actual lifetime of sync_buf may be longer if one of the - * callers take a reference to it. - * - * Certain hardware have instructions to insert this type of wait condition - * in the command stream, so no intervention from software would be needed. - * This type of fence can be destroyed before completed, however a reference - * on the sync_buf dma-buf can be taken. It is encouraged to re-use the same - * dma-buf for sync_buf, since mapping or unmapping the sync_buf to the - * device's vm can be expensive. - * - * It is recommended for creators of seqno_fence to call dma_fence_signal() - * before destruction. This will prevent possible issues from wraparound at - * time of issue vs time of check, since users can check dma_fence_is_signaled() - * before submitting instructions for the hardware to wait on the fence. - * However, when ops.enable_signaling is not called, it doesn't have to be - * done as soon as possible, just before there's any real danger of seqno - * wraparound. - */ -static inline void -seqno_fence_init(struct seqno_fence *fence, spinlock_t *lock, - struct dma_buf *sync_buf, uint32_t context, - uint32_t seqno_ofs, uint32_t seqno, - enum seqno_fence_condition cond, - const struct dma_fence_ops *ops) -{ - BUG_ON(!fence || !sync_buf || !ops); - BUG_ON(!ops->wait || !ops->enable_signaling || - !ops->get_driver_name || !ops->get_timeline_name); - - /* - * ops is used in dma_fence_init for get_driver_name, so needs to be - * initialized first - */ - fence->ops = ops; - dma_fence_init(&fence->base, &seqno_fence_ops, lock, context, seqno); - get_dma_buf(sync_buf); - fence->sync_buf = sync_buf; - fence->seqno_ofs = seqno_ofs; - fence->condition = cond; -} - -#endif /* __LINUX_SEQNO_FENCE_H */ diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 5db211f43b29..ff84a3ed10ea 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -104,8 +104,6 @@ struct uart_8250_port { unsigned char ier; unsigned char lcr; unsigned char mcr; - unsigned char mcr_mask; /* mask of user bits */ - unsigned char mcr_force; /* mask of forced bits */ unsigned char cur_iotype; /* Running I/O type */ unsigned int rpm_tx_active; unsigned char canary; /* non-zero during system sleep diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index cf0de4a86640..f6c3323fc4c5 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -27,15 +27,6 @@ #define S3C2410_UERSTAT (0x14) #define S3C2410_UFSTAT (0x18) #define S3C2410_UMSTAT (0x1C) -#define USI_CON (0xC4) -#define USI_OPTION (0xC8) - -#define USI_CON_RESET (1<<0) -#define USI_CON_RESET_MASK (1<<0) - -#define USI_OPTION_HWACG_CLKREQ_ON (1<<1) -#define USI_OPTION_HWACG_CLKSTOP_ON (1<<2) -#define USI_OPTION_HWACG_MASK (3<<1) #define S3C2410_LCON_CFGMASK ((0xF<<3)|(0x3)) diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 9814fff58a69..76fbf92b04d9 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -93,4 +93,5 @@ extern void register_shrinker_prepared(struct shrinker *shrinker); extern int register_shrinker(struct shrinker *shrinker); extern void unregister_shrinker(struct shrinker *shrinker); extern void free_prealloced_shrinker(struct shrinker *shrinker); +extern void synchronize_shrinkers(void); #endif diff --git a/include/linux/signal.h b/include/linux/signal.h index 3f96a6374e4f..a6db6f2ae113 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -126,7 +126,6 @@ static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2) #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS -#include <linux/string.h> #define _SIG_SET_BINOP(name, op) \ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ @@ -464,6 +463,12 @@ int __save_altstack(stack_t __user *, unsigned long); unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ } while (0); +#ifdef CONFIG_DYNAMIC_SIGFRAME +bool sigaltstack_size_valid(size_t ss_size); +#else +static inline bool sigaltstack_size_valid(size_t size) { return true; } +#endif /* !CONFIG_DYNAMIC_SIGFRAME */ + #ifdef CONFIG_PROC_FS struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 34cb28b8f16c..a70b2bdbf4d9 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -70,6 +70,9 @@ struct ksignal { int sig; }; +/* Used to kill the race between sigaction and forced signals */ +#define SA_IMMUTABLE 0x00800000 + #ifndef __ARCH_UAPI_SA_FLAGS #ifdef SA_RESTORER #define __ARCH_UAPI_SA_FLAGS SA_RESTORER diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e..cce8a9acc76c 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -21,15 +21,15 @@ typedef struct { u64 key[2]; } siphash_key_t; +#define siphash_aligned_key_t siphash_key_t __aligned(16) + static inline bool siphash_key_is_zero(const siphash_key_t *key) { return !(key->key[0] | key->key[1]); } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +82,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +95,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,10 +132,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 841e2f0f5240..bf11e1fbd69b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -36,6 +36,7 @@ #include <linux/splice.h> #include <linux/in6.h> #include <linux/if_packet.h> +#include <linux/llist.h> #include <net/flow.h> #include <net/page_pool.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -286,14 +287,19 @@ struct nf_bridge_info { struct tc_skb_ext { __u32 chain; __u16 mru; - bool post_ct; + __u16 zone; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; }; #endif struct sk_buff_head { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; + /* These two members must be first to match sk_buff. */ + struct_group_tagged(sk_buff_list, list, + struct sk_buff *next; + struct sk_buff *prev; + ); __u32 qlen; spinlock_t lock; @@ -301,6 +307,22 @@ struct sk_buff_head { struct sk_buff; +/* The reason of skb drop, which is used in kfree_skb_reason(). + * en...maybe they should be splited by group? + * + * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is + * used to translate the reason to string. + */ +enum skb_drop_reason { + SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_NO_SOCKET, + SKB_DROP_REASON_PKT_TOO_SMALL, + SKB_DROP_REASON_TCP_CSUM, + SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_UDP_CSUM, + SKB_DROP_REASON_MAX, +}; + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -454,9 +476,15 @@ enum { * all frags to avoid possible bad checksum */ SKBFL_SHARED_FRAG = BIT(1), + + /* segment contains only zerocopy data and should not be + * charged to the kernel memory. + */ + SKBFL_PURE_ZEROCOPY = BIT(2), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) +#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY) /* * The callback notifies userspace to release buffers when skb DMA is done in @@ -620,6 +648,7 @@ typedef unsigned char *sk_buff_data_t; * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @list: queue head + * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in * fragmentation management @@ -652,6 +681,7 @@ typedef unsigned char *sk_buff_data_t; * @tc_at_ingress: used within tc_classify to distinguish in/egress * @redirected: packet was redirected by packet classifier * @from_ingress: packet was redirected from the ingress path + * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -721,7 +751,7 @@ typedef unsigned char *sk_buff_data_t; struct sk_buff { union { struct { - /* These two members must be first. */ + /* These two members must be first to match sk_buff_head. */ struct sk_buff *next; struct sk_buff *prev; @@ -736,6 +766,7 @@ struct sk_buff { }; struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ struct list_head list; + struct llist_node ll_node; }; union { @@ -785,7 +816,7 @@ struct sk_buff { #else #define CLONED_MASK 1 #endif -#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) +#define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset) /* private: */ __u8 __cloned_offset[0]; @@ -801,25 +832,15 @@ struct sk_buff { __u8 active_extensions; #endif - /* fields enclosed in headers_start/headers_end are copied + /* Fields enclosed in headers group are copied * using a single memcpy() in __copy_skb_header() */ - /* private: */ - __u32 headers_start[0]; - /* public: */ - -/* if you move pkt_type around you also must adapt those constants */ -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + struct_group(headers, /* private: */ __u8 __pkt_type_offset[0]; /* public: */ - __u8 pkt_type:3; + __u8 pkt_type:3; /* see PKT_TYPE_MAX */ __u8 ignore_df:1; __u8 nf_trace:1; __u8 ip_summed:2; @@ -835,16 +856,10 @@ struct sk_buff { __u8 encap_hdr_csum:1; __u8 csum_valid:1; -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_VLAN_PRESENT_BIT 7 -#else -#define PKT_VLAN_PRESENT_BIT 0 -#endif -#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) /* private: */ __u8 __pkt_vlan_present_offset[0]; /* public: */ - __u8 vlan_present:1; + __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_not_inet:1; @@ -868,6 +883,9 @@ struct sk_buff { #ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; #endif +#ifdef CONFIG_NETFILTER_SKIP_EGRESS + __u8 nf_skip_egress:1; +#endif #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif @@ -922,9 +940,7 @@ struct sk_buff { u64 kcov_handle; #endif - /* private: */ - __u32 headers_end[0]; - /* public: */ + ); /* end headers group */ /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; @@ -940,6 +956,22 @@ struct sk_buff { #endif }; +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 +#endif +#define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) + +/* if you move pkt_vlan_present around you also must adapt these constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_VLAN_PRESENT_BIT 7 +#else +#define PKT_VLAN_PRESENT_BIT 0 +#endif +#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) + #ifdef __KERNEL__ /* * Handling routines are only of interest to the kernel @@ -1071,8 +1103,18 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); + +/** + * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason + * @skb: buffer to free + */ +static inline void kfree_skb(struct sk_buff *skb) +{ + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); +} + void skb_release_head_state(struct sk_buff *skb); -void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); @@ -1370,7 +1412,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, - bool post_ct); + bool post_ct, u16 zone); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@ -1460,6 +1502,17 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) return is_zcopy ? skb_uarg(skb) : NULL; } +static inline bool skb_zcopy_pure(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY; +} + +static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1, + const struct sk_buff *skb2) +{ + return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2); +} + static inline void net_zcopy_get(struct ubuf_info *uarg) { refcount_inc(&uarg->refcnt); @@ -1524,7 +1577,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (!skb_zcopy_is_nouarg(skb)) uarg->callback(skb, uarg, zerocopy_success); - skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG; + skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } } @@ -1671,6 +1724,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } +/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_cloned(skb)) { + unsigned int save = skb->truesize; + int res; + + res = pskb_expand_head(skb, 0, 0, pri); + skb->truesize = save; + return res; + } + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check @@ -1938,8 +2007,8 @@ static inline void __skb_insert(struct sk_buff *newsk, */ WRITE_ONCE(newsk->next, next); WRITE_ONCE(newsk->prev, prev); - WRITE_ONCE(next->prev, newsk); - WRITE_ONCE(prev->next, newsk); + WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk); + WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk); WRITE_ONCE(list->qlen, list->qlen + 1); } @@ -2035,7 +2104,7 @@ static inline void __skb_queue_after(struct sk_buff_head *list, struct sk_buff *prev, struct sk_buff *newsk) { - __skb_insert(newsk, prev, prev->next, list); + __skb_insert(newsk, prev, ((struct sk_buff_list *)prev)->next, list); } void skb_append(struct sk_buff *old, struct sk_buff *newsk, @@ -2045,7 +2114,7 @@ static inline void __skb_queue_before(struct sk_buff_head *list, struct sk_buff *next, struct sk_buff *newsk) { - __skb_insert(newsk, next->prev, next, list); + __skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list); } /** @@ -2335,6 +2404,8 @@ static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len) return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); } +void *skb_pull_data(struct sk_buff *skb, size_t len); + void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline void *__pskb_pull(struct sk_buff *skb, unsigned int len) @@ -3447,7 +3518,12 @@ __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - __skb_postpull_rcsum(skb, start, len, 0); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = wsum_negate(csum_partial(start, len, + wsum_negate(skb->csum))); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; } static __always_inline void @@ -4189,7 +4265,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, return; } - if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { + if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { __skb_checksum_complete(skb); skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data); } @@ -4240,6 +4316,9 @@ enum skb_ext_id { #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, #endif +#if IS_ENABLED(CONFIG_MCTP_FLOWS) + SKB_EXT_MCTP, +#endif SKB_EXT_NUM, /* must be last */ }; diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 1ce9a9eb223b..18a717fe62eb 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -507,10 +507,30 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; +} + #if IS_ENABLED(CONFIG_NET_SOCK_MSG) -/* We only have one bit so far. */ -#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) +#define BPF_F_STRPARSER (1UL << 1) + +/* We only have two bits so far. */ +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER) + +static inline bool skb_bpf_strparser(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return sk_redir & BPF_F_STRPARSER; +} + +static inline void skb_bpf_set_strparser(struct sk_buff *skb) +{ + skb->_sk_redir |= BPF_F_STRPARSER; +} static inline bool skb_bpf_ingress(const struct sk_buff *skb) { diff --git a/include/linux/slab.h b/include/linux/slab.h index 083f3ce550bc..367366f1d1ff 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -142,8 +142,6 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); -extern bool usercopy_fallback; - struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)); @@ -152,8 +150,8 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)); -void kmem_cache_destroy(struct kmem_cache *); -int kmem_cache_shrink(struct kmem_cache *); +void kmem_cache_destroy(struct kmem_cache *s); +int kmem_cache_shrink(struct kmem_cache *s); /* * Please use this macro to create slab caches. Simply specify the @@ -181,24 +179,16 @@ int kmem_cache_shrink(struct kmem_cache *); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *, size_t, gfp_t); -void kfree(const void *); -void kfree_sensitive(const void *); -size_t __ksize(const void *); -size_t ksize(const void *); +void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2); +void kfree(const void *objp); +void kfree_sensitive(const void *objp); +size_t __ksize(const void *objp); +size_t ksize(const void *objp); #ifdef CONFIG_PRINTK bool kmem_valid_obj(void *object); void kmem_dump_obj(void *object); #endif -#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR -void __check_heap_object(const void *ptr, unsigned long n, struct page *page, - bool to_user); -#else -static inline void __check_heap_object(const void *ptr, unsigned long n, - struct page *page, bool to_user) { } -#endif - /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. @@ -425,9 +415,9 @@ static __always_inline unsigned int __kmalloc_index(size_t size, #define kmalloc_index(s) __kmalloc_index(s, true) #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; -void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc; -void kmem_cache_free(struct kmem_cache *, void *); +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); +void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; +void kmem_cache_free(struct kmem_cache *s, void *objp); /* * Bulk allocation and freeing operations. These are accelerated in an @@ -436,8 +426,8 @@ void kmem_cache_free(struct kmem_cache *, void *); * * Note that interrupts must be enabled when calling these functions. */ -void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p); /* * Caller must not use kfree_bulk() on memory not originally allocated @@ -449,10 +439,12 @@ static __always_inline void kfree_bulk(size_t size, void **p) } #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc; +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment + __alloc_size(1); +void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment + __malloc; #else -static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node) { return __kmalloc(size, flags); } @@ -464,25 +456,24 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment __malloc; +extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_slab_alignment __alloc_size(3); #ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) __assume_slab_alignment __malloc; +extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) __assume_slab_alignment + __alloc_size(4); #else -static __always_inline void * -kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) +static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s, + gfp_t gfpflags, int node, size_t size) { return kmem_cache_alloc_trace(s, gfpflags, size); } #endif /* CONFIG_NUMA */ #else /* CONFIG_TRACING */ -static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, - gfp_t flags, size_t size) +static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s, + gfp_t flags, size_t size) { void *ret = kmem_cache_alloc(s, flags); @@ -490,10 +481,8 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, return ret; } -static __always_inline void * -kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) +static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) { void *ret = kmem_cache_alloc_node(s, gfpflags, node); @@ -502,19 +491,21 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment + __alloc_size(1); #ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) + __assume_page_alignment __alloc_size(1); #else -static __always_inline void * -kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags, + unsigned int order) { return kmalloc_order(size, flags, order); } #endif -static __always_inline void *kmalloc_large(size_t size, gfp_t flags) +static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags) { unsigned int order = get_order(size); return kmalloc_order_trace(size, flags, order); @@ -574,7 +565,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * Try really hard to succeed the allocation but fail * eventually. */ -static __always_inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { #ifndef CONFIG_SLOB @@ -596,7 +587,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } -static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB if (__builtin_constant_p(size) && @@ -620,7 +611,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags) { size_t bytes; @@ -638,8 +629,10 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) */ -static __must_check inline void * -krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) +static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p, + size_t new_n, + size_t new_size, + gfp_t flags) { size_t bytes; @@ -655,7 +648,7 @@ krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags) { return kmalloc_array(n, size, flags | __GFP_ZERO); } @@ -668,12 +661,13 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); +extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) + __alloc_size(1); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) -static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, - int node) +static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, + int node) { size_t bytes; @@ -684,14 +678,15 @@ static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, return __kmalloc_node(bytes, flags, node); } -static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) +static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) { return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); } #ifdef CONFIG_NUMA -extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); +extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, + unsigned long caller) __alloc_size(1); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ _RET_IP_) @@ -716,7 +711,7 @@ static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kzalloc(size_t size, gfp_t flags) +static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags) { return kmalloc(size, flags | __GFP_ZERO); } @@ -727,11 +722,45 @@ static inline void *kzalloc(size_t size, gfp_t flags) * @flags: the type of memory to allocate (see kmalloc). * @node: memory node from which to allocate */ -static inline void *kzalloc_node(size_t size, gfp_t flags, int node) +static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node) { return kmalloc_node(size, flags | __GFP_ZERO, node); } +extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1); +static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags) +{ + return kvmalloc_node(size, flags, NUMA_NO_NODE); +} +static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node) +{ + return kvmalloc_node(size, flags | __GFP_ZERO, node); +} +static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags) +{ + return kvmalloc(size, flags | __GFP_ZERO); +} + +static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + return kvmalloc(bytes, flags); +} + +static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags) +{ + return kvmalloc_array(n, size, flags | __GFP_ZERO); +} + +extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) + __alloc_size(3); +extern void kvfree(const void *addr); +extern void kvfree_sensitive(const void *addr, size_t len); + unsigned int kmem_cache_size(struct kmem_cache *s); void __init kmem_cache_init_late(void); diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 3aa5e1e73ab6..e24c9aff6fed 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -87,11 +87,11 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, +static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, void *x) { - void *object = x - (x - page->s_mem) % cache->size; - void *last_object = page->s_mem + (cache->num - 1) * cache->size; + void *object = x - (x - slab->s_mem) % cache->size; + void *last_object = slab->s_mem + (cache->num - 1) * cache->size; if (unlikely(object > last_object)) return last_object; @@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, * reciprocal_divide(offset, cache->reciprocal_buffer_size) */ static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct page *page, void *obj) + const struct slab *slab, void *obj) { - u32 offset = (obj - page->s_mem); + u32 offset = (obj - slab->s_mem); return reciprocal_divide(offset, cache->reciprocal_buffer_size); } -static inline int objs_per_slab_page(const struct kmem_cache *cache, - const struct page *page) +static inline int objs_per_slab(const struct kmem_cache *cache, + const struct slab *slab) { - if (is_kfence_address(page_address(page))) + if (is_kfence_address(slab_address(slab))) return 1; return cache->num; } diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 85499f0586b0..33c5c0e3bd8d 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -48,9 +48,9 @@ enum stat_item { struct kmem_cache_cpu { void **freelist; /* Pointer to next available object */ unsigned long tid; /* Globally unique transaction id */ - struct page *page; /* The slab from which we are allocating */ + struct slab *slab; /* The slab from which we are allocating */ #ifdef CONFIG_SLUB_CPU_PARTIAL - struct page *partial; /* Partially allocated frozen slabs */ + struct slab *partial; /* Partially allocated frozen slabs */ #endif local_lock_t lock; /* Protects the fields above */ #ifdef CONFIG_SLUB_STATS @@ -99,6 +99,8 @@ struct kmem_cache { #ifdef CONFIG_SLUB_CPU_PARTIAL /* Number of per cpu partial objects to keep around */ unsigned int cpu_partial; + /* Number of per cpu partial slabs to keep around */ + unsigned int cpu_partial_slabs; #endif struct kmem_cache_order_objects oo; @@ -141,17 +143,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -#ifdef CONFIG_SLUB_CPU_PARTIAL -#define slub_cpu_partial(s) ((s)->cpu_partial) -#define slub_set_cpu_partial(s, n) \ -({ \ - slub_cpu_partial(s) = (n); \ -}) -#else -#define slub_cpu_partial(s) (0) -#define slub_set_cpu_partial(s, n) -#endif /* CONFIG_SLUB_CPU_PARTIAL */ - #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS void sysfs_slab_unlink(struct kmem_cache *); @@ -165,16 +156,13 @@ static inline void sysfs_slab_release(struct kmem_cache *s) } #endif -void object_err(struct kmem_cache *s, struct page *page, - u8 *object, char *reason); - void *fixup_red_left(struct kmem_cache *s, void *p); -static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, +static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, void *x) { - void *object = x - (x - page_address(page)) % cache->size; - void *last_object = page_address(page) + - (page->objects - 1) * cache->size; + void *object = x - (x - slab_address(slab)) % cache->size; + void *last_object = slab_address(slab) + + (slab->objects - 1) * cache->size; void *result = (unlikely(object > last_object)) ? last_object : object; result = fixup_red_left(cache, result); @@ -190,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, } static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct page *page, void *obj) + const struct slab *slab, void *obj) { if (is_kfence_address(obj)) return 0; - return __obj_to_index(cache, page_address(page), obj); + return __obj_to_index(cache, slab_address(slab), obj); } -static inline int objs_per_slab_page(const struct kmem_cache *cache, - const struct page *page) +static inline int objs_per_slab(const struct kmem_cache *cache, + const struct slab *slab) { - return page->objects; + return slab->objects; } #endif /* _LINUX_SLUB_DEF_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 510519e8a1eb..a80ab58ae3f1 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -108,7 +108,6 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, #ifdef CONFIG_SMP #include <linux/preempt.h> -#include <linux/kernel.h> #include <linux/compiler.h> #include <linux/thread_info.h> #include <asm/smp.h> diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h index fa1d6af0164e..d683251a0b40 100644 --- a/include/linux/soc/marvell/octeontx2/asm.h +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -5,6 +5,7 @@ #ifndef __SOC_OTX2_ASM_H #define __SOC_OTX2_ASM_H +#include <linux/types.h> #if defined(CONFIG_ARM64) /* * otx2_lmt_flush is used for LMT store operation. @@ -34,9 +35,23 @@ : [rf] "+r"(val) \ : [rs] "r"(addr)); \ }) + +static inline u64 otx2_atomic64_fetch_add(u64 incr, u64 *ptr) +{ + u64 result; + + asm volatile (".cpu generic+lse\n" + "ldadda %x[i], %x[r], [%[b]]" + : [r] "=r" (result), "+m" (*ptr) + : [i] "r" (incr), [b] "r" (ptr) + : "memory"); + return result; +} + #else #define otx2_lmt_flush(ioaddr) ({ 0; }) #define cn10k_lmt_flush(val, addr) ({ addr = val; }) +#define otx2_atomic64_fetch_add(incr, ptr) ({ incr; }) #endif #endif /* __SOC_OTX2_ASM_H */ diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 2228bf6133da..4bba275e235a 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -29,13 +29,16 @@ enum mtk_ddp_comp_id { DDP_COMPONENT_OVL0, DDP_COMPONENT_OVL_2L0, DDP_COMPONENT_OVL_2L1, + DDP_COMPONENT_OVL_2L2, DDP_COMPONENT_OVL1, + DDP_COMPONENT_POSTMASK0, DDP_COMPONENT_PWM0, DDP_COMPONENT_PWM1, DDP_COMPONENT_PWM2, DDP_COMPONENT_RDMA0, DDP_COMPONENT_RDMA1, DDP_COMPONENT_RDMA2, + DDP_COMPONENT_RDMA4, DDP_COMPONENT_UFOE, DDP_COMPONENT_WDMA0, DDP_COMPONENT_WDMA1, diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index 137f9f2ac4c3..23c5b30f3511 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -7,6 +7,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <dt-bindings/soc/qcom,apr.h> +#include <dt-bindings/soc/qcom,gpr.h> extern struct bus_type aprbus; @@ -75,10 +76,65 @@ struct apr_resp_pkt { int payload_size; }; +struct gpr_hdr { + uint32_t version:4; + uint32_t hdr_size:4; + uint32_t pkt_size:24; + uint32_t dest_domain:8; + uint32_t src_domain:8; + uint32_t reserved:16; + uint32_t src_port; + uint32_t dest_port; + uint32_t token; + uint32_t opcode; +} __packed; + +struct gpr_pkt { + struct gpr_hdr hdr; + uint32_t payload[]; +}; + +struct gpr_resp_pkt { + struct gpr_hdr hdr; + void *payload; + int payload_size; +}; + +#define GPR_HDR_SIZE sizeof(struct gpr_hdr) +#define GPR_PKT_VER 0x0 +#define GPR_PKT_HEADER_WORD_SIZE ((sizeof(struct gpr_pkt) + 3) >> 2) +#define GPR_PKT_HEADER_BYTE_SIZE (GPR_PKT_HEADER_WORD_SIZE << 2) + +#define GPR_BASIC_RSP_RESULT 0x02001005 + +struct gpr_ibasic_rsp_result_t { + uint32_t opcode; + uint32_t status; +}; + +#define GPR_BASIC_EVT_ACCEPTED 0x02001006 + +struct gpr_ibasic_rsp_accepted_t { + uint32_t opcode; +}; + /* Bits 0 to 15 -- Minor version, Bits 16 to 31 -- Major version */ #define APR_SVC_MAJOR_VERSION(v) ((v >> 16) & 0xFF) #define APR_SVC_MINOR_VERSION(v) (v & 0xFF) +typedef int (*gpr_port_cb) (struct gpr_resp_pkt *d, void *priv, int op); +struct packet_router; +struct pkt_router_svc { + struct device *dev; + gpr_port_cb callback; + struct packet_router *pr; + spinlock_t lock; + int id; + void *priv; +}; + +typedef struct pkt_router_svc gpr_port_t; + struct apr_device { struct device dev; uint16_t svc_id; @@ -86,21 +142,26 @@ struct apr_device { uint32_t version; char name[APR_NAME_SIZE]; const char *service_path; - spinlock_t lock; + struct pkt_router_svc svc; struct list_head node; }; +typedef struct apr_device gpr_device_t; + #define to_apr_device(d) container_of(d, struct apr_device, dev) +#define svc_to_apr_device(d) container_of(d, struct apr_device, svc) struct apr_driver { int (*probe)(struct apr_device *sl); int (*remove)(struct apr_device *sl); int (*callback)(struct apr_device *a, struct apr_resp_pkt *d); + int (*gpr_callback)(struct gpr_resp_pkt *d, void *data, int op); struct device_driver driver; const struct apr_device_id *id_table; }; +typedef struct apr_driver gpr_driver_t; #define to_apr_driver(d) container_of(d, struct apr_driver, driver) /* @@ -123,7 +184,14 @@ void apr_driver_unregister(struct apr_driver *drv); #define module_apr_driver(__apr_driver) \ module_driver(__apr_driver, apr_driver_register, \ apr_driver_unregister) +#define module_gpr_driver(__gpr_driver) module_apr_driver(__gpr_driver) int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt); +gpr_port_t *gpr_alloc_port(gpr_device_t *gdev, struct device *dev, + gpr_port_cb cb, void *priv); +void gpr_free_port(gpr_port_t *port); +int gpr_send_port_pkt(gpr_port_t *port, struct gpr_pkt *pkt); +int gpr_send_pkt(gpr_device_t *gdev, struct gpr_pkt *pkt); + #endif /* __QCOM_APR_H_ */ diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 437c9df13229..9e8fd92c96b7 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -33,6 +33,9 @@ #define LLCC_MODPE 29 #define LLCC_APTCM 30 #define LLCC_WRCACHE 31 +#define LLCC_CVPFW 32 +#define LLCC_CPUSS1 33 +#define LLCC_CPUHWT 36 /** * struct llcc_slice_desc - Cache slice descriptor diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h new file mode 100644 index 000000000000..3c2a82e606f8 --- /dev/null +++ b/include/linux/soc/qcom/qcom_aoss.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __QCOM_AOSS_H__ +#define __QCOM_AOSS_H__ + +#include <linux/err.h> +#include <linux/device.h> + +struct qmp; + +#if IS_ENABLED(CONFIG_QCOM_AOSS_QMP) + +int qmp_send(struct qmp *qmp, const void *data, size_t len); +struct qmp *qmp_get(struct device *dev); +void qmp_put(struct qmp *qmp); + +#else + +static inline int qmp_send(struct qmp *qmp, const void *data, size_t len) +{ + return -ENODEV; +} + +static inline struct qmp *qmp_get(struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + +static inline void qmp_put(struct qmp *qmp) +{ +} + +#endif + +#endif diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 60e66fc9b6bf..860dd8cdf9f3 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -38,6 +38,8 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_IPA_CLK 0x617069 #define QCOM_SMD_RPM_CE_CLK 0x6563 #define QCOM_SMD_RPM_AGGR_CLK 0x72676761 +#define QCOM_SMD_RPM_HWKM_CLK 0x6d6b7768 +#define QCOM_SMD_RPM_PKA_CLK 0x616b70 int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h index 7899a5b8c247..1f1fe8bfaa76 100644 --- a/include/linux/soc/renesas/rcar-rst.h +++ b/include/linux/soc/renesas/rcar-rst.h @@ -4,8 +4,10 @@ #ifdef CONFIG_RST_RCAR int rcar_rst_read_mode_pins(u32 *mode); +int rcar_rst_set_rproc_boot_addr(u64 boot_addr); #else static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; } +static inline int rcar_rst_set_rproc_boot_addr(u64 boot_addr) { return -ENODEV; } #endif #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */ diff --git a/include/linux/soc/samsung/exynos-chipid.h b/include/linux/soc/samsung/exynos-chipid.h index 8bca6763f99c..62f0e2531068 100644 --- a/include/linux/soc/samsung/exynos-chipid.h +++ b/include/linux/soc/samsung/exynos-chipid.h @@ -9,10 +9,8 @@ #define __LINUX_SOC_EXYNOS_CHIPID_H #define EXYNOS_CHIPID_REG_PRO_ID 0x00 -#define EXYNOS_SUBREV_MASK (0xf << 4) -#define EXYNOS_MAINREV_MASK (0xf << 0) -#define EXYNOS_REV_MASK (EXYNOS_SUBREV_MASK | \ - EXYNOS_MAINREV_MASK) +#define EXYNOS_REV_PART_MASK 0xf +#define EXYNOS_REV_PART_SHIFT 4 #define EXYNOS_MASK 0xfffff000 #define EXYNOS_CHIPID_REG_PKG_ID 0x04 diff --git a/include/linux/soc/ti/ti_sci_inta_msi.h b/include/linux/soc/ti/ti_sci_inta_msi.h index e3aa8b14612e..4dba2f2aff6f 100644 --- a/include/linux/soc/ti/ti_sci_inta_msi.h +++ b/include/linux/soc/ti/ti_sci_inta_msi.h @@ -18,6 +18,4 @@ struct irq_domain struct irq_domain *parent); int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev, struct ti_sci_resource *res); -unsigned int ti_sci_inta_msi_get_virq(struct device *dev, u32 index); -void ti_sci_inta_msi_domain_free_irqs(struct device *dev); #endif /* __INCLUDE_LINUX_IRQCHIP_TI_SCI_INTA_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h index 041d6032a348..8ef26d89ef49 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -364,6 +364,8 @@ struct ucred { #define SOL_KCM 281 #define SOL_TLS 282 #define SOL_XDP 283 +#define SOL_MPTCP 284 +#define SOL_MCTP 285 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 8a463b8fc12a..67e0d3e750b5 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -92,7 +92,7 @@ * firmware. */ struct sdw_intel_stream_params_data { - struct snd_pcm_substream *substream; + int stream; struct snd_soc_dai *dai; struct snd_pcm_hw_params *hw_params; int link_id; @@ -105,7 +105,7 @@ struct sdw_intel_stream_params_data { * firmware. */ struct sdw_intel_stream_free_data { - struct snd_pcm_substream *substream; + int stream; struct snd_soc_dai *dai; int link_id; }; diff --git a/include/linux/spi/max7301.h b/include/linux/spi/max7301.h index 21449067aedb..e392c53758bc 100644 --- a/include/linux/spi/max7301.h +++ b/include/linux/spi/max7301.h @@ -31,6 +31,6 @@ struct max7301_platform_data { u32 input_pullup_active; }; -extern int __max730x_remove(struct device *dev); +extern void __max730x_remove(struct device *dev); extern int __max730x_probe(struct max7301 *ts); #endif diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index eaab121ee575..ca74dce36706 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -9,9 +9,6 @@ #include <linux/pxa2xx_ssp.h> -#define PXA2XX_CS_ASSERT (0x01) -#define PXA2XX_CS_DEASSERT (0x02) - struct dma_chan; /* @@ -45,9 +42,7 @@ struct pxa2xx_spi_chip { u8 rx_threshold; u8 dma_burst_size; u32 timeout; - u8 enable_loopback; int gpio_cs; - void (*cs_control)(u32 command); }; #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6b0b686f6f90..7ab3fed7b804 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -14,12 +14,12 @@ #include <linux/completion.h> #include <linux/scatterlist.h> #include <linux/gpio/consumer.h> -#include <linux/ptp_clock_kernel.h> #include <uapi/linux/spi/spi.h> struct dma_chan; struct software_node; +struct ptp_system_timestamp; struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; @@ -78,10 +78,6 @@ struct spi_statistics { unsigned long transfers_split_maxsize; }; -void spi_statistics_add_transfer_stats(struct spi_statistics *stats, - struct spi_transfer *xfer, - struct spi_controller *ctlr); - #define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \ do { \ unsigned long flags; \ @@ -763,8 +759,6 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -extern struct spi_controller *spi_busnum_to_master(u16 busnum); - /* * SPI resource management while processing a SPI message */ @@ -788,15 +782,6 @@ struct spi_res { unsigned long long data[]; /* guarantee ull alignment */ }; -extern void *spi_res_alloc(struct spi_device *spi, - spi_res_release_t release, - size_t size, gfp_t gfp); -extern void spi_res_add(struct spi_message *message, void *res); -extern void spi_res_free(void *res); - -extern void spi_res_release(struct spi_controller *ctlr, - struct spi_message *message); - /*---------------------------------------------------------------------------*/ /* @@ -1114,8 +1099,6 @@ static inline void spi_message_free(struct spi_message *m) extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); -extern int spi_async_locked(struct spi_device *spi, - struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); static inline size_t @@ -1198,15 +1181,6 @@ struct spi_replaced_transfers { struct spi_transfer inserted_transfers[]; }; -extern struct spi_replaced_transfers *spi_replace_transfers( - struct spi_message *msg, - struct spi_transfer *xfer_first, - size_t remove, - size_t insert, - spi_replaced_release_t release, - size_t extradatasize, - gfp_t gfp); - /*---------------------------------------------------------------------------*/ /* SPI transfer transformation methods */ @@ -1478,20 +1452,8 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). - * - * You can also use spi_alloc_device() and spi_add_device() to use a two - * stage registration sequence for each spi_device. This gives the caller - * some more control over the spi_device structure before it is registered, - * but requires that caller to initialize fields that would otherwise - * be defined using the board info. */ extern struct spi_device * -spi_alloc_device(struct spi_controller *ctlr); - -extern int -spi_add_device(struct spi_device *spi); - -extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); extern void spi_unregister_device(struct spi_device *spi); @@ -1505,23 +1467,6 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } -/* OF support code */ -#if IS_ENABLED(CONFIG_OF) - -/* must call put_device() when done with returned spi_device device */ -extern struct spi_device * -of_find_spi_device_by_node(struct device_node *node); - -#else - -static inline struct spi_device * -of_find_spi_device_by_node(struct device_node *node) -{ - return NULL; -} - -#endif /* IS_ENABLED(CONFIG_OF) */ - /* Compatibility layer */ #define spi_master spi_controller diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 45310ea1b1d7..5c0c5174155d 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -57,7 +57,6 @@ #include <linux/compiler.h> #include <linux/irqflags.h> #include <linux/thread_info.h> -#include <linux/kernel.h> #include <linux/stringify.h> #include <linux/bottom_half.h> #include <linux/lockdep.h> @@ -172,12 +171,11 @@ do { \ * Architectures that can implement ACQUIRE better need to take care. */ #ifndef smp_mb__after_spinlock -#define smp_mb__after_spinlock() do { } while (0) +#define smp_mb__after_spinlock() kcsan_mb() #endif #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); -#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock) extern int do_raw_spin_trylock(raw_spinlock_t *lock); extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock); #else @@ -188,18 +186,6 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) mmiowb_spin_lock(); } -#ifndef arch_spin_lock_flags -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#endif - -static inline void -do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock) -{ - __acquire(lock); - arch_spin_lock_flags(&lock->raw_lock, *flags); - mmiowb_spin_lock(); -} - static inline int do_raw_spin_trylock(raw_spinlock_t *lock) { int ret = arch_spin_trylock(&(lock)->raw_lock); diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 6b8e1a0b137b..51fa0dab68c4 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -108,16 +108,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) local_irq_save(flags); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - /* - * On lockdep we dont want the hand-coded irq-enable of - * do_raw_spin_lock_flags() code, because lockdep assumes - * that interrupts are not re-enabled during lock-acquire: - */ -#ifdef CONFIG_LOCKDEP LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -#else - do_raw_spin_lock_flags(lock, &flags); -#endif return flags; } diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index c09b6407ae1b..7f86a2016ac5 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_TYPES_UP_H #define __LINUX_SPINLOCK_TYPES_UP_H -#ifndef __LINUX_SPINLOCK_TYPES_H +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H # error "please don't include this file directly" #endif diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 0ac9112c1bbe..16521074b6f7 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -62,7 +62,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched/core.c and kernel_lock.c: */ # define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) -# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) # define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0) # define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e6011a9975af..01226e4d960a 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -117,7 +117,8 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * lockdep_is_held() calls. */ #define srcu_dereference_check(p, ssp, c) \ - __rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || srcu_read_lock_held(ssp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 6bb4bc1a5f54..c34b55a6e554 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -11,15 +11,24 @@ #ifndef _LINUX_STACKDEPOT_H #define _LINUX_STACKDEPOT_H +#include <linux/gfp.h> + typedef u32 depot_stack_handle_t; +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t gfp_flags, bool can_alloc); + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); -unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); +int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, + int spaces); + +void stack_depot_print(depot_stack_handle_t stack); #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 9edecb494e9e..97455880ac41 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -8,21 +8,6 @@ struct task_struct; struct pt_regs; -#ifdef CONFIG_STACKTRACE -void stack_trace_print(const unsigned long *trace, unsigned int nr_entries, - int spaces); -int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries, - unsigned int nr_entries, int spaces); -unsigned int stack_trace_save(unsigned long *store, unsigned int size, - unsigned int skipnr); -unsigned int stack_trace_save_tsk(struct task_struct *task, - unsigned long *store, unsigned int size, - unsigned int skipnr); -unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, - unsigned int size, unsigned int skipnr); -unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); - -/* Internal interfaces. Do not use in generic code */ #ifdef CONFIG_ARCH_STACKWALK /** @@ -75,8 +60,25 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie, void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, const struct pt_regs *regs); +#endif /* CONFIG_ARCH_STACKWALK */ -#else /* CONFIG_ARCH_STACKWALK */ +#ifdef CONFIG_STACKTRACE +void stack_trace_print(const unsigned long *trace, unsigned int nr_entries, + int spaces); +int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries, + unsigned int nr_entries, int spaces); +unsigned int stack_trace_save(unsigned long *store, unsigned int size, + unsigned int skipnr); +unsigned int stack_trace_save_tsk(struct task_struct *task, + unsigned long *store, unsigned int size, + unsigned int skipnr); +unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, + unsigned int size, unsigned int skipnr); +unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); +unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); + +#ifndef CONFIG_ARCH_STACKWALK +/* Internal interfaces. Do not use in generic code */ struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 998a4ba28eba..ca507bd5f808 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -20,7 +20,7 @@ enum { #endif /** - * sizeof_field(TYPE, MEMBER) + * sizeof_field() - Report the size of a struct field in bytes * * @TYPE: The structure containing the field of interest * @MEMBER: The field to return the size of @@ -28,7 +28,7 @@ enum { #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) /** - * offsetofend(TYPE, MEMBER) + * offsetofend() - Report the offset of a struct field within the struct * * @TYPE: The type of the structure * @MEMBER: The member within the structure to get the end offset of @@ -36,4 +36,65 @@ enum { #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +/** + * struct_group() - Wrap a set of declarations in a mirrored struct + * + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. + */ +#define struct_group(NAME, MEMBERS...) \ + __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS) + +/** + * struct_group_attr() - Create a struct_group() with trailing attributes + * + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes to apply + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes structure attributes argument. + */ +#define struct_group_attr(NAME, ATTRS, MEMBERS...) \ + __struct_group(/* no tag */, NAME, ATTRS, MEMBERS) + +/** + * struct_group_tagged() - Create a struct_group with a reusable tag + * + * @TAG: The tag name for the named sub-struct + * @NAME: The identifier name of the mirrored sub-struct + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical + * layout and size: one anonymous and one named. The former can be + * used normally without sub-struct naming, and the latter can be + * used to reason about the start, end, and size of the group of + * struct members. Includes struct tag argument for the named copy, + * so the specified layout can be reused later. + */ +#define struct_group_tagged(TAG, NAME, MEMBERS...) \ + __struct_group(TAG, NAME, /* no attrs */, MEMBERS) + +/** + * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define DECLARE_FLEX_ARRAY(TYPE, NAME) \ + __DECLARE_FLEX_ARRAY(TYPE, NAME) + #endif diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a6f03b36fc4f..24eea1b05ca2 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -233,6 +233,7 @@ struct plat_stmmacenet_data { int (*clks_config)(void *priv, bool enabled); int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, void *ctx); + void (*dump_debug_regs)(void *priv); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; @@ -241,6 +242,7 @@ struct plat_stmmacenet_data { unsigned int clk_ref_rate; unsigned int mult_fact_100ns; s32 ptp_max_adj; + u32 cdc_error_adj; struct reset_control *stmmac_rst; struct reset_control *stmmac_ahb_rst; struct stmmac_axi *axi; diff --git a/include/linux/string.h b/include/linux/string.h index 5e96d656be7a..b6572aeca2f5 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -249,36 +249,47 @@ static inline const char *kbasename(const char *path) return tail ? tail + 1 : path; } -#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) -#define __RENAME(x) __asm__(#x) - -void fortify_panic(const char *name) __noreturn __cold; -void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); -void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); -void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter"); -void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); - #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) #include <linux/fortify-string.h> #endif +void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, + int pad); + /** - * memcpy_and_pad - Copy one buffer to another with padding - * @dest: Where to copy to - * @dest_len: The destination buffer size - * @src: Where to copy from - * @count: The number of bytes to copy - * @pad: Character to use for padding if space is left in destination. + * memset_after - Set a value after a struct member to the end of a struct + * + * @obj: Address of target struct instance + * @v: Byte value to repeatedly write + * @member: after which struct member to start writing bytes + * + * This is good for clearing padding following the given member. */ -static inline void memcpy_and_pad(void *dest, size_t dest_len, - const void *src, size_t count, int pad) -{ - if (dest_len > count) { - memcpy(dest, src, count); - memset(dest + count, pad, dest_len - count); - } else - memcpy(dest, src, dest_len); -} +#define memset_after(obj, v, member) \ +({ \ + u8 *__ptr = (u8 *)(obj); \ + typeof(v) __val = (v); \ + memset(__ptr + offsetofend(typeof(*(obj)), member), __val, \ + sizeof(*(obj)) - offsetofend(typeof(*(obj)), member)); \ +}) + +/** + * memset_startat - Set a value starting at a member to the end of a struct + * + * @obj: Address of target struct instance + * @v: Byte value to repeatedly write + * @member: struct member to start writing at + * + * Note that if there is padding between the prior member and the target + * member, memset_after() should be used to clear the prior padding. + */ +#define memset_startat(obj, v, member) \ +({ \ + u8 *__ptr = (u8 *)(obj); \ + typeof(v) __val = (v); \ + memset(__ptr + offsetof(typeof(*(obj)), member), __val, \ + sizeof(*(obj)) - offsetof(typeof(*(obj)), member)); \ +}) /** * str_has_prefix - Test if a string has a given prefix diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 68189c4a2eb1..7a22921c9db7 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -4,8 +4,10 @@ #include <linux/bits.h> #include <linux/ctype.h> +#include <linux/string.h> #include <linux/types.h> +struct device; struct file; struct task_struct; @@ -99,6 +101,9 @@ char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); +char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n); void kfree_strarray(char **array, size_t n); +char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n); + #endif diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a4661646adc9..267b7aeaf1a6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -40,6 +40,7 @@ struct rpc_clnt { unsigned int cl_clid; /* client id */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ + atomic_t cl_pid; /* task PID counter */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ const struct rpc_procinfo *cl_procinfo; /* procedure info */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index a237b8dbf608..db964bb63912 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -150,25 +150,13 @@ struct rpc_task_setup { #define RPC_TASK_MSG_PIN_WAIT 5 #define RPC_TASK_SIGNALLED 6 -#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_test_and_set_running(t) \ test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_clear_running(t) \ - do { \ - smp_mb__before_atomic(); \ - clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ - smp_mb__after_atomic(); \ - } while (0) +#define rpc_clear_running(t) clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define RPC_IS_QUEUED(t) test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define rpc_set_queued(t) set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) -#define rpc_clear_queued(t) \ - do { \ - smp_mb__before_atomic(); \ - clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \ - smp_mb__after_atomic(); \ - } while (0) +#define rpc_clear_queued(t) clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 064c96157d1f..f35c22b3355f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -64,10 +64,9 @@ struct svc_serv_ops { /* queue up a transport for servicing */ void (*svo_enqueue_xprt)(struct svc_xprt *); - /* set up thread (or whatever) execution context */ - int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); - - /* optional module to count when adding threads (pooled svcs only) */ + /* optional module to count when adding threads. + * Thread function must call module_put_and_kthread_exit() to exit. + */ struct module *svo_module; }; @@ -85,6 +84,7 @@ struct svc_serv { struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + struct kref sv_refcnt; unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -114,15 +114,43 @@ struct svc_serv { #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/* - * We use sv_nrthreads as a reference count. svc_destroy() drops - * this refcount, so we need to bump it up around operations that - * change the number of threads. Horrible, but there it is. - * Should be called with the "service mutex" held. +/** + * svc_get() - increment reference count on a SUNRPC serv + * @serv: the svc_serv to have count incremented + * + * Returns: the svc_serv that was passed in. + */ +static inline struct svc_serv *svc_get(struct svc_serv *serv) +{ + kref_get(&serv->sv_refcnt); + return serv; +} + +void svc_destroy(struct kref *); + +/** + * svc_put - decrement reference count on a SUNRPC serv + * @serv: the svc_serv to have count decremented + * + * When the reference count reaches zero, svc_destroy() + * is called to clean up and free the serv. + */ +static inline void svc_put(struct svc_serv *serv) +{ + kref_put(&serv->sv_refcnt, svc_destroy); +} + +/** + * svc_put_not_last - decrement non-final reference count on SUNRPC serv + * @serv: the svc_serv to have count decremented + * + * Returns: %true is refcount was decremented. + * + * If the refcount is 1, it is not decremented and instead failure is reported. */ -static inline void svc_get(struct svc_serv *serv) +static inline bool svc_put_not_last(struct svc_serv *serv) { - serv->sv_nrthreads++; + return refcount_dec_not_one(&serv->sv_refcnt.refcount); } /* @@ -443,10 +471,7 @@ struct svc_version { /* Need xprt with congestion control */ bool vs_need_cong_ctrl; - /* Override dispatch function (e.g. when caching replies). - * A return value of 0 means drop the request. - * vs_dispatch == NULL means use default dispatcher. - */ + /* Dispatch function */ int (*vs_dispatch)(struct svc_rqst *, __be32 *); }; @@ -457,9 +482,11 @@ struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ - int (*pc_decode)(struct svc_rqst *, __be32 *data); + bool (*pc_decode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR encode result: */ - int (*pc_encode)(struct svc_rqst *, __be32 *data); + bool (*pc_encode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ @@ -470,29 +497,6 @@ struct svc_procedure { }; /* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; - -struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -}; - -extern struct svc_pool_map svc_pool_map; - -/* * Function prototypes. */ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); @@ -502,20 +506,14 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int, const struct svc_serv_ops *); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); -struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, - struct svc_pool *pool, int node); void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -unsigned int svc_pool_map_get(void); -void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_destroy(struct svc_serv *); void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, @@ -532,8 +530,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct page **pages, - struct kvec *first, size_t total); + struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index 068e1982ad37..74bfdffaf7b0 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -792,8 +792,8 @@ enum ssam_event_mask { #define SSAM_EVENT_REGISTRY_KIP \ SSAM_EVENT_REGISTRY(SSAM_SSH_TC_KIP, 0x02, 0x27, 0x28) -#define SSAM_EVENT_REGISTRY_REG \ - SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, 0x02, 0x01, 0x02) +#define SSAM_EVENT_REGISTRY_REG(tid)\ + SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, tid, 0x01, 0x02) /** * enum ssam_event_notifier_flags - Flags for event notifiers. diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index f636c5310321..cc257097eb05 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -319,6 +319,15 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d); ssam_device_driver_unregister) +/* -- Helpers for controller and hub devices. ------------------------------- */ + +#ifdef CONFIG_SURFACE_AGGREGATOR_BUS +void ssam_remove_clients(struct device *dev); +#else /* CONFIG_SURFACE_AGGREGATOR_BUS */ +static inline void ssam_remove_clients(struct device *dev) {} +#endif /* CONFIG_SURFACE_AGGREGATOR_BUS */ + + /* -- Helpers for client-device requests. ----------------------------------- */ /** diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8af13ba60c7e..5785d909c321 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern u32 swsusp_hardware_signature; extern void hibernation_set_ops(const struct platform_hibernation_ops *ops); extern int hibernate(void); extern bool system_entering_hibernation(void); diff --git a/include/linux/swap.h b/include/linux/swap.h index ba52f3a3478e..1d38d9475c4d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -320,11 +320,17 @@ struct vma_swap_readahead { #endif }; +static inline swp_entry_t folio_swap_entry(struct folio *folio) +{ + swp_entry_t entry = { .val = page_private(&folio->page) }; + return entry; +} + /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); -void workingset_refault(struct page *page, void *shadow); -void workingset_activation(struct page *page); +void workingset_refault(struct folio *folio, void *shadow); +void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); @@ -335,7 +341,6 @@ void workingset_update_node(struct xa_node *node); /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; -extern unsigned long nr_free_buffer_pages(void); /* Definition of global_zone_page_state not available yet */ #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES) @@ -344,9 +349,11 @@ extern unsigned long nr_free_buffer_pages(void); /* linux/mm/swap.c */ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); -extern void lru_note_cost_page(struct page *); +extern void lru_note_cost_folio(struct folio *); +extern void folio_add_lru(struct folio *); extern void lru_cache_add(struct page *); -extern void mark_page_accessed(struct page *); +void mark_page_accessed(struct page *); +void folio_mark_accessed(struct folio *); extern atomic_t lru_disable_count; @@ -365,7 +372,6 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); @@ -508,7 +514,7 @@ extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); -extern bool reuse_swap_page(struct page *, int *); +extern bool reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); @@ -674,8 +680,8 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page, total_map_swapcount) \ - (page_trans_huge_mapcount(page, total_map_swapcount) == 1) +#define reuse_swap_page(page) \ + (page_trans_huge_mapcount(page) == 1) static inline int try_to_free_swap(struct page *page) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b0cb2a9973f4..f6c3638255d5 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,7 +45,8 @@ extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, size_t mapping_size, size_t alloc_size, - enum dma_data_direction dir, unsigned long attrs); + unsigned int alloc_aligned_mask, enum dma_data_direction dir, + unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, @@ -72,6 +73,9 @@ extern enum swiotlb_force swiotlb_force; * @end: The end address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. + * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool + * may be remapped in the memory encrypted case and store virtual + * address for bounce buffer operation. * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and * @end. For default swiotlb, this is command line adjustable via * setup_io_tlb_npages. @@ -91,6 +95,7 @@ extern enum swiotlb_force swiotlb_force; struct io_tlb_mem { phys_addr_t start; phys_addr_t end; + void *vaddr; unsigned long nslabs; unsigned long used; unsigned int index; @@ -185,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev) } #endif /* CONFIG_DMA_RESTRICTED_POOL */ +extern phys_addr_t swiotlb_unencrypted_base; + #endif /* __LINUX_SWIOTLB_H */ diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 082f1d51957a..48fabe36509e 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -19,6 +19,7 @@ #define SWITCHTEC_EVENT_EN_CLI BIT(2) #define SWITCHTEC_EVENT_EN_IRQ BIT(3) #define SWITCHTEC_EVENT_FATAL BIT(4) +#define SWITCHTEC_EVENT_NOT_SUPP BIT(31) #define SWITCHTEC_DMA_MRPC_EN BIT(0) @@ -336,8 +337,6 @@ enum { NTB_CTRL_REQ_ID_EN = 1 << 0, NTB_CTRL_LUT_EN = 1 << 0, - - NTB_PART_CTRL_ID_PROT_DIS = 1 << 0, }; struct ntb_ctrl_regs { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 252243c7783d..819c0cb00b6d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -58,6 +58,7 @@ struct mq_attr; struct compat_stat; struct old_timeval32; struct robust_list_head; +struct futex_waitv; struct getcpu_cache; struct old_linux_dirent; struct perf_event_attr; @@ -610,7 +611,7 @@ asmlinkage long sys_waitid(int which, pid_t pid, asmlinkage long sys_set_tid_address(int __user *tidptr); asmlinkage long sys_unshare(unsigned long unshare_flags); -/* kernel/futex.c */ +/* kernel/futex/syscalls.c */ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, const struct __kernel_timespec __user *utime, u32 __user *uaddr2, u32 val3); @@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid, asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); +asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, + unsigned int nr_futexes, unsigned int flags, + struct __kernel_timespec __user *timeout, clockid_t clockid); + /* kernel/hrtimer.c */ asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); @@ -1052,6 +1057,9 @@ asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type ru const void __user *rule_attr, __u32 flags); asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags); asmlinkage long sys_memfd_secret(unsigned int flags); +asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long len, + unsigned long home_node, + unsigned long flags); /* * Architecture-specific system calls diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 96305a64a5a7..c635c2e014e3 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -3,7 +3,7 @@ #define _LINUX_T10_PI_H #include <linux/types.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> /* * A T10 PI-capable target device can be formatted with different diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 48d8a363319e..78b91bb92f0d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -512,11 +512,13 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); +void __tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_cork(struct sock *sk, bool on); int tcp_sock_set_keepcnt(struct sock *sk, int val); int tcp_sock_set_keepidle_locked(struct sock *sk, int val); int tcp_sock_set_keepidle(struct sock *sk, int val); int tcp_sock_set_keepintvl(struct sock *sk, int val); +void __tcp_sock_set_nodelay(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 3ebfea0781f1..5e1533ee3785 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -195,9 +195,13 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, * @offset: offset of buffer in user space * @pages: locked pages from userspace * @num_pages: number of locked pages - * @dmabuf: dmabuf used to for exporting to user space + * @refcount: reference counter * @flags: defined by TEE_SHM_* in tee_drv.h - * @id: unique id of a shared memory object on this device + * @id: unique id of a shared memory object on this device, shared + * with user space + * @sec_world_id: + * secure world assigned id of this shared memory object, not + * used by all drivers * * This pool is only supposed to be accessed directly from the TEE * subsystem and from drivers that implements their own shm pool manager. @@ -210,9 +214,10 @@ struct tee_shm { unsigned int offset; struct page **pages; size_t num_pages; - struct dma_buf *dmabuf; + refcount_t refcount; u32 flags; int id; + u64 sec_world_id; }; /** @@ -582,4 +587,18 @@ struct tee_client_driver { #define to_tee_client_driver(d) \ container_of(d, struct tee_client_driver, driver) +/** + * teedev_open() - Open a struct tee_device + * @teedev: Device to open + * + * @return a pointer to struct tee_context on success or an ERR_PTR on failure. + */ +struct tee_context *teedev_open(struct tee_device *teedev); + +/** + * teedev_close_context() - closes a struct tee_context + * @ctx: The struct tee_context to close + */ +void teedev_close_context(struct tee_context *ctx); + #endif /*__TEE_DRV_H*/ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 0999f6317978..73a6f34b3847 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -118,6 +118,15 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) return test_bit(flag, (unsigned long *)&ti->flags); } +/* + * This may be used in noinstr code, and needs to be __always_inline to prevent + * inadvertent instrumentation. + */ +static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti) +{ + return READ_ONCE(ti->flags); +} + #define set_thread_flag(flag) \ set_ti_thread_flag(current_thread_info(), flag) #define clear_thread_flag(flag) \ @@ -130,6 +139,11 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) test_and_clear_ti_thread_flag(current_thread_info(), flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) +#define read_thread_flags() \ + read_ti_thread_flags(current_thread_info()) + +#define read_task_thread_flags(t) \ + read_ti_thread_flags(task_thread_info(t)) #ifdef CONFIG_GENERIC_ENTRY #define set_syscall_work(fl) \ @@ -203,7 +217,7 @@ static inline void copy_overflow(int size, unsigned long count) static __always_inline __must_check bool check_copy_size(const void *addr, size_t bytes, bool is_source) { - int sz = __compiletime_object_size(addr); + int sz = __builtin_object_size(addr, 0); if (unlikely(sz >= 0 && sz < bytes)) { if (!__builtin_constant_p(bytes)) copy_overflow(sz, bytes); diff --git a/include/linux/topology.h b/include/linux/topology.h index 7634cd737061..a6e201758ae9 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -180,24 +180,55 @@ static inline int cpu_to_mem(int cpu) #endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ +#if defined(topology_die_id) && defined(topology_die_cpumask) +#define TOPOLOGY_DIE_SYSFS +#endif +#if defined(topology_cluster_id) && defined(topology_cluster_cpumask) +#define TOPOLOGY_CLUSTER_SYSFS +#endif +#if defined(topology_book_id) && defined(topology_book_cpumask) +#define TOPOLOGY_BOOK_SYSFS +#endif +#if defined(topology_drawer_id) && defined(topology_drawer_cpumask) +#define TOPOLOGY_DRAWER_SYSFS +#endif + #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) #endif #ifndef topology_die_id #define topology_die_id(cpu) ((void)(cpu), -1) #endif +#ifndef topology_cluster_id +#define topology_cluster_id(cpu) ((void)(cpu), -1) +#endif #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif +#ifndef topology_book_id +#define topology_book_id(cpu) ((void)(cpu), -1) +#endif +#ifndef topology_drawer_id +#define topology_drawer_id(cpu) ((void)(cpu), -1) +#endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_core_cpumask #define topology_core_cpumask(cpu) cpumask_of(cpu) #endif +#ifndef topology_cluster_cpumask +#define topology_cluster_cpumask(cpu) cpumask_of(cpu) +#endif #ifndef topology_die_cpumask #define topology_die_cpumask(cpu) cpumask_of(cpu) #endif +#ifndef topology_book_cpumask +#define topology_book_cpumask(cpu) cpumask_of(cpu) +#endif +#ifndef topology_drawer_cpumask +#define topology_drawer_cpumask(cpu) cpumask_of(cpu) +#endif #if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) static inline const struct cpumask *cpu_smt_mask(int cpu) @@ -206,6 +237,13 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif +#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask) +static inline const struct cpumask *cpu_cluster_mask(int cpu) +{ + return topology_cluster_cpumask(cpu); +} +#endif + static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); diff --git a/include/linux/torture.h b/include/linux/torture.h index 0910c5803f35..63fa4196e51c 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -38,15 +38,18 @@ do { \ pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); \ } \ } while (0) -#define VERBOSE_TOROUT_ERRSTRING(s) \ -do { \ - if (verbose) { \ - verbose_torout_sleep(); \ - pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); \ - } \ -} while (0) +#define TOROUT_ERRSTRING(s) \ + pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s) void verbose_torout_sleep(void); +#define torture_init_error(firsterr) \ +({ \ + int ___firsterr = (firsterr); \ + \ + WARN_ONCE(!IS_MODULE(CONFIG_RCU_TORTURE_TEST) && ___firsterr < 0, "Torture-test initialization failed with error code %d\n", ___firsterr); \ + ___firsterr < 0; \ +}) + /* Definitions for online/offline exerciser. */ #ifdef CONFIG_HOTPLUG_CPU int torture_num_online_cpus(void); diff --git a/include/linux/tpm.h b/include/linux/tpm.h index aa11fe323c56..dfeb25a0362d 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -207,6 +207,7 @@ enum tpm2_return_codes { TPM2_RC_INITIALIZE = 0x0100, /* RC_VER1 */ TPM2_RC_FAILURE = 0x0101, TPM2_RC_DISABLED = 0x0120, + TPM2_RC_UPGRADE = 0x012D, TPM2_RC_COMMAND_CODE = 0x0143, TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, @@ -269,6 +270,7 @@ enum tpm2_cc_attrs { #define TPM_VID_INTEL 0x8086 #define TPM_VID_WINBOND 0x1050 #define TPM_VID_STM 0x104A +#define TPM_VID_ATML 0x1114 enum tpm_chip_flags { TPM_CHIP_FLAG_TPM2 = BIT(1), @@ -277,6 +279,7 @@ enum tpm_chip_flags { TPM_CHIP_FLAG_HAVE_TIMEOUTS = BIT(4), TPM_CHIP_FLAG_ALWAYS_POWERED = BIT(5), TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED = BIT(6), + TPM_CHIP_FLAG_FIRMWARE_UPGRADE = BIT(7), }; #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) @@ -398,6 +401,14 @@ static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value) tpm_buf_append(buf, (u8 *) &value2, 4); } +/* + * Check if TPM device is in the firmware upgrade mode. + */ +static inline bool tpm_is_firmware_upgrade(struct tpm_chip *chip) +{ + return chip->flags & TPM_CHIP_FLAG_FIRMWARE_UPGRADE; +} + static inline u32 tpm2_rc_value(u32 rc) { return (rc & BIT(7)) ? rc & 0xff : rc; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 3e475eeb5a99..70c069aef02c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -172,6 +172,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, + TRACE_FLAG_BH_OFF = 0x80, }; #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -671,9 +672,9 @@ struct trace_event_file { } \ early_initcall(trace_init_perf_perm_##name); -#define PERF_MAX_TRACE_SIZE 2048 +#define PERF_MAX_TRACE_SIZE 8192 -#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ +#define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ enum event_trigger_type { ETT_NONE = (0), @@ -782,6 +783,7 @@ enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, + FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, FILTER_COMM, diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index fe95f0922526..c303f7a114e9 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -116,13 +116,9 @@ enum { static __always_inline int trace_get_context_bit(void) { - unsigned long pc = preempt_count(); + unsigned char bit = interrupt_context_level(); - if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) - return TRACE_CTX_NORMAL; - else - return pc & NMI_MASK ? TRACE_CTX_NMI : - pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; + return TRACE_CTX_NORMAL - bit; } #ifdef CONFIG_FTRACE_RECORD_RECURSION @@ -139,6 +135,9 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); # define do_ftrace_record_recursion(ip, pip) do { } while (0) #endif +/* + * Preemption is promised to be disabled when return bit >= 0. + */ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, int start) { @@ -148,8 +147,12 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* - * It could be that preempt_count has not been updated during - * a switch between contexts. Allow for a single recursion. + * If an interrupt occurs during a trace, and another trace + * happens in that interrupt but before the preempt_count is + * updated to reflect the new interrupt context, then this + * will think a recursion occurred, and the event will be dropped. + * Let a single instance happen via the TRANSITION_BIT to + * not drop those events. */ bit = TRACE_CTX_TRANSITION + start; if (val & (1 << bit)) { @@ -162,11 +165,17 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign current->trace_recursion = val; barrier(); + preempt_disable_notrace(); + return bit; } +/* + * Preemption will be enabled (if it was previously enabled). + */ static __always_inline void trace_clear_recursion(int bit) { + preempt_enable_notrace(); barrier(); trace_recursion_clear(bit); } @@ -178,7 +187,7 @@ static __always_inline void trace_clear_recursion(int bit) * tracing recursed in the same context (normal vs interrupt), * * Returns: -1 if a recursion happened. - * >= 0 if no recursion + * >= 0 if no recursion. */ static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, unsigned long parent_ip) diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 2564b7434b4d..88c007ab5ebc 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -54,8 +54,7 @@ struct linux_binprm; /* * ptrace report for syscall entry and exit looks identical. */ -static inline int ptrace_report_syscall(struct pt_regs *regs, - unsigned long message) +static inline int ptrace_report_syscall(unsigned long message) { int ptrace = current->ptrace; @@ -102,7 +101,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs, static inline __must_check int tracehook_report_syscall_entry( struct pt_regs *regs) { - return ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_ENTRY); + return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); } /** @@ -127,7 +126,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) if (step) user_single_step_report(regs); else - ptrace_report_syscall(regs, PTRACE_EVENTMSG_SYSCALL_EXIT); + ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); } /** diff --git a/include/linux/tty.h b/include/linux/tty.h index 168e57e40bbb..7b0a5d478ef6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -122,33 +122,84 @@ struct tty_operations; /** * struct tty_struct - state associated with a tty while open * - * @flow.lock: lock for flow members - * @flow.stopped: tty stopped/started by tty_stop/tty_start - * @flow.tco_stopped: tty stopped/started by TCOOFF/TCOON ioctls (it has - * precedense over @flow.stopped) + * @magic: magic value set early in @alloc_tty_struct to %TTY_MAGIC, for + * debugging purposes + * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero + * frees the structure + * @dev: class device or %NULL (e.g. ptys, serdev) + * @driver: &struct tty_driver operating this tty + * @ops: &struct tty_operations of @driver for this tty (open, close, etc.) + * @index: index of this tty (e.g. to construct @name like tty12) + * @ldisc_sem: protects line discipline changes (@ldisc) -- lock tty not pty + * @ldisc: the current line discipline for this tty (n_tty by default) + * @atomic_write_lock: protects against concurrent writers, i.e. locks + * @write_cnt, @write_buf and similar + * @legacy_mutex: leftover from history (BKL -> BTM -> @legacy_mutex), + * protecting several operations on this tty + * @throttle_mutex: protects against concurrent tty_throttle_safe() and + * tty_unthrottle_safe() (but not tty_unthrottle()) + * @termios_rwsem: protects @termios and @termios_locked + * @winsize_mutex: protects @winsize + * @termios: termios for the current tty, copied from/to @driver.termios + * @termios_locked: locked termios (by %TIOCGLCKTRMIOS and %TIOCSLCKTRMIOS + * ioctls) + * @name: name of the tty constructed by tty_line_name() (e.g. ttyS3) + * @flags: bitwise OR of %TTY_THROTTLED, %TTY_IO_ERROR, ... + * @count: count of open processes, reaching zero cancels all the work for + * this tty and drops a @kref too (but does not free this tty) + * @winsize: size of the terminal "window" (cf. @winsize_mutex) + * @flow: flow settings grouped together, see also @flow.unused + * @flow.lock: lock for @flow members + * @flow.stopped: tty stopped/started by stop_tty()/start_tty() + * @flow.tco_stopped: tty stopped/started by %TCOOFF/%TCOON ioctls (it has + * precedence over @flow.stopped) * @flow.unused: alignment for Alpha, so that no members other than @flow.* are * modified by the same 64b word store. The @flow's __aligned is * there for the very same reason. - * @ctrl.lock: lock for ctrl members + * @ctrl: control settings grouped together, see also @ctrl.unused + * @ctrl.lock: lock for @ctrl members * @ctrl.pgrp: process group of this tty (setpgrp(2)) * @ctrl.session: session of this tty (setsid(2)). Writes are protected by both - * @ctrl.lock and legacy mutex, readers must use at least one of + * @ctrl.lock and @legacy_mutex, readers must use at least one of * them. - * @ctrl.pktstatus: packet mode status (bitwise OR of TIOCPKT_* constants) + * @ctrl.pktstatus: packet mode status (bitwise OR of %TIOCPKT_ constants) * @ctrl.packet: packet mode enabled + * @ctrl.unused: alignment for Alpha, see @flow.unused for explanation + * @hw_stopped: not controlled by the tty layer, under @driver's control for CTS + * handling + * @receive_room: bytes permitted to feed to @ldisc without any being lost + * @flow_change: controls behavior of throttling, see tty_throttle_safe() and + * tty_unthrottle_safe() + * @link: link to another pty (master -> slave and vice versa) + * @fasync: state for %O_ASYNC (for %SIGIO); managed by fasync_helper() + * @write_wait: concurrent writers are waiting in this queue until they are + * allowed to write + * @read_wait: readers wait for data in this queue + * @hangup_work: normally a work to perform a hangup (do_tty_hangup()); while + * freeing the tty, (re)used to release_one_tty() + * @disc_data: pointer to @ldisc's private data (e.g. to &struct n_tty_data) + * @driver_data: pointer to @driver's private data (e.g. &struct uart_state) + * @files_lock: protects @tty_files list + * @tty_files: list of (re)openers of this tty (i.e. linked &struct + * tty_file_private) + * @closing: when set during close, n_tty processes only START & STOP chars + * @write_buf: temporary buffer used during tty_write() to copy user data to + * @write_cnt: count of bytes written in tty_write() to @write_buf + * @SAK_work: if the tty has a pending do_SAK, it is queued here + * @port: persistent storage for this device (i.e. &struct tty_port) * * All of the state associated with a tty while the tty is open. Persistent - * storage for tty devices is referenced here as @port in struct tty_port. + * storage for tty devices is referenced here as @port and is documented in + * &struct tty_port. */ struct tty_struct { int magic; struct kref kref; - struct device *dev; /* class device or NULL (e.g. ptys, serdev) */ + struct device *dev; struct tty_driver *driver; const struct tty_operations *ops; int index; - /* Protects ldisc changes: Lock tty not pty */ struct ld_semaphore ldisc_sem; struct tty_ldisc *ldisc; @@ -157,12 +208,11 @@ struct tty_struct { struct mutex throttle_mutex; struct rw_semaphore termios_rwsem; struct mutex winsize_mutex; - /* Termios values are protected by the termios rwsem */ struct ktermios termios, termios_locked; char name[64]; unsigned long flags; int count; - struct winsize winsize; /* winsize_mutex */ + struct winsize winsize; struct { spinlock_t lock; @@ -181,7 +231,7 @@ struct tty_struct { } __aligned(sizeof(unsigned long)) ctrl; int hw_stopped; - unsigned int receive_room; /* Bytes free for queue */ + unsigned int receive_room; int flow_change; struct tty_struct *link; @@ -191,7 +241,7 @@ struct tty_struct { struct work_struct hangup_work; void *disc_data; void *driver_data; - spinlock_t files_lock; /* protects tty_files list */ + spinlock_t files_lock; struct list_head tty_files; #define N_TTY_BUF_SIZE 4096 @@ -199,7 +249,6 @@ struct tty_struct { int closing; unsigned char *write_buf; int write_cnt; - /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; } __randomize_layout; @@ -214,26 +263,72 @@ struct tty_file_private { /* tty magic number */ #define TTY_MAGIC 0x5401 -/* - * These bits are used in the flags field of the tty structure. +/** + * DOC: TTY Struct Flags + * + * These bits are used in the :c:member:`tty_struct.flags` field. * * So that interrupts won't be able to mess up the queues, * copy_to_cooked must be atomic with respect to itself, as must * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. + * + * TTY_THROTTLED + * Driver input is throttled. The ldisc should call + * :c:member:`tty_driver.unthrottle()` in order to resume reception when + * it is ready to process more data (at threshold min). + * + * TTY_IO_ERROR + * If set, causes all subsequent userspace read/write calls on the tty to + * fail, returning -%EIO. (May be no ldisc too.) + * + * TTY_OTHER_CLOSED + * Device is a pty and the other side has closed. + * + * TTY_EXCLUSIVE + * Exclusive open mode (a single opener). + * + * TTY_DO_WRITE_WAKEUP + * If set, causes the driver to call the + * :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume + * transmission when it can accept more data to transmit. + * + * TTY_LDISC_OPEN + * Indicates that a line discipline is open. For debugging purposes only. + * + * TTY_PTY_LOCK + * A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic. + * + * TTY_NO_WRITE_SPLIT + * Prevent driver from splitting up writes into smaller chunks (preserve + * write boundaries to driver). + * + * TTY_HUPPED + * The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`. + * + * TTY_HUPPING + * The TTY is in the process of hanging up to abort potential readers. + * + * TTY_LDISC_CHANGING + * Line discipline for this TTY is being changed. I/O should not block + * when this is set. Use tty_io_nonblock() to check. + * + * TTY_LDISC_HALTED + * Line discipline for this TTY was stopped. No work should be queued to + * this ldisc. */ -#define TTY_THROTTLED 0 /* Call unthrottle() at threshold min */ -#define TTY_IO_ERROR 1 /* Cause an I/O error (may be no ldisc too) */ -#define TTY_OTHER_CLOSED 2 /* Other side (if any) has closed */ -#define TTY_EXCLUSIVE 3 /* Exclusive open mode */ -#define TTY_DO_WRITE_WAKEUP 5 /* Call write_wakeup after queuing new */ -#define TTY_LDISC_OPEN 11 /* Line discipline is open */ -#define TTY_PTY_LOCK 16 /* pty private */ -#define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ -#define TTY_HUPPED 18 /* Post driver->hangup() */ -#define TTY_HUPPING 19 /* Hangup in progress */ -#define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ -#define TTY_LDISC_HALTED 22 /* Line discipline is halted */ +#define TTY_THROTTLED 0 +#define TTY_IO_ERROR 1 +#define TTY_OTHER_CLOSED 2 +#define TTY_EXCLUSIVE 3 +#define TTY_DO_WRITE_WAKEUP 5 +#define TTY_LDISC_OPEN 11 +#define TTY_PTY_LOCK 16 +#define TTY_NO_WRITE_SPLIT 17 +#define TTY_HUPPED 18 +#define TTY_HUPPING 19 +#define TTY_LDISC_CHANGING 20 +#define TTY_LDISC_HALTED 22 static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { @@ -252,20 +347,20 @@ static inline bool tty_throttled(struct tty_struct *tty) } #ifdef CONFIG_TTY -extern void tty_kref_put(struct tty_struct *tty); -extern struct pid *tty_get_pgrp(struct tty_struct *tty); -extern void tty_vhangup_self(void); -extern void disassociate_ctty(int priv); -extern dev_t tty_devnum(struct tty_struct *tty); -extern void proc_clear_tty(struct task_struct *p); -extern struct tty_struct *get_current_tty(void); +void tty_kref_put(struct tty_struct *tty); +struct pid *tty_get_pgrp(struct tty_struct *tty); +void tty_vhangup_self(void); +void disassociate_ctty(int priv); +dev_t tty_devnum(struct tty_struct *tty); +void proc_clear_tty(struct task_struct *p); +struct tty_struct *get_current_tty(void); /* tty_io.c */ -extern int __init tty_init(void); -extern const char *tty_name(const struct tty_struct *tty); -extern struct tty_struct *tty_kopen_exclusive(dev_t device); -extern struct tty_struct *tty_kopen_shared(dev_t device); -extern void tty_kclose(struct tty_struct *tty); -extern int tty_dev_name_to_number(const char *name, dev_t *number); +int __init tty_init(void); +const char *tty_name(const struct tty_struct *tty); +struct tty_struct *tty_kopen_exclusive(dev_t device); +struct tty_struct *tty_kopen_shared(dev_t device); +void tty_kclose(struct tty_struct *tty); +int tty_dev_name_to_number(const char *name, dev_t *number); #else static inline void tty_kref_put(struct tty_struct *tty) { } @@ -296,7 +391,7 @@ static inline int tty_dev_name_to_number(const char *name, dev_t *number) extern struct ktermios tty_std_termios; -extern int vcs_init(void); +int vcs_init(void); extern struct class *tty_class; @@ -316,34 +411,34 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) return tty; } -extern const char *tty_driver_name(const struct tty_struct *tty); -extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); -extern void stop_tty(struct tty_struct *tty); -extern void start_tty(struct tty_struct *tty); -extern void tty_write_message(struct tty_struct *tty, char *msg); -extern int tty_send_xchar(struct tty_struct *tty, char ch); -extern int tty_put_char(struct tty_struct *tty, unsigned char c); -extern unsigned int tty_chars_in_buffer(struct tty_struct *tty); -extern unsigned int tty_write_room(struct tty_struct *tty); -extern void tty_driver_flush_buffer(struct tty_struct *tty); -extern void tty_unthrottle(struct tty_struct *tty); -extern int tty_throttle_safe(struct tty_struct *tty); -extern int tty_unthrottle_safe(struct tty_struct *tty); -extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); -extern int tty_get_icount(struct tty_struct *tty, - struct serial_icounter_struct *icount); -extern int is_current_pgrp_orphaned(void); -extern void tty_hangup(struct tty_struct *tty); -extern void tty_vhangup(struct tty_struct *tty); -extern int tty_hung_up_p(struct file *filp); -extern void do_SAK(struct tty_struct *tty); -extern void __do_SAK(struct tty_struct *tty); -extern void no_tty(void); -extern speed_t tty_termios_baud_rate(struct ktermios *termios); -extern void tty_termios_encode_baud_rate(struct ktermios *termios, - speed_t ibaud, speed_t obaud); -extern void tty_encode_baud_rate(struct tty_struct *tty, - speed_t ibaud, speed_t obaud); +const char *tty_driver_name(const struct tty_struct *tty); +void tty_wait_until_sent(struct tty_struct *tty, long timeout); +void stop_tty(struct tty_struct *tty); +void start_tty(struct tty_struct *tty); +void tty_write_message(struct tty_struct *tty, char *msg); +int tty_send_xchar(struct tty_struct *tty, char ch); +int tty_put_char(struct tty_struct *tty, unsigned char c); +unsigned int tty_chars_in_buffer(struct tty_struct *tty); +unsigned int tty_write_room(struct tty_struct *tty); +void tty_driver_flush_buffer(struct tty_struct *tty); +void tty_unthrottle(struct tty_struct *tty); +int tty_throttle_safe(struct tty_struct *tty); +int tty_unthrottle_safe(struct tty_struct *tty); +int tty_do_resize(struct tty_struct *tty, struct winsize *ws); +int tty_get_icount(struct tty_struct *tty, + struct serial_icounter_struct *icount); +int is_current_pgrp_orphaned(void); +void tty_hangup(struct tty_struct *tty); +void tty_vhangup(struct tty_struct *tty); +int tty_hung_up_p(struct file *filp); +void do_SAK(struct tty_struct *tty); +void __do_SAK(struct tty_struct *tty); +void no_tty(void); +speed_t tty_termios_baud_rate(struct ktermios *termios); +void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, + speed_t obaud); +void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, + speed_t obaud); /** * tty_get_baud_rate - get tty bit rates @@ -363,37 +458,36 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) unsigned char tty_get_char_size(unsigned int cflag); unsigned char tty_get_frame_size(unsigned int cflag); -extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); -extern int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); -extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); +void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); +int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); +int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); -extern void tty_wakeup(struct tty_struct *tty); +void tty_wakeup(struct tty_struct *tty); -extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg); -extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); -extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); -extern void tty_release_struct(struct tty_struct *tty, int idx); -extern void tty_init_termios(struct tty_struct *tty); -extern void tty_save_termios(struct tty_struct *tty); -extern int tty_standard_install(struct tty_driver *driver, +int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); +int tty_perform_flush(struct tty_struct *tty, unsigned long arg); +struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); +void tty_release_struct(struct tty_struct *tty, int idx); +void tty_init_termios(struct tty_struct *tty); +void tty_save_termios(struct tty_struct *tty); +int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty); extern struct mutex tty_mutex; /* n_tty.c */ -extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); +void n_tty_inherit_ops(struct tty_ldisc_ops *ops); #ifdef CONFIG_TTY -extern void __init n_tty_init(void); +void __init n_tty_init(void); #else static inline void n_tty_init(void) { } #endif /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_exit(void); -extern void tty_audit_fork(struct signal_struct *sig); -extern int tty_audit_push(void); +void tty_audit_exit(void); +void tty_audit_fork(struct signal_struct *sig); +int tty_audit_push(void); #else static inline void tty_audit_exit(void) { @@ -408,24 +502,23 @@ static inline int tty_audit_push(void) #endif /* tty_ioctl.c */ -extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg); +int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd, + unsigned long arg); /* vt.c */ -extern int vt_ioctl(struct tty_struct *tty, - unsigned int cmd, unsigned long arg); +int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg); -extern long vt_compat_ioctl(struct tty_struct *tty, - unsigned int cmd, unsigned long arg); +long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, + unsigned long arg); /* tty_mutex.c */ /* functions for preparation of BKL removal */ -extern void tty_lock(struct tty_struct *tty); -extern int tty_lock_interruptible(struct tty_struct *tty); -extern void tty_unlock(struct tty_struct *tty); -extern void tty_lock_slave(struct tty_struct *tty); -extern void tty_unlock_slave(struct tty_struct *tty); -extern void tty_set_lock_subclass(struct tty_struct *tty); +void tty_lock(struct tty_struct *tty); +int tty_lock_interruptible(struct tty_struct *tty); +void tty_unlock(struct tty_struct *tty); +void tty_lock_slave(struct tty_struct *tty); +void tty_unlock_slave(struct tty_struct *tty); +void tty_set_lock_subclass(struct tty_struct *tty); #endif diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index c20431d8def8..4841d8069c07 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -2,248 +2,350 @@ #ifndef _LINUX_TTY_DRIVER_H #define _LINUX_TTY_DRIVER_H -/* - * This structure defines the interface between the low-level tty - * driver and the tty routines. The following routines can be - * defined; unless noted otherwise, they are optional, and can be - * filled in with a null pointer. +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/cdev.h> +#include <linux/termios.h> +#include <linux/seq_file.h> + +struct tty_struct; +struct tty_driver; +struct serial_icounter_struct; +struct serial_struct; + +/** + * struct tty_operations -- interface between driver and tty + * + * @lookup: ``struct tty_struct *()(struct tty_driver *self, struct file *, + * int idx)`` * - * struct tty_struct * (*lookup)(struct tty_driver *self, struct file *, int idx) + * Return the tty device corresponding to @idx, %NULL if there is not + * one currently in use and an %ERR_PTR value on error. Called under + * %tty_mutex (for now!) * - * Return the tty device corresponding to idx, NULL if there is not - * one currently in use and an ERR_PTR value on error. Called under - * tty_mutex (for now!) + * Optional method. Default behaviour is to use the @self->ttys array. * - * Optional method. Default behaviour is to use the ttys array + * @install: ``int ()(struct tty_driver *self, struct tty_struct *tty)`` * - * int (*install)(struct tty_driver *self, struct tty_struct *tty) + * Install a new @tty into the @self's internal tables. Used in + * conjunction with @lookup and @remove methods. * - * Install a new tty into the tty driver internal tables. Used in - * conjunction with lookup and remove methods. + * Optional method. Default behaviour is to use the @self->ttys array. * - * Optional method. Default behaviour is to use the ttys array + * @remove: ``void ()(struct tty_driver *self, struct tty_struct *tty)`` * - * void (*remove)(struct tty_driver *self, struct tty_struct *tty) + * Remove a closed @tty from the @self's internal tables. Used in + * conjunction with @lookup and @remove methods. * - * Remove a closed tty from the tty driver internal tables. Used in - * conjunction with lookup and remove methods. + * Optional method. Default behaviour is to use the @self->ttys array. * - * Optional method. Default behaviour is to use the ttys array + * @open: ``int ()(struct tty_struct *tty, struct file *)`` * - * int (*open)(struct tty_struct * tty, struct file * filp); + * This routine is called when a particular @tty device is opened. This + * routine is mandatory; if this routine is not filled in, the attempted + * open will fail with %ENODEV. * - * This routine is called when a particular tty device is opened. - * This routine is mandatory; if this routine is not filled in, - * the attempted open will fail with ENODEV. + * Required method. Called with tty lock held. May sleep. * - * Required method. Called with tty lock held. + * @close: ``void ()(struct tty_struct *tty, struct file *)`` * - * void (*close)(struct tty_struct * tty, struct file * filp); + * This routine is called when a particular @tty device is closed. At the + * point of return from this call the driver must make no further ldisc + * calls of any kind. * - * This routine is called when a particular tty device is closed. - * Note: called even if the corresponding open() failed. + * Remark: called even if the corresponding @open() failed. * - * Required method. Called with tty lock held. + * Required method. Called with tty lock held. May sleep. * - * void (*shutdown)(struct tty_struct * tty); + * @shutdown: ``void ()(struct tty_struct *tty)`` * - * This routine is called under the tty lock when a particular tty device - * is closed for the last time. It executes before the tty resources - * are freed so may execute while another function holds a tty kref. + * This routine is called under the tty lock when a particular @tty device + * is closed for the last time. It executes before the @tty resources + * are freed so may execute while another function holds a @tty kref. * - * void (*cleanup)(struct tty_struct * tty); + * @cleanup: ``void ()(struct tty_struct *tty)`` * - * This routine is called asynchronously when a particular tty device + * This routine is called asynchronously when a particular @tty device * is closed for the last time freeing up the resources. This is * actually the second part of shutdown for routines that might sleep. * + * @write: ``int ()(struct tty_struct *tty, const unsigned char *buf, + * int count)`` * - * int (*write)(struct tty_struct * tty, - * const unsigned char *buf, int count); - * - * This routine is called by the kernel to write a series of - * characters to the tty device. The characters may come from - * user space or kernel space. This routine will return the + * This routine is called by the kernel to write a series (@count) of + * characters (@buf) to the @tty device. The characters may come from + * user space or kernel space. This routine will return the * number of characters actually accepted for writing. * - * Optional: Required for writable devices. + * May occur in parallel in special cases. Because this includes panic + * paths drivers generally shouldn't try and do clever locking here. * - * int (*put_char)(struct tty_struct *tty, unsigned char ch); + * Optional: Required for writable devices. May not sleep. * - * This routine is called by the kernel to write a single - * character to the tty device. If the kernel uses this routine, - * it must call the flush_chars() routine (if defined) when it is - * done stuffing characters into the driver. If there is no room - * in the queue, the character is ignored. + * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)`` * - * Optional: Kernel will use the write method if not provided. + * This routine is called by the kernel to write a single character @ch to + * the @tty device. If the kernel uses this routine, it must call the + * @flush_chars() routine (if defined) when it is done stuffing characters + * into the driver. If there is no room in the queue, the character is + * ignored. * - * Note: Do not call this function directly, call tty_put_char + * Optional: Kernel will use the @write method if not provided. Do not + * call this function directly, call tty_put_char(). * - * void (*flush_chars)(struct tty_struct *tty); + * @flush_chars: ``void ()(struct tty_struct *tty)`` * - * This routine is called by the kernel after it has written a - * series of characters to the tty device using put_char(). + * This routine is called by the kernel after it has written a + * series of characters to the tty device using @put_char(). * - * Optional: + * Optional. Do not call this function directly, call + * tty_driver_flush_chars(). * - * Note: Do not call this function directly, call tty_driver_flush_chars - * - * unsigned int (*write_room)(struct tty_struct *tty); + * @write_room: ``unsigned int ()(struct tty_struct *tty)`` * - * This routine returns the numbers of characters the tty driver - * will accept for queuing to be written. This number is subject - * to change as output buffers get emptied, or if the output flow + * This routine returns the numbers of characters the @tty driver + * will accept for queuing to be written. This number is subject + * to change as output buffers get emptied, or if the output flow * control is acted. * - * Required if write method is provided else not needed. + * The ldisc is responsible for being intelligent about multi-threading of + * write_room/write calls + * + * Required if @write method is provided else not needed. Do not call this + * function directly, call tty_write_room() + * + * @chars_in_buffer: ``unsigned int ()(struct tty_struct *tty)`` + * + * This routine returns the number of characters in the device private + * output queue. Used in tty_wait_until_sent() and for poll() + * implementation. * - * Note: Do not call this function directly, call tty_write_room - * - * int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); + * Optional: if not provided, it is assumed there is no queue on the + * device. Do not call this function directly, call tty_chars_in_buffer(). * - * This routine allows the tty driver to implement - * device-specific ioctls. If the ioctl number passed in cmd - * is not recognized by the driver, it should return ENOIOCTLCMD. + * @ioctl: ``int ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` * - * Optional + * This routine allows the @tty driver to implement device-specific + * ioctls. If the ioctl number passed in @cmd is not recognized by the + * driver, it should return %ENOIOCTLCMD. * - * long (*compat_ioctl)(struct tty_struct *tty,, - * unsigned int cmd, unsigned long arg); + * Optional. * - * implement ioctl processing for 32 bit process on 64 bit system + * @compat_ioctl: ``long ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` * - * Optional - * - * void (*set_termios)(struct tty_struct *tty, struct ktermios * old); + * Implement ioctl processing for 32 bit process on 64 bit system. * - * This routine allows the tty driver to be notified when - * device's termios settings have changed. + * Optional. * - * Optional: Called under the termios lock + * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)`` * + * This routine allows the @tty driver to be notified when device's + * termios settings have changed. New settings are in @tty->termios. + * Previous settings are passed in the @old argument. * - * void (*set_ldisc)(struct tty_struct *tty); + * The API is defined such that the driver should return the actual modes + * selected. This means that the driver is responsible for modifying any + * bits in @tty->termios it cannot fulfill to indicate the actual modes + * being used. * - * This routine allows the tty driver to be notified when the - * device's termios settings have changed. + * Optional. Called under the @tty->termios_rwsem. May sleep. * - * Optional: Called under BKL (currently) - * - * void (*throttle)(struct tty_struct * tty); + * @set_ldisc: ``void ()(struct tty_struct *tty)`` * - * This routine notifies the tty driver that input buffers for - * the line discipline are close to full, and it should somehow - * signal that no more characters should be sent to the tty. + * This routine allows the @tty driver to be notified when the device's + * line discipline is being changed. At the point this is done the + * discipline is not yet usable. * - * Optional: Always invoke via tty_throttle_safe(), called under the - * termios lock. - * - * void (*unthrottle)(struct tty_struct * tty); + * Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem. * - * This routine notifies the tty drivers that it should signals - * that characters can now be sent to the tty without fear of - * overrunning the input buffers of the line disciplines. - * - * Optional: Always invoke via tty_unthrottle(), called under the - * termios lock. + * @throttle: ``void ()(struct tty_struct *tty)`` * - * void (*stop)(struct tty_struct *tty); + * This routine notifies the @tty driver that input buffers for the line + * discipline are close to full, and it should somehow signal that no more + * characters should be sent to the @tty. * - * This routine notifies the tty driver that it should stop - * outputting characters to the tty device. + * Serialization including with @unthrottle() is the job of the ldisc + * layer. * - * Called with ->flow.lock held. Serialized with start() method. + * Optional: Always invoke via tty_throttle_safe(). Called under the + * @tty->termios_rwsem. * - * Optional: + * @unthrottle: ``void ()(struct tty_struct *tty)`` * - * Note: Call stop_tty not this method. - * - * void (*start)(struct tty_struct *tty); + * This routine notifies the @tty driver that it should signal that + * characters can now be sent to the @tty without fear of overrunning the + * input buffers of the line disciplines. * - * This routine notifies the tty driver that it resume sending + * Optional. Always invoke via tty_unthrottle(). Called under the + * @tty->termios_rwsem. + * + * @stop: ``void ()(struct tty_struct *tty)`` + * + * This routine notifies the @tty driver that it should stop outputting * characters to the tty device. * - * Called with ->flow.lock held. Serialized with stop() method. + * Called with @tty->flow.lock held. Serialized with @start() method. + * + * Optional. Always invoke via stop_tty(). + * + * @start: ``void ()(struct tty_struct *tty)`` + * + * This routine notifies the @tty driver that it resumed sending + * characters to the @tty device. + * + * Called with @tty->flow.lock held. Serialized with stop() method. + * + * Optional. Always invoke via start_tty(). + * + * @hangup: ``void ()(struct tty_struct *tty)`` + * + * This routine notifies the @tty driver that it should hang up the @tty + * device. * - * Optional: + * Optional. Called with tty lock held. * - * Note: Call start_tty not this method. - * - * void (*hangup)(struct tty_struct *tty); + * @break_ctl: ``int ()(struct tty_struct *tty, int state)`` * - * This routine notifies the tty driver that it should hang up the - * tty device. + * This optional routine requests the @tty driver to turn on or off BREAK + * status on the RS-232 port. If @state is -1, then the BREAK status + * should be turned on; if @state is 0, then BREAK should be turned off. * - * Optional: + * If this routine is implemented, the high-level tty driver will handle + * the following ioctls: %TCSBRK, %TCSBRKP, %TIOCSBRK, %TIOCCBRK. * - * Called with tty lock held. + * If the driver sets %TTY_DRIVER_HARDWARE_BREAK in tty_alloc_driver(), + * then the interface will also be called with actual times and the + * hardware is expected to do the delay work itself. 0 and -1 are still + * used for on/off. * - * int (*break_ctl)(struct tty_struct *tty, int state); + * Optional: Required for %TCSBRK/%BRKP/etc. handling. May sleep. * - * This optional routine requests the tty driver to turn on or - * off BREAK status on the RS-232 port. If state is -1, - * then the BREAK status should be turned on; if state is 0, then - * BREAK should be turned off. + * @flush_buffer: ``void ()(struct tty_struct *tty)`` * - * If this routine is implemented, the high-level tty driver will - * handle the following ioctls: TCSBRK, TCSBRKP, TIOCSBRK, - * TIOCCBRK. + * This routine discards device private output buffer. Invoked on close, + * hangup, to implement %TCOFLUSH ioctl and similar. * - * If the driver sets TTY_DRIVER_HARDWARE_BREAK then the interface - * will also be called with actual times and the hardware is expected - * to do the delay work itself. 0 and -1 are still used for on/off. + * Optional: if not provided, it is assumed there is no queue on the + * device. Do not call this function directly, call + * tty_driver_flush_buffer(). * - * Optional: Required for TCSBRK/BRKP/etc handling. + * @wait_until_sent: ``void ()(struct tty_struct *tty, int timeout)`` * - * void (*wait_until_sent)(struct tty_struct *tty, int timeout); - * - * This routine waits until the device has written out all of the - * characters in its transmitter FIFO. + * This routine waits until the device has written out all of the + * characters in its transmitter FIFO. Or until @timeout (in jiffies) is + * reached. * - * Optional: If not provided the device is assumed to have no FIFO + * Optional: If not provided, the device is assumed to have no FIFO. + * Usually correct to invoke via tty_wait_until_sent(). May sleep. * - * Note: Usually correct to call tty_wait_until_sent + * @send_xchar: ``void ()(struct tty_struct *tty, char ch)`` * - * void (*send_xchar)(struct tty_struct *tty, char ch); + * This routine is used to send a high-priority XON/XOFF character (@ch) + * to the @tty device. * - * This routine is used to send a high-priority XON/XOFF - * character to the device. + * Optional: If not provided, then the @write method is called under + * the @tty->atomic_write_lock to keep it serialized with the ldisc. * - * Optional: If not provided then the write method is called under - * the atomic write lock to keep it serialized with the ldisc. + * @tiocmget: ``int ()(struct tty_struct *tty)`` * - * int (*resize)(struct tty_struct *tty, struct winsize *ws) + * This routine is used to obtain the modem status bits from the @tty + * driver. * - * Called when a termios request is issued which changes the - * requested terminal geometry. + * Optional: If not provided, then %ENOTTY is returned from the %TIOCMGET + * ioctl. Do not call this function directly, call tty_tiocmget(). + * + * @tiocmset: ``int ()(struct tty_struct *tty, + * unsigned int set, unsigned int clear)`` + * + * This routine is used to set the modem status bits to the @tty driver. + * First, @clear bits should be cleared, then @set bits set. + * + * Optional: If not provided, then %ENOTTY is returned from the %TIOCMSET + * ioctl. Do not call this function directly, call tty_tiocmset(). + * + * @resize: ``int ()(struct tty_struct *tty, struct winsize *ws)`` + * + * Called when a termios request is issued which changes the requested + * terminal geometry to @ws. * * Optional: the default action is to update the termios structure * without error. This is usually the correct behaviour. Drivers should - * not force errors here if they are not resizable objects (eg a serial + * not force errors here if they are not resizable objects (e.g. a serial * line). See tty_do_resize() if you need to wrap the standard method - * in your own logic - the usual case. + * in your own logic -- the usual case. + * + * @get_icount: ``int ()(struct tty_struct *tty, + * struct serial_icounter *icount)`` + * + * Called when the @tty device receives a %TIOCGICOUNT ioctl. Passed a + * kernel structure @icount to complete. + * + * Optional: called only if provided, otherwise %ENOTTY will be returned. * - * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount); + * @get_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)`` * - * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel - * structure to complete. This method is optional and will only be called - * if provided (otherwise ENOTTY will be returned). + * Called when the @tty device receives a %TIOCGSERIAL ioctl. Passed a + * kernel structure @p (&struct serial_struct) to complete. + * + * Optional: called only if provided, otherwise %ENOTTY will be returned. + * Do not call this function directly, call tty_tiocgserial(). + * + * @set_serial: ``int ()(struct tty_struct *tty, struct serial_struct *p)`` + * + * Called when the @tty device receives a %TIOCSSERIAL ioctl. Passed a + * kernel structure @p (&struct serial_struct) to set the values from. + * + * Optional: called only if provided, otherwise %ENOTTY will be returned. + * Do not call this function directly, call tty_tiocsserial(). + * + * @show_fdinfo: ``void ()(struct tty_struct *tty, struct seq_file *m)`` + * + * Called when the @tty device file descriptor receives a fdinfo request + * from VFS (to show in /proc/<pid>/fdinfo/). @m should be filled with + * information. + * + * Optional: called only if provided, otherwise nothing is written to @m. + * Do not call this function directly, call tty_show_fdinfo(). + * + * @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)`` + * + * kgdboc support (Documentation/dev-tools/kgdb.rst). This routine is + * called to initialize the HW for later use by calling @poll_get_char or + * @poll_put_char. + * + * Optional: called only if provided, otherwise skipped as a non-polling + * driver. + * + * @poll_get_char: ``int ()(struct tty_driver *driver, int line)`` + * + * kgdboc support (see @poll_init). @driver should read a character from a + * tty identified by @line and return it. + * + * Optional: called only if @poll_init provided. + * + * @poll_put_char: ``void ()(struct tty_driver *driver, int line, char ch)`` + * + * kgdboc support (see @poll_init). @driver should write character @ch to + * a tty identified by @line. + * + * Optional: called only if @poll_init provided. + * + * @proc_show: ``int ()(struct seq_file *m, void *driver)`` + * + * Driver @driver (cast to &struct tty_driver) can show additional info in + * /proc/tty/driver/<driver_name>. It is enough to fill in the information + * into @m. + * + * Optional: called only if provided, otherwise no /proc entry created. + * + * This structure defines the interface between the low-level tty driver and + * the tty routines. These routines can be defined. Unless noted otherwise, + * they are optional, and can be filled in with a %NULL pointer. */ - -#include <linux/export.h> -#include <linux/fs.h> -#include <linux/kref.h> -#include <linux/list.h> -#include <linux/cdev.h> -#include <linux/termios.h> -#include <linux/seq_file.h> - -struct tty_struct; -struct tty_driver; -struct serial_icounter_struct; -struct serial_struct; - struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, struct file *filp, int idx); @@ -288,26 +390,64 @@ struct tty_operations { int (*poll_get_char)(struct tty_driver *driver, int line); void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif - int (*proc_show)(struct seq_file *, void *); + int (*proc_show)(struct seq_file *m, void *driver); } __randomize_layout; +/** + * struct tty_driver -- driver for TTY devices + * + * @magic: set to %TTY_DRIVER_MAGIC in __tty_alloc_driver() + * @kref: reference counting. Reaching zero frees all the internals and the + * driver. + * @cdevs: allocated/registered character /dev devices + * @owner: modules owning this driver. Used drivers cannot be rmmod'ed. + * Automatically set by tty_alloc_driver(). + * @driver_name: name of the driver used in /proc/tty + * @name: used for constructing /dev node name + * @name_base: used as a number base for constructing /dev node name + * @major: major /dev device number (zero for autoassignment) + * @minor_start: the first minor /dev device number + * @num: number of devices allocated + * @type: type of tty driver (%TTY_DRIVER_TYPE_) + * @subtype: subtype of tty driver (%SYSTEM_TYPE_, %PTY_TYPE_, %SERIAL_TYPE_) + * @init_termios: termios to set to each tty initially (e.g. %tty_std_termios) + * @flags: tty driver flags (%TTY_DRIVER_) + * @proc_entry: proc fs entry, used internally + * @other: driver of the linked tty; only used for the PTY driver + * @ttys: array of active &struct tty_struct, set by tty_standard_install() + * @ports: array of &struct tty_port; can be set during initialization by + * tty_port_link_device() and similar + * @termios: storage for termios at each TTY close for the next open + * @driver_state: pointer to driver's arbitrary data + * @ops: driver hooks for TTYs. Set them using tty_set_operations(). Use &struct + * tty_port helpers in them as much as possible. + * @tty_drivers: used internally to link tty_drivers together + * + * The usual handling of &struct tty_driver is to allocate it by + * tty_alloc_driver(), set up all the necessary members, and register it by + * tty_register_driver(). At last, the driver is torn down by calling + * tty_unregister_driver() followed by tty_driver_kref_put(). + * + * The fields required to be set before calling tty_register_driver() include + * @driver_name, @name, @type, @subtype, @init_termios, and @ops. + */ struct tty_driver { - int magic; /* magic number for this structure */ - struct kref kref; /* Reference management */ + int magic; + struct kref kref; struct cdev **cdevs; struct module *owner; const char *driver_name; const char *name; - int name_base; /* offset of printed name */ - int major; /* major device number */ - int minor_start; /* start of minor device number */ - unsigned int num; /* number of devices allocated */ - short type; /* type of tty driver */ - short subtype; /* subtype of tty driver */ - struct ktermios init_termios; /* Initial termios */ - unsigned long flags; /* tty driver flags */ - struct proc_dir_entry *proc_entry; /* /proc fs entry */ - struct tty_driver *other; /* only used for the PTY driver */ + int name_base; + int major; + int minor_start; + unsigned int num; + short type; + short subtype; + struct ktermios init_termios; + unsigned long flags; + struct proc_dir_entry *proc_entry; + struct tty_driver *other; /* * Pointer to the tty data structures @@ -327,11 +467,11 @@ struct tty_driver { extern struct list_head tty_drivers; -extern struct tty_driver *__tty_alloc_driver(unsigned int lines, - struct module *owner, unsigned long flags); -extern struct tty_driver *tty_find_polling_driver(char *name, int *line); +struct tty_driver *__tty_alloc_driver(unsigned int lines, struct module *owner, + unsigned long flags); +struct tty_driver *tty_find_polling_driver(char *name, int *line); -extern void tty_driver_kref_put(struct tty_driver *driver); +void tty_driver_kref_put(struct tty_driver *driver); /* Use TTY_DRIVER_* flags below */ #define tty_alloc_driver(lines, flags) \ @@ -352,49 +492,53 @@ static inline void tty_set_operations(struct tty_driver *driver, /* tty driver magic number */ #define TTY_DRIVER_MAGIC 0x5402 -/* - * tty driver flags - * - * TTY_DRIVER_RESET_TERMIOS --- requests the tty layer to reset the - * termios setting when the last process has closed the device. - * Used for PTY's, in particular. - * - * TTY_DRIVER_REAL_RAW --- if set, indicates that the driver will - * guarantee never not to set any special character handling - * flags if ((IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || - * !INPCK)). That is, if there is no reason for the driver to - * send notifications of parity and break characters up to the - * line driver, it won't do so. This allows the line driver to - * optimize for this case if this flag is set. (Note that there - * is also a promise, if the above case is true, not to signal - * overruns, either.) - * - * TTY_DRIVER_DYNAMIC_DEV --- if set, the individual tty devices need - * to be registered with a call to tty_register_device() when the - * device is found in the system and unregistered with a call to - * tty_unregister_device() so the devices will be show up - * properly in sysfs. If not set, driver->num entries will be - * created by the tty core in sysfs when tty_register_driver() is - * called. This is to be used by drivers that have tty devices - * that can appear and disappear while the main tty driver is - * registered with the tty core. - * - * TTY_DRIVER_DEVPTS_MEM -- don't use the standard arrays, instead - * use dynamic memory keyed through the devpts filesystem. This - * is only applicable to the pty driver. - * - * TTY_DRIVER_HARDWARE_BREAK -- hardware handles break signals. Pass - * the requested timeout to the caller instead of using a simple - * on/off interface. - * - * TTY_DRIVER_DYNAMIC_ALLOC -- do not allocate structures which are - * needed per line for this driver as it would waste memory. - * The driver will take care. - * - * TTY_DRIVER_UNNUMBERED_NODE -- do not create numbered /dev nodes. In - * other words create /dev/ttyprintk and not /dev/ttyprintk0. - * Applicable only when a driver for a single tty device is - * being allocated. +/** + * DOC: TTY Driver Flags + * + * TTY_DRIVER_RESET_TERMIOS + * Requests the tty layer to reset the termios setting when the last + * process has closed the device. Used for PTYs, in particular. + * + * TTY_DRIVER_REAL_RAW + * Indicates that the driver will guarantee not to set any special + * character handling flags if this is set for the tty: + * + * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` + * + * That is, if there is no reason for the driver to + * send notifications of parity and break characters up to the line + * driver, it won't do so. This allows the line driver to optimize for + * this case if this flag is set. (Note that there is also a promise, if + * the above case is true, not to signal overruns, either.) + * + * TTY_DRIVER_DYNAMIC_DEV + * The individual tty devices need to be registered with a call to + * tty_register_device() when the device is found in the system and + * unregistered with a call to tty_unregister_device() so the devices will + * be show up properly in sysfs. If not set, all &tty_driver.num entries + * will be created by the tty core in sysfs when tty_register_driver() is + * called. This is to be used by drivers that have tty devices that can + * appear and disappear while the main tty driver is registered with the + * tty core. + * + * TTY_DRIVER_DEVPTS_MEM + * Don't use the standard arrays (&tty_driver.ttys and + * &tty_driver.termios), instead use dynamic memory keyed through the + * devpts filesystem. This is only applicable to the PTY driver. + * + * TTY_DRIVER_HARDWARE_BREAK + * Hardware handles break signals. Pass the requested timeout to the + * &tty_operations.break_ctl instead of using a simple on/off interface. + * + * TTY_DRIVER_DYNAMIC_ALLOC + * Do not allocate structures which are needed per line for this driver + * (&tty_driver.ports) as it would waste memory. The driver will take + * care. This is only applicable to the PTY driver. + * + * TTY_DRIVER_UNNUMBERED_NODE + * Do not create numbered ``/dev`` nodes. For example, create + * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a + * driver for a single tty device is being allocated. */ #define TTY_DRIVER_INSTALLED 0x0001 #define TTY_DRIVER_RESET_TERMIOS 0x0002 diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 32284992b31a..483d41cbcbb7 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -7,17 +7,16 @@ struct tty_ldisc; -extern int tty_buffer_set_limit(struct tty_port *port, int limit); -extern unsigned int tty_buffer_space_avail(struct tty_port *port); -extern int tty_buffer_request_room(struct tty_port *port, size_t size); -extern int tty_insert_flip_string_flags(struct tty_port *port, +int tty_buffer_set_limit(struct tty_port *port, int limit); +unsigned int tty_buffer_space_avail(struct tty_port *port); +int tty_buffer_request_room(struct tty_port *port, size_t size); +int tty_insert_flip_string_flags(struct tty_port *port, const unsigned char *chars, const char *flags, size_t size); -extern int tty_insert_flip_string_fixed_flag(struct tty_port *port, +int tty_insert_flip_string_fixed_flag(struct tty_port *port, const unsigned char *chars, char flag, size_t size); -extern int tty_prepare_flip_string(struct tty_port *port, - unsigned char **chars, size_t size); -extern void tty_flip_buffer_push(struct tty_port *port); -void tty_schedule_flip(struct tty_port *port); +int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, + size_t size); +void tty_flip_buffer_push(struct tty_port *port); int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, @@ -45,7 +44,7 @@ static inline int tty_insert_flip_string(struct tty_port *port, int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, const char *f, int count); -extern void tty_buffer_lock_exclusive(struct tty_port *port); -extern void tty_buffer_unlock_exclusive(struct tty_port *port); +void tty_buffer_lock_exclusive(struct tty_port *port); +void tty_buffer_unlock_exclusive(struct tty_port *port); #endif /* _LINUX_TTY_FLIP_H */ diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index b1d812e902aa..e85002b56752 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -4,127 +4,6 @@ struct tty_struct; -/* - * This structure defines the interface between the tty line discipline - * implementation and the tty routines. The following routines can be - * defined; unless noted otherwise, they are optional, and can be - * filled in with a null pointer. - * - * int (*open)(struct tty_struct *); - * - * This function is called when the line discipline is associated - * with the tty. The line discipline can use this as an - * opportunity to initialize any state needed by the ldisc routines. - * - * void (*close)(struct tty_struct *); - * - * This function is called when the line discipline is being - * shutdown, either because the tty is being closed or because - * the tty is being changed to use a new line discipline - * - * void (*flush_buffer)(struct tty_struct *tty); - * - * This function instructs the line discipline to clear its - * buffers of any input characters it may have queued to be - * delivered to the user mode process. - * - * ssize_t (*read)(struct tty_struct * tty, struct file * file, - * unsigned char * buf, size_t nr); - * - * This function is called when the user requests to read from - * the tty. The line discipline will return whatever characters - * it has buffered up for the user. If this function is not - * defined, the user will receive an EIO error. - * - * ssize_t (*write)(struct tty_struct * tty, struct file * file, - * const unsigned char * buf, size_t nr); - * - * This function is called when the user requests to write to the - * tty. The line discipline will deliver the characters to the - * low-level tty device for transmission, optionally performing - * some processing on the characters first. If this function is - * not defined, the user will receive an EIO error. - * - * int (*ioctl)(struct tty_struct * tty, struct file * file, - * unsigned int cmd, unsigned long arg); - * - * This function is called when the user requests an ioctl which - * is not handled by the tty layer or the low-level tty driver. - * It is intended for ioctls which affect line discpline - * operation. Note that the search order for ioctls is (1) tty - * layer, (2) tty low-level driver, (3) line discpline. So a - * low-level driver can "grab" an ioctl request before the line - * discpline has a chance to see it. - * - * int (*compat_ioctl)(struct tty_struct * tty, struct file * file, - * unsigned int cmd, unsigned long arg); - * - * Process ioctl calls from 32-bit process on 64-bit system - * - * NOTE: only ioctls that are neither "pointer to compatible - * structure" nor tty-generic. Something private that takes - * an integer or a pointer to wordsize-sensitive structure - * belongs here, but most of ldiscs will happily leave - * it NULL. - * - * void (*set_termios)(struct tty_struct *tty, struct ktermios * old); - * - * This function notifies the line discpline that a change has - * been made to the termios structure. - * - * int (*poll)(struct tty_struct * tty, struct file * file, - * poll_table *wait); - * - * This function is called when a user attempts to select/poll on a - * tty device. It is solely the responsibility of the line - * discipline to handle poll requests. - * - * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, - * char *fp, int count); - * - * This function is called by the low-level tty driver to send - * characters received by the hardware to the line discpline for - * processing. <cp> is a pointer to the buffer of input - * character received by the device. <fp> is a pointer to a - * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. <fp> may be NULL to indicate - * all data received is TTY_NORMAL. - * - * void (*write_wakeup)(struct tty_struct *); - * - * This function is called by the low-level tty driver to signal - * that line discpline should try to send more characters to the - * low-level driver for transmission. If the line discpline does - * not have any more data to send, it can just return. If the line - * discipline does have some data to send, please arise a tasklet - * or workqueue to do the real data transfer. Do not send data in - * this hook, it may leads to a deadlock. - * - * int (*hangup)(struct tty_struct *) - * - * Called on a hangup. Tells the discipline that it should - * cease I/O to the tty driver. Can sleep. The driver should - * seek to perform this action quickly but should wait until - * any pending driver I/O is completed. - * - * void (*dcd_change)(struct tty_struct *tty, unsigned int status) - * - * Tells the discipline that the DCD pin has changed its status. - * Used exclusively by the N_PPS (Pulse-Per-Second) line discipline. - * - * int (*receive_buf2)(struct tty_struct *, const unsigned char *cp, - * char *fp, int count); - * - * This function is called by the low-level tty driver to send - * characters received by the hardware to the line discpline for - * processing. <cp> is a pointer to the buffer of input - * character received by the device. <fp> is a pointer to a - * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. <fp> may be NULL to indicate - * all data received is TTY_NORMAL. - * If assigned, prefer this function for automatic flow control. - */ - #include <linux/fs.h> #include <linux/wait.h> #include <linux/atomic.h> @@ -146,7 +25,7 @@ struct ld_semaphore { #endif }; -extern void __init_ldsem(struct ld_semaphore *sem, const char *name, +void __init_ldsem(struct ld_semaphore *sem, const char *name, struct lock_class_key *key); #define init_ldsem(sem) \ @@ -157,18 +36,18 @@ do { \ } while (0) -extern int ldsem_down_read(struct ld_semaphore *sem, long timeout); -extern int ldsem_down_read_trylock(struct ld_semaphore *sem); -extern int ldsem_down_write(struct ld_semaphore *sem, long timeout); -extern int ldsem_down_write_trylock(struct ld_semaphore *sem); -extern void ldsem_up_read(struct ld_semaphore *sem); -extern void ldsem_up_write(struct ld_semaphore *sem); +int ldsem_down_read(struct ld_semaphore *sem, long timeout); +int ldsem_down_read_trylock(struct ld_semaphore *sem); +int ldsem_down_write(struct ld_semaphore *sem, long timeout); +int ldsem_down_write_trylock(struct ld_semaphore *sem); +void ldsem_up_read(struct ld_semaphore *sem); +void ldsem_up_write(struct ld_semaphore *sem); #ifdef CONFIG_DEBUG_LOCK_ALLOC -extern int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, - long timeout); -extern int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, - long timeout); +int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, + long timeout); +int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, + long timeout); #else # define ldsem_down_read_nested(sem, subclass, timeout) \ ldsem_down_read(sem, timeout) @@ -176,40 +55,179 @@ extern int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, ldsem_down_write(sem, timeout) #endif - +/** + * struct tty_ldisc_ops - ldisc operations + * + * @name: name of this ldisc rendered in /proc/tty/ldiscs + * @num: ``N_*`` number (%N_TTY, %N_HDLC, ...) reserved to this ldisc + * + * @open: [TTY] ``int ()(struct tty_struct *tty)`` + * + * This function is called when the line discipline is associated with the + * @tty. No other call into the line discipline for this tty will occur + * until it completes successfully. It should initialize any state needed + * by the ldisc, and set @tty->receive_room to the maximum amount of data + * the line discipline is willing to accept from the driver with a single + * call to @receive_buf(). Returning an error will prevent the ldisc from + * being attached. + * + * Can sleep. + * + * @close: [TTY] ``void ()(struct tty_struct *tty)`` + * + * This function is called when the line discipline is being shutdown, + * either because the @tty is being closed or because the @tty is being + * changed to use a new line discipline. At the point of execution no + * further users will enter the ldisc code for this tty. + * + * Can sleep. + * + * @flush_buffer: [TTY] ``void ()(struct tty_struct *tty)`` + * + * This function instructs the line discipline to clear its buffers of any + * input characters it may have queued to be delivered to the user mode + * process. It may be called at any point between open and close. + * + * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, + * unsigned char *buf, size_t nr)`` + * + * This function is called when the user requests to read from the @tty. + * The line discipline will return whatever characters it has buffered up + * for the user. If this function is not defined, the user will receive + * an %EIO error. Multiple read calls may occur in parallel and the ldisc + * must deal with serialization issues. + * + * Can sleep. + * + * @write: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, + * const unsigned char *buf, size_t nr)`` + * + * This function is called when the user requests to write to the @tty. + * The line discipline will deliver the characters to the low-level tty + * device for transmission, optionally performing some processing on the + * characters first. If this function is not defined, the user will + * receive an %EIO error. + * + * Can sleep. + * + * @ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` + * + * This function is called when the user requests an ioctl which is not + * handled by the tty layer or the low-level tty driver. It is intended + * for ioctls which affect line discpline operation. Note that the search + * order for ioctls is (1) tty layer, (2) tty low-level driver, (3) line + * discpline. So a low-level driver can "grab" an ioctl request before + * the line discpline has a chance to see it. + * + * @compat_ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd, + * unsigned long arg)`` + * + * Process ioctl calls from 32-bit process on 64-bit system. + * + * Note that only ioctls that are neither "pointer to compatible + * structure" nor tty-generic. Something private that takes an integer or + * a pointer to wordsize-sensitive structure belongs here, but most of + * ldiscs will happily leave it %NULL. + * + * @set_termios: [TTY] ``void ()(struct tty_struct *tty, struct ktermios *old)`` + * + * This function notifies the line discpline that a change has been made + * to the termios structure. + * + * @poll: [TTY] ``int ()(struct tty_struct *tty, struct file *file, + * struct poll_table_struct *wait)`` + * + * This function is called when a user attempts to select/poll on a @tty + * device. It is solely the responsibility of the line discipline to + * handle poll requests. + * + * @hangup: [TTY] ``void ()(struct tty_struct *tty)`` + * + * Called on a hangup. Tells the discipline that it should cease I/O to + * the tty driver. The driver should seek to perform this action quickly + * but should wait until any pending driver I/O is completed. No further + * calls into the ldisc code will occur. + * + * Can sleep. + * + * @receive_buf: [DRV] ``void ()(struct tty_struct *tty, + * const unsigned char *cp, const char *fp, int count)`` + * + * This function is called by the low-level tty driver to send characters + * received by the hardware to the line discpline for processing. @cp is + * a pointer to the buffer of input character received by the device. @fp + * is a pointer to an array of flag bytes which indicate whether a + * character was received with a parity error, etc. @fp may be %NULL to + * indicate all data received is %TTY_NORMAL. + * + * @write_wakeup: [DRV] ``void ()(struct tty_struct *tty)`` + * + * This function is called by the low-level tty driver to signal that line + * discpline should try to send more characters to the low-level driver + * for transmission. If the line discpline does not have any more data to + * send, it can just return. If the line discipline does have some data to + * send, please arise a tasklet or workqueue to do the real data transfer. + * Do not send data in this hook, it may lead to a deadlock. + * + * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, unsigned int status)`` + * + * Tells the discipline that the DCD pin has changed its status. Used + * exclusively by the %N_PPS (Pulse-Per-Second) line discipline. + * + * @receive_buf2: [DRV] ``int ()(struct tty_struct *tty, + * const unsigned char *cp, const char *fp, int count)`` + * + * This function is called by the low-level tty driver to send characters + * received by the hardware to the line discpline for processing. @cp is a + * pointer to the buffer of input character received by the device. @fp + * is a pointer to an array of flag bytes which indicate whether a + * character was received with a parity error, etc. @fp may be %NULL to + * indicate all data received is %TTY_NORMAL. If assigned, prefer this + * function for automatic flow control. + * + * @owner: module containting this ldisc (for reference counting) + * + * This structure defines the interface between the tty line discipline + * implementation and the tty routines. The above routines can be defined. + * Unless noted otherwise, they are optional, and can be filled in with a %NULL + * pointer. + * + * Hooks marked [TTY] are invoked from the TTY core, the [DRV] ones from the + * tty_driver side. + */ struct tty_ldisc_ops { char *name; int num; - int flags; /* * The following routines are called from above. */ - int (*open)(struct tty_struct *); - void (*close)(struct tty_struct *); + int (*open)(struct tty_struct *tty); + void (*close)(struct tty_struct *tty); void (*flush_buffer)(struct tty_struct *tty); ssize_t (*read)(struct tty_struct *tty, struct file *file, unsigned char *buf, size_t nr, void **cookie, unsigned long offset); ssize_t (*write)(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr); - int (*ioctl)(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg); - int (*compat_ioctl)(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg); + int (*ioctl)(struct tty_struct *tty, unsigned int cmd, + unsigned long arg); + int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, + unsigned long arg); void (*set_termios)(struct tty_struct *tty, struct ktermios *old); - __poll_t (*poll)(struct tty_struct *, struct file *, - struct poll_table_struct *); - int (*hangup)(struct tty_struct *tty); + __poll_t (*poll)(struct tty_struct *tty, struct file *file, + struct poll_table_struct *wait); + void (*hangup)(struct tty_struct *tty); /* * The following routines are called from below. */ - void (*receive_buf)(struct tty_struct *, const unsigned char *cp, + void (*receive_buf)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); - void (*write_wakeup)(struct tty_struct *); - void (*dcd_change)(struct tty_struct *, unsigned int); - int (*receive_buf2)(struct tty_struct *, const unsigned char *cp, + void (*write_wakeup)(struct tty_struct *tty); + void (*dcd_change)(struct tty_struct *tty, unsigned int status); + int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); struct module *owner; @@ -220,8 +238,6 @@ struct tty_ldisc { struct tty_struct *tty; }; -#define LDISC_FLAG_DEFINED 0x00000001 - #define MODULE_ALIAS_LDISC(ldisc) \ MODULE_ALIAS("tty-ldisc-" __stringify(ldisc)) diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 6e86e9e118b6..d3ea9ed0b98e 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -7,37 +7,33 @@ #include <linux/tty_buffer.h> #include <linux/wait.h> -/* - * Port level information. Each device keeps its own port level information - * so provide a common structure for those ports wanting to use common support - * routines. - * - * The tty port has a different lifetime to the tty so must be kept apart. - * In addition be careful as tty -> port mappings are valid for the life - * of the tty object but in many cases port -> tty mappings are valid only - * until a hangup so don't use the wrong path. - */ - struct attribute_group; struct tty_driver; struct tty_port; struct tty_struct; +/** + * struct tty_port_operations -- operations on tty_port + * @carrier_raised: return 1 if the carrier is raised on @port + * @dtr_rts: raise the DTR line if @raise is nonzero, otherwise lower DTR + * @shutdown: called when the last close completes or a hangup finishes IFF the + * port was initialized. Do not use to free resources. Turn off the device + * only. Called under the port mutex to serialize against @activate and + * @shutdown. + * @activate: called under the port mutex from tty_port_open(), serialized using + * the port mutex. Supposed to turn on the device. + * + * FIXME: long term getting the tty argument *out* of this would be good + * for consoles. + * + * @destruct: called on the final put of a port. Free resources, possibly incl. + * the port itself. + */ struct tty_port_operations { - /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); - /* Control the DTR line */ void (*dtr_rts)(struct tty_port *port, int raise); - /* Called when the last close completes or a hangup finishes - IFF the port was initialized. Do not use to free resources. Called - under the port mutex to serialize against activate/shutdowns */ void (*shutdown)(struct tty_port *port); - /* Called under the port mutex from tty_port_open, serialized using - the port mutex */ - /* FIXME: long term getting the tty argument *out* of this would be - good for consoles */ int (*activate)(struct tty_port *port, struct tty_struct *tty); - /* Called on the final put of a port */ void (*destruct)(struct tty_port *port); }; @@ -48,30 +44,77 @@ struct tty_port_client_operations { extern const struct tty_port_client_operations tty_port_default_client_ops; +/** + * struct tty_port -- port level information + * + * @buf: buffer for this port, locked internally + * @tty: back pointer to &struct tty_struct, valid only if the tty is open. Use + * tty_port_tty_get() to obtain it (and tty_kref_put() to release). + * @itty: internal back pointer to &struct tty_struct. Avoid this. It should be + * eliminated in the long term. + * @ops: tty port operations (like activate, shutdown), see &struct + * tty_port_operations + * @client_ops: tty port client operations (like receive_buf, write_wakeup). + * By default, tty_port_default_client_ops is used. + * @lock: lock protecting @tty + * @blocked_open: # of procs waiting for open in tty_port_block_til_ready() + * @count: usage count + * @open_wait: open waiters queue (waiting e.g. for a carrier) + * @delta_msr_wait: modem status change queue (waiting for MSR changes) + * @flags: user TTY flags (%ASYNC_) + * @iflags: internal flags (%TTY_PORT_) + * @console: when set, the port is a console + * @mutex: locking, for open, shutdown and other port operations + * @buf_mutex: @xmit_buf alloc lock + * @xmit_buf: optional xmit buffer used by some drivers + * @close_delay: delay in jiffies to wait when closing the port + * @closing_wait: delay in jiffies for output to be sent before closing + * @drain_delay: set to zero if no pure time based drain is needed else set to + * size of fifo + * @kref: references counter. Reaching zero calls @ops->destruct() if non-%NULL + * or frees the port otherwise. + * @client_data: pointer to private data, for @client_ops + * + * Each device keeps its own port level information. &struct tty_port was + * introduced as a common structure for such information. As every TTY device + * shall have a backing tty_port structure, every driver can use these members. + * + * The tty port has a different lifetime to the tty so must be kept apart. + * In addition be careful as tty -> port mappings are valid for the life + * of the tty object but in many cases port -> tty mappings are valid only + * until a hangup so don't use the wrong path. + * + * Tty port shall be initialized by tty_port_init() and shut down either by + * tty_port_destroy() (refcounting not used), or tty_port_put() (refcounting). + * + * There is a lot of helpers around &struct tty_port too. To name the most + * significant ones: tty_port_open(), tty_port_close() (or + * tty_port_close_start() and tty_port_close_end() separately if need be), and + * tty_port_hangup(). These call @ops->activate() and @ops->shutdown() as + * needed. + */ struct tty_port { - struct tty_bufhead buf; /* Locked internally */ - struct tty_struct *tty; /* Back pointer */ - struct tty_struct *itty; /* internal back ptr */ - const struct tty_port_operations *ops; /* Port operations */ - const struct tty_port_client_operations *client_ops; /* Port client operations */ - spinlock_t lock; /* Lock protecting tty field */ - int blocked_open; /* Waiting to open */ - int count; /* Usage count */ - wait_queue_head_t open_wait; /* Open waiters */ - wait_queue_head_t delta_msr_wait; /* Modem status change */ - unsigned long flags; /* User TTY flags ASYNC_ */ - unsigned long iflags; /* Internal flags TTY_PORT_ */ - unsigned char console:1; /* port is a console */ - struct mutex mutex; /* Locking */ - struct mutex buf_mutex; /* Buffer alloc lock */ - unsigned char *xmit_buf; /* Optional buffer */ - unsigned int close_delay; /* Close port delay */ - unsigned int closing_wait; /* Delay for output */ - int drain_delay; /* Set to zero if no pure time - based drain is needed else - set to size of fifo */ - struct kref kref; /* Ref counter */ - void *client_data; + struct tty_bufhead buf; + struct tty_struct *tty; + struct tty_struct *itty; + const struct tty_port_operations *ops; + const struct tty_port_client_operations *client_ops; + spinlock_t lock; + int blocked_open; + int count; + wait_queue_head_t open_wait; + wait_queue_head_t delta_msr_wait; + unsigned long flags; + unsigned long iflags; + unsigned char console:1; + struct mutex mutex; + struct mutex buf_mutex; + unsigned char *xmit_buf; + unsigned int close_delay; + unsigned int closing_wait; + int drain_delay; + struct kref kref; + void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index e81856c0ba13..6ad4e9032d53 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -66,7 +66,7 @@ #include <linux/seqlock.h> struct u64_stats_sync { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) seqcount_t seq; #endif }; @@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p) return local64_read(&p->v); } +static inline void u64_stats_set(u64_stats_t *p, u64 val) +{ + local64_set(&p->v, val); +} + static inline void u64_stats_add(u64_stats_t *p, unsigned long val) { local64_add(val, &p->v); @@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p) return p->v; } +static inline void u64_stats_set(u64_stats_t *p, u64 val) +{ + p->v = val; +} + static inline void u64_stats_add(u64_stats_t *p, unsigned long val) { p->v += val; @@ -115,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) #else static inline void u64_stats_init(struct u64_stats_sync *syncp) @@ -125,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp) static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); write_seqcount_begin(&syncp->seq); #endif } static inline void u64_stats_update_end(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); #endif } @@ -142,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) { unsigned long flags = 0; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - local_irq_save(flags); +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_irq_save(flags); write_seqcount_begin(&syncp->seq); #endif return flags; @@ -153,15 +170,18 @@ static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, unsigned long flags) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); - local_irq_restore(flags); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_irq_restore(flags); #endif } static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_begin(&syncp->seq); #else return 0; @@ -170,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync * static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -179,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_retry(&syncp->seq, start); #else return false; @@ -189,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) preempt_enable(); #endif return __u64_stats_fetch_retry(syncp, start); @@ -203,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_disable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_disable(); #endif return __u64_stats_fetch_begin(syncp); @@ -212,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) + preempt_enable(); +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) local_irq_enable(); #endif return __u64_stats_fetch_retry(syncp, start); diff --git a/include/linux/uio.h b/include/linux/uio.h index 207101a9c5c3..1198a2bfc9bf 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <linux/thread_info.h> +#include <linux/mm_types.h> #include <uapi/linux/uio.h> struct page; @@ -35,6 +36,7 @@ struct iov_iter_state { struct iov_iter { u8 iter_type; + bool nofault; bool data_source; size_t iov_offset; size_t count; @@ -133,7 +135,8 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes); +size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); +size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); @@ -144,6 +147,12 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i) +{ + return copy_page_to_iter(&folio->page, offset, bytes, i); +} + static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { @@ -194,7 +203,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /* * Note, users like pmem that depend on the stricter semantics of - * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for + * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the * destination is flushed from the cache on return. */ @@ -209,24 +218,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #define _copy_mc_to_iter _copy_to_iter #endif -static __always_inline __must_check -size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else - return _copy_from_iter_flushcache(addr, bytes, i); -} - -static __always_inline __must_check -size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(!check_copy_size(addr, bytes, true))) - return 0; - else - return _copy_mc_to_iter(addr, bytes, i); -} - size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 74484d44c755..4d39e6e11a95 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -5,9 +5,52 @@ #include <linux/init.h> #include <linux/dcache.h> +struct utf8data; +struct utf8data_table; + +#define UNICODE_MAJ_SHIFT 16 +#define UNICODE_MIN_SHIFT 8 + +#define UNICODE_AGE(MAJ, MIN, REV) \ + (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \ + ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ + ((unsigned int)(REV))) + +static inline u8 unicode_major(unsigned int age) +{ + return (age >> UNICODE_MAJ_SHIFT) & 0xff; +} + +static inline u8 unicode_minor(unsigned int age) +{ + return (age >> UNICODE_MIN_SHIFT) & 0xff; +} + +static inline u8 unicode_rev(unsigned int age) +{ + return age & 0xff; +} + +/* + * Two normalization forms are supported: + * 1) NFDI + * - Apply unicode normalization form NFD. + * - Remove any Default_Ignorable_Code_Point. + * 2) NFDICF + * - Apply unicode normalization form NFD. + * - Remove any Default_Ignorable_Code_Point. + * - Apply a full casefold (C + F). + */ +enum utf8_normalization { + UTF8_NFDI = 0, + UTF8_NFDICF, + UTF8_NMAX, +}; + struct unicode_map { - const char *charset; - int version; + unsigned int version; + const struct utf8data *ntab[UTF8_NMAX]; + const struct utf8data_table *tables; }; int utf8_validate(const struct unicode_map *um, const struct qstr *str); @@ -30,7 +73,7 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, int utf8_casefold_hash(const struct unicode_map *um, const void *salt, struct qstr *str); -struct unicode_map *utf8_load(const char *version); +struct unicode_map *utf8_load(unsigned int version); void utf8_unload(struct unicode_map *um); #endif /* _LINUX_UNICODE_H */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 7ccaa76a9a96..200b7b79acb5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -875,15 +875,6 @@ extern struct usb_host_interface *usb_find_alt_setting( unsigned int iface_num, unsigned int alt_num); -#if IS_REACHABLE(CONFIG_USB) -int usb_for_each_port(void *data, int (*fn)(struct device *, void *)); -#else -static inline int usb_for_each_port(void *data, int (*fn)(struct device *, void *)) -{ - return 0; -} -#endif - /* port claiming functions */ int usb_hub_claim_port(struct usb_device *hdev, unsigned port1, struct usb_dev_state *owner); diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 1cffa34740b0..969e7dba6358 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -33,7 +33,6 @@ #ifndef __LINUX_USB_CH9_H #define __LINUX_USB_CH9_H -#include <linux/device.h> #include <uapi/linux/usb/ch9.h> /* USB 3.2 SuperSpeed Plus phy signaling rate generation and lane count */ @@ -45,6 +44,8 @@ enum usb_ssp_rate { USB_SSP_GEN_2x2, }; +struct device; + extern const char *usb_ep_type_string(int ep_type); extern const char *usb_speed_string(enum usb_device_speed speed); extern enum usb_device_speed usb_get_maximum_speed(struct device *dev); diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 2c1fc9212cf2..548a028f2dab 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -124,7 +124,6 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ -#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -135,7 +134,6 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) -#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h index fd1c9f6a4e37..d3e65eb9e16f 100644 --- a/include/linux/usb/tegra_usb_phy.h +++ b/include/linux/usb/tegra_usb_phy.h @@ -18,6 +18,7 @@ #include <linux/clk.h> #include <linux/gpio.h> +#include <linux/regmap.h> #include <linux/reset.h> #include <linux/usb/otg.h> @@ -30,6 +31,7 @@ * enter host mode * requires_extra_tuning_parameters: true if xcvr_hsslew, hssquelch_level * and hsdiscon_level should be set for adequate signal quality + * requires_pmc_ao_power_up: true if USB AO is powered down by default */ struct tegra_phy_soc_config { @@ -37,6 +39,7 @@ struct tegra_phy_soc_config { bool has_hostpc; bool requires_usbmode_setup; bool requires_extra_tuning_parameters; + bool requires_pmc_ao_power_up; }; struct tegra_utmip_config { @@ -62,6 +65,7 @@ enum tegra_usb_phy_port_speed { struct tegra_xtal_freq; struct tegra_usb_phy { + int irq; int instance; const struct tegra_xtal_freq *freq; void __iomem *regs; @@ -70,6 +74,7 @@ struct tegra_usb_phy { struct clk *pll_u; struct clk *pad_clk; struct regulator *vbus; + struct regmap *pmc_regmap; enum usb_dr_mode mode; void *config; const struct tegra_phy_soc_config *soc_config; diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index e2e44bb1dad8..7ba45a97eeae 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -305,16 +305,4 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); -#if IS_REACHABLE(CONFIG_TYPEC) -int typec_link_port(struct device *port); -void typec_unlink_port(struct device *port); -#else -static inline int typec_link_port(struct device *port) -{ - return 0; -} - -static inline void typec_unlink_port(struct device *port) { } -#endif - #endif /* __LINUX_USB_TYPEC_H */ diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 3972ab765de1..c3011ccda430 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -6,6 +6,8 @@ #include <linux/device.h> #include <linux/interrupt.h> #include <linux/vhost_iotlb.h> +#include <linux/virtio_net.h> +#include <linux/if_ether.h> /** * struct vdpa_calllback - vDPA callback definition. @@ -63,6 +65,7 @@ struct vdpa_mgmt_dev; * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. + * @cf_mutex: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests * @use_va: indicate whether virtual address must be used by this device @@ -74,6 +77,7 @@ struct vdpa_device { struct device dev; struct device *dma_dev; const struct vdpa_config_ops *config; + struct mutex cf_mutex; /* Protects get/set config */ unsigned int index; bool features_valid; bool use_va; @@ -91,6 +95,14 @@ struct vdpa_iova_range { u64 last; }; +struct vdpa_dev_set_config { + struct { + u8 mac[ETH_ALEN]; + u16 mtu; + } net; + u64 mask; +}; + /** * Corresponding file area for device memory mapping * @file: vma->vm_file for the mapping @@ -171,6 +183,9 @@ struct vdpa_map_file { * @get_vq_num_max: Get the max size of virtqueue * @vdev: vdpa device * Returns u16: max size of virtqueue + * @get_vq_num_min: Get the min size of virtqueue (optional) + * @vdev: vdpa device + * Returns u16: min size of virtqueue * @get_device_id: Get virtio device id * @vdev: vdpa device * Returns u32: virtio device id @@ -257,7 +272,7 @@ struct vdpa_config_ops { struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ - int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx); + int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); @@ -266,6 +281,7 @@ struct vdpa_config_ops { void (*set_config_cb)(struct vdpa_device *vdev, struct vdpa_callback *cb); u16 (*get_vq_num_max)(struct vdpa_device *vdev); + u16 (*get_vq_num_min)(struct vdpa_device *vdev); u32 (*get_device_id)(struct vdpa_device *vdev); u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); @@ -382,26 +398,16 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) return ops->set_features(vdev, features); } -static inline void vdpa_get_config(struct vdpa_device *vdev, - unsigned int offset, void *buf, - unsigned int len) -{ - const struct vdpa_config_ops *ops = vdev->config; - - /* - * Config accesses aren't supposed to trigger before features are set. - * If it does happen we assume a legacy guest. - */ - if (!vdev->features_valid) - vdpa_set_features(vdev, 0); - ops->get_config(vdev, offset, buf, len); -} - +void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len); +void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, + const void *buf, unsigned int length); /** * struct vdpa_mgmtdev_ops - vdpa device ops * @dev_add: Add a vdpa device using alloc and register * @mdev: parent device to use for device addition * @name: name of the new vdpa device + * @config: config attributes to apply to the device under creation * Driver need to add a new device using _vdpa_register_device() * after fully initializing the vdpa device. Driver must return 0 * on success or appropriate error code. @@ -412,14 +418,25 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, * _vdpa_unregister_device(). */ struct vdpa_mgmtdev_ops { - int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); + int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); }; +/** + * struct vdpa_mgmt_dev - vdpa management device + * @device: Management parent device + * @ops: operations supported by management device + * @id_table: Pointer to device id table of supported ids + * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that + * management device support during dev_add callback + * @list: list entry + */ struct vdpa_mgmt_dev { struct device *device; const struct vdpa_mgmtdev_ops *ops; - const struct virtio_device_id *id_table; /* supported ids */ + const struct virtio_device_id *id_table; + u64 config_attr_mask; struct list_head list; }; diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 1eaaa93c37bf..329d63babaeb 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -15,7 +15,7 @@ #else #define MODULE_VERMAGIC_SMP "" #endif -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPT_BUILD #define MODULE_VERMAGIC_PREEMPT "preempt " #elif defined(CONFIG_PREEMPT_RT) #define MODULE_VERMAGIC_PREEMPT "preempt_rt " diff --git a/include/linux/vfio.h b/include/linux/vfio.h index b53a9557884a..76191d7abed1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -71,68 +71,17 @@ struct vfio_device_ops { int (*match)(struct vfio_device *vdev, char *buf); }; -extern struct iommu_group *vfio_iommu_group_get(struct device *dev); -extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); - void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); void vfio_uninit_group_dev(struct vfio_device *device); int vfio_register_group_dev(struct vfio_device *device); +int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); -/* events for the backend driver notify callback */ -enum vfio_iommu_notify_type { - VFIO_IOMMU_CONTAINER_CLOSE = 0, -}; - -/** - * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks - */ -struct vfio_iommu_driver_ops { - char *name; - struct module *owner; - void *(*open)(unsigned long arg); - void (*release)(void *iommu_data); - ssize_t (*read)(void *iommu_data, char __user *buf, - size_t count, loff_t *ppos); - ssize_t (*write)(void *iommu_data, const char __user *buf, - size_t count, loff_t *size); - long (*ioctl)(void *iommu_data, unsigned int cmd, - unsigned long arg); - int (*mmap)(void *iommu_data, struct vm_area_struct *vma); - int (*attach_group)(void *iommu_data, - struct iommu_group *group); - void (*detach_group)(void *iommu_data, - struct iommu_group *group); - int (*pin_pages)(void *iommu_data, - struct iommu_group *group, - unsigned long *user_pfn, - int npage, int prot, - unsigned long *phys_pfn); - int (*unpin_pages)(void *iommu_data, - unsigned long *user_pfn, int npage); - int (*register_notifier)(void *iommu_data, - unsigned long *events, - struct notifier_block *nb); - int (*unregister_notifier)(void *iommu_data, - struct notifier_block *nb); - int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, - void *data, size_t count, bool write); - struct iommu_domain *(*group_iommu_domain)(void *iommu_data, - struct iommu_group *group); - void (*notify)(void *iommu_data, - enum vfio_iommu_notify_type event); -}; - -extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); - -extern void vfio_unregister_iommu_driver( - const struct vfio_iommu_driver_ops *ops); - /* * External user API */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8519b3ae5d52..4d107ad31149 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -23,6 +23,8 @@ struct virtio_shm_region { * any of @get/@set, @get_status/@set_status, or @get_features/ * @finalize_features are NOT safe to be called from an atomic * context. + * @enable_cbs: enable the callbacks + * vdev: the virtio_device * @get: read the value of a configuration field * vdev: the virtio_device * offset: the offset of the configuration field @@ -75,6 +77,7 @@ struct virtio_shm_region { */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { + void (*enable_cbs)(struct virtio_device *vdev); void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); void (*set)(struct virtio_device *vdev, unsigned offset, @@ -229,6 +232,9 @@ void virtio_device_ready(struct virtio_device *dev) { unsigned status = dev->config->get_status(dev); + if (dev->config->enable_cbs) + dev->config->enable_cbs(dev); + BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index b465f8f3e554..a960de68ac69 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -7,9 +7,27 @@ #include <uapi/linux/udp.h> #include <uapi/linux/virtio_net.h> +static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +{ + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + return protocol == cpu_to_be16(ETH_P_IP); + case VIRTIO_NET_HDR_GSO_TCPV6: + return protocol == cpu_to_be16(ETH_P_IPV6); + case VIRTIO_NET_HDR_GSO_UDP: + return protocol == cpu_to_be16(ETH_P_IP) || + protocol == cpu_to_be16(ETH_P_IPV6); + default: + return false; + } +} + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { + if (skb->protocol) + return 0; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: @@ -88,9 +106,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); - virtio_net_hdr_set_proto(skb, hdr); - if (protocol && protocol != skb->protocol) + if (!protocol) + virtio_net_hdr_set_proto(skb, hdr); + else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; + else + skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, @@ -120,10 +141,15 @@ retry: if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); + /* UFO may not include transport header in gso_size. */ + if (gso_type & SKB_GSO_UDP) + nh_off -= thlen; + /* Too small packets are not really GSO ones. */ - if (skb->len - p_off > gso_size) { + if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h new file mode 100644 index 000000000000..e5d665faf00e --- /dev/null +++ b/include/linux/virtio_pci_legacy.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_PCI_LEGACY_H +#define _LINUX_VIRTIO_PCI_LEGACY_H + +#include "linux/mod_devicetable.h" +#include <linux/pci.h> +#include <linux/virtio_pci.h> + +struct virtio_pci_legacy_device { + struct pci_dev *pci_dev; + + /* Where to read and clear interrupt */ + u8 __iomem *isr; + /* The IO mapping for the PCI config space (legacy mode only) */ + void __iomem *ioaddr; + + struct virtio_device_id id; +}; + +u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev); +u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev); +void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, + u32 features); +u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev); +void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, + u8 status); +u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, + u16 idx, u16 vector); +u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, + u16 vector); +void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, + u16 index, u32 queue_pfn); +bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, + u16 idx); +void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev, + u16 idx, u16 size); +u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, + u16 idx); +int vp_legacy_probe(struct virtio_pci_legacy_device *ldev); +void vp_legacy_remove(struct virtio_pci_legacy_device *ldev); + +#endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index a185cc75ff52..7b2363388bfa 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -98,6 +98,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, + THP_SCAN_EXCEED_NONE_PTE, + THP_SCAN_EXCEED_SWAP_PTE, + THP_SCAN_EXCEED_SHARED_PTE, #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD THP_SPLIT_PUD, #endif diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 671d402c3778..880227b9f044 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -22,12 +22,19 @@ struct notifier_block; /* in notifier.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ -#define VM_NO_GUARD 0x00000040 /* don't add guard page */ +#define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) +#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ +#else +#define VM_DEFER_KMEMLEAK 0 +#endif + /* * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. * @@ -136,21 +143,21 @@ static inline void vmalloc_init(void) static inline unsigned long vmalloc_nr_pages(void) { return 0; } #endif -extern void *vmalloc(unsigned long size); -extern void *vzalloc(unsigned long size); -extern void *vmalloc_user(unsigned long size); -extern void *vmalloc_node(unsigned long size, int node); -extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_32(unsigned long size); -extern void *vmalloc_32_user(unsigned long size); -extern void *__vmalloc(unsigned long size, gfp_t gfp_mask); +extern void *vmalloc(unsigned long size) __alloc_size(1); +extern void *vzalloc(unsigned long size) __alloc_size(1); +extern void *vmalloc_user(unsigned long size) __alloc_size(1); +extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1); +extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1); +extern void *vmalloc_32(unsigned long size) __alloc_size(1); +extern void *vmalloc_32_user(unsigned long size) __alloc_size(1); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, - const void *caller); + const void *caller) __alloc_size(1); void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, - int node, const void *caller); -void *vmalloc_no_huge(unsigned long size); + int node, const void *caller) __alloc_size(1); +void *vmalloc_no_huge(unsigned long size) __alloc_size(1); extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d6a6cf53b127..bfe38869498d 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } #endif /* CONFIG_SMP */ +static inline void __zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + __mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void __zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void __zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void __node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + __mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void __node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void __node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + +static inline void node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, int migratetype) { @@ -525,12 +597,6 @@ static inline void mod_lruvec_page_state(struct page *page, #endif /* CONFIG_MEMCG */ -static inline void inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, 1); -} - static inline void __inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { @@ -543,6 +609,24 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } +static inline void __lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + __mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void __lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void __lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} + static inline void inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { @@ -555,4 +639,21 @@ static inline void dec_lruvec_page_state(struct page *page, mod_lruvec_page_state(page, idx, -1); } +static inline void lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} #endif /* _LINUX_VMSTAT_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 93dab0e9580f..851e07da2583 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); +void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); #define wake_up_interruptible_sync_poll_locked(x, m) \ __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. + */ +static inline void wake_up_pollfree(struct wait_queue_head *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ @@ -1160,6 +1186,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i (wait)->flags = 0; \ } while (0) -bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg); +typedef int (*task_call_f)(struct task_struct *p, void *arg); +extern int task_call_func(struct task_struct *p, task_call_f func, void *arg); #endif /* _LINUX_WAIT_H */ diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 2cb3913c1f50..b88d7b58e61e 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -35,6 +35,7 @@ extern int set_required_buffer_size(struct wmi_device *wdev, u64 length); struct wmi_driver { struct device_driver driver; const struct wmi_device_id *id_table; + bool no_notify_data; int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 74d3c1efd9bb..7fee9b6cfede 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -469,7 +469,8 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); -extern void show_workqueue_state(void); +extern void show_all_workqueues(void); +extern void show_one_workqueue(struct workqueue_struct *wq); extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); /** diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d1f65adf6a26..fec248ab1fec 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,7 +11,6 @@ #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> -#include <linux/blk-cgroup.h> struct bio; @@ -69,6 +68,7 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ + unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */ /* * When writeback IOs are bounced through async layers, only the @@ -109,15 +109,12 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc) return flags; } -static inline struct cgroup_subsys_state * -wbc_blkcg_css(struct writeback_control *wbc) -{ #ifdef CONFIG_CGROUP_WRITEBACK - if (wbc->wb) - return wbc->wb->blkcg_css; -#endif - return blkcg_root_css; -} +#define wbc_blkcg_css(wbc) \ + ((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css) +#else +#define wbc_blkcg_css(wbc) (blkcg_root_css) +#endif /* CONFIG_CGROUP_WRITEBACK */ /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to @@ -393,7 +390,14 @@ void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); -void account_page_redirty(struct page *page); +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); +void folio_account_redirty(struct folio *folio); +static inline void account_page_redirty(struct page *page) +{ + folio_account_redirty(page_folio(page)); +} +bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); +bool redirty_page_for_writepage(struct writeback_control *, struct page *); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 29db736af86d..bb763085479a 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -28,12 +28,10 @@ #ifndef CONFIG_PREEMPT_RT #define WW_MUTEX_BASE mutex #define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k) -#define ww_mutex_base_trylock(l) mutex_trylock(l) #define ww_mutex_base_is_locked(b) mutex_is_locked((b)) #else #define WW_MUTEX_BASE rt_mutex #define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k) -#define ww_mutex_base_trylock(l) rt_mutex_trylock(l) #define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex) #endif @@ -339,17 +337,8 @@ ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, extern void ww_mutex_unlock(struct ww_mutex *lock); -/** - * ww_mutex_trylock - tries to acquire the w/w mutex without acquire context - * @lock: mutex to lock - * - * Trylocks a mutex without acquire context, so no deadlock detection is - * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. - */ -static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) -{ - return ww_mutex_base_trylock(&lock->base); -} +extern int __must_check ww_mutex_trylock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /*** * ww_mutex_destroy - mark a w/w mutex unusable diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 9fac819f92e3..afb3334ec8c5 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -4,12 +4,9 @@ #ifndef __WWAN_H #define __WWAN_H -#include <linux/device.h> -#include <linux/kernel.h> #include <linux/poll.h> -#include <linux/skbuff.h> -#include <linux/netlink.h> #include <linux/netdevice.h> +#include <linux/types.h> /** * enum wwan_port_type - WWAN port types @@ -37,6 +34,10 @@ enum wwan_port_type { WWAN_PORT_UNKNOWN, }; +struct device; +struct file; +struct netlink_ext_ack; +struct sk_buff; struct wwan_port; /** struct wwan_port_ops - The WWAN port operations @@ -171,4 +172,13 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops, void wwan_unregister_ops(struct device *parent); +#ifdef CONFIG_WWAN_DEBUGFS +struct dentry *wwan_get_debugfs_dir(struct device *parent); +#else +static inline struct dentry *wwan_get_debugfs_dir(struct device *parent) +{ + return ERR_PTR(-ENODEV); +} +#endif + #endif /* __WWAN_H */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index a91e3d90df8a..d6d5da6ed735 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1581,6 +1581,24 @@ static inline void xas_set(struct xa_state *xas, unsigned long index) } /** + * xas_advance() - Skip over sibling entries. + * @xas: XArray operation state. + * @index: Index of last sibling entry. + * + * Move the operation state to refer to the last sibling entry. + * This is useful for loops that normally want to see sibling + * entries but sometimes want to skip them. Use xas_set() if you + * want to move to an index which is not part of this entry. + */ +static inline void xas_advance(struct xa_state *xas, unsigned long index) +{ + unsigned char shift = xas_is_node(xas) ? xas->xa_node->shift : 0; + + xas->xa_index = index; + xas->xa_offset = (index >> shift) & XA_CHUNK_MASK; +} + +/** * xas_set_order() - Set up XArray operation state for a multislot entry. * @xas: XArray operation state. * @index: Target of the operation. diff --git a/include/linux/xz.h b/include/linux/xz.h index 9884c8440188..7285ca5d56e9 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -234,6 +234,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s); XZ_EXTERN void xz_dec_end(struct xz_dec *s); /* + * Decompressor for MicroLZMA, an LZMA variant with a very minimal header. + * See xz_dec_microlzma_alloc() below for details. + * + * These functions aren't used or available in preboot code and thus aren't + * marked with XZ_EXTERN. This avoids warnings about static functions that + * are never defined. + */ +/** + * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state + */ +struct xz_dec_microlzma; + +/** + * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder + * @mode XZ_SINGLE or XZ_PREALLOC + * @dict_size LZMA dictionary size. This must be at least 4 KiB and + * at most 3 GiB. + * + * In contrast to xz_dec_init(), this function only allocates the memory + * and remembers the dictionary size. xz_dec_microlzma_reset() must be used + * before calling xz_dec_microlzma_run(). + * + * The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE. + * With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated. + * + * On success, xz_dec_microlzma_alloc() returns a pointer to + * struct xz_dec_microlzma. If memory allocation fails or + * dict_size is invalid, NULL is returned. + * + * The compressed format supported by this decoder is a raw LZMA stream + * whose first byte (always 0x00) has been replaced with bitwise-negation + * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is + * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. + * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream + * marker must not be used. The unused values are reserved for future use. + * This MicroLZMA header format was created for use in EROFS but may be used + * by others too. + */ +extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size); + +/** + * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state + * @s Decoder state allocated using xz_dec_microlzma_alloc() + * @comp_size Compressed size of the input stream + * @uncomp_size Uncompressed size of the input stream. A value smaller + * than the real uncompressed size of the input stream can + * be specified if uncomp_size_is_exact is set to false. + * uncomp_size can never be set to a value larger than the + * expected real uncompressed size because it would eventually + * result in XZ_DATA_ERROR. + * @uncomp_size_is_exact This is an int instead of bool to avoid + * requiring stdbool.h. This should normally be set to true. + * When this is set to false, error detection is weaker. + */ +extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, + uint32_t comp_size, uint32_t uncomp_size, + int uncomp_size_is_exact); + +/** + * xz_dec_microlzma_run() - Run the MicroLZMA decoder + * @s Decoder state initialized using xz_dec_microlzma_reset() + * @b: Input and output buffers + * + * This works similarly to xz_dec_run() with a few important differences. + * Only the differences are documented here. + * + * The only possible return values are XZ_OK, XZ_STREAM_END, and + * XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress + * is possible due to lack of input data or output space, this function will + * keep returning XZ_OK. Thus, the calling code must be written so that it + * will eventually provide input and output space matching (or exceeding) + * comp_size and uncomp_size arguments given to xz_dec_microlzma_reset(). + * If the caller cannot do this (for example, if the input file is truncated + * or otherwise corrupt), the caller must detect this error by itself to + * avoid an infinite loop. + * + * If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned. + * This can happen also when incorrect dictionary, uncompressed, or + * compressed sizes have been specified. + * + * With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over + * uncompressed data. This way the caller doesn't need to provide a temporary + * output buffer for the bytes that will be ignored. + * + * With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK + * is also possible and thus XZ_SINGLE is actually a limited multi-call mode. + * After XZ_OK the bytes decoded so far may be read from the output buffer. + * It is possible to continue decoding but the variables b->out and b->out_pos + * MUST NOT be changed by the caller. Increasing the value of b->out_size is + * allowed to make more output space available; one doesn't need to provide + * space for the whole uncompressed data on the first call. The input buffer + * may be changed normally like with XZ_PREALLOC. This way input data can be + * provided from non-contiguous memory. + */ +extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, + struct xz_buf *b); + +/** + * xz_dec_microlzma_end() - Free the memory allocated for the decoder state + * @s: Decoder state allocated using xz_dec_microlzma_alloc(). + * If s is NULL, this function does nothing. + */ +extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); + +/* * Standalone build (userspace build or in-kernel build for boot time use) * needs a CRC32 implementation. For normal in-kernel use, kernel's own * CRC32 module is used instead, and users of this module don't need to diff --git a/include/linux/zstd.h b/include/linux/zstd.h index e87f78c9b19c..113408eef6ec 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -1,138 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of https://github.com/facebook/zstd) and + * the GPLv2 (found in the COPYING file in the root directory of + * https://github.com/facebook/zstd). You may select, at your option, one of the + * above-listed licenses. */ -#ifndef ZSTD_H -#define ZSTD_H +#ifndef LINUX_ZSTD_H +#define LINUX_ZSTD_H -/* ====== Dependency ======*/ -#include <linux/types.h> /* size_t */ +/** + * This is a kernel-style API that wraps the upstream zstd API, which cannot be + * used directly because the symbols aren't exported. It exposes the minimal + * functionality which is currently required by users of zstd in the kernel. + * Expose extra functions from lib/zstd/zstd.h as needed. + */ +/* ====== Dependency ====== */ +#include <linux/types.h> +#include <linux/zstd_errors.h> +#include <linux/zstd_lib.h> -/*-***************************************************************************** - * Introduction +/* ====== Helper Functions ====== */ +/** + * zstd_compress_bound() - maximum compressed size in worst case scenario + * @src_size: The size of the data to compress. * - * zstd, short for Zstandard, is a fast lossless compression algorithm, - * targeting real-time compression scenarios at zlib-level and better - * compression ratios. The zstd compression library provides in-memory - * compression and decompression functions. The library supports compression - * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled - * ultra, should be used with caution, as they require more memory. - * Compression can be done in: - * - a single step, reusing a context (described as Explicit memory management) - * - unbounded multiple steps (described as Streaming compression) - * The compression ratio achievable on small data can be highly improved using - * compression with a dictionary in: - * - a single step (described as Simple dictionary API) - * - a single step, reusing a dictionary (described as Fast dictionary API) - ******************************************************************************/ - -/*====== Helper functions ======*/ + * Return: The maximum compressed size in the worst case scenario. + */ +size_t zstd_compress_bound(size_t src_size); /** - * enum ZSTD_ErrorCode - zstd error codes + * zstd_is_error() - tells if a size_t function result is an error code + * @code: The function result to check for error. * - * Functions that return size_t can be checked for errors using ZSTD_isError() - * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + * Return: Non-zero iff the code is an error. + */ +unsigned int zstd_is_error(size_t code); + +/** + * enum zstd_error_code - zstd error codes */ -typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_frameParameter_windowTooLarge, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_dictionaryCreation_failed, - ZSTD_error_maxCode -} ZSTD_ErrorCode; +typedef ZSTD_ErrorCode zstd_error_code; /** - * ZSTD_maxCLevel() - maximum compression level available + * zstd_get_error_code() - translates an error function result to an error code + * @code: The function result for which zstd_is_error(code) is true. * - * Return: Maximum compression level available. + * Return: A unique error code for this error. */ -int ZSTD_maxCLevel(void); +zstd_error_code zstd_get_error_code(size_t code); + /** - * ZSTD_compressBound() - maximum compressed size in worst case scenario - * @srcSize: The size of the data to compress. + * zstd_get_error_name() - translates an error function result to a string + * @code: The function result for which zstd_is_error(code) is true. * - * Return: The maximum compressed size in the worst case scenario. + * Return: An error string corresponding to the error code. */ -size_t ZSTD_compressBound(size_t srcSize); +const char *zstd_get_error_name(size_t code); + /** - * ZSTD_isError() - tells if a size_t function result is an error code - * @code: The function result to check for error. + * zstd_min_clevel() - minimum allowed compression level * - * Return: Non-zero iff the code is an error. + * Return: The minimum allowed compression level. */ -static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) -{ - return code > (size_t)-ZSTD_error_maxCode; -} +int zstd_min_clevel(void); + /** - * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode - * @functionResult: The result of a function for which ZSTD_isError() is true. + * zstd_max_clevel() - maximum allowed compression level * - * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 - * if the functionResult isn't an error. + * Return: The maximum allowed compression level. */ -static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( - size_t functionResult) -{ - if (!ZSTD_isError(functionResult)) - return (ZSTD_ErrorCode)0; - return (ZSTD_ErrorCode)(0 - functionResult); -} +int zstd_max_clevel(void); + +/* ====== Parameter Selection ====== */ /** - * enum ZSTD_strategy - zstd compression search strategy + * enum zstd_strategy - zstd compression search strategy * - * From faster to stronger. + * From faster to stronger. See zstd_lib.h. */ -typedef enum { - ZSTD_fast, - ZSTD_dfast, - ZSTD_greedy, - ZSTD_lazy, - ZSTD_lazy2, - ZSTD_btlazy2, - ZSTD_btopt, - ZSTD_btopt2 -} ZSTD_strategy; +typedef ZSTD_strategy zstd_strategy; /** - * struct ZSTD_compressionParameters - zstd compression parameters + * struct zstd_compression_parameters - zstd compression parameters * @windowLog: Log of the largest match distance. Larger means more * compression, and more memory needed during decompression. - * @chainLog: Fully searched segment. Larger means more compression, slower, - * and more memory (useless for fast). + * @chainLog: Fully searched segment. Larger means more compression, + * slower, and more memory (useless for fast). * @hashLog: Dispatch table. Larger means more compression, * slower, and more memory. * @searchLog: Number of searches. Larger means more compression and slower. @@ -141,1017 +100,348 @@ typedef enum { * @targetLength: Acceptable match size for optimal parser (only). Larger means * more compression, and slower. * @strategy: The zstd compression strategy. + * + * See zstd_lib.h. */ -typedef struct { - unsigned int windowLog; - unsigned int chainLog; - unsigned int hashLog; - unsigned int searchLog; - unsigned int searchLength; - unsigned int targetLength; - ZSTD_strategy strategy; -} ZSTD_compressionParameters; +typedef ZSTD_compressionParameters zstd_compression_parameters; /** - * struct ZSTD_frameParameters - zstd frame parameters - * @contentSizeFlag: Controls whether content size will be present in the frame - * header (when known). - * @checksumFlag: Controls whether a 32-bit checksum is generated at the end - * of the frame for error detection. - * @noDictIDFlag: Controls whether dictID will be saved into the frame header - * when using dictionary compression. + * struct zstd_frame_parameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the + * frame header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the + * end of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame + * header when using dictionary compression. * - * The default value is all fields set to 0. + * The default value is all fields set to 0. See zstd_lib.h. */ -typedef struct { - unsigned int contentSizeFlag; - unsigned int checksumFlag; - unsigned int noDictIDFlag; -} ZSTD_frameParameters; +typedef ZSTD_frameParameters zstd_frame_parameters; /** - * struct ZSTD_parameters - zstd parameters + * struct zstd_parameters - zstd parameters * @cParams: The compression parameters. * @fParams: The frame parameters. */ -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; +typedef ZSTD_parameters zstd_parameters; /** - * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * zstd_get_params() - returns zstd_parameters for selected level + * @level: The compression level + * @estimated_src_size: The estimated source size to compress or 0 + * if unknown. * - * Return: The selected ZSTD_compressionParameters. + * Return: The selected zstd_parameters. */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size); -/** - * ZSTD_getParams() - returns ZSTD_parameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. - * - * The same as ZSTD_getCParams() except also selects the default frame - * parameters (all zero). - * - * Return: The selected ZSTD_parameters. - */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); +/* ====== Single-pass Compression ====== */ -/*-************************************* - * Explicit memory management - **************************************/ +typedef ZSTD_CCtx zstd_cctx; /** - * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx - * @cParams: The compression parameters to be used for compression. + * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx + * @parameters: The compression parameters to be used. * * If multiple compression parameters might be used, the caller must call - * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum + * zstd_cctx_workspace_bound() for each set of parameters and use the maximum * size. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCCtx(). + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cctx(). */ -size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); /** - * struct ZSTD_CCtx - the zstd compression context - * - * When compressing many times it is recommended to allocate a context just once - * and reuse it for each successive compression operation. - */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -/** - * ZSTD_initCCtx() - initialize a zstd compression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to - * determine how large the workspace must be. - * - * Return: A compression context emplaced into workspace. - */ -ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_compressCCtx() - compress src into dst - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, ZSTD_parameters params); - -/** - * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx - * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDCtx(). - */ -size_t ZSTD_DCtxWorkspaceBound(void); - -/** - * struct ZSTD_DCtx - the zstd decompression context - * - * When decompressing many times it is recommended to allocate a context just - * once and reuse it for each successive decompression operation. - */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -/** - * ZSTD_initDCtx() - initialize a zstd decompression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to - * determine how large the workspace must be. - * - * Return: A decompression context emplaced into workspace. - */ -ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_decompressDCtx() - decompress zstd compressed src into dst - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - -/*-************************ - * Simple dictionary API - **************************/ - -/** - * ZSTD_compress_usingDict() - compress src into dst using a dictionary - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @dict: The dictionary to use for compression. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Compression using a predefined dictionary. The same dictionary must be used - * during decompression. - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params); - -/** - * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @dict: The dictionary to use for decompression. The same dictionary - * must've been used to compress the data. - * @dictSize: The size of the dictionary. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize); - -/*-************************** - * Fast dictionary API - ***************************/ - -/** - * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict - * @cParams: The compression parameters to be used for compression. + * zstd_init_cctx() - initialize a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to + * determine how large the workspace must be. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCDict(). - */ -size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); - -/** - * struct ZSTD_CDict - a digested dictionary to be used for compression + * Return: A zstd compression context or NULL on error. */ -typedef struct ZSTD_CDict_s ZSTD_CDict; +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size); /** - * ZSTD_initCDict() - initialize a digested dictionary for compression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_CDict so it must outlive the returned ZSTD_CDict. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * @workspace: The workspace. It must outlive the returned ZSTD_CDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_CDictWorkspaceBound(params.cParams). + * zstd_compress_cctx() - compress src into dst with the initialized parameters + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @dst: The buffer to compress src into. + * @dst_capacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @src_size: The size of the data to compress. + * @parameters: The compression parameters to be used. * - * When compressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_CDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_CDict. - * - * Return: The digested dictionary emplaced into workspace. + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). */ -ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize, - ZSTD_parameters params, void *workspace, size_t workspaceSize); +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters); -/** - * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(cParams) where - * cParams are the compression parameters used to initialize the - * cdict. - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @cdict: The digested dictionary to use for compression. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Compression using a digested dictionary. The same dictionary must be used - * during decompression. - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const ZSTD_CDict *cdict); +/* ====== Single-pass Decompression ====== */ +typedef ZSTD_DCtx zstd_dctx; /** - * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict + * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDDict(). - */ -size_t ZSTD_DDictWorkspaceBound(void); - -/** - * struct ZSTD_DDict - a digested dictionary to be used for decompression + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_dctx(). */ -typedef struct ZSTD_DDict_s ZSTD_DDict; +size_t zstd_dctx_workspace_bound(void); /** - * ZSTD_initDDict() - initialize a digested dictionary for decompression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_DDict so it must outlive the returned ZSTD_DDict. - * @dictSize: The size of the dictionary. - * @workspace: The workspace. It must outlive the returned ZSTD_DDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_DDictWorkspaceBound(). - * - * When decompressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_DDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_DDict. + * zstd_init_dctx() - initialize a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to + * determine how large the workspace must be. * - * Return: The digested dictionary emplaced into workspace. + * Return: A zstd decompression context or NULL on error. */ -ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize, - void *workspace, size_t workspaceSize); +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size); /** - * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @ddict: The digested dictionary to use for decompression. The same - * dictionary must've been used to compress the data. + * zstd_decompress_dctx() - decompress zstd compressed src into dst + * @dctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dst_capacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @src_size: The exact size of the data to decompress. * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). + * Return: The decompressed size or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, - size_t dstCapacity, const void *src, size_t srcSize, - const ZSTD_DDict *ddict); +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size); - -/*-************************** - * Streaming - ***************************/ +/* ====== Streaming Buffers ====== */ /** - * struct ZSTD_inBuffer - input buffer for streaming + * struct zstd_in_buffer - input buffer for streaming * @src: Start of the input buffer. * @size: Size of the input buffer. * @pos: Position where reading stopped. Will be updated. * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. */ -typedef struct ZSTD_inBuffer_s { - const void *src; - size_t size; - size_t pos; -} ZSTD_inBuffer; +typedef ZSTD_inBuffer zstd_in_buffer; /** - * struct ZSTD_outBuffer - output buffer for streaming + * struct zstd_out_buffer - output buffer for streaming * @dst: Start of the output buffer. * @size: Size of the output buffer. * @pos: Position where writing stopped. Will be updated. * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. */ -typedef struct ZSTD_outBuffer_s { - void *dst; - size_t size; - size_t pos; -} ZSTD_outBuffer; +typedef ZSTD_outBuffer zstd_out_buffer; +/* ====== Streaming Compression ====== */ - -/*-***************************************************************************** - * Streaming compression - HowTo - * - * A ZSTD_CStream object is required to track streaming operation. - * Use ZSTD_initCStream() to initialize a ZSTD_CStream object. - * ZSTD_CStream objects can be reused multiple times on consecutive compression - * operations. It is recommended to re-use ZSTD_CStream in situations where many - * streaming operations will be achieved consecutively. Use one separate - * ZSTD_CStream per thread for parallel execution. - * - * Use ZSTD_compressStream() repetitively to consume input stream. - * The function will automatically update both `pos` fields. - * Note that it may not consume the entire input, in which case `pos < size`, - * and it's up to the caller to present again remaining data. - * It returns a hint for the preferred number of bytes to use as an input for - * the next function call. - * - * At any moment, it's possible to flush whatever data remains within internal - * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might - * still be some content left within the internal buffer if `output->size` is - * too small. It returns the number of bytes left in the internal buffer and - * must be called until it returns 0. - * - * ZSTD_endStream() instructs to finish a frame. It will perform a flush and - * write frame epilogue. The epilogue is required for decoders to consider a - * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush - * the full content if `output->size` is too small. In which case, call again - * ZSTD_endStream() to complete the flush. It returns the number of bytes left - * in the internal buffer and must be called until it returns 0. - ******************************************************************************/ +typedef ZSTD_CStream zstd_cstream; /** - * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream - * @cParams: The compression parameters to be used for compression. + * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream + * @cparams: The compression parameters to be used for compression. * * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCStream() and ZSTD_initCStream_usingCDict(). - */ -size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); - -/** - * struct ZSTD_CStream - the zstd streaming compression context - */ -typedef struct ZSTD_CStream_s ZSTD_CStream; - -/*===== ZSTD_CStream management functions =====*/ -/** - * ZSTD_initCStream() - initialize a zstd streaming compression context - * @params: The zstd compression parameters. - * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must - * pass the source size (zero means empty source). Otherwise, - * the caller may optionally pass the source size, or zero if - * unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine - * how large the workspace must be. - * - * Return: The zstd streaming compression context. + * zstd_init_cstream(). */ -ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); /** - * ZSTD_initCStream_usingCDict() - initialize a streaming compression context - * @cdict: The digested dictionary to use for compression. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() - * with the cParams used to initialize the cdict to determine - * how large the workspace must be. + * zstd_init_cstream() - initialize a zstd streaming compression context + * @parameters The zstd parameters to use for compression. + * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller + * must pass the source size (zero means empty source). + * Otherwise, the caller may optionally pass the source + * size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. + * Use zstd_cstream_workspace_bound(params->cparams) to + * determine how large the workspace must be. * - * Return: The zstd streaming compression context. + * Return: The zstd streaming compression context or NULL on error. */ -ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size); -/*===== Streaming compression functions =====*/ /** - * ZSTD_resetCStream() - reset the context using parameters from creation - * @zcs: The zstd streaming compression context to reset. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * zstd_reset_cstream() - reset the context using parameters from creation + * @cstream: The zstd streaming compression context to reset. + * @pledged_src_size: Optionally the source size, or zero if unknown. * * Resets the context using the parameters from creation. Skips dictionary - * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame + * loading, since it can be reused. If `pledged_src_size` is non-zero the frame * content size is always written into the frame header. * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: Zero or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size); + /** - * ZSTD_compressStream() - streaming compress some of input into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * @input: Source buffer. `input->pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input->pos < input->size`, and it's up to the caller to present - * remaining data again. + * zstd_compress_stream() - streaming compress some of input into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data + * was read. Note that it may not consume the entire input, in which + * case `input->pos < input->size`, and it's up to the caller to + * present remaining data again. * * The `input` and `output` buffers may be any size. Guaranteed to make some * forward progress if `input` and `output` are not empty. * - * Return: A hint for the number of bytes to use as the input for the next - * function call or an error, which can be checked using - * ZSTD_isError(). + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input); + /** - * ZSTD_flushStream() - flush internal buffers into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. + * zstd_flush_stream() - flush internal buffers into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. * - * ZSTD_flushStream() must be called until it returns 0, meaning all the data - * has been flushed. Since ZSTD_flushStream() causes a block to be ended, + * zstd_flush_stream() must be called until it returns 0, meaning all the data + * has been flushed. Since zstd_flush_stream() causes a block to be ended, * calling it too often will degrade the compression ratio. * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). */ -size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -/** - * ZSTD_endStream() - flush internal buffers into output and end the frame - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * - * ZSTD_endStream() must be called until it returns 0, meaning all the data has - * been flushed and the frame epilogue has been written. - * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). - */ -size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output); /** - * ZSTD_CStreamInSize() - recommended size for the input buffer - * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_CStreamInSize(void); -/** - * ZSTD_CStreamOutSize() - recommended size for the output buffer + * zstd_end_stream() - flush internal buffers into output and end the frame + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete compressed block. + * zstd_end_stream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. * - * Return: The recommended size for the output buffer. + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). */ -size_t ZSTD_CStreamOutSize(void); +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output); +/* ====== Streaming Decompression ====== */ - -/*-***************************************************************************** - * Streaming decompression - HowTo - * - * A ZSTD_DStream object is required to track streaming operations. - * Use ZSTD_initDStream() to initialize a ZSTD_DStream object. - * ZSTD_DStream objects can be re-used multiple times. - * - * Use ZSTD_decompressStream() repetitively to consume your input. - * The function will update both `pos` fields. - * If `input->pos < input->size`, some input has not been consumed. - * It's up to the caller to present again remaining data. - * If `output->pos < output->size`, decoder has flushed everything it could. - * Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise it returns a suggested next input size that will never load more - * than the current frame. - ******************************************************************************/ +typedef ZSTD_DStream zstd_dstream; /** - * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream - * @maxWindowSize: The maximum window size allowed for compressed frames. + * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream + * @max_window_size: The maximum window size allowed for compressed frames. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDStream() and ZSTD_initDStream_usingDDict(). + * Return: A lower bound on the size of the workspace that is passed + * to zstd_init_dstream(). */ -size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); +size_t zstd_dstream_workspace_bound(size_t max_window_size); /** - * struct ZSTD_DStream - the zstd streaming decompression context - */ -typedef struct ZSTD_DStream_s ZSTD_DStream; -/*===== ZSTD_DStream management functions =====*/ -/** - * ZSTD_initDStream() - initialize a zstd streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. - * - * Return: The zstd streaming decompression context. - */ -ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, - size_t workspaceSize); -/** - * ZSTD_initDStream_usingDDict() - initialize streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @ddict: The digested dictionary to use for decompression. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. + * zstd_init_dstream() - initialize a zstd streaming decompression context + * @max_window_size: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use zstd_dstream_workspace_bound(max_window_size) to + * determine how large the workspace must be. * - * Return: The zstd streaming decompression context. + * Return: The zstd streaming decompression context. */ -ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, - const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size); -/*===== Streaming decompression functions =====*/ /** - * ZSTD_resetDStream() - reset the context using parameters from creation - * @zds: The zstd streaming decompression context to reset. + * zstd_reset_dstream() - reset the context using parameters from creation + * @dstream: The zstd streaming decompression context to reset. * * Resets the context using the parameters from creation. Skips dictionary * loading, since it can be reused. * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: Zero or an error, which can be checked using zstd_is_error(). */ -size_t ZSTD_resetDStream(ZSTD_DStream *zds); +size_t zstd_reset_dstream(zstd_dstream *dstream); + /** - * ZSTD_decompressStream() - streaming decompress some of input into output - * @zds: The zstd streaming decompression context. - * @output: Destination buffer. `output.pos` is updated to indicate how much - * decompressed data was written. - * @input: Source buffer. `input.pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input.pos < input.size`, and it's up to the caller to present - * remaining data again. + * zstd_decompress_stream() - streaming decompress some of input into output + * @dstream: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. * * The `input` and `output` buffers may be any size. Guaranteed to make some * forward progress if `input` and `output` are not empty. - * ZSTD_decompressStream() will not consume the last byte of the frame until + * zstd_decompress_stream() will not consume the last byte of the frame until * the entire frame is flushed. * - * Return: Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise returns a hint for the number of bytes to use as the input - * for the next function call or an error, which can be checked using - * ZSTD_isError(). The size hint will never load more than the frame. + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the + * input for the next function call or an error, which can be checked + * using zstd_is_error(). The size hint will never load more than the + * frame. */ -size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input); -/** - * ZSTD_DStreamInSize() - recommended size for the input buffer - * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_DStreamInSize(void); -/** - * ZSTD_DStreamOutSize() - recommended size for the output buffer - * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete decompressed block. - * - * Return: The recommended size for the output buffer. - */ -size_t ZSTD_DStreamOutSize(void); - - -/* --- Constants ---*/ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U - -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) - -#define ZSTD_WINDOWLOG_MAX_32 27 -#define ZSTD_WINDOWLOG_MAX_64 27 -#define ZSTD_WINDOWLOG_MAX \ - ((unsigned int)(sizeof(size_t) == 4 \ - ? ZSTD_WINDOWLOG_MAX_32 \ - : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_HASHLOG3_MAX 17 -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -/* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_SEARCHLENGTH_MAX 7 -/* only for ZSTD_btopt, other strategies are limited to 4 */ -#define ZSTD_SEARCHLENGTH_MIN 3 -#define ZSTD_TARGETLENGTH_MIN 4 -#define ZSTD_TARGETLENGTH_MAX 999 - -/* for static allocation */ -#define ZSTD_FRAMEHEADERSIZE_MAX 18 -#define ZSTD_FRAMEHEADERSIZE_MIN 6 -#define ZSTD_frameHeaderSize_prefix 5 -#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN -#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX -/* magic number + skippable frame length */ -#define ZSTD_skippableHeaderSize 8 - - -/*-************************************* - * Compressed size functions - **************************************/ - -/** - * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame - * @src: Source buffer. It should point to the start of a zstd encoded frame - * or a skippable frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * size of the frame. - * - * Return: The compressed size of the frame pointed to by `src` or an error, - * which can be check with ZSTD_isError(). - * Suitable to pass to ZSTD_decompress() or similar functions. - */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Decompressed size functions - **************************************/ -/** - * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header - * @src: It should point to the start of a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. - * - * Return: The frame content size stored in the frame header if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the - * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. - */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +/* ====== Frame Inspection Functions ====== */ /** - * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames - * @src: It should point to the start of a series of zstd encoded and/or - * skippable frames. - * @srcSize: The exact size of the series of frames. + * zstd_find_frame_compressed_size() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded + * frame or a skippable frame. + * @src_size: The size of the source buffer. It must be at least as large as the + * size of the frame. * - * If any zstd encoded frame in the series doesn't have the frame content size - * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always - * set when using ZSTD_compress(). The decompressed size can be very large. - * If the source is untrusted, the decompressed size could be wrong or - * intentionally modified. Always ensure the result fits within the - * application's authorized limits. ZSTD_findDecompressedSize() handles multiple - * frames, and so it must traverse the input to read each frame header. This is - * efficient as most of the data is skipped, however it does mean that all frame - * data must be present and valid. - * - * Return: Decompressed size of all the data contained in the frames if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. - * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. - */ -unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Advanced compression functions - **************************************/ -/** - * ZSTD_checkCParams() - ensure parameter values remain within authorized range - * @cParams: The zstd compression parameters. - * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with zstd_is_error(). + * Suitable to pass to ZSTD_decompress() or similar functions. */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); /** - * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize - * @srcSize: Optionally the estimated source size, or zero if unknown. - * @dictSize: Optionally the estimated dictionary size, or zero if unknown. - * - * Return: The optimized parameters. - */ -ZSTD_compressionParameters ZSTD_adjustCParams( - ZSTD_compressionParameters cParams, unsigned long long srcSize, - size_t dictSize); - -/*--- Advanced decompression functions ---*/ - -/** - * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame - * @buffer: The source buffer to check. - * @size: The size of the source buffer, must be at least 4 bytes. - * - * Return: True iff the buffer starts with a zstd or skippable frame identifier. - */ -unsigned int ZSTD_isFrame(const void *buffer, size_t size); - -/** - * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary - * @dict: The dictionary buffer. - * @dictSize: The size of the dictionary buffer. - * - * Return: The dictionary id stored within the dictionary or 0 if the - * dictionary is not a zstd dictionary. If it returns 0 the - * dictionary can still be loaded as a content-only dictionary. - */ -unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); - -/** - * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict - * @ddict: The ddict to find the id of. - * - * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not - * a zstd dictionary. If it returns 0 `ddict` will be loaded as a - * content-only dictionary. - */ -unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); - -/** - * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame - * @src: Source buffer. It must be a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. - * - * Return: The dictionary id required to decompress the frame stored within - * `src` or 0 if the dictionary id could not be decoded. It can return - * 0 if the frame does not require a dictionary, the dictionary id - * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` - * is too small. - */ -unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); - -/** - * struct ZSTD_frameParams - zstd frame parameters stored in the frame header - * @frameContentSize: The frame content size, or 0 if not present. + * struct zstd_frame_params - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not + * present. * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @blockSizeMax: The maximum block size. + * @frameType: The frame type (zstd or skippable) + * @headerSize: The size of the frame header. * @dictID: The dictionary id, or 0 if not present. * @checksumFlag: Whether a checksum was used. + * + * See zstd_lib.h. */ -typedef struct { - unsigned long long frameContentSize; - unsigned int windowSize; - unsigned int dictID; - unsigned int checksumFlag; -} ZSTD_frameParams; +typedef ZSTD_frameHeader zstd_frame_header; /** - * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame - * @fparamsPtr: On success the frame parameters are written here. - * @src: The source buffer. It must point to a zstd or skippable frame. - * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is - * always large enough to succeed. + * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame + * @params: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @src_size: The size of the source buffer. * - * Return: 0 on success. If more data is required it returns how many bytes - * must be provided to make forward progress. Otherwise it returns - * an error, which can be checked using ZSTD_isError(). + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using zstd_is_error(). */ -size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, - size_t srcSize); - -/*-***************************************************************************** - * Buffer-less and synchronous inner streaming functions - * - * This is an advanced API, giving full control over buffer management, for - * users which need direct control over memory. - * But it's also a complex one, with many restrictions (documented below). - * Prefer using normal streaming API for an easier experience - ******************************************************************************/ - -/*-***************************************************************************** - * Buffer-less streaming compression (synchronous mode) - * - * A ZSTD_CCtx object is required to track streaming operations. - * Use ZSTD_initCCtx() to initialize a context. - * ZSTD_CCtx object can be re-used multiple times within successive compression - * operations. - * - * Start by initializing a context. - * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary - * compression, - * or ZSTD_compressBegin_advanced(), for finer parameter control. - * It's also possible to duplicate a reference context which has already been - * initialized, using ZSTD_copyCCtx() - * - * Then, consume your input using ZSTD_compressContinue(). - * There are some important considerations to keep in mind when using this - * advanced function : - * - ZSTD_compressContinue() has no internal buffer. It uses externally provided - * buffer only. - * - Interface is synchronous : input is consumed entirely and produce 1+ - * (or more) compressed blocks. - * - Caller must ensure there is enough space in `dst` to store compressed data - * under worst case scenario. Worst case evaluation is provided by - * ZSTD_compressBound(). - * ZSTD_compressContinue() doesn't guarantee recover after a failed - * compression. - * - ZSTD_compressContinue() presumes prior input ***is still accessible and - * unmodified*** (up to maximum distance size, see WindowLog). - * It remembers all previous contiguous blocks, plus one separated memory - * segment (which can itself consists of multiple contiguous blocks) - * - ZSTD_compressContinue() detects that prior input has been overwritten when - * `src` buffer overlaps. In which case, it will "discard" the relevant memory - * section from its history. - * - * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) - * and optional checksum. It's possible to use srcSize==0, in which case, it - * will write a final empty block to end the frame. Without last block mark, - * frames will be considered unfinished (corrupted) by decoders. - * - * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new - * frame. - ******************************************************************************/ - -/*===== Buffer-less streaming compression functions =====*/ -size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, int compressionLevel); -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, ZSTD_parameters params, - unsigned long long pledgedSrcSize); -size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - - - -/*-***************************************************************************** - * Buffer-less streaming decompression (synchronous mode) - * - * A ZSTD_DCtx object is required to track streaming operations. - * Use ZSTD_initDCtx() to initialize a context. - * A ZSTD_DCtx object can be re-used multiple times. - * - * First typical operation is to retrieve frame parameters, using - * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide - * important information to correctly decode the frame, such as the minimum - * rolling buffer size to allocate to decompress data (`windowSize`), and the - * dictionary ID used. - * Note: content size is optional, it may not be present. 0 means unknown. - * Note that these values could be wrong, either because of data malformation, - * or because an attacker is spoofing deliberate false information. As a - * consequence, check that values remain within valid application range, - * especially `windowSize`, before allocation. Each application can set its own - * limit, depending on local restrictions. For extended interoperability, it is - * recommended to support at least 8 MB. - * Frame parameters are extracted from the beginning of the compressed frame. - * Data fragment must be large enough to ensure successful decoding, typically - * `ZSTD_frameHeaderSize_max` bytes. - * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. - * >0: `srcSize` is too small, provide at least this many bytes. - * errorCode, which can be tested using ZSTD_isError(). - * - * Start decompression, with ZSTD_decompressBegin() or - * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared - * context, using ZSTD_copyDCtx(). - * - * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() - * alternatively. - * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' - * to ZSTD_decompressContinue(). - * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will - * fail. - * - * The result of ZSTD_decompressContinue() is the number of bytes regenerated - * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an - * error; it just means ZSTD_decompressContinue() has decoded some metadata - * item. It can also be an error code, which can be tested with ZSTD_isError(). - * - * ZSTD_decompressContinue() needs previous data blocks during decompression, up - * to `windowSize`. They should preferably be located contiguously, prior to - * current block. Alternatively, a round buffer of sufficient size is also - * possible. Sufficient size is determined by frame parameters. - * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't - * follow each other, make sure that either the compressor breaks contiguity at - * the same place, or that previous contiguous segment is large enough to - * properly handle maximum back-reference. - * - * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - * Context can then be reset to start a new decompression. - * - * Note: it's possible to know if next input to present is a header or a block, - * using ZSTD_nextInputType(). This information is not required to properly - * decode a frame. - * - * == Special case: skippable frames == - * - * Skippable frames allow integration of user-defined data into a flow of - * concatenated frames. Skippable frames will be ignored (skipped) by a - * decompressor. The format of skippable frames is as follows: - * a) Skippable frame ID - 4 Bytes, Little endian format, any value from - * 0x184D2A50 to 0x184D2A5F - * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - * c) Frame Content - any content (User Data) of length equal to Frame Size - * For skippable frames ZSTD_decompressContinue() always returns 0. - * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 - * what means that a frame is skippable. - * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might - * actually be a zstd encoded frame with no content. For purposes of - * decompression, it is valid in both cases to skip the frame using - * ZSTD_findFrameCompressedSize() to find its size in bytes. - * It also returns frame size as fparamsPtr->frameContentSize. - ******************************************************************************/ - -/*===== Buffer-less streaming decompression functions =====*/ -size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, - size_t dictSize); -void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); -size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -typedef enum { - ZSTDnit_frameHeader, - ZSTDnit_blockHeader, - ZSTDnit_block, - ZSTDnit_lastBlock, - ZSTDnit_checksum, - ZSTDnit_skippableFrame -} ZSTD_nextInputType_e; -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); - -/*-***************************************************************************** - * Block functions - * - * Block functions produce and decode raw zstd blocks, without frame metadata. - * Frame metadata cost is typically ~18 bytes, which can be non-negligible for - * very small blocks (< 100 bytes). User will have to take in charge required - * information to regenerate data, such as compressed and content sizes. - * - * A few rules to respect: - * - Compressing and decompressing require a context structure - * + Use ZSTD_initCCtx() and ZSTD_initDCtx() - * - It is necessary to init context before starting - * + compression : ZSTD_compressBegin() - * + decompression : ZSTD_decompressBegin() - * + variants _usingDict() are also allowed - * + copyCCtx() and copyDCtx() work too - * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - * + If you need to compress more, cut data into multiple blocks - * + Consider using the regular ZSTD_compress() instead, as frame metadata - * costs become negligible when source size is large. - * - When a block is considered not compressible enough, ZSTD_compressBlock() - * result will be zero. In which case, nothing is produced into `dst`. - * + User must test for such outcome and deal directly with uncompressed data - * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! - * + In case of multiple successive blocks, decoder must be informed of - * uncompressed block existence to follow proper history. Use - * ZSTD_insertBlock() in such a case. - ******************************************************************************/ - -/* Define for static allocation */ -#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) -/*===== Raw zstd block functions =====*/ -size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); -size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, - size_t blockSize); +size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, + size_t src_size); -#endif /* ZSTD_H */ +#endif /* LINUX_ZSTD_H */ diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h new file mode 100644 index 000000000000..58b6dd45a969 --- /dev/null +++ b/include/linux/zstd_errors.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + + +/*===== dependency =====*/ +#include <linux/types.h> /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#define ZSTDERRORLIB_VISIBILITY +#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h new file mode 100644 index 000000000000..b8c7dbf98390 --- /dev/null +++ b/include/linux/zstd_lib.h @@ -0,0 +1,2432 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include <linux/limits.h> /* INT_MAX */ +#include <linux/types.h> /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#define ZSTDLIB_VISIBILITY +#define ZSTDLIB_API ZSTDLIB_VISIBILITY + + +/* ***************************************************************************** + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 10 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) + + + +/* ************************************* +* Simple API +***************************************/ +/*! ZSTD_compress() : + * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/* ************************************* +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */ + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/* ************************************* +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/* ************************************* +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/* ************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /*< start of input buffer */ + size_t size; /*< size of input buffer */ + size_t pos; /*< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /*< start of output buffer */ + size_t size; /*< size of output buffer */ + size_t pos; /*< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /*< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */ + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /*< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /*< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */ + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/* ************************ +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/* ********************************* + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/* ****************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/* ***************************************************************************** + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/* ************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size, + * to preserve host's memory from unreasonable requirements. + * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,). + * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */ + + +/* LDM parameter bounds */ +#define ZSTD_LDM_HASHLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_LDM_HASHLOG_MAX ZSTD_HASHLOG_MAX +#define ZSTD_LDM_MINMATCH_MIN 4 +#define ZSTD_LDM_MINMATCH_MAX 4096 +#define ZSTD_LDM_BUCKETSIZELOG_MIN 1 +#define ZSTD_LDM_BUCKETSIZELOG_MAX 8 +#define ZSTD_LDM_HASHRATELOG_MIN 0 +#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) + +/* Advanced parameter bounds */ +#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_SRCSIZEHINT_MIN 0 +#define ZSTD_SRCSIZEHINT_MAX INT_MAX + +/* internal */ +#define ZSTD_HASHLOG3_MAX 17 + + +/* --- Advanced types --- */ + +typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; + +typedef struct { + unsigned int offset; /* The offset of the match. (NOT the same as the offset code) + * If offset == 0 and matchLength == 0, this sequence represents the last + * literals in the block of litLength size. + */ + + unsigned int litLength; /* Literal length of the sequence. */ + unsigned int matchLength; /* Match length of the sequence. */ + + /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0. + * In this case, we will treat the sequence as a marker for a block boundary. + */ + + unsigned int rep; /* Represents which repeat offset is represented by the field 'offset'. + * Ranges from [0, 3]. + * + * Repeat offsets are essentially previous offsets from previous sequences sorted in + * recency order. For more detail, see doc/zstd_compression_format.md + * + * If rep == 0, then 'offset' does not contain a repeat offset. + * If rep > 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /*< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /*< dispatch table : larger == faster, more memory */ + unsigned searchLog; /*< nb of searches : larger == more compression, slower */ + unsigned minMatch; /*< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /*< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /*< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /*< 1: content size will be in frame header (when known) */ + int checksumFlag; /*< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /*< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /*< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /*< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /*< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /*< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /*< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/* ************************************* +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/* ************************************* +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +__attribute__((__unused__)) +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/* ************************************* +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/* ************************************* +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/* ****************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/* ******************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/* + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/* + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/* Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + |