From fd8be27e50e04f6e80af0f3e327cced525558256 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 25 Feb 2022 21:51:35 +0100 Subject: efifb: Remove redundant efifb_setup_from_dmi stub efifb is the only user of efifb_setup_from_dmi which is provided by sysfb which is selected by efifb. That makes the stub redundant. Signed-off-by: Michal Suchanek Reviewed-by: Javier Martinez Canillas Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/7416c439d68e9e96068ea5c77e05c99c7df41750.1645822213.git.msuchanek@suse.de --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index ccd4d3f91c98..0cbbc4103632 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1329,10 +1329,6 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( } #endif -#ifdef CONFIG_SYSFB extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); -#else -static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } -#endif #endif /* _LINUX_EFI_H */ -- cgit From dc55e35f9e810f23dd69cfdc91a3d636023f57a2 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 14 Feb 2022 19:18:14 +0100 Subject: ipc: Store mqueue sysctls in the ipc namespace Right now, the mqueue sysctls take ipc namespaces into account in a rather hacky way. This works in most cases, but does not respect the user namespace. Within the user namespace, the user cannot change the /proc/sys/fs/mqueue/* parametres. This poses a problem in the rootless containers. To solve this I changed the implementation of the mqueue sysctls just like some other sysctls. So far, the changes do not provide additional access to files. This will be done in a future patch. v3: * Don't implemenet set_permissions to keep the current behavior. v2: * Fixed compilation problem if CONFIG_POSIX_MQUEUE_SYSCTL is not specified. Reported-by: kernel test robot Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/b0ccbb2489119f1f20c737cf1930c3a9c4e4243a.1644862280.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- include/linux/ipc_namespace.h | 16 ++++-- ipc/mq_sysctl.c | 121 +++++++++++++++++++++++------------------- ipc/mqueue.c | 10 ++-- ipc/namespace.c | 6 +++ 4 files changed, 88 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index b75395ec8d52..fa787d97d60a 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -10,6 +10,7 @@ #include #include #include +#include struct user_namespace; @@ -63,6 +64,9 @@ struct ipc_namespace { unsigned int mq_msg_default; unsigned int mq_msgsize_default; + struct ctl_table_set mq_set; + struct ctl_table_header *mq_sysctls; + /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; struct ucounts *ucounts; @@ -169,14 +173,18 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) #ifdef CONFIG_POSIX_MQUEUE_SYSCTL -struct ctl_table_header; -extern struct ctl_table_header *mq_register_sysctl_table(void); +void retire_mq_sysctls(struct ipc_namespace *ns); +bool setup_mq_sysctls(struct ipc_namespace *ns); #else /* CONFIG_POSIX_MQUEUE_SYSCTL */ -static inline struct ctl_table_header *mq_register_sysctl_table(void) +static inline void retire_mq_sysctls(struct ipc_namespace *ns) { - return NULL; +} + +static inline bool setup_mq_sysctls(struct ipc_namespace *ns) +{ + return true; } #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 72a92a08c848..fbf6a8b93a26 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -9,39 +9,9 @@ #include #include -#ifdef CONFIG_PROC_SYSCTL -static void *get_mq(struct ctl_table *table) -{ - char *which = table->data; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; - return which; -} - -static int proc_mq_dointvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table mq_table; - memcpy(&mq_table, table, sizeof(mq_table)); - mq_table.data = get_mq(table); - - return proc_dointvec(&mq_table, write, buffer, lenp, ppos); -} - -static int proc_mq_dointvec_minmax(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table mq_table; - memcpy(&mq_table, table, sizeof(mq_table)); - mq_table.data = get_mq(table); - - return proc_dointvec_minmax(&mq_table, write, buffer, - lenp, ppos); -} -#else -#define proc_mq_dointvec NULL -#define proc_mq_dointvec_minmax NULL -#endif +#include +#include +#include static int msg_max_limit_min = MIN_MSGMAX; static int msg_max_limit_max = HARD_MSGMAX; @@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = { .data = &init_ipc_ns.mq_queues_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_mq_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "msg_max", .data = &init_ipc_ns.mq_msg_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_mq_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &msg_max_limit_min, .extra2 = &msg_max_limit_max, }, @@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = { .data = &init_ipc_ns.mq_msgsize_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_mq_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, }, @@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = { .data = &init_ipc_ns.mq_msg_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_mq_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &msg_max_limit_min, .extra2 = &msg_max_limit_max, }, @@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = { .data = &init_ipc_ns.mq_msgsize_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_mq_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, }, {} }; -static struct ctl_table mq_sysctl_dir[] = { - { - .procname = "mqueue", - .mode = 0555, - .child = mq_sysctls, - }, - {} -}; +static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +{ + return ¤t->nsproxy->ipc_ns->mq_set; +} -static struct ctl_table mq_sysctl_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = mq_sysctl_dir, - }, - {} +static int set_is_seen(struct ctl_table_set *set) +{ + return ¤t->nsproxy->ipc_ns->mq_set == set; +} + +static struct ctl_table_root set_root = { + .lookup = set_lookup, }; -struct ctl_table_header *mq_register_sysctl_table(void) +bool setup_mq_sysctls(struct ipc_namespace *ns) { - return register_sysctl_table(mq_sysctl_root); + struct ctl_table *tbl; + + setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen); + + tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL); + if (tbl) { + int i; + + for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) { + if (tbl[i].data == &init_ipc_ns.mq_queues_max) + tbl[i].data = &ns->mq_queues_max; + + else if (tbl[i].data == &init_ipc_ns.mq_msg_max) + tbl[i].data = &ns->mq_msg_max; + + else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max) + tbl[i].data = &ns->mq_msgsize_max; + + else if (tbl[i].data == &init_ipc_ns.mq_msg_default) + tbl[i].data = &ns->mq_msg_default; + + else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default) + tbl[i].data = &ns->mq_msgsize_default; + else + tbl[i].data = NULL; + } + + ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl); + } + if (!ns->mq_sysctls) { + kfree(tbl); + retire_sysctl_set(&ns->mq_set); + return false; + } + + return true; +} + +void retire_mq_sysctls(struct ipc_namespace *ns) +{ + struct ctl_table *tbl; + + tbl = ns->mq_sysctls->ctl_table_arg; + unregister_sysctl_table(ns->mq_sysctls); + retire_sysctl_set(&ns->mq_set); + kfree(tbl); } diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 5becca9be867..1b4a3be71636 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -163,8 +163,6 @@ static void remove_notification(struct mqueue_inode_info *info); static struct kmem_cache *mqueue_inode_cachep; -static struct ctl_table_header *mq_sysctl_table; - static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) { return container_of(inode, struct mqueue_inode_info, vfs_inode); @@ -1713,8 +1711,10 @@ static int __init init_mqueue_fs(void) if (mqueue_inode_cachep == NULL) return -ENOMEM; - /* ignore failures - they are not fatal */ - mq_sysctl_table = mq_register_sysctl_table(); + if (!setup_mq_sysctls(&init_ipc_ns)) { + pr_warn("sysctl registration failed\n"); + return -ENOMEM; + } error = register_filesystem(&mqueue_fs_type); if (error) @@ -1731,8 +1731,6 @@ static int __init init_mqueue_fs(void) out_filesystem: unregister_filesystem(&mqueue_fs_type); out_sysctl: - if (mq_sysctl_table) - unregister_sysctl_table(mq_sysctl_table); kmem_cache_destroy(mqueue_inode_cachep); return error; } diff --git a/ipc/namespace.c b/ipc/namespace.c index ae83f0f2651b..f760243ca685 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -59,6 +59,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (err) goto fail_put; + err = -ENOMEM; + if (!setup_mq_sysctls(ns)) + goto fail_put; + sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); @@ -125,6 +129,8 @@ static void free_ipc_ns(struct ipc_namespace *ns) msg_exit_ns(ns); shm_exit_ns(ns); + retire_mq_sysctls(ns); + dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); -- cgit From 1f5c135ee509e89e0cc274333a65f73c62cb16e5 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 14 Feb 2022 19:18:15 +0100 Subject: ipc: Store ipc sysctls in the ipc namespace The ipc sysctls are not available for modification inside the user namespace. Following the mqueue sysctls, we changed the implementation to be more userns friendly. So far, the changes do not provide additional access to files. This will be done in a future patch. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/be6f9d014276f4dddd0c3aa05a86052856c1c555.1644862280.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- include/linux/ipc_namespace.h | 21 +++++ ipc/ipc_sysctl.c | 189 +++++++++++++++++++++++++++--------------- ipc/namespace.c | 4 + 3 files changed, 147 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index fa787d97d60a..e3e8c8662b49 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -67,6 +67,9 @@ struct ipc_namespace { struct ctl_table_set mq_set; struct ctl_table_header *mq_sysctls; + struct ctl_table_set ipc_set; + struct ctl_table_header *ipc_sysctls; + /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; struct ucounts *ucounts; @@ -188,4 +191,22 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns) } #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ + +#ifdef CONFIG_SYSVIPC_SYSCTL + +bool setup_ipc_sysctls(struct ipc_namespace *ns); +void retire_ipc_sysctls(struct ipc_namespace *ns); + +#else /* CONFIG_SYSVIPC_SYSCTL */ + +static inline void retire_ipc_sysctls(struct ipc_namespace *ns) +{ +} + +static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) +{ + return true; +} + +#endif /* CONFIG_SYSVIPC_SYSCTL */ #endif diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index f101c171753f..15210ac47e9e 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -13,43 +13,22 @@ #include #include #include +#include #include "util.h" -static void *get_ipc(struct ctl_table *table) -{ - char *which = table->data; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; - return which; -} - -static int proc_ipc_dointvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table ipc_table; - - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - - return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); -} - -static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, +static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct ipc_namespace *ns = table->extra1; struct ctl_table ipc_table; + int err; memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); -} + ipc_table.extra1 = SYSCTL_ZERO; + ipc_table.extra2 = SYSCTL_ONE; -static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ipc_namespace *ns = current->nsproxy->ipc_ns; - int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + err = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (err < 0) return err; @@ -58,17 +37,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, return err; } -static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table ipc_table; - memcpy(&ipc_table, table, sizeof(ipc_table)); - ipc_table.data = get_ipc(table); - - return proc_doulongvec_minmax(&ipc_table, write, buffer, - lenp, ppos); -} - static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -87,11 +55,17 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct ipc_namespace *ns = table->extra1; + struct ctl_table ipc_table; int ret, semmni; - struct ipc_namespace *ns = current->nsproxy->ipc_ns; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + + ipc_table.extra1 = NULL; + ipc_table.extra2 = NULL; semmni = ns->sem_ctls[3]; - ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret) ret = sem_check_semmni(current->nsproxy->ipc_ns); @@ -108,12 +82,18 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; + struct ipc_namespace *ns = table->extra1; + struct ctl_table ipc_table; - if (write && !checkpoint_restore_ns_capable(user_ns)) + if (write && !checkpoint_restore_ns_capable(ns->user_ns)) return -EPERM; - return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + memcpy(&ipc_table, table, sizeof(ipc_table)); + + ipc_table.extra1 = SYSCTL_ZERO; + ipc_table.extra2 = SYSCTL_INT_MAX; + + return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); } #endif @@ -121,27 +101,27 @@ int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; -static struct ctl_table ipc_kern_table[] = { +static struct ctl_table ipc_sysctls[] = { { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, .maxlen = sizeof(init_ipc_ns.shm_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, .maxlen = sizeof(init_ipc_ns.shm_ctlall), .mode = 0644, - .proc_handler = proc_ipc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, .maxlen = sizeof(init_ipc_ns.shm_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, @@ -151,15 +131,13 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), .mode = 0644, .proc_handler = proc_ipc_dointvec_minmax_orphans, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, }, { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof(init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -168,7 +146,7 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof(init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &ipc_mni, }, @@ -186,7 +164,7 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -204,8 +182,6 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), .mode = 0666, .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, }, { .procname = "msg_next_id", @@ -213,8 +189,6 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), .mode = 0666, .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, }, { .procname = "shm_next_id", @@ -222,25 +196,106 @@ static struct ctl_table ipc_kern_table[] = { .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), .mode = 0666, .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, }, #endif {} }; -static struct ctl_table ipc_root_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = ipc_kern_table, - }, - {} +static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +{ + return ¤t->nsproxy->ipc_ns->ipc_set; +} + +static int set_is_seen(struct ctl_table_set *set) +{ + return ¤t->nsproxy->ipc_ns->ipc_set == set; +} + +static struct ctl_table_root set_root = { + .lookup = set_lookup, }; +bool setup_ipc_sysctls(struct ipc_namespace *ns) +{ + struct ctl_table *tbl; + + setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); + + tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); + if (tbl) { + int i; + + for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { + if (tbl[i].data == &init_ipc_ns.shm_ctlmax) { + tbl[i].data = &ns->shm_ctlmax; + + } else if (tbl[i].data == &init_ipc_ns.shm_ctlall) { + tbl[i].data = &ns->shm_ctlall; + + } else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) { + tbl[i].data = &ns->shm_ctlmni; + + } else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) { + tbl[i].data = &ns->shm_rmid_forced; + tbl[i].extra1 = ns; + + } else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) { + tbl[i].data = &ns->msg_ctlmax; + + } else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) { + tbl[i].data = &ns->msg_ctlmni; + + } else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) { + tbl[i].data = &ns->msg_ctlmnb; + + } else if (tbl[i].data == &init_ipc_ns.sem_ctls) { + tbl[i].data = &ns->sem_ctls; + tbl[i].extra1 = ns; +#ifdef CONFIG_CHECKPOINT_RESTORE + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; + tbl[i].extra1 = ns; + + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; + tbl[i].extra1 = ns; + + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; + tbl[i].extra1 = ns; +#endif + } else { + tbl[i].data = NULL; + } + } + + ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); + } + if (!ns->ipc_sysctls) { + kfree(tbl); + retire_sysctl_set(&ns->ipc_set); + return false; + } + + return true; +} + +void retire_ipc_sysctls(struct ipc_namespace *ns) +{ + struct ctl_table *tbl; + + tbl = ns->ipc_sysctls->ctl_table_arg; + unregister_sysctl_table(ns->ipc_sysctls); + retire_sysctl_set(&ns->ipc_set); + kfree(tbl); +} + static int __init ipc_sysctl_init(void) { - register_sysctl_table(ipc_root_table); + if (!setup_ipc_sysctls(&init_ipc_ns)) { + pr_warn("ipc sysctl registration failed\n"); + return -ENOMEM; + } return 0; } diff --git a/ipc/namespace.c b/ipc/namespace.c index f760243ca685..754f3237194a 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -63,6 +63,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (!setup_mq_sysctls(ns)) goto fail_put; + if (!setup_ipc_sysctls(ns)) + goto fail_put; + sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); @@ -130,6 +133,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) shm_exit_ns(ns); retire_mq_sysctls(ns); + retire_ipc_sysctls(ns); dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); -- cgit From 548e7432dc2da475a18077b612e8d55b8ff51891 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 24 Sep 2021 10:55:45 +0200 Subject: dma-buf: add dma_resv_replace_fences v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function allows to replace fences from the shared fence list when we can gurantee that the operation represented by the original fence has finished or no accesses to the resources protected by the dma_resv object any more when the new fence finishes. Then use this function in the amdkfd code when BOs are unmapped from the process. v2: add an example when this is usefull. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220321135856.1331-1-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 45 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 49 ++++-------------------- include/linux/dma-resv.h | 2 + 3 files changed, 54 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b51416405e86..509060861cf3 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -289,6 +289,51 @@ replace: } EXPORT_SYMBOL(dma_resv_add_shared_fence); +/** + * dma_resv_replace_fences - replace fences in the dma_resv obj + * @obj: the reservation object + * @context: the context of the fences to replace + * @replacement: the new fence to use instead + * + * Replace fences with a specified context with a new fence. Only valid if the + * operation represented by the original fence has no longer access to the + * resources represented by the dma_resv object when the new fence completes. + * + * And example for using this is replacing a preemption fence with a page table + * update fence which makes the resource inaccessible. + */ +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, + struct dma_fence *replacement) +{ + struct dma_resv_list *list; + struct dma_fence *old; + unsigned int i; + + dma_resv_assert_held(obj); + + write_seqcount_begin(&obj->seq); + + old = dma_resv_excl_fence(obj); + if (old->context == context) { + RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement)); + dma_fence_put(old); + } + + list = dma_resv_shared_list(obj); + for (i = 0; list && i < list->shared_count; ++i) { + old = rcu_dereference_protected(list->shared[i], + dma_resv_held(obj)); + if (old->context != context) + continue; + + rcu_assign_pointer(list->shared[i], dma_fence_get(replacement)); + dma_fence_put(old); + } + + write_seqcount_end(&obj->seq); +} +EXPORT_SYMBOL(dma_resv_replace_fences); + /** * dma_resv_add_excl_fence - Add an exclusive fence. * @obj: the reservation object diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f9bab963a948..b6f266f612ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct amdgpu_amdkfd_fence *ef) { - struct dma_resv *resv = bo->tbo.base.resv; - struct dma_resv_list *old, *new; - unsigned int i, j, k; + struct dma_fence *replacement; if (!ef) return -EINVAL; - old = dma_resv_shared_list(resv); - if (!old) - return 0; - - new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL); - if (!new) - return -ENOMEM; - - /* Go through all the shared fences in the resevation object and sort - * the interesting ones to the end of the list. + /* TODO: Instead of block before we should use the fence of the page + * table update and TLB flush here directly. */ - for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) { - struct dma_fence *f; - - f = rcu_dereference_protected(old->shared[i], - dma_resv_held(resv)); - - if (f->context == ef->base.context) - RCU_INIT_POINTER(new->shared[--j], f); - else - RCU_INIT_POINTER(new->shared[k++], f); - } - new->shared_max = old->shared_max; - new->shared_count = k; - - /* Install the new fence list, seqcount provides the barriers */ - write_seqcount_begin(&resv->seq); - RCU_INIT_POINTER(resv->fence, new); - write_seqcount_end(&resv->seq); - - /* Drop the references to the removed fences or move them to ef_list */ - for (i = j; i < old->shared_count; ++i) { - struct dma_fence *f; - - f = rcu_dereference_protected(new->shared[i], - dma_resv_held(resv)); - dma_fence_put(f); - } - kfree_rcu(old, rcu); - + replacement = dma_fence_get_stub(); + dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, + replacement); + dma_fence_put(replacement); return 0; } diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index afdfdfac729f..3f53177bdb46 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -468,6 +468,8 @@ void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, + struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); int dma_resv_get_fences(struct dma_resv *obj, bool write, unsigned int *num_fences, struct dma_fence ***fences); -- cgit From 8938d48451f5d7cb565dfa68aa0bd0e81985da09 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 24 Sep 2021 14:19:22 +0200 Subject: dma-buf: finally make the dma_resv_list private v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers should never touch this directly. v2: drop kerneldoc for now internal handling Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220321135856.1331-2-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 11 +++++++++++ include/linux/dma-resv.h | 26 +------------------------- 2 files changed, 12 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 509060861cf3..5001e9b4420a 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -56,6 +56,12 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); +struct dma_resv_list { + struct rcu_head rcu; + u32 shared_count, shared_max; + struct dma_fence __rcu *shared[]; +}; + /** * dma_resv_list_alloc - allocate fence list * @shared_max: number of fences we need space for @@ -133,6 +139,11 @@ void dma_resv_fini(struct dma_resv *obj) } EXPORT_SYMBOL(dma_resv_fini); +static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) +{ + return rcu_dereference_check(obj->fence, dma_resv_held(obj)); +} + /** * dma_resv_reserve_shared - Reserve space to add shared fences to * a dma_resv. diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 3f53177bdb46..202cc65d0621 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -47,18 +47,7 @@ extern struct ww_class reservation_ww_class; -/** - * struct dma_resv_list - a list of shared fences - * @rcu: for internal use - * @shared_count: table of shared fences - * @shared_max: for growing shared fence table - * @shared: shared fence table - */ -struct dma_resv_list { - struct rcu_head rcu; - u32 shared_count, shared_max; - struct dma_fence __rcu *shared[]; -}; +struct dma_resv_list; /** * struct dma_resv - a reservation object manages fences for a buffer @@ -451,19 +440,6 @@ dma_resv_excl_fence(struct dma_resv *obj) return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); } -/** - * dma_resv_shared_list - get the reservation object's shared fence list - * @obj: the reservation object - * - * Returns the shared fence list. Caller must either hold the objects - * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(), - * or one of the variants of each - */ -static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) -{ - return rcu_dereference_check(obj->fence, dma_resv_held(obj)); -} - void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); -- cgit From 15325e3c1013035c2e3e266ba79a0c3bef905f25 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 11 Nov 2021 15:18:34 +0100 Subject: dma-buf: drop the DAG approach for the dma_resv object v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far we had the approach of using a directed acyclic graph with the dma_resv obj. This turned out to have many downsides, especially it means that every single driver and user of this interface needs to be aware of this restriction when adding fences. If the rules for the DAG are not followed then we end up with potential hard to debug memory corruption, information leaks or even elephant big security holes because we allow userspace to access freed up memory. Since we already took a step back from that by always looking at all fences we now go a step further and stop dropping the shared fences when a new exclusive one is added. v2: Drop some now superflous documentation v3: Add some more documentation for the new handling. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220321135856.1331-11-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 16 +--------------- include/linux/dma-buf.h | 4 +--- include/linux/dma-resv.h | 22 +++++----------------- 3 files changed, 7 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5001e9b4420a..be65522f0f47 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -351,35 +351,21 @@ EXPORT_SYMBOL(dma_resv_replace_fences); * @fence: the exclusive fence to add * * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock(). - * Note that this function replaces all fences attached to @obj, see also - * &dma_resv.fence_excl for a discussion of the semantics. + * See also &dma_resv.fence_excl for a discussion of the semantics. */ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) { struct dma_fence *old_fence = dma_resv_excl_fence(obj); - struct dma_resv_list *old; - u32 i = 0; dma_resv_assert_held(obj); - old = dma_resv_shared_list(obj); - if (old) - i = old->shared_count; - dma_fence_get(fence); write_seqcount_begin(&obj->seq); /* write_seqcount_begin provides the necessary memory barrier */ RCU_INIT_POINTER(obj->fence_excl, fence); - if (old) - old->shared_count = 0; write_seqcount_end(&obj->seq); - /* inplace update, no shared fences */ - while (i--) - dma_fence_put(rcu_dereference_protected(old->shared[i], - dma_resv_held(obj))); - dma_fence_put(old_fence); } EXPORT_SYMBOL(dma_resv_add_excl_fence); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 7ab50076e7a6..85ab5554425e 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -424,9 +424,7 @@ struct dma_buf { * IMPORTANT: * * All drivers must obey the struct dma_resv rules, specifically the - * rules for updating fences, see &dma_resv.fence_excl and - * &dma_resv.fence. If these dependency rules are broken access tracking - * can be lost resulting in use after free issues. + * rules for updating and obeying fences. */ struct dma_resv *resv; diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 202cc65d0621..dccaf7b1663e 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -93,23 +93,11 @@ struct dma_resv { * * The exclusive fence, if there is one currently. * - * There are two ways to update this fence: - * - * - First by calling dma_resv_add_excl_fence(), which replaces all - * fences attached to the reservation object. To guarantee that no - * fences are lost, this new fence must signal only after all previous - * fences, both shared and exclusive, have signalled. In some cases it - * is convenient to achieve that by attaching a struct dma_fence_array - * with all the new and old fences. - * - * - Alternatively the fence can be set directly, which leaves the - * shared fences unchanged. To guarantee that no fences are lost, this - * new fence must signal only after the previous exclusive fence has - * signalled. Since the shared fences are staying intact, it is not - * necessary to maintain any ordering against those. If semantically - * only a new access is added without actually treating the previous - * one as a dependency the exclusive fences can be strung together - * using struct dma_fence_chain. + * To guarantee that no fences are lost, this new fence must signal + * only after the previous exclusive fence has signalled. If + * semantically only a new access is added without actually treating the + * previous one as a dependency the exclusive fences can be strung + * together using struct dma_fence_chain. * * Note that actual semantics of what an exclusive or shared fence mean * is defined by the user, for reservation objects shared across drivers -- cgit From 8733068b9bdbc7a54f02dcc59eb0e4789cd60942 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:17 +0000 Subject: KVM: x86/xen: Make kvm_xen_set_evtchn() reusable from other places Clean it up to return -errno on error consistently, while still being compatible with the return conventions for kvm_arch_set_irq_inatomic() and the kvm_set_irq() callback. We use -ENOTCONN to indicate when the port is masked. No existing users care, except that it's negative. Also allow it to optimise the vCPU lookup. Unless we abuse the lapic map, there is no quick lookup from APIC ID to a vCPU; the logic in kvm_get_vcpu_by_id() will just iterate over all vCPUs till it finds the one it wants. So do that just once and stash the result in the struct kvm_xen_evtchn for next time. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-8-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 2 +- arch/x86/kvm/xen.c | 83 +++++++++++++++++++++++++++++++++--------------- arch/x86/kvm/xen.h | 2 +- include/linux/kvm_host.h | 3 +- 4 files changed, 62 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 6e0dab04320e..0687162c4f22 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -181,7 +181,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, if (!level) return -1; - return kvm_xen_set_evtchn_fast(e, kvm); + return kvm_xen_set_evtchn_fast(&e->xen_evtchn, kvm); #endif default: break; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 65ffba89441a..9c87263a5be2 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -859,13 +859,16 @@ static inline int max_evtchn_port(struct kvm *kvm) } /* - * This follows the kvm_set_irq() API, so it returns: + * The return value from this function is propagated to kvm_set_irq() API, + * so it returns: * < 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) * > 0 Number of CPUs interrupt was delivered to + * + * It is also called directly from kvm_arch_set_irq_inatomic(), where the + * only check on its return value is a comparison with -EWOULDBLOCK'. */ -int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm) +int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; struct kvm_vcpu *vcpu; @@ -873,18 +876,23 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, unsigned long flags; int port_word_bit; bool kick_vcpu = false; - int idx; - int rc; + int vcpu_idx, idx, rc; - vcpu = kvm_get_vcpu_by_id(kvm, e->xen_evtchn.vcpu); - if (!vcpu) - return -1; + vcpu_idx = READ_ONCE(xe->vcpu_idx); + if (vcpu_idx >= 0) + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + else { + vcpu = kvm_get_vcpu_by_id(kvm, xe->vcpu_id); + if (!vcpu) + return -EINVAL; + WRITE_ONCE(xe->vcpu_idx, kvm_vcpu_get_idx(vcpu)); + } if (!vcpu->arch.xen.vcpu_info_cache.active) - return -1; + return -EINVAL; - if (e->xen_evtchn.port >= max_evtchn_port(kvm)) - return -1; + if (xe->port >= max_evtchn_port(kvm)) + return -EINVAL; rc = -EWOULDBLOCK; @@ -898,12 +906,12 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, struct shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; mask_bits = (unsigned long *)&shinfo->evtchn_mask; - port_word_bit = e->xen_evtchn.port / 64; + port_word_bit = xe->port / 64; } else { struct compat_shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; mask_bits = (unsigned long *)&shinfo->evtchn_mask; - port_word_bit = e->xen_evtchn.port / 32; + port_word_bit = xe->port / 32; } /* @@ -913,10 +921,10 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, * already set, then we kick the vCPU in question to write to the * *real* evtchn_pending_sel in its own guest vcpu_info struct. */ - if (test_and_set_bit(e->xen_evtchn.port, pending_bits)) { + if (test_and_set_bit(xe->port, pending_bits)) { rc = 0; /* It was already raised */ - } else if (test_bit(e->xen_evtchn.port, mask_bits)) { - rc = -1; /* Masked */ + } else if (test_bit(xe->port, mask_bits)) { + rc = -ENOTCONN; /* Masked */ } else { rc = 1; /* Delivered to the bitmap in shared_info. */ /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ @@ -962,17 +970,12 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, return rc; } -/* This is the version called from kvm_set_irq() as the .set function */ -static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, - int irq_source_id, int level, bool line_status) +static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) { bool mm_borrowed = false; int rc; - if (!level) - return -1; - - rc = kvm_xen_set_evtchn_fast(e, kvm); + rc = kvm_xen_set_evtchn_fast(xe, kvm); if (rc != -EWOULDBLOCK) return rc; @@ -1016,7 +1019,7 @@ static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; int idx; - rc = kvm_xen_set_evtchn_fast(e, kvm); + rc = kvm_xen_set_evtchn_fast(xe, kvm); if (rc != -EWOULDBLOCK) break; @@ -1033,11 +1036,27 @@ static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm return rc; } +/* This is the version called from kvm_set_irq() as the .set function */ +static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + if (!level) + return -EINVAL; + + return kvm_xen_set_evtchn(&e->xen_evtchn, kvm); +} + +/* + * Set up an event channel interrupt from the KVM IRQ routing table. + * Used for e.g. PIRQ from passed through physical devices. + */ int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { + struct kvm_vcpu *vcpu; + if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) return -EINVAL; @@ -1045,8 +1064,22 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) return -EINVAL; + /* + * Xen gives us interesting mappings from vCPU index to APIC ID, + * which means kvm_get_vcpu_by_id() has to iterate over all vCPUs + * to find it. Do that once at setup time, instead of every time. + * But beware that on live update / live migration, the routing + * table might be reinstated before the vCPU threads have finished + * recreating their vCPUs. + */ + vcpu = kvm_get_vcpu_by_id(kvm, ue->u.xen_evtchn.vcpu); + if (vcpu) + e->xen_evtchn.vcpu_idx = kvm_vcpu_get_idx(vcpu); + else + e->xen_evtchn.vcpu_idx = -1; + e->xen_evtchn.port = ue->u.xen_evtchn.port; - e->xen_evtchn.vcpu = ue->u.xen_evtchn.vcpu; + e->xen_evtchn.vcpu_id = ue->u.xen_evtchn.vcpu; e->xen_evtchn.priority = ue->u.xen_evtchn.priority; e->set = evtchn_set_fn; diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 7dd0590f93e1..e28feb32add6 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -25,7 +25,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc); void kvm_xen_init_vm(struct kvm *kvm); void kvm_xen_destroy_vm(struct kvm *kvm); void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu); -int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, +int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm); int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3f9b22c4983a..252ee4a61b58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -611,7 +611,8 @@ struct kvm_hv_sint { struct kvm_xen_evtchn { u32 port; - u32 vcpu; + u32 vcpu_id; + int vcpu_idx; u32 priority; }; -- cgit From 92cedee6a6a3e6fcc3ffc0e3866baae5f6f76ac1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 3 Nov 2021 10:02:08 +0100 Subject: dma-buf: add dma_resv_get_singleton v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a function to simplify getting a single fence for all the fences in the dma_resv object. v2: fix ref leak in error handling Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220321135856.1331-3-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 2 ++ 2 files changed, 56 insertions(+) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index be65522f0f47..ff16da0a54ec 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -34,6 +34,7 @@ */ #include +#include #include #include #include @@ -636,6 +637,59 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write, } EXPORT_SYMBOL_GPL(dma_resv_get_fences); +/** + * dma_resv_get_singleton - Get a single fence for all the fences + * @obj: the reservation object + * @write: true if we should return all fences + * @fence: the resulting fence + * + * Get a single fence representing all the fences inside the resv object. + * Returns either 0 for success or -ENOMEM. + * + * Warning: This can't be used like this when adding the fence back to the resv + * object since that can lead to stack corruption when finalizing the + * dma_fence_array. + * + * Returns 0 on success and negative error values on failure. + */ +int dma_resv_get_singleton(struct dma_resv *obj, bool write, + struct dma_fence **fence) +{ + struct dma_fence_array *array; + struct dma_fence **fences; + unsigned count; + int r; + + r = dma_resv_get_fences(obj, write, &count, &fences); + if (r) + return r; + + if (count == 0) { + *fence = NULL; + return 0; + } + + if (count == 1) { + *fence = fences[0]; + kfree(fences); + return 0; + } + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) { + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; + } + + *fence = &array->base; + return 0; +} +EXPORT_SYMBOL_GPL(dma_resv_get_singleton); + /** * dma_resv_wait_timeout - Wait on reservation's objects * shared and/or exclusive fences. diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index dccaf7b1663e..233ed4f14d9e 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -437,6 +437,8 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); int dma_resv_get_fences(struct dma_resv *obj, bool write, unsigned int *num_fences, struct dma_fence ***fences); +int dma_resv_get_singleton(struct dma_resv *obj, bool write, + struct dma_fence **fence); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout); -- cgit From 8bea9af887de4c99a95f93f2ce400ef63e8b4e9b Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 22 Mar 2022 12:50:27 +0200 Subject: iio: adc: ad_sigma_delta: Add sequencer support Some sigma-delta chips support sampling of multiple channels in continuous mode. When the operating with more than one channel enabled, the channel sequencer cycles through the enabled channels in sequential order, from first channel to the last one. If a channel is disabled, it is skipped by the sequencer. If more than one channel is used in continuous mode, instruct the device to append the status to the SPI transfer (1 extra byte) every time we receive a sample. All sigma-delta chips possessing a sampling sequencer have this ability. Inside the status register there will be the number of the converted channel. In this way, even if the CPU won't keep up with the sampling rate, it won't send to userspace wrong channel samples. When multiple channels are enabled in continuous mode, the device needs to perform a measurement on all slots before we can push to userspace the sample. If, during sequencing and data reading, a channel measurement is lost, a desync occurred. In this case, ad_sigma_delta drops the incomplete sample and waits for the device to send the measurement on the first active slot. Co-developed-by: Alexandru Tachici Signed-off-by: Alexandru Tachici Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20220322105029.86389-5-alexandru.tachici@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 143 ++++++++++++++++++++++++++++++--- include/linux/iio/adc/ad_sigma_delta.h | 38 +++++++++ 2 files changed, 171 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index ebcd52526cac..261a9a6b45e1 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -6,6 +6,7 @@ * Author: Lars-Peter Clausen */ +#include #include #include #include @@ -342,15 +343,49 @@ EXPORT_SYMBOL_NS_GPL(ad_sigma_delta_single_conversion, IIO_AD_SIGMA_DELTA); static int ad_sd_buffer_postenable(struct iio_dev *indio_dev) { struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); + unsigned int i, slot, samples_buf_size; unsigned int channel; + uint8_t *samples_buf; int ret; - channel = find_first_bit(indio_dev->active_scan_mask, - indio_dev->masklength); - ret = ad_sigma_delta_set_channel(sigma_delta, - indio_dev->channels[channel].address); - if (ret) - return ret; + if (sigma_delta->num_slots == 1) { + channel = find_first_bit(indio_dev->active_scan_mask, + indio_dev->masklength); + ret = ad_sigma_delta_set_channel(sigma_delta, + indio_dev->channels[channel].address); + if (ret) + return ret; + slot = 1; + } else { + /* + * At this point update_scan_mode already enabled the required channels. + * For sigma-delta sequencer drivers with multiple slots, an update_scan_mode + * implementation is mandatory. + */ + slot = 0; + for_each_set_bit(i, indio_dev->active_scan_mask, indio_dev->masklength) { + sigma_delta->slots[slot] = indio_dev->channels[i].address; + slot++; + } + } + + sigma_delta->active_slots = slot; + sigma_delta->current_slot = 0; + + if (sigma_delta->active_slots > 1) { + ret = ad_sigma_delta_append_status(sigma_delta, true); + if (ret) + return ret; + } + + samples_buf_size = ALIGN(slot * indio_dev->channels[0].scan_type.storagebits, 8); + samples_buf_size += sizeof(int64_t); + samples_buf = devm_krealloc(&sigma_delta->spi->dev, sigma_delta->samples_buf, + samples_buf_size, GFP_KERNEL); + if (!samples_buf) + return -ENOMEM; + + sigma_delta->samples_buf = samples_buf; spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; @@ -386,6 +421,10 @@ static int ad_sd_buffer_postdisable(struct iio_dev *indio_dev) sigma_delta->keep_cs_asserted = false; ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); + if (sigma_delta->status_appended) + ad_sigma_delta_append_status(sigma_delta, false); + + ad_sigma_delta_disable_all(sigma_delta); sigma_delta->bus_locked = false; return spi_bus_unlock(sigma_delta->spi->master); } @@ -396,6 +435,10 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); uint8_t *data = sigma_delta->rx_buf; + unsigned int transfer_size; + unsigned int sample_size; + unsigned int sample_pos; + unsigned int status_pos; unsigned int reg_size; unsigned int data_reg; @@ -408,21 +451,69 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p) else data_reg = AD_SD_REG_DATA; + /* Status word will be appended to the sample during transfer */ + if (sigma_delta->status_appended) + transfer_size = reg_size + 1; + else + transfer_size = reg_size; + switch (reg_size) { case 4: case 2: case 1: - ad_sd_read_reg_raw(sigma_delta, data_reg, reg_size, &data[0]); + status_pos = reg_size; + ad_sd_read_reg_raw(sigma_delta, data_reg, transfer_size, &data[0]); break; case 3: + /* + * Data array after transfer will look like (if status is appended): + * data[] = { [0][sample][sample][sample][status] } + * Keeping the first byte 0 shifts the status postion by 1 byte to the right. + */ + status_pos = reg_size + 1; + /* We store 24 bit samples in a 32 bit word. Keep the upper * byte set to zero. */ - ad_sd_read_reg_raw(sigma_delta, data_reg, reg_size, &data[1]); + ad_sd_read_reg_raw(sigma_delta, data_reg, transfer_size, &data[1]); break; } - iio_push_to_buffers_with_timestamp(indio_dev, data, pf->timestamp); + /* + * For devices sampling only one channel at + * once, there is no need for sample number tracking. + */ + if (sigma_delta->active_slots == 1) { + iio_push_to_buffers_with_timestamp(indio_dev, data, pf->timestamp); + goto irq_handled; + } + + if (sigma_delta->status_appended) { + u8 converted_channel; + + converted_channel = data[status_pos] & sigma_delta->info->status_ch_mask; + if (converted_channel != sigma_delta->slots[sigma_delta->current_slot]) { + /* + * Desync occurred during continuous sampling of multiple channels. + * Drop this incomplete sample and start from first channel again. + */ + + sigma_delta->current_slot = 0; + goto irq_handled; + } + } + + sample_size = indio_dev->channels[0].scan_type.storagebits / 8; + sample_pos = sample_size * sigma_delta->current_slot; + memcpy(&sigma_delta->samples_buf[sample_pos], data, sample_size); + sigma_delta->current_slot++; + if (sigma_delta->current_slot == sigma_delta->active_slots) { + sigma_delta->current_slot = 0; + iio_push_to_buffers_with_timestamp(indio_dev, sigma_delta->samples_buf, + pf->timestamp); + } + +irq_handled: iio_trigger_notify_done(indio_dev->trig); sigma_delta->irq_dis = false; enable_irq(sigma_delta->spi->irq); @@ -430,10 +521,17 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p) return IRQ_HANDLED; } +static bool ad_sd_validate_scan_mask(struct iio_dev *indio_dev, const unsigned long *mask) +{ + struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); + + return bitmap_weight(mask, indio_dev->masklength) <= sigma_delta->num_slots; +} + static const struct iio_buffer_setup_ops ad_sd_buffer_setup_ops = { .postenable = &ad_sd_buffer_postenable, .postdisable = &ad_sd_buffer_postdisable, - .validate_scan_mask = &iio_validate_scan_mask_onehot, + .validate_scan_mask = &ad_sd_validate_scan_mask, }; static irqreturn_t ad_sd_data_rdy_trig_poll(int irq, void *private) @@ -513,8 +611,14 @@ static int devm_ad_sd_probe_trigger(struct device *dev, struct iio_dev *indio_de */ int devm_ad_sd_setup_buffer_and_trigger(struct device *dev, struct iio_dev *indio_dev) { + struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); int ret; + sigma_delta->slots = devm_kcalloc(dev, sigma_delta->num_slots, + sizeof(*sigma_delta->slots), GFP_KERNEL); + if (!sigma_delta->slots) + return -ENOMEM; + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, &iio_pollfunc_store_time, &ad_sd_trigger_handler, @@ -541,6 +645,25 @@ int ad_sd_init(struct ad_sigma_delta *sigma_delta, struct iio_dev *indio_dev, { sigma_delta->spi = spi; sigma_delta->info = info; + + /* If the field is unset in ad_sigma_delta_info, asume there can only be 1 slot. */ + if (!info->num_slots) + sigma_delta->num_slots = 1; + else + sigma_delta->num_slots = info->num_slots; + + if (sigma_delta->num_slots > 1) { + if (!indio_dev->info->update_scan_mode) { + dev_err(&spi->dev, "iio_dev lacks update_scan_mode().\n"); + return -EINVAL; + } + + if (!info->disable_all) { + dev_err(&spi->dev, "ad_sigma_delta_info lacks disable_all().\n"); + return -EINVAL; + } + } + iio_device_set_drvdata(indio_dev, sigma_delta); return 0; diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index c525fd51652f..7852f6c9a714 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -32,26 +32,34 @@ struct iio_dev; /** * struct ad_sigma_delta_info - Sigma Delta driver specific callbacks and options * @set_channel: Will be called to select the current channel, may be NULL. + * @append_status: Will be called to enable status append at the end of the sample, may be NULL. * @set_mode: Will be called to select the current mode, may be NULL. + * @disable_all: Will be called to disable all channels, may be NULL. * @postprocess_sample: Is called for each sampled data word, can be used to * modify or drop the sample data, it, may be NULL. * @has_registers: true if the device has writable and readable registers, false * if there is just one read-only sample data shift register. * @addr_shift: Shift of the register address in the communications register. * @read_mask: Mask for the communications register having the read bit set. + * @status_ch_mask: Mask for the channel number stored in status register. * @data_reg: Address of the data register, if 0 the default address of 0x3 will * be used. * @irq_flags: flags for the interrupt used by the triggered buffer + * @num_slots: Number of sequencer slots */ struct ad_sigma_delta_info { int (*set_channel)(struct ad_sigma_delta *, unsigned int channel); + int (*append_status)(struct ad_sigma_delta *, bool append); int (*set_mode)(struct ad_sigma_delta *, enum ad_sigma_delta_mode mode); + int (*disable_all)(struct ad_sigma_delta *); int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample); bool has_registers; unsigned int addr_shift; unsigned int read_mask; + unsigned int status_ch_mask; unsigned int data_reg; unsigned long irq_flags; + unsigned int num_slots; }; /** @@ -76,6 +84,13 @@ struct ad_sigma_delta { uint8_t comm; const struct ad_sigma_delta_info *info; + unsigned int active_slots; + unsigned int current_slot; + unsigned int num_slots; + bool status_appended; + /* map slots to channels in order to know what to expect from devices */ + unsigned int *slots; + uint8_t *samples_buf; /* * DMA (thus cache coherency maintenance) requires the @@ -97,6 +112,29 @@ static inline int ad_sigma_delta_set_channel(struct ad_sigma_delta *sd, return 0; } +static inline int ad_sigma_delta_append_status(struct ad_sigma_delta *sd, bool append) +{ + int ret; + + if (sd->info->append_status) { + ret = sd->info->append_status(sd, append); + if (ret < 0) + return ret; + + sd->status_appended = append; + } + + return 0; +} + +static inline int ad_sigma_delta_disable_all(struct ad_sigma_delta *sd) +{ + if (sd->info->disable_all) + return sd->info->disable_all(sd); + + return 0; +} + static inline int ad_sigma_delta_set_mode(struct ad_sigma_delta *sd, unsigned int mode) { -- cgit From 71d637823cac7748079a912e0373476c7cf6f985 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 3 Nov 2021 13:35:14 +0100 Subject: dma-buf: finally make dma_resv_excl_fence private v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers should never touch this directly. v2: fix rebase clash Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220321135856.1331-10-christian.koenig@amd.com Reviewed-by: Daniel Vetter --- drivers/dma-buf/dma-resv.c | 6 ++++++ include/linux/dma-resv.h | 17 ----------------- 2 files changed, 6 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index ff16da0a54ec..15ffac35439d 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -140,6 +140,12 @@ void dma_resv_fini(struct dma_resv *obj) } EXPORT_SYMBOL(dma_resv_fini); +static inline struct dma_fence * +dma_resv_excl_fence(struct dma_resv *obj) +{ + return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); +} + static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) { return rcu_dereference_check(obj->fence, dma_resv_held(obj)); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 233ed4f14d9e..ecb697d4d861 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -411,23 +411,6 @@ static inline void dma_resv_unlock(struct dma_resv *obj) ww_mutex_unlock(&obj->lock); } -/** - * dma_resv_excl_fence - return the object's exclusive fence - * @obj: the reservation object - * - * Returns the exclusive fence (if any). Caller must either hold the objects - * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(), - * or one of the variants of each - * - * RETURNS - * The exclusive fence or NULL - */ -static inline struct dma_fence * -dma_resv_excl_fence(struct dma_resv *obj) -{ - return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); -} - void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); -- cgit From 1be9473e31ab87ad1b6ecf9fd11df461930ede85 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Tue, 22 Mar 2022 14:03:34 +0000 Subject: module: Move livepatch support to a separate file No functional change. This patch migrates livepatch support (i.e. used during module add/or load and remove/or deletion) from core module code into kernel/module/livepatch.c. At the moment it contains code to persist Elf information about a given livepatch module, only. The new file was added to MAINTAINERS. Reviewed-by: Petr Mladek Tested-by: Petr Mladek Signed-off-by: Aaron Tomlin Signed-off-by: Luis Chamberlain --- MAINTAINERS | 1 + include/linux/module.h | 9 ++-- kernel/module/Makefile | 1 + kernel/module/internal.h | 22 ++++++++++ kernel/module/livepatch.c | 74 +++++++++++++++++++++++++++++++++ kernel/module/main.c | 102 +++++----------------------------------------- 6 files changed, 111 insertions(+), 98 deletions(-) create mode 100644 kernel/module/livepatch.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 5e7778cd437f..6dcd93fb3a96 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11360,6 +11360,7 @@ F: arch/s390/include/asm/livepatch.h F: arch/x86/include/asm/livepatch.h F: include/linux/livepatch.h F: kernel/livepatch/ +F: kernel/module/livepatch.c F: lib/livepatch/ F: samples/livepatch/ F: tools/testing/selftests/livepatch/ diff --git a/include/linux/module.h b/include/linux/module.h index 1e135fd5c076..7ec9715de7dc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -663,17 +663,14 @@ static inline bool module_requested_async_probing(struct module *module) return module && module->async_probe_requested; } -#ifdef CONFIG_LIVEPATCH static inline bool is_livepatch_module(struct module *mod) { +#ifdef CONFIG_LIVEPATCH return mod->klp; -} -#else /* !CONFIG_LIVEPATCH */ -static inline bool is_livepatch_module(struct module *mod) -{ +#else return false; +#endif } -#endif /* CONFIG_LIVEPATCH */ bool is_module_sig_enforced(void); void set_module_sig_enforced(void); diff --git a/kernel/module/Makefile b/kernel/module/Makefile index cdd5c61b8c7f..ed3aacb04f17 100644 --- a/kernel/module/Makefile +++ b/kernel/module/Makefile @@ -10,3 +10,4 @@ KCOV_INSTRUMENT_module.o := n obj-y += main.o obj-$(CONFIG_MODULE_DECOMPRESS) += decompress.o obj-$(CONFIG_MODULE_SIG) += signing.o +obj-$(CONFIG_LIVEPATCH) += livepatch.o diff --git a/kernel/module/internal.h b/kernel/module/internal.h index e0775e66bcf7..ad7a444253ed 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -57,6 +57,28 @@ struct load_info { int mod_verify_sig(const void *mod, struct load_info *info); +#ifdef CONFIG_LIVEPATCH +int copy_module_elf(struct module *mod, struct load_info *info); +void free_module_elf(struct module *mod); +#else /* !CONFIG_LIVEPATCH */ +static inline int copy_module_elf(struct module *mod, struct load_info *info) +{ + return 0; +} + +static inline void free_module_elf(struct module *mod) { } +#endif /* CONFIG_LIVEPATCH */ + +static inline bool set_livepatch_module(struct module *mod) +{ +#ifdef CONFIG_LIVEPATCH + mod->klp = true; + return true; +#else + return false; +#endif +} + #ifdef CONFIG_MODULE_DECOMPRESS int module_decompress(struct load_info *info, const void *buf, size_t size); void module_decompress_cleanup(struct load_info *info); diff --git a/kernel/module/livepatch.c b/kernel/module/livepatch.c new file mode 100644 index 000000000000..486d4ff92719 --- /dev/null +++ b/kernel/module/livepatch.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Module livepatch support + * + * Copyright (C) 2016 Jessica Yu + */ + +#include +#include +#include +#include "internal.h" + +/* + * Persist Elf information about a module. Copy the Elf header, + * section header table, section string table, and symtab section + * index from info to mod->klp_info. + */ +int copy_module_elf(struct module *mod, struct load_info *info) +{ + unsigned int size, symndx; + int ret; + + size = sizeof(*mod->klp_info); + mod->klp_info = kmalloc(size, GFP_KERNEL); + if (!mod->klp_info) + return -ENOMEM; + + /* Elf header */ + size = sizeof(mod->klp_info->hdr); + memcpy(&mod->klp_info->hdr, info->hdr, size); + + /* Elf section header table */ + size = sizeof(*info->sechdrs) * info->hdr->e_shnum; + mod->klp_info->sechdrs = kmemdup(info->sechdrs, size, GFP_KERNEL); + if (!mod->klp_info->sechdrs) { + ret = -ENOMEM; + goto free_info; + } + + /* Elf section name string table */ + size = info->sechdrs[info->hdr->e_shstrndx].sh_size; + mod->klp_info->secstrings = kmemdup(info->secstrings, size, GFP_KERNEL); + if (!mod->klp_info->secstrings) { + ret = -ENOMEM; + goto free_sechdrs; + } + + /* Elf symbol section index */ + symndx = info->index.sym; + mod->klp_info->symndx = symndx; + + /* + * For livepatch modules, core_kallsyms.symtab is a complete + * copy of the original symbol table. Adjust sh_addr to point + * to core_kallsyms.symtab since the copy of the symtab in module + * init memory is freed at the end of do_init_module(). + */ + mod->klp_info->sechdrs[symndx].sh_addr = (unsigned long)mod->core_kallsyms.symtab; + + return 0; + +free_sechdrs: + kfree(mod->klp_info->sechdrs); +free_info: + kfree(mod->klp_info); + return ret; +} + +void free_module_elf(struct module *mod) +{ + kfree(mod->klp_info->sechdrs); + kfree(mod->klp_info->secstrings); + kfree(mod->klp_info); +} diff --git a/kernel/module/main.c b/kernel/module/main.c index 5898a1af41a9..915143827069 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2043,81 +2043,6 @@ static int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, } #endif /* CONFIG_STRICT_MODULE_RWX */ -#ifdef CONFIG_LIVEPATCH -/* - * Persist Elf information about a module. Copy the Elf header, - * section header table, section string table, and symtab section - * index from info to mod->klp_info. - */ -static int copy_module_elf(struct module *mod, struct load_info *info) -{ - unsigned int size, symndx; - int ret; - - size = sizeof(*mod->klp_info); - mod->klp_info = kmalloc(size, GFP_KERNEL); - if (mod->klp_info == NULL) - return -ENOMEM; - - /* Elf header */ - size = sizeof(mod->klp_info->hdr); - memcpy(&mod->klp_info->hdr, info->hdr, size); - - /* Elf section header table */ - size = sizeof(*info->sechdrs) * info->hdr->e_shnum; - mod->klp_info->sechdrs = kmemdup(info->sechdrs, size, GFP_KERNEL); - if (mod->klp_info->sechdrs == NULL) { - ret = -ENOMEM; - goto free_info; - } - - /* Elf section name string table */ - size = info->sechdrs[info->hdr->e_shstrndx].sh_size; - mod->klp_info->secstrings = kmemdup(info->secstrings, size, GFP_KERNEL); - if (mod->klp_info->secstrings == NULL) { - ret = -ENOMEM; - goto free_sechdrs; - } - - /* Elf symbol section index */ - symndx = info->index.sym; - mod->klp_info->symndx = symndx; - - /* - * For livepatch modules, core_kallsyms.symtab is a complete - * copy of the original symbol table. Adjust sh_addr to point - * to core_kallsyms.symtab since the copy of the symtab in module - * init memory is freed at the end of do_init_module(). - */ - mod->klp_info->sechdrs[symndx].sh_addr = \ - (unsigned long) mod->core_kallsyms.symtab; - - return 0; - -free_sechdrs: - kfree(mod->klp_info->sechdrs); -free_info: - kfree(mod->klp_info); - return ret; -} - -static void free_module_elf(struct module *mod) -{ - kfree(mod->klp_info->sechdrs); - kfree(mod->klp_info->secstrings); - kfree(mod->klp_info); -} -#else /* !CONFIG_LIVEPATCH */ -static int copy_module_elf(struct module *mod, struct load_info *info) -{ - return 0; -} - -static void free_module_elf(struct module *mod) -{ -} -#endif /* CONFIG_LIVEPATCH */ - void __weak module_memfree(void *module_region) { /* @@ -3092,30 +3017,23 @@ static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned l return 0; } -#ifdef CONFIG_LIVEPATCH static int check_modinfo_livepatch(struct module *mod, struct load_info *info) { - if (get_modinfo(info, "livepatch")) { - mod->klp = true; + if (!get_modinfo(info, "livepatch")) + /* Nothing more to do */ + return 0; + + if (set_livepatch_module(mod)) { add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK); pr_notice_once("%s: tainting kernel with TAINT_LIVEPATCH\n", - mod->name); - } - - return 0; -} -#else /* !CONFIG_LIVEPATCH */ -static int check_modinfo_livepatch(struct module *mod, struct load_info *info) -{ - if (get_modinfo(info, "livepatch")) { - pr_err("%s: module is marked as livepatch module, but livepatch support is disabled", - mod->name); - return -ENOEXEC; + mod->name); + return 0; } - return 0; + pr_err("%s: module is marked as livepatch module, but livepatch support is disabled", + mod->name); + return -ENOEXEC; } -#endif /* CONFIG_LIVEPATCH */ static void check_modinfo_retpoline(struct module *mod, struct load_info *info) { -- cgit From 0c1e42805c25c87eb7a6f3b18bdbf3b3b7840aff Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Tue, 22 Mar 2022 14:03:37 +0000 Subject: module: Move extra signature support out of core code No functional change. This patch migrates additional module signature check code from core module code into kernel/module/signing.c. Reviewed-by: Christophe Leroy Signed-off-by: Aaron Tomlin Signed-off-by: Luis Chamberlain --- include/linux/module.h | 12 ++++--- kernel/module/internal.h | 9 +++++ kernel/module/main.c | 87 ------------------------------------------------ kernel/module/signing.c | 77 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 7ec9715de7dc..5e2059f3afc7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -672,7 +672,6 @@ static inline bool is_livepatch_module(struct module *mod) #endif } -bool is_module_sig_enforced(void); void set_module_sig_enforced(void); #else /* !CONFIG_MODULES... */ @@ -799,10 +798,6 @@ static inline bool module_requested_async_probing(struct module *module) return false; } -static inline bool is_module_sig_enforced(void) -{ - return false; -} static inline void set_module_sig_enforced(void) { @@ -854,11 +849,18 @@ static inline bool retpoline_module_ok(bool has_retpoline) #endif #ifdef CONFIG_MODULE_SIG +bool is_module_sig_enforced(void); + static inline bool module_sig_ok(struct module *module) { return module->sig_ok; } #else /* !CONFIG_MODULE_SIG */ +static inline bool is_module_sig_enforced(void) +{ + return false; +} + static inline bool module_sig_ok(struct module *module) { return true; diff --git a/kernel/module/internal.h b/kernel/module/internal.h index a6895bb5598a..d6f646a5da41 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -158,3 +158,12 @@ static inline int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, return 0; } #endif /* CONFIG_STRICT_MODULE_RWX */ + +#ifdef CONFIG_MODULE_SIG +int module_sig_check(struct load_info *info, int flags); +#else /* !CONFIG_MODULE_SIG */ +static inline int module_sig_check(struct load_info *info, int flags) +{ + return 0; +} +#endif /* !CONFIG_MODULE_SIG */ diff --git a/kernel/module/main.c b/kernel/module/main.c index d55a2a8338a1..c349c91e53fa 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -127,28 +126,6 @@ static void module_assert_mutex_or_preempt(void) #endif } -#ifdef CONFIG_MODULE_SIG -static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); -module_param(sig_enforce, bool_enable_only, 0644); - -void set_module_sig_enforced(void) -{ - sig_enforce = true; -} -#else -#define sig_enforce false -#endif - -/* - * Export sig_enforce kernel cmdline parameter to allow other subsystems rely - * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. - */ -bool is_module_sig_enforced(void) -{ - return sig_enforce; -} -EXPORT_SYMBOL(is_module_sig_enforced); - /* Block module loading/unloading? */ int modules_disabled = 0; core_param(nomodule, modules_disabled, bint, 0); @@ -2569,70 +2546,6 @@ static inline void kmemleak_load_module(const struct module *mod, } #endif -#ifdef CONFIG_MODULE_SIG -static int module_sig_check(struct load_info *info, int flags) -{ - int err = -ENODATA; - const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; - const char *reason; - const void *mod = info->hdr; - bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS | - MODULE_INIT_IGNORE_VERMAGIC); - /* - * Do not allow mangled modules as a module with version information - * removed is no longer the module that was signed. - */ - if (!mangled_module && - info->len > markerlen && - memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { - /* We truncate the module to discard the signature */ - info->len -= markerlen; - err = mod_verify_sig(mod, info); - if (!err) { - info->sig_ok = true; - return 0; - } - } - - /* - * We don't permit modules to be loaded into the trusted kernels - * without a valid signature on them, but if we're not enforcing, - * certain errors are non-fatal. - */ - switch (err) { - case -ENODATA: - reason = "unsigned module"; - break; - case -ENOPKG: - reason = "module with unsupported crypto"; - break; - case -ENOKEY: - reason = "module with unavailable key"; - break; - - default: - /* - * All other errors are fatal, including lack of memory, - * unparseable signatures, and signature check failures -- - * even if signatures aren't required. - */ - return err; - } - - if (is_module_sig_enforced()) { - pr_notice("Loading of %s is rejected\n", reason); - return -EKEYREJECTED; - } - - return security_locked_down(LOCKDOWN_MODULE_SIGNATURE); -} -#else /* !CONFIG_MODULE_SIG */ -static int module_sig_check(struct load_info *info, int flags) -{ - return 0; -} -#endif /* !CONFIG_MODULE_SIG */ - static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) { #if defined(CONFIG_64BIT) diff --git a/kernel/module/signing.c b/kernel/module/signing.c index 8aeb6d2ee94b..85c8999dfecf 100644 --- a/kernel/module/signing.c +++ b/kernel/module/signing.c @@ -11,9 +11,29 @@ #include #include #include +#include #include +#include #include "internal.h" +static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); +module_param(sig_enforce, bool_enable_only, 0644); + +/* + * Export sig_enforce kernel cmdline parameter to allow other subsystems rely + * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. + */ +bool is_module_sig_enforced(void) +{ + return sig_enforce; +} +EXPORT_SYMBOL(is_module_sig_enforced); + +void set_module_sig_enforced(void) +{ + sig_enforce = true; +} + /* * Verify the signature on a module. */ @@ -43,3 +63,60 @@ int mod_verify_sig(const void *mod, struct load_info *info) VERIFYING_MODULE_SIGNATURE, NULL, NULL); } + +int module_sig_check(struct load_info *info, int flags) +{ + int err = -ENODATA; + const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; + const char *reason; + const void *mod = info->hdr; + bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS | + MODULE_INIT_IGNORE_VERMAGIC); + /* + * Do not allow mangled modules as a module with version information + * removed is no longer the module that was signed. + */ + if (!mangled_module && + info->len > markerlen && + memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { + /* We truncate the module to discard the signature */ + info->len -= markerlen; + err = mod_verify_sig(mod, info); + if (!err) { + info->sig_ok = true; + return 0; + } + } + + /* + * We don't permit modules to be loaded into the trusted kernels + * without a valid signature on them, but if we're not enforcing, + * certain errors are non-fatal. + */ + switch (err) { + case -ENODATA: + reason = "unsigned module"; + break; + case -ENOPKG: + reason = "module with unsupported crypto"; + break; + case -ENOKEY: + reason = "module with unavailable key"; + break; + + default: + /* + * All other errors are fatal, including lack of memory, + * unparseable signatures, and signature check failures -- + * even if signatures aren't required. + */ + return err; + } + + if (is_module_sig_enforced()) { + pr_notice("Loading of %s is rejected\n", reason); + return -EKEYREJECTED; + } + + return security_locked_down(LOCKDOWN_MODULE_SIGNATURE); +} -- cgit From f64205a42046d3802c423fa2059e7fca39af127c Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Tue, 22 Mar 2022 14:03:43 +0000 Subject: module: Move kdb module related code out of main kdb code No functional change. This patch migrates the kdb 'lsmod' command support out of main kdb code into its own file under kernel/module. In addition to the above, a minor style warning i.e. missing a blank line after declarations, was resolved too. The new file was added to MAINTAINERS. Finally we remove linux/module.h as it is entirely redundant. Reviewed-by: Daniel Thompson Acked-by: Daniel Thompson Signed-off-by: Aaron Tomlin Signed-off-by: Luis Chamberlain --- MAINTAINERS | 1 + include/linux/kdb.h | 1 + kernel/debug/kdb/kdb_io.c | 1 - kernel/debug/kdb/kdb_keyboard.c | 1 - kernel/debug/kdb/kdb_main.c | 49 ------------------------------------ kernel/debug/kdb/kdb_private.h | 4 --- kernel/debug/kdb/kdb_support.c | 1 - kernel/module/Makefile | 1 + kernel/module/kdb.c | 56 +++++++++++++++++++++++++++++++++++++++++ kernel/module/main.c | 4 --- 10 files changed, 59 insertions(+), 60 deletions(-) create mode 100644 kernel/module/kdb.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 6dcd93fb3a96..87cc05c6b462 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10907,6 +10907,7 @@ F: drivers/tty/serial/kgdboc.c F: include/linux/kdb.h F: include/linux/kgdb.h F: kernel/debug/ +F: kernel/module/kdb.c KHADAS MCU MFD DRIVER M: Neil Armstrong diff --git a/include/linux/kdb.h b/include/linux/kdb.h index ea0f5e580fac..07dfb6a20a1c 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -222,5 +222,6 @@ enum { extern int kdbgetintenv(const char *, int *); extern int kdb_set(int, const char **); +int kdb_lsmod(int argc, const char **argv); #endif /* !_KDB_H */ diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 6735ac36b718..67d3c48a1522 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -9,7 +9,6 @@ * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved. */ -#include #include #include #include diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c index f877a0a0d7cf..f87c750d3eb3 100644 --- a/kernel/debug/kdb/kdb_keyboard.c +++ b/kernel/debug/kdb/kdb_keyboard.c @@ -11,7 +11,6 @@ #include #include #include -#include #include /* Keyboard Controller Registers on normal PCs. */ diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 0852a537dad4..f3a30cd5037f 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -2004,54 +2003,6 @@ static int kdb_ef(int argc, const char **argv) return 0; } -#if defined(CONFIG_MODULES) -/* - * kdb_lsmod - This function implements the 'lsmod' command. Lists - * currently loaded kernel modules. - * Mostly taken from userland lsmod. - */ -static int kdb_lsmod(int argc, const char **argv) -{ - struct module *mod; - - if (argc != 0) - return KDB_ARGCOUNT; - - kdb_printf("Module Size modstruct Used by\n"); - list_for_each_entry(mod, kdb_modules, list) { - if (mod->state == MODULE_STATE_UNFORMED) - continue; - - kdb_printf("%-20s%8u 0x%px ", mod->name, - mod->core_layout.size, (void *)mod); -#ifdef CONFIG_MODULE_UNLOAD - kdb_printf("%4d ", module_refcount(mod)); -#endif - if (mod->state == MODULE_STATE_GOING) - kdb_printf(" (Unloading)"); - else if (mod->state == MODULE_STATE_COMING) - kdb_printf(" (Loading)"); - else - kdb_printf(" (Live)"); - kdb_printf(" 0x%px", mod->core_layout.base); - -#ifdef CONFIG_MODULE_UNLOAD - { - struct module_use *use; - kdb_printf(" [ "); - list_for_each_entry(use, &mod->source_list, - source_list) - kdb_printf("%s ", use->target->name); - kdb_printf("]\n"); - } -#endif - } - - return 0; -} - -#endif /* CONFIG_MODULES */ - /* * kdb_env - This function implements the 'env' command. Display the * current environment variables. diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 0d2f9feea0a4..1f8c519a5f81 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -226,10 +226,6 @@ extern void kdb_kbd_cleanup_state(void); #define kdb_kbd_cleanup_state() #endif /* ! CONFIG_KDB_KEYBOARD */ -#ifdef CONFIG_MODULES -extern struct list_head *kdb_modules; -#endif /* CONFIG_MODULES */ - extern char kdb_prompt_str[]; #define KDB_WORD_SIZE ((int)sizeof(unsigned long)) diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 85cb51c4a17e..0a39497140bf 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/module/Makefile b/kernel/module/Makefile index cf8dcdc6b55f..88f5cdcdb067 100644 --- a/kernel/module/Makefile +++ b/kernel/module/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += debug_kmemleak.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PROC_FS) += procfs.o obj-$(CONFIG_SYSFS) += sysfs.o +obj-$(CONFIG_KGDB_KDB) += kdb.o diff --git a/kernel/module/kdb.c b/kernel/module/kdb.c new file mode 100644 index 000000000000..a446c699db0a --- /dev/null +++ b/kernel/module/kdb.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Module kdb support + * + * Copyright (C) 2010 Jason Wessel + */ + +#include +#include +#include "internal.h" + +/* + * kdb_lsmod - This function implements the 'lsmod' command. Lists + * currently loaded kernel modules. + * Mostly taken from userland lsmod. + */ +int kdb_lsmod(int argc, const char **argv) +{ + struct module *mod; + + if (argc != 0) + return KDB_ARGCOUNT; + + kdb_printf("Module Size modstruct Used by\n"); + list_for_each_entry(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + + kdb_printf("%-20s%8u 0x%px ", mod->name, + mod->core_layout.size, (void *)mod); +#ifdef CONFIG_MODULE_UNLOAD + kdb_printf("%4d ", module_refcount(mod)); +#endif + if (mod->state == MODULE_STATE_GOING) + kdb_printf(" (Unloading)"); + else if (mod->state == MODULE_STATE_COMING) + kdb_printf(" (Loading)"); + else + kdb_printf(" (Live)"); + kdb_printf(" 0x%px", mod->core_layout.base); + +#ifdef CONFIG_MODULE_UNLOAD + { + struct module_use *use; + + kdb_printf(" [ "); + list_for_each_entry(use, &mod->source_list, + source_list) + kdb_printf("%s ", use->target->name); + kdb_printf("]\n"); + } +#endif + } + + return 0; +} diff --git a/kernel/module/main.c b/kernel/module/main.c index 0cd0590dd411..a2dc54726621 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -108,10 +108,6 @@ static void mod_update_bounds(struct module *mod) __mod_update_bounds(mod->init_layout.base, mod->init_layout.size); } -#ifdef CONFIG_KGDB_KDB -struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ -#endif /* CONFIG_KGDB_KDB */ - static void module_assert_mutex_or_preempt(void) { #ifdef CONFIG_LOCKDEP -- cgit From 01dc0386efb769056257410ba5754558384006a7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Feb 2022 13:02:14 +0100 Subject: module: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC to allow architectures to request having modules data in vmalloc area instead of module area. This is required on powerpc book3s/32 in order to set data non executable, because it is not possible to set executability on page basis, this is done per 256 Mbytes segments. The module area has exec right, vmalloc area has noexec. This can also be useful on other powerpc/32 in order to maximize the chance of code being close enough to kernel core to avoid branch trampolines. Cc: Jason Wessel Acked-by: Daniel Thompson Cc: Douglas Anderson Signed-off-by: Christophe Leroy [mcgrof: rebased in light of kernel/module/kdb.c move] Signed-off-by: Luis Chamberlain --- arch/Kconfig | 6 +++++ include/linux/module.h | 8 +++++++ kernel/module/internal.h | 3 +++ kernel/module/kdb.c | 10 ++++++-- kernel/module/main.c | 58 +++++++++++++++++++++++++++++++++++++++++++-- kernel/module/procfs.c | 8 +++++-- kernel/module/strict_rwx.c | 1 + kernel/module/tree_lookup.c | 8 +++++++ 8 files changed, 96 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 29b0167c088b..24945cee808b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -888,6 +888,12 @@ config MODULES_USE_ELF_REL Modules only use ELF REL relocations. Modules with ELF RELA relocations will give an error. +config ARCH_WANTS_MODULES_DATA_IN_VMALLOC + bool + help + For architectures like powerpc/32 which have constraints on module + allocation and need to allocate module data outside of module area. + config HAVE_IRQ_EXIT_ON_IRQ_STACK bool help diff --git a/include/linux/module.h b/include/linux/module.h index 5e2059f3afc7..46d4d5f2516e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -422,6 +422,9 @@ struct module { /* Core layout: rbtree is accessed frequently, so keep together. */ struct module_layout core_layout __module_layout_align; struct module_layout init_layout; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + struct module_layout data_layout; +#endif /* Arch-specific module values */ struct mod_arch_specific arch; @@ -569,6 +572,11 @@ bool is_module_text_address(unsigned long addr); static inline bool within_module_core(unsigned long addr, const struct module *mod) { +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + if ((unsigned long)mod->data_layout.base <= addr && + addr < (unsigned long)mod->data_layout.base + mod->data_layout.size) + return true; +#endif return (unsigned long)mod->core_layout.base <= addr && addr < (unsigned long)mod->core_layout.base + mod->core_layout.size; } diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 0aabbf5cbcd1..3e23bef5884d 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -20,7 +20,9 @@ /* Maximum number of characters written by module_flags() */ #define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4) +#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC #define data_layout core_layout +#endif /* * Modules' sections will be aligned on page boundaries @@ -154,6 +156,7 @@ struct mod_tree_root { }; extern struct mod_tree_root mod_tree; +extern struct mod_tree_root mod_data_tree; #ifdef CONFIG_MODULES_TREE_LOOKUP void mod_tree_insert(struct module *mod); diff --git a/kernel/module/kdb.c b/kernel/module/kdb.c index a446c699db0a..f4317f92e189 100644 --- a/kernel/module/kdb.c +++ b/kernel/module/kdb.c @@ -26,8 +26,11 @@ int kdb_lsmod(int argc, const char **argv) if (mod->state == MODULE_STATE_UNFORMED) continue; - kdb_printf("%-20s%8u 0x%px ", mod->name, - mod->core_layout.size, (void *)mod); + kdb_printf("%-20s%8u", mod->name, mod->core_layout.size); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + kdb_printf("/%8u", mod->data_layout.size); +#endif + kdb_printf(" 0x%px ", (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); #endif @@ -38,6 +41,9 @@ int kdb_lsmod(int argc, const char **argv) else kdb_printf(" (Live)"); kdb_printf(" 0x%px", mod->core_layout.base); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + kdb_printf("/0x%px", mod->data_layout.base); +#endif #ifdef CONFIG_MODULE_UNLOAD { diff --git a/kernel/module/main.c b/kernel/module/main.c index 78658283408d..84b828431dcb 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -78,6 +78,12 @@ struct mod_tree_root mod_tree __cacheline_aligned = { .addr_min = -1UL, }; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC +struct mod_tree_root mod_data_tree __cacheline_aligned = { + .addr_min = -1UL, +}; +#endif + #define module_addr_min mod_tree.addr_min #define module_addr_max mod_tree.addr_max @@ -107,6 +113,9 @@ static void mod_update_bounds(struct module *mod) __mod_update_bounds(mod->core_layout.base, mod->core_layout.size, &mod_tree); if (mod->init_layout.size) __mod_update_bounds(mod->init_layout.base, mod->init_layout.size, &mod_tree); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + __mod_update_bounds(mod->data_layout.base, mod->data_layout.size, &mod_data_tree); +#endif } static void module_assert_mutex_or_preempt(void) @@ -940,6 +949,17 @@ static ssize_t show_coresize(struct module_attribute *mattr, static struct module_attribute modinfo_coresize = __ATTR(coresize, 0444, show_coresize, NULL); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC +static ssize_t show_datasize(struct module_attribute *mattr, + struct module_kobject *mk, char *buffer) +{ + return sprintf(buffer, "%u\n", mk->mod->data_layout.size); +} + +static struct module_attribute modinfo_datasize = + __ATTR(datasize, 0444, show_datasize, NULL); +#endif + static ssize_t show_initsize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { @@ -968,6 +988,9 @@ struct module_attribute *modinfo_attrs[] = { &modinfo_srcversion, &modinfo_initstate, &modinfo_coresize, +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + &modinfo_datasize, +#endif &modinfo_initsize, &modinfo_taint, #ifdef CONFIG_MODULE_UNLOAD @@ -1194,6 +1217,9 @@ static void free_module(struct module *mod) /* Finally, free the core (containing the module structure) */ module_memfree(mod->core_layout.base); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + vfree(mod->data_layout.base); +#endif } void *__symbol_get(const char *symbol) @@ -2124,6 +2150,24 @@ static int move_module(struct module *mod, struct load_info *info) } else mod->init_layout.base = NULL; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + /* Do the allocs. */ + ptr = vmalloc(mod->data_layout.size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. Just mark it as not being a + * leak. + */ + kmemleak_not_leak(ptr); + if (!ptr) { + module_memfree(mod->core_layout.base); + module_memfree(mod->init_layout.base); + return -ENOMEM; + } + + memset(ptr, 0, mod->data_layout.size); + mod->data_layout.base = ptr; +#endif /* Transfer each section which specifies SHF_ALLOC */ pr_debug("final section addresses:\n"); for (i = 0; i < info->hdr->e_shnum; i++) { @@ -2299,6 +2343,9 @@ static void module_deallocate(struct module *mod, struct load_info *info) module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); module_memfree(mod->core_layout.base); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + vfree(mod->data_layout.base); +#endif } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -3015,13 +3062,20 @@ bool is_module_address(unsigned long addr) struct module *__module_address(unsigned long addr) { struct module *mod; + struct mod_tree_root *tree; - if (addr < module_addr_min || addr > module_addr_max) + if (addr >= mod_tree.addr_min && addr <= mod_tree.addr_max) + tree = &mod_tree; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + else if (addr >= mod_data_tree.addr_min && addr <= mod_data_tree.addr_max) + tree = &mod_data_tree; +#endif + else return NULL; module_assert_mutex_or_preempt(); - mod = mod_find(addr, &mod_tree); + mod = mod_find(addr, tree); if (mod) { BUG_ON(!within_module(addr, mod)); if (mod->state == MODULE_STATE_UNFORMED) diff --git a/kernel/module/procfs.c b/kernel/module/procfs.c index 2717e130788e..9a8f4f0f6329 100644 --- a/kernel/module/procfs.c +++ b/kernel/module/procfs.c @@ -67,13 +67,17 @@ static int m_show(struct seq_file *m, void *p) struct module *mod = list_entry(p, struct module, list); char buf[MODULE_FLAGS_BUF_SIZE]; void *value; + unsigned int size; /* We always ignore unformed modules. */ if (mod->state == MODULE_STATE_UNFORMED) return 0; - seq_printf(m, "%s %u", - mod->name, mod->init_layout.size + mod->core_layout.size); + size = mod->init_layout.size + mod->core_layout.size; +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + size += mod->data_layout.size; +#endif + seq_printf(m, "%s %u", mod->name, size); print_unload_info(m, mod); /* Informative for users. */ diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index fe3c10891407..14fbea66f12f 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -75,6 +75,7 @@ bool module_check_misalignment(const struct module *mod) return false; return layout_check_misalignment(&mod->core_layout) || + layout_check_misalignment(&mod->data_layout) || layout_check_misalignment(&mod->init_layout); } diff --git a/kernel/module/tree_lookup.c b/kernel/module/tree_lookup.c index 995fe68059db..8ec5cfd60496 100644 --- a/kernel/module/tree_lookup.c +++ b/kernel/module/tree_lookup.c @@ -83,6 +83,11 @@ void mod_tree_insert(struct module *mod) __mod_tree_insert(&mod->core_layout.mtn, &mod_tree); if (mod->init_layout.size) __mod_tree_insert(&mod->init_layout.mtn, &mod_tree); + +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + mod->data_layout.mtn.mod = mod; + __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree); +#endif } void mod_tree_remove_init(struct module *mod) @@ -95,6 +100,9 @@ void mod_tree_remove(struct module *mod) { __mod_tree_remove(&mod->core_layout.mtn, &mod_tree); mod_tree_remove_init(mod); +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC + __mod_tree_remove(&mod->data_layout.mtn, &mod_data_tree); +#endif } struct module *mod_find(unsigned long addr, struct mod_tree_root *tree) -- cgit From baa914cd81f51f4e4f3bae5bb59764b32ad8c353 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 5 Apr 2022 16:22:20 +0900 Subject: firewire: add kernel API to access CYCLE_TIME register 1394 OHCI specification defined Isochronous Cycle Timer Register to get value of CYCLE_TIME register defined by IEEE 1394 for CSR architecture defined by ISO/IEC 13213. Unit driver can calculate packet time by compute with the value of CYCLE_TIME and timeStamp field in descriptor of each isochronous and asynchronous context. The resolution of CYCLE_TIME is 49.576 MHz, while the one of timeStamp is 8,000 Hz. Current implementation of Linux FireWire subsystem allows the driver to get the value of CYCLE_TIMER CSR register by transaction service. The transaction service has overhead in regard of access to MMIO register. This commit adds kernel API for unit driver to access the register directly. Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220405072221.226217-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-card.c | 28 ++++++++++++++++++++++++++++ drivers/firewire/core-cdev.c | 6 ++++-- include/linux/firewire.h | 2 ++ 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index d994da6cf465..cd09de61bc4f 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -702,3 +702,31 @@ void fw_core_remove_card(struct fw_card *card) WARN_ON(!list_empty(&card->transaction_list)); } EXPORT_SYMBOL(fw_core_remove_card); + +/** + * fw_card_read_cycle_time: read from Isochronous Cycle Timer Register of 1394 OHCI in MMIO region + * for controller card. + * @card: The instance of card for 1394 OHCI controller. + * @cycle_time: The mutual reference to value of cycle time for the read operation. + * + * Read value from Isochronous Cycle Timer Register of 1394 OHCI in MMIO region for the given + * controller card. This function accesses the region without any lock primitives or IRQ mask. + * When returning successfully, the content of @value argument has value aligned to host endianness, + * formetted by CYCLE_TIME CSR Register of IEEE 1394 std. + * + * Context: Any context. + * Return: + * * 0 - Read successfully. + * * -ENODEV - The controller is unavailable due to being removed or unbound. + */ +int fw_card_read_cycle_time(struct fw_card *card, u32 *cycle_time) +{ + if (card->driver->read_csr == dummy_read_csr) + return -ENODEV; + + // It's possible to switch to dummy driver between the above and the below. This is the best + // effort to return -ENODEV. + *cycle_time = card->driver->read_csr(card, CSR_CYCLE_TIME); + return 0; +} +EXPORT_SYMBOL_GPL(fw_card_read_cycle_time); diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 9f89c17730b1..8e9670036e5c 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1216,7 +1216,9 @@ static int ioctl_get_cycle_timer2(struct client *client, union ioctl_arg *arg) local_irq_disable(); - cycle_time = card->driver->read_csr(card, CSR_CYCLE_TIME); + ret = fw_card_read_cycle_time(card, &cycle_time); + if (ret < 0) + goto end; switch (a->clk_id) { case CLOCK_REALTIME: ktime_get_real_ts64(&ts); break; @@ -1225,7 +1227,7 @@ static int ioctl_get_cycle_timer2(struct client *client, union ioctl_arg *arg) default: ret = -EINVAL; } - +end: local_irq_enable(); a->tv_sec = ts.tv_sec; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 07967a450eaa..2f467c52bdec 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -150,6 +150,8 @@ static inline void fw_card_put(struct fw_card *card) kref_put(&card->kref, fw_card_release); } +int fw_card_read_cycle_time(struct fw_card *card, u32 *cycle_time); + struct fw_attribute_group { struct attribute_group *groups[2]; struct attribute_group group; -- cgit From b2405aa948b95afc5246fa56fc05c3512cd6185c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 5 Apr 2022 16:22:21 +0900 Subject: firewire: add kernel API to access packet structure in request structure for AR context In 1394 OHCI specification, descriptor of Asynchronous Receive DMA context has timeStamp field in its trailer quadlet. The field is written by the host controller for the time to receive asynchronous request subaction in isochronous cycle time. In Linux FireWire subsystem, the value of field is stored to fw_packet structure and copied to fw_request structure as the part. The fw_request structure is hidden from unit driver and passed as opaque pointer when calling registered handler. It's inconvenient to the unit driver which needs timestamp of packet. This commit adds kernel API to pick up timestamp from opaque pointer to fw_request structure. Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220405072221.226217-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- drivers/firewire/core-transaction.c | 18 ++++++++++++++++++ include/linux/firewire.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index ac487c96bb71..e12a0a4c33f7 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -619,6 +619,7 @@ struct fw_request { struct fw_packet response; u32 request_header[4]; int ack; + u32 timestamp; u32 length; u32 data[]; }; @@ -788,6 +789,7 @@ static struct fw_request *allocate_request(struct fw_card *card, request->response.ack = 0; request->response.callback = free_response_callback; request->ack = p->ack; + request->timestamp = p->timestamp; request->length = length; if (data) memcpy(request->data, data, length); @@ -832,6 +834,22 @@ int fw_get_request_speed(struct fw_request *request) } EXPORT_SYMBOL(fw_get_request_speed); +/** + * fw_request_get_timestamp: Get timestamp of the request. + * @request: The opaque pointer to request structure. + * + * Get timestamp when 1394 OHCI controller receives the asynchronous request subaction. The + * timestamp consists of the low order 3 bits of second field and the full 13 bits of count + * field of isochronous cycle time register. + * + * Returns: timestamp of the request. + */ +u32 fw_request_get_timestamp(const struct fw_request *request) +{ + return request->timestamp; +} +EXPORT_SYMBOL_GPL(fw_request_get_timestamp); + static void handle_exclusive_region_request(struct fw_card *card, struct fw_packet *p, struct fw_request *request, diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 2f467c52bdec..980019053e54 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -354,6 +354,7 @@ void fw_core_remove_address_handler(struct fw_address_handler *handler); void fw_send_response(struct fw_card *card, struct fw_request *request, int rcode); int fw_get_request_speed(struct fw_request *request); +u32 fw_request_get_timestamp(const struct fw_request *request); void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, unsigned long long offset, void *payload, size_t length, -- cgit From 0b5c21bbc01e92745ca1ca4f6fd87d878fa3ea5e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 4 Apr 2022 11:38:47 +0200 Subject: net: ensure net_todo_list is processed quickly In [1], Will raised a potential issue that the cfg80211 code, which does (from a locking perspective) rtnl_lock() wiphy_lock() rtnl_unlock() might be suspectible to ABBA deadlocks, because rtnl_unlock() calls netdev_run_todo(), which might end up calling rtnl_lock() again, which could then deadlock (see the comment in the code added here for the scenario). Some back and forth and thinking ensued, but clearly this can't happen if the net_todo_list is empty at the rtnl_unlock() here. Clearly, the code here cannot actually put an entry on it, and all other users of rtnl_unlock() will empty it since that will always go through netdev_run_todo(), emptying the list. So the only other way to get there would be to add to the list and then unlock the RTNL without going through rtnl_unlock(), which is only possible through __rtnl_unlock(). However, this isn't exported and not used in many places, and none of them seem to be able to unregister before using it. Therefore, add a WARN_ON() in the code to ensure this invariant won't be broken, so that the cfg80211 (or any similar) code stays safe. [1] https://lore.kernel.org/r/Yjzpo3TfZxtKPMAG@google.com Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20220404113847.0ee02e4a70da.Ic73d206e217db20fd22dcec14fe5442ca732804b@changeid Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 ++- net/core/dev.c | 2 +- net/core/rtnetlink.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 59e27a2b7bf0..b6a1e7f643da 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3894,7 +3894,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; extern unsigned int netdev_budget_usecs; -/* Called by rtnetlink.c:rtnl_unlock() */ +/* Used by rtnetlink.c:__rtnl_unlock()/rtnl_unlock() */ +extern struct list_head net_todo_list; void netdev_run_todo(void); static inline void __dev_put(struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 8c6c08446556..2ec17358d7b4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9431,7 +9431,7 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static LIST_HEAD(net_todo_list); +LIST_HEAD(net_todo_list); DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 159c9c61e6af..0e4502d641eb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -95,6 +95,39 @@ void __rtnl_unlock(void) defer_kfree_skb_list = NULL; + /* Ensure that we didn't actually add any TODO item when __rtnl_unlock() + * is used. In some places, e.g. in cfg80211, we have code that will do + * something like + * rtnl_lock() + * wiphy_lock() + * ... + * rtnl_unlock() + * + * and because netdev_run_todo() acquires the RTNL for items on the list + * we could cause a situation such as this: + * Thread 1 Thread 2 + * rtnl_lock() + * unregister_netdevice() + * __rtnl_unlock() + * rtnl_lock() + * wiphy_lock() + * rtnl_unlock() + * netdev_run_todo() + * __rtnl_unlock() + * + * // list not empty now + * // because of thread 2 + * rtnl_lock() + * while (!list_empty(...)) + * rtnl_lock() + * wiphy_lock() + * **** DEADLOCK **** + * + * However, usage of __rtnl_unlock() is rare, and so we can ensure that + * it's not used in cases where something is added to do the list. + */ + WARN_ON(!list_empty(&net_todo_list)); + mutex_unlock(&rtnl_mutex); while (head) { -- cgit From 40379a0084c2f65eb62c102f5bbf5cdc14a50410 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 4 Apr 2022 15:08:15 +0300 Subject: net/mlx5_fpga: Drop INNOVA TLS support Mellanox INNOVA TLS cards are EOL in May, 2018 [1]. As such, the code is unmaintained, untested and not in-use by any upstream/distro oriented customers. In order to reduce code complexity, drop the kernel code. [1] https://network.nvidia.com/related-docs/eol/LCR-000286.pdf Link: https://lore.kernel.org/r/b88add368def721ea9d054cb69def72d9e3f67aa.1649073691.git.leonro@nvidia.com Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 14 - drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 - .../net/ethernet/mellanox/mlx5/core/accel/tls.c | 47 -- .../net/ethernet/mellanox/mlx5/core/accel/tls.h | 56 -- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - .../net/ethernet/mellanox/mlx5/core/en_accel/tls.c | 180 +----- .../net/ethernet/mellanox/mlx5/core/en_accel/tls.h | 48 +- .../mellanox/mlx5/core/en_accel/tls_rxtx.c | 324 +---------- .../mellanox/mlx5/core/en_accel/tls_rxtx.h | 8 +- .../mellanox/mlx5/core/en_accel/tls_stats.c | 17 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 - .../net/ethernet/mellanox/mlx5/core/fpga/core.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 622 --------------------- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h | 74 --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 - include/linux/mlx5/mlx5_ifc_fpga.h | 63 --- 16 files changed, 10 insertions(+), 1459 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 4ba1a78c6515..21df10bb14c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -177,20 +177,6 @@ config MLX5_EN_IPSEC Note: Support for hardware with this capability needs to be selected for this option to become available. -config MLX5_FPGA_TLS - bool "Mellanox Technologies TLS Innova support" - depends on TLS_DEVICE - depends on TLS=y || MLX5_CORE=m - depends on MLX5_CORE_EN - depends on MLX5_FPGA - select MLX5_EN_TLS - help - Build TLS support for the Innova family of network cards by Mellanox - Technologies. Innova network cards are comprised of a ConnectX chip - and an FPGA chip on one board. If you select this option, the - mlx5_core driver will include the Innova FPGA core and allow building - sandbox-specific client drivers. - config MLX5_TLS bool "Mellanox Technologies TLS Connect-X support" depends on TLS_DEVICE diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 4bc666714a35..33525f7d0aa0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -90,7 +90,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # mlx5_core-$(CONFIG_MLX5_IPSEC) += accel/ipsec_offload.o mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o -mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c index 6c2b86a26863..fe82c4140d85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c @@ -37,53 +37,6 @@ #include "mlx5_core.h" #include "lib/mlx5.h" -#ifdef CONFIG_MLX5_FPGA_TLS -#include "fpga/tls.h" - -int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) -{ - return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, - start_offload_tcp_sn, p_swid, - direction_sx); -} - -void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx) -{ - mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx); -} - -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) -{ - return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn); -} - -bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_is_tls_device(mdev) || - mlx5_accel_is_ktls_device(mdev); -} - -u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_tls_device_caps(mdev); -} - -int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_tls_init(mdev); -} - -void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) -{ - mlx5_fpga_tls_cleanup(mdev); -} -#endif - #ifdef CONFIG_MLX5_TLS int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, struct tls_crypto_info *crypto_info, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index fd874f0c380a..6f92ebe3cc82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -97,60 +97,4 @@ static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, struct tls_crypto_info *crypto_info) { return false; } #endif - -enum { - MLX5_ACCEL_TLS_TX = BIT(0), - MLX5_ACCEL_TLS_RX = BIT(1), - MLX5_ACCEL_TLS_V12 = BIT(2), - MLX5_ACCEL_TLS_V13 = BIT(3), - MLX5_ACCEL_TLS_LRO = BIT(4), - MLX5_ACCEL_TLS_IPV6 = BIT(5), - MLX5_ACCEL_TLS_AES_GCM128 = BIT(30), - MLX5_ACCEL_TLS_AES_GCM256 = BIT(31), -}; - -struct mlx5_ifc_tls_flow_bits { - u8 src_port[0x10]; - u8 dst_port[0x10]; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; - u8 ipv6[0x1]; - u8 direction_sx[0x1]; - u8 reserved_at_2[0x1e]; -}; - -#ifdef CONFIG_MLX5_FPGA_TLS -int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx); -void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx); -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn); -bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev); -u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev); -int mlx5_accel_tls_init(struct mlx5_core_dev *mdev); -void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev); - -#else - -static inline int -mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) { return -ENOTSUPP; } -static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx) { } -static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) { return 0; } -static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return mlx5_accel_is_ktls_device(mdev); -} -static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } -static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } -static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } -#endif - #endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8653ac0fd865..50818081bdc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -354,7 +354,6 @@ enum { MLX5E_RQ_STATE_AM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ - MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index b8fc863aa68d..0c6e165c154e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -36,188 +36,12 @@ #include "en_accel/tls.h" #include "accel/tls.h" -static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - - MLX5_SET(tls_flow, flow, ipv6, 0); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); -} - -#if IS_ENABLED(CONFIG_IPV6) -static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - MLX5_SET(tls_flow, flow, ipv6, 1); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); -} -#endif - -static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport, - MLX5_FLD_SZ_BYTES(tls_flow, src_port)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport, - MLX5_FLD_SZ_BYTES(tls_flow, dst_port)); -} - -static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps) -{ - switch (sk->sk_family) { - case AF_INET: - mlx5e_tls_set_ipv4_flow(flow, sk); - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - if (!sk->sk_ipv6only && - ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { - mlx5e_tls_set_ipv4_flow(flow, sk); - break; - } - if (!(caps & MLX5_ACCEL_TLS_IPV6)) - goto error_out; - - mlx5e_tls_set_ipv6_flow(flow, sk); - break; -#endif - default: - goto error_out; - } - - mlx5e_tls_set_flow_tcp_ports(flow, sk); - return 0; -error_out: - return -EINVAL; -} - -static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk, - enum tls_offload_ctx_dir direction, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct mlx5_core_dev *mdev = priv->mdev; - u32 caps = mlx5_accel_tls_device_caps(mdev); - int ret = -ENOMEM; - void *flow; - u32 swid; - - flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL); - if (!flow) - return ret; - - ret = mlx5e_tls_set_flow(flow, sk, caps); - if (ret) - goto free_flow; - - ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info, - start_offload_tcp_sn, &swid, - direction == TLS_OFFLOAD_CTX_DIR_TX); - if (ret < 0) - goto free_flow; - - if (direction == TLS_OFFLOAD_CTX_DIR_TX) { - struct mlx5e_tls_offload_context_tx *tx_ctx = - mlx5e_get_tls_tx_context(tls_ctx); - - tx_ctx->swid = htonl(swid); - tx_ctx->expected_seq = start_offload_tcp_sn; - } else { - struct mlx5e_tls_offload_context_rx *rx_ctx = - mlx5e_get_tls_rx_context(tls_ctx); - - rx_ctx->handle = htonl(swid); - } - - return 0; -free_flow: - kfree(flow); - return ret; -} - -static void mlx5e_tls_del(struct net_device *netdev, - struct tls_context *tls_ctx, - enum tls_offload_ctx_dir direction) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - unsigned int handle; - - handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ? - mlx5e_get_tls_tx_context(tls_ctx)->swid : - mlx5e_get_tls_rx_context(tls_ctx)->handle); - - mlx5_accel_tls_del_flow(priv->mdev, handle, - direction == TLS_OFFLOAD_CTX_DIR_TX); -} - -static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk, - u32 seq, u8 *rcd_sn_data, - enum tls_offload_ctx_dir direction) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_tls_offload_context_rx *rx_ctx; - __be64 rcd_sn = *(__be64 *)rcd_sn_data; - - if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX)) - return -EINVAL; - rx_ctx = mlx5e_get_tls_rx_context(tls_ctx); - - netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq, - be64_to_cpu(rcd_sn)); - mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn); - atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply); - - return 0; -} - -static const struct tlsdev_ops mlx5e_tls_ops = { - .tls_dev_add = mlx5e_tls_add, - .tls_dev_del = mlx5e_tls_del, - .tls_dev_resync = mlx5e_tls_resync, -}; - void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { - struct net_device *netdev = priv->netdev; - u32 caps; - - if (mlx5e_accel_is_ktls_device(priv->mdev)) { - mlx5e_ktls_build_netdev(priv); + if (!mlx5e_accel_is_ktls_device(priv->mdev)) return; - } - - /* FPGA */ - if (!mlx5e_accel_is_tls_device(priv->mdev)) - return; - - caps = mlx5_accel_tls_device_caps(priv->mdev); - if (caps & MLX5_ACCEL_TLS_TX) { - netdev->features |= NETIF_F_HW_TLS_TX; - netdev->hw_features |= NETIF_F_HW_TLS_TX; - } - - if (caps & MLX5_ACCEL_TLS_RX) { - netdev->features |= NETIF_F_HW_TLS_RX; - netdev->hw_features |= NETIF_F_HW_TLS_RX; - } - - if (!(caps & MLX5_ACCEL_TLS_LRO)) { - netdev->features &= ~NETIF_F_LRO; - netdev->hw_features &= ~NETIF_F_LRO; - } - netdev->tlsdev_ops = &mlx5e_tls_ops; + mlx5e_ktls_build_netdev(priv); } int mlx5e_tls_init(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index 62ecf14bf86a..fc13b2ebe6b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -43,16 +43,8 @@ struct mlx5e_tls_sw_stats { atomic64_t tx_tls_ctx; atomic64_t tx_tls_del; - atomic64_t tx_tls_drop_metadata; - atomic64_t tx_tls_drop_resync_alloc; - atomic64_t tx_tls_drop_no_sync_data; - atomic64_t tx_tls_drop_bypass_required; atomic64_t rx_tls_ctx; atomic64_t rx_tls_del; - atomic64_t rx_tls_drop_resync_request; - atomic64_t rx_tls_resync_request; - atomic64_t rx_tls_resync_reply; - atomic64_t rx_tls_auth_fail; }; struct mlx5e_tls { @@ -60,42 +52,6 @@ struct mlx5e_tls { struct workqueue_struct *rx_wq; }; -struct mlx5e_tls_offload_context_tx { - struct tls_offload_context_tx base; - u32 expected_seq; - __be32 swid; -}; - -static inline struct mlx5e_tls_offload_context_tx * -mlx5e_get_tls_tx_context(struct tls_context *tls_ctx) -{ - BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); - return container_of(tls_offload_ctx_tx(tls_ctx), - struct mlx5e_tls_offload_context_tx, - base); -} - -struct mlx5e_tls_offload_context_rx { - struct tls_offload_context_rx base; - __be32 handle; -}; - -static inline struct mlx5e_tls_offload_context_rx * -mlx5e_get_tls_rx_context(struct tls_context *tls_ctx) -{ - BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); - return container_of(tls_offload_ctx_rx(tls_ctx), - struct mlx5e_tls_offload_context_rx, - base); -} - -static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) -{ - return priv->tls; -} - void mlx5e_tls_build_netdev(struct mlx5e_priv *priv); int mlx5e_tls_init(struct mlx5e_priv *priv); void mlx5e_tls_cleanup(struct mlx5e_priv *priv); @@ -106,8 +62,7 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) { - return !is_kdump_kernel() && - mlx5_accel_is_tls_device(mdev); + return !is_kdump_kernel() && mlx5_accel_is_ktls_device(mdev); } #else @@ -119,7 +74,6 @@ static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) mlx5e_ktls_build_netdev(priv); } -static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) { return false; } static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index a05580cea481..39412fafa860 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -38,230 +38,11 @@ #include #include -#define SYNDROM_DECRYPTED 0x30 -#define SYNDROM_RESYNC_REQUEST 0x31 -#define SYNDROM_AUTH_FAILED 0x32 - -#define SYNDROME_OFFLOAD_REQUIRED 32 -#define SYNDROME_SYNC 33 - -struct sync_info { - u64 rcd_sn; - s32 sync_len; - int nr_frags; - skb_frag_t frags[MAX_SKB_FRAGS]; -}; - -struct recv_metadata_content { - u8 syndrome; - u8 reserved; - __be32 sync_seq; -} __packed; - -struct send_metadata_content { - /* One byte of syndrome followed by 3 bytes of swid */ - __be32 syndrome_swid; - __be16 first_seq; -} __packed; - -struct mlx5e_tls_metadata { - union { - /* from fpga to host */ - struct recv_metadata_content recv; - /* from host to fpga */ - struct send_metadata_content send; - unsigned char raw[6]; - } __packed content; - /* packet type ID field */ - __be16 ethertype; -} __packed; - -static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid) -{ - struct mlx5e_tls_metadata *pet; - struct ethhdr *eth; - - if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata))) - return -ENOMEM; - - eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata)); - skb->mac_header -= sizeof(struct mlx5e_tls_metadata); - pet = (struct mlx5e_tls_metadata *)(eth + 1); - - memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata), - 2 * ETH_ALEN); - - eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); - pet->content.send.syndrome_swid = - htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid; - - return 0; -} - -static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context, - u32 tcp_seq, struct sync_info *info) -{ - int remaining, i = 0, ret = -EINVAL; - struct tls_record_info *record; - unsigned long flags; - s32 sync_size; - - spin_lock_irqsave(&context->base.lock, flags); - record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn); - - if (unlikely(!record)) - goto out; - - sync_size = tcp_seq - tls_record_start_seq(record); - info->sync_len = sync_size; - if (unlikely(sync_size < 0)) { - if (tls_record_is_start_marker(record)) - goto done; - - goto out; - } - - remaining = sync_size; - while (remaining > 0) { - info->frags[i] = record->frags[i]; - __skb_frag_ref(&info->frags[i]); - remaining -= skb_frag_size(&info->frags[i]); - - if (remaining < 0) - skb_frag_size_add(&info->frags[i], remaining); - - i++; - } - info->nr_frags = i; -done: - ret = 0; -out: - spin_unlock_irqrestore(&context->base.lock, flags); - return ret; -} - -static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb, - struct sk_buff *nskb, u32 tcp_seq, - int headln, __be64 rcd_sn) -{ - struct mlx5e_tls_metadata *pet; - u8 syndrome = SYNDROME_SYNC; - struct iphdr *iph; - struct tcphdr *th; - int data_len, mss; - - nskb->dev = skb->dev; - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb_network_offset(skb)); - skb_set_transport_header(nskb, skb_transport_offset(skb)); - memcpy(nskb->data, skb->data, headln); - memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn)); - - iph = ip_hdr(nskb); - iph->tot_len = htons(nskb->len - skb_network_offset(nskb)); - th = tcp_hdr(nskb); - data_len = nskb->len - headln; - tcp_seq -= data_len; - th->seq = htonl(tcp_seq); - - mss = nskb->dev->mtu - (headln - skb_network_offset(nskb)); - skb_shinfo(nskb)->gso_size = 0; - if (data_len > mss) { - skb_shinfo(nskb)->gso_size = mss; - skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss); - } - skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; - - pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr)); - memcpy(pet, &syndrome, sizeof(syndrome)); - pet->content.send.first_seq = htons(tcp_seq); - - /* MLX5 devices don't care about the checksum partial start, offset - * and pseudo header - */ - nskb->ip_summed = CHECKSUM_PARTIAL; - - nskb->queue_mapping = skb->queue_mapping; -} - -static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, - struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tls *tls) -{ - u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); - struct sync_info info; - struct sk_buff *nskb; - int linear_len = 0; - int headln; - int i; - - sq->stats->tls_ooo++; - - if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) { - /* We might get here if a retransmission reaches the driver - * after the relevant record is acked. - * It should be safe to drop the packet in this case - */ - atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data); - goto err_out; - } - - if (unlikely(info.sync_len < 0)) { - u32 payload; - - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - payload = skb->len - headln; - if (likely(payload <= -info.sync_len)) - /* SKB payload doesn't require offload - */ - return true; - - atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required); - goto err_out; - } - - if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { - atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata); - goto err_out; - } - - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - linear_len += headln + sizeof(info.rcd_sn); - nskb = alloc_skb(linear_len, GFP_ATOMIC); - if (unlikely(!nskb)) { - atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc); - goto err_out; - } - - context->expected_seq = tcp_seq + skb->len - headln; - skb_put(nskb, linear_len); - for (i = 0; i < info.nr_frags; i++) - skb_shinfo(nskb)->frags[i] = info.frags[i]; - - skb_shinfo(nskb)->nr_frags = info.nr_frags; - nskb->data_len = info.sync_len; - nskb->len += info.sync_len; - sq->stats->tls_resync_bytes += nskb->len; - mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, - cpu_to_be64(info.rcd_sn)); - mlx5e_sq_xmit_simple(sq, nskb, true); - - return true; - -err_out: - dev_kfree_skb_any(skb); - return false; -} - bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state) { - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_tls_offload_context_tx *context; struct tls_context *tls_ctx; - u32 expected_seq; int datalen; - u32 skb_seq; datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); if (!datalen) @@ -273,118 +54,17 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) goto err_out; - if (mlx5e_accel_is_ktls_tx(sq->mdev)) - return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state); - - /* FPGA */ - skb_seq = ntohl(tcp_hdr(skb)->seq); - context = mlx5e_get_tls_tx_context(tls_ctx); - expected_seq = context->expected_seq; - - if (unlikely(expected_seq != skb_seq)) - return mlx5e_tls_handle_ooo(context, sq, skb, priv->tls); - - if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { - atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata); - dev_kfree_skb_any(skb); - return false; - } - - context->expected_seq = skb_seq + datalen; - return true; + return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state); err_out: dev_kfree_skb_any(skb); return false; } -static int tls_update_resync_sn(struct net_device *netdev, - struct sk_buff *skb, - struct mlx5e_tls_metadata *mdata) -{ - struct sock *sk = NULL; - struct iphdr *iph; - struct tcphdr *th; - __be32 seq; - - if (mdata->ethertype != htons(ETH_P_IP)) - return -EINVAL; - - iph = (struct iphdr *)(mdata + 1); - - th = ((void *)iph) + iph->ihl * 4; - - if (iph->version == 4) { - sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, - iph->saddr, th->source, iph->daddr, - th->dest, netdev->ifindex); -#if IS_ENABLED(CONFIG_IPV6) - } else { - struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; - - sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, - &ipv6h->saddr, th->source, - &ipv6h->daddr, ntohs(th->dest), - netdev->ifindex, 0); -#endif - } - if (!sk || sk->sk_state == TCP_TIME_WAIT) { - struct mlx5e_priv *priv = netdev_priv(netdev); - - atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request); - goto out; - } - - skb->sk = sk; - skb->destructor = sock_edemux; - - memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq)); - tls_offload_rx_resync_request(sk, seq); -out: - return 0; -} - -/* FPGA tls rx handler */ -void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, - u32 *cqe_bcnt) -{ - struct mlx5e_tls_metadata *mdata; - struct mlx5e_priv *priv; - - /* Use the metadata */ - mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN); - switch (mdata->content.recv.syndrome) { - case SYNDROM_DECRYPTED: - skb->decrypted = 1; - break; - case SYNDROM_RESYNC_REQUEST: - tls_update_resync_sn(rq->netdev, skb, mdata); - priv = netdev_priv(rq->netdev); - atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request); - break; - case SYNDROM_AUTH_FAILED: - /* Authentication failure will be observed and verified by kTLS */ - priv = netdev_priv(rq->netdev); - atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail); - break; - default: - /* Bypass the metadata header to others */ - return; - } - - remove_metadata_hdr(skb); - *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; -} - u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { if (!mlx5e_accel_is_tls_device(mdev)) return 0; - if (mlx5e_accel_is_ktls_device(mdev)) - return mlx5e_ktls_get_stop_room(mdev, params); - - /* FPGA */ - /* Resync SKB. */ - return mlx5e_stop_room_for_max_wqe(mdev); + return mlx5e_ktls_get_stop_room(mdev, params); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 0ca0a023fb8d..168acceb0d28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -60,18 +60,12 @@ mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); } -void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, - u32 *cqe_bcnt); - static inline void mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) { if (unlikely(get_cqe_tls_offload(cqe))) /* cqe bit indicates a TLS device */ - return mlx5e_ktls_handle_rx_skb(rq, skb, cqe, cqe_bcnt); - - if (unlikely(test_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state) && is_metadata_hdr_valid(skb))) - return mlx5e_tls_handle_rx_skb_metadata(rq, skb, cqe_bcnt); + mlx5e_ktls_handle_rx_skb(rq, skb, cqe, cqe_bcnt); } #else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c index 56e7b2aee85f..c25e8742bbbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -38,13 +38,6 @@ #include "fpga/sdk.h" #include "en_accel/tls.h" -static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) }, -}; - static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) }, @@ -59,18 +52,16 @@ static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv) { if (!priv->tls) return NULL; - if (mlx5e_accel_is_ktls_device(priv->mdev)) - return mlx5e_ktls_sw_stats_desc; - return mlx5e_tls_sw_stats_desc; + + return mlx5e_ktls_sw_stats_desc; } int mlx5e_tls_get_count(struct mlx5e_priv *priv) { if (!priv->tls) return 0; - if (mlx5e_accel_is_ktls_device(priv->mdev)) - return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); - return ARRAY_SIZE(mlx5e_tls_sw_stats_desc); + + return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); } int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2f1dedc721d1..7423c6830c4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1036,9 +1036,6 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, if (err) goto err_destroy_rq; - if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev)) - __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */ - if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 2a984e82ae16..e9e72d260681 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -59,7 +59,6 @@ struct mlx5_fpga_device { } conn_res; struct mlx5_fpga_ipsec *ipsec; - struct mlx5_fpga_tls *tls; }; #define mlx5_fpga_dbg(__adev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c deleted file mode 100644 index 29b7339ebfa3..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ /dev/null @@ -1,622 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include -#include "fpga/tls.h" -#include "fpga/cmd.h" -#include "fpga/sdk.h" -#include "fpga/core.h" -#include "accel/tls.h" - -struct mlx5_fpga_tls_command_context; - -typedef void (*mlx5_fpga_tls_command_complete) - (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *ctx, - struct mlx5_fpga_dma_buf *resp); - -struct mlx5_fpga_tls_command_context { - struct list_head list; - /* There is no guarantee on the order between the TX completion - * and the command response. - * The TX completion is going to touch cmd->buf even in - * the case of successful transmission. - * So instead of requiring separate allocations for cmd - * and cmd->buf we've decided to use a reference counter - */ - refcount_t ref; - struct mlx5_fpga_dma_buf buf; - mlx5_fpga_tls_command_complete complete; -}; - -static void -mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx) -{ - if (refcount_dec_and_test(&ctx->ref)) - kfree(ctx); -} - -static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *resp) -{ - struct mlx5_fpga_conn *conn = fdev->tls->conn; - struct mlx5_fpga_tls_command_context *ctx; - struct mlx5_fpga_tls *tls = fdev->tls; - unsigned long flags; - - spin_lock_irqsave(&tls->pending_cmds_lock, flags); - ctx = list_first_entry(&tls->pending_cmds, - struct mlx5_fpga_tls_command_context, list); - list_del(&ctx->list); - spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); - ctx->complete(conn, fdev, ctx, resp); -} - -static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, - u8 status) -{ - struct mlx5_fpga_tls_command_context *ctx = - container_of(buf, struct mlx5_fpga_tls_command_context, buf); - - mlx5_fpga_tls_put_command_ctx(ctx); - - if (unlikely(status)) - mlx5_fpga_tls_cmd_complete(fdev, NULL); -} - -static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - mlx5_fpga_tls_command_complete complete) -{ - struct mlx5_fpga_tls *tls = fdev->tls; - unsigned long flags; - int ret; - - refcount_set(&cmd->ref, 2); - cmd->complete = complete; - cmd->buf.complete = mlx5_fpga_cmd_send_complete; - - spin_lock_irqsave(&tls->pending_cmds_lock, flags); - /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock - * to make sure commands are inserted to the tls->pending_cmds list - * and the command QP in the same order. - */ - ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf); - if (likely(!ret)) - list_add_tail(&cmd->list, &tls->pending_cmds); - else - complete(tls->conn, fdev, cmd, NULL); - spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); -} - -/* Start of context identifiers range (inclusive) */ -#define SWID_START 0 -/* End of context identifiers range (exclusive) */ -#define SWID_END BIT(24) - -static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, - void *ptr) -{ - unsigned long flags; - int ret; - - /* TLS metadata format is 1 byte for syndrome followed - * by 3 bytes of swid (software ID) - * swid must not exceed 3 bytes. - * See tls_rxtx.c:insert_pet() for details - */ - BUILD_BUG_ON((SWID_END - 1) & 0xFF000000); - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(idr_spinlock, flags); - ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC); - spin_unlock_irqrestore(idr_spinlock, flags); - idr_preload_end(); - - return ret; -} - -static void *mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) -{ - unsigned long flags; - void *ptr; - - spin_lock_irqsave(idr_spinlock, flags); - ptr = idr_remove(idr, swid); - spin_unlock_irqrestore(idr_spinlock, flags); - return ptr; -} - -static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, u8 status) -{ - kfree(buf); -} - -static void -mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - struct mlx5_fpga_dma_buf *resp) -{ - if (resp) { - u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); - - if (syndrome) - mlx5_fpga_err(fdev, - "Teardown stream failed with syndrome = %d", - syndrome); - } - mlx5_fpga_tls_put_command_ctx(cmd); -} - -static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd) -{ - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow, - MLX5_BYTE_OFF(tls_flow, ipv6)); - - MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6)); - MLX5_SET(tls_cmd, cmd, direction_sx, - MLX5_GET(tls_flow, flow, direction_sx)); -} - -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) -{ - struct mlx5_fpga_dma_buf *buf; - int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE; - void *flow; - void *cmd; - int ret; - - buf = kzalloc(size, GFP_ATOMIC); - if (!buf) - return -ENOMEM; - - cmd = (buf + 1); - - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - if (unlikely(!flow)) { - rcu_read_unlock(); - WARN_ONCE(1, "Received NULL pointer for handle\n"); - kfree(buf); - return -EINVAL; - } - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - rcu_read_unlock(); - - MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); - MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); - MLX5_SET(tls_cmd, cmd, tcp_sn, seq); - MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX); - - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - buf->complete = mlx_tls_kfree_complete; - - ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); - if (ret < 0) - kfree(buf); - - return ret; -} - -static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, - void *flow, u32 swid, gfp_t flags) -{ - struct mlx5_fpga_tls_command_context *ctx; - struct mlx5_fpga_dma_buf *buf; - void *cmd; - - ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags); - if (!ctx) - return; - - buf = &ctx->buf; - cmd = (ctx + 1); - MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); - MLX5_SET(tls_cmd, cmd, swid, swid); - - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - kfree(flow); - - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - - mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, - mlx5_fpga_tls_teardown_completion); -} - -void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - gfp_t flags, bool direction_sx) -{ - struct mlx5_fpga_tls *tls = mdev->fpga->tls; - void *flow; - - if (direction_sx) - flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, - swid); - else - flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, - swid); - - if (!flow) { - mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", - swid); - return; - } - - synchronize_rcu(); /* before kfree(flow) */ - mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); -} - -enum mlx5_fpga_setup_stream_status { - MLX5_FPGA_CMD_PENDING, - MLX5_FPGA_CMD_SEND_FAILED, - MLX5_FPGA_CMD_RESPONSE_RECEIVED, - MLX5_FPGA_CMD_ABANDONED, -}; - -struct mlx5_setup_stream_context { - struct mlx5_fpga_tls_command_context cmd; - atomic_t status; - u32 syndrome; - struct completion comp; -}; - -static void -mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - struct mlx5_fpga_dma_buf *resp) -{ - struct mlx5_setup_stream_context *ctx = - container_of(cmd, struct mlx5_setup_stream_context, cmd); - int status = MLX5_FPGA_CMD_SEND_FAILED; - void *tls_cmd = ctx + 1; - - /* If we failed to send to command resp == NULL */ - if (resp) { - ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); - status = MLX5_FPGA_CMD_RESPONSE_RECEIVED; - } - - status = atomic_xchg_release(&ctx->status, status); - if (likely(status != MLX5_FPGA_CMD_ABANDONED)) { - complete(&ctx->comp); - return; - } - - mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n", - ctx->syndrome); - - if (!ctx->syndrome) { - /* The process was killed while waiting for the context to be - * added, and the add completed successfully. - * We need to destroy the HW context, and we can't can't reuse - * the command context because we might not have received - * the tx completion yet. - */ - mlx5_fpga_tls_del_flow(fdev->mdev, - MLX5_GET(tls_cmd, tls_cmd, swid), - GFP_ATOMIC, - MLX5_GET(tls_cmd, tls_cmd, - direction_sx)); - } - - mlx5_fpga_tls_put_command_ctx(cmd); -} - -static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev, - struct mlx5_setup_stream_context *ctx) -{ - struct mlx5_fpga_dma_buf *buf; - void *cmd = ctx + 1; - int status, ret = 0; - - buf = &ctx->cmd.buf; - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM); - - init_completion(&ctx->comp); - atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING); - ctx->syndrome = -1; - - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, - mlx5_fpga_tls_setup_completion); - wait_for_completion_killable(&ctx->comp); - - status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED); - if (unlikely(status == MLX5_FPGA_CMD_PENDING)) - /* ctx is going to be released in mlx5_fpga_tls_setup_completion */ - return -EINTR; - - if (unlikely(ctx->syndrome)) - ret = -ENOMEM; - - mlx5_fpga_tls_put_command_ctx(&ctx->cmd); - return ret; -} - -static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg, - struct mlx5_fpga_dma_buf *buf) -{ - struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg; - - mlx5_fpga_tls_cmd_complete(fdev, buf); -} - -bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev) -{ - if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) - return false; - - if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != - MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0) - return false; - - return true; -} - -static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev, - u32 *p_caps) -{ - int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap); - u32 caps = 0; - void *buf; - - buf = kzalloc(cap_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf); - if (err) - goto out; - - if (MLX5_GET(tls_extended_cap, buf, tx)) - caps |= MLX5_ACCEL_TLS_TX; - if (MLX5_GET(tls_extended_cap, buf, rx)) - caps |= MLX5_ACCEL_TLS_RX; - if (MLX5_GET(tls_extended_cap, buf, tls_v12)) - caps |= MLX5_ACCEL_TLS_V12; - if (MLX5_GET(tls_extended_cap, buf, tls_v13)) - caps |= MLX5_ACCEL_TLS_V13; - if (MLX5_GET(tls_extended_cap, buf, lro)) - caps |= MLX5_ACCEL_TLS_LRO; - if (MLX5_GET(tls_extended_cap, buf, ipv6)) - caps |= MLX5_ACCEL_TLS_IPV6; - - if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128)) - caps |= MLX5_ACCEL_TLS_AES_GCM128; - if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256)) - caps |= MLX5_ACCEL_TLS_AES_GCM256; - - *p_caps = caps; - err = 0; -out: - kfree(buf); - return err; -} - -int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_conn_attr init_attr = {0}; - struct mlx5_fpga_conn *conn; - struct mlx5_fpga_tls *tls; - int err = 0; - - if (!mlx5_fpga_is_tls_device(mdev) || !fdev) - return 0; - - tls = kzalloc(sizeof(*tls), GFP_KERNEL); - if (!tls) - return -ENOMEM; - - err = mlx5_fpga_tls_get_caps(fdev, &tls->caps); - if (err) - goto error; - - if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) { - err = -ENOTSUPP; - goto error; - } - - init_attr.rx_size = SBU_QP_QUEUE_SIZE; - init_attr.tx_size = SBU_QP_QUEUE_SIZE; - init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb; - init_attr.cb_arg = fdev; - conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); - if (IS_ERR(conn)) { - err = PTR_ERR(conn); - mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n", - err); - goto error; - } - - tls->conn = conn; - spin_lock_init(&tls->pending_cmds_lock); - INIT_LIST_HEAD(&tls->pending_cmds); - - idr_init(&tls->tx_idr); - idr_init(&tls->rx_idr); - spin_lock_init(&tls->tx_idr_spinlock); - spin_lock_init(&tls->rx_idr_spinlock); - fdev->tls = tls; - return 0; - -error: - kfree(tls); - return err; -} - -void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!fdev || !fdev->tls) - return; - - mlx5_fpga_sbu_conn_destroy(fdev->tls->conn); - kfree(fdev->tls); - fdev->tls = NULL; -} - -static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd, - struct tls_crypto_info *info, - __be64 *rcd_sn) -{ - struct tls12_crypto_info_aes_gcm_128 *crypto_info = - (struct tls12_crypto_info_aes_gcm_128 *)info; - - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq, - TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); - - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv), - crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key), - crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - - /* in AES-GCM 128 we need to write the key twice */ - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) + - TLS_CIPHER_AES_GCM_128_KEY_SIZE, - crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - - MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128); -} - -static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps, - struct tls_crypto_info *crypto_info) -{ - __be64 rcd_sn; - - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - if (!(caps & MLX5_ACCEL_TLS_AES_GCM128)) - return -EINVAL; - mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 swid, u32 tcp_sn) -{ - u32 caps = mlx5_fpga_tls_device_caps(mdev); - struct mlx5_setup_stream_context *ctx; - int ret = -ENOMEM; - size_t cmd_size; - void *cmd; - - cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx); - ctx = kzalloc(cmd_size, GFP_KERNEL); - if (!ctx) - goto out; - - cmd = ctx + 1; - ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info); - if (ret) - goto free_ctx; - - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - - MLX5_SET(tls_cmd, cmd, swid, swid); - MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn); - - return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx); - -free_ctx: - kfree(ctx); -out: - return ret; -} - -int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) -{ - struct mlx5_fpga_tls *tls = mdev->fpga->tls; - int ret = -ENOMEM; - u32 swid; - - if (direction_sx) - ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, flow); - else - ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, flow); - - if (ret < 0) - return ret; - - swid = ret; - MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0); - - ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid, - start_offload_tcp_sn); - if (ret && ret != -EINTR) - goto free_swid; - - *p_swid = swid; - return 0; -free_swid: - if (direction_sx) - mlx5_fpga_tls_release_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, swid); - else - mlx5_fpga_tls_release_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, swid); - - return ret; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h deleted file mode 100644 index 5714cf391d1b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_FPGA_TLS_H__ -#define __MLX5_FPGA_TLS_H__ - -#include - -#include -#include "fpga/core.h" - -struct mlx5_fpga_tls { - struct list_head pending_cmds; - spinlock_t pending_cmds_lock; /* Protects pending_cmds */ - u32 caps; - struct mlx5_fpga_conn *conn; - - struct idr tx_idr; - struct idr rx_idr; - spinlock_t tx_idr_spinlock; /* protects the IDR */ - spinlock_t rx_idr_spinlock; /* protects the IDR */ -}; - -int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx); - -void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - gfp_t flags, bool direction_sx); - -bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev); -int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev); -void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev); - -static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev) -{ - return mdev->fpga->tls->caps; -} - -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn); - -#endif /* __MLX5_FPGA_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2589e39eb9c7..7f287e300fb4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -64,7 +64,6 @@ #include "fpga/core.h" #include "fpga/ipsec.h" #include "accel/ipsec.h" -#include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" #include "lib/geneve.h" @@ -1185,12 +1184,6 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_accel_ipsec_init(dev); - err = mlx5_accel_tls_init(dev); - if (err) { - mlx5_core_err(dev, "TLS device start failed %d\n", err); - goto err_tls_start; - } - err = mlx5_init_fs(dev); if (err) { mlx5_core_err(dev, "Failed to init flow steering\n"); @@ -1238,8 +1231,6 @@ err_vhca: err_set_hca: mlx5_cleanup_fs(dev); err_fs: - mlx5_accel_tls_cleanup(dev); -err_tls_start: mlx5_accel_ipsec_cleanup(dev); mlx5_fpga_device_stop(dev); err_fpga_start: @@ -1267,7 +1258,6 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_vhca_event_stop(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); - mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); mlx5_rsc_dump_cleanup(dev); mlx5_hv_vhca_cleanup(dev->hv_vhca); diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 07d77323f78a..e3d824f6a309 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -54,7 +54,6 @@ enum { enum { MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3, }; struct mlx5_ifc_fpga_shell_caps_bits { @@ -387,27 +386,6 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_tls_extended_cap_bits { - u8 aes_gcm_128[0x1]; - u8 aes_gcm_256[0x1]; - u8 reserved_at_2[0x1e]; - u8 reserved_at_20[0x20]; - u8 context_capacity_total[0x20]; - u8 context_capacity_rx[0x20]; - u8 context_capacity_tx[0x20]; - u8 reserved_at_a0[0x10]; - u8 tls_counter_size[0x10]; - u8 tls_counters_addr_low[0x20]; - u8 tls_counters_addr_high[0x20]; - u8 rx[0x1]; - u8 tx[0x1]; - u8 tls_v12[0x1]; - u8 tls_v13[0x1]; - u8 lro[0x1]; - u8 ipv6[0x1]; - u8 reserved_at_106[0x1a]; -}; - struct mlx5_ifc_ipsec_extended_cap_bits { u8 encapsulation[0x20]; @@ -572,45 +550,4 @@ struct mlx5_ifc_fpga_ipsec_sa { __be16 vid; /* only 12 bits, rest is reserved */ __be16 reserved2; } __packed; - -enum fpga_tls_cmds { - CMD_SETUP_STREAM = 0x1001, - CMD_TEARDOWN_STREAM = 0x1002, - CMD_RESYNC_RX = 0x1003, -}; - -#define MLX5_TLS_1_2 (0) - -#define MLX5_TLS_ALG_AES_GCM_128 (0) -#define MLX5_TLS_ALG_AES_GCM_256 (1) - -struct mlx5_ifc_tls_cmd_bits { - u8 command_type[0x20]; - u8 ipv6[0x1]; - u8 direction_sx[0x1]; - u8 tls_version[0x2]; - u8 reserved[0x1c]; - u8 swid[0x20]; - u8 src_port[0x10]; - u8 dst_port[0x10]; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; - u8 tls_rcd_sn[0x40]; - u8 tcp_sn[0x20]; - u8 tls_implicit_iv[0x20]; - u8 tls_xor_iv[0x40]; - u8 encryption_key[0x100]; - u8 alg[4]; - u8 reserved2[0x1c]; - u8 reserved3[0x4a0]; -}; - -struct mlx5_ifc_tls_resp_bits { - u8 syndrome[0x20]; - u8 stream_id[0x20]; - u8 reserved[0x40]; -}; - -#define MLX5_TLS_COMMAND_SIZE (0x100) - #endif /* MLX5_IFC_FPGA_H */ -- cgit From 0276bd3a94c072de3f69b5afe6224e488cc76635 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 5 Apr 2022 17:15:16 +0200 Subject: IB/mlx5: Fix undefined behavior due to shift overflowing the constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: drivers/infiniband/hw/mlx5/main.c: In function ‘translate_eth_legacy_proto_oper’: drivers/infiniband/hw/mlx5/main.c:370:2: error: case label does not reduce to an integer constant case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2): ^~~~ See https://lore.kernel.org/r/YkwQ6%2BtIH8GQpuct@zn.tnic for the gory details as to why it triggers with older gccs only. Link: https://lore.kernel.org/all/20220405151517.29753-11-bp@alien8.de Signed-off-by: Borislav Petkov Cc: Leon Romanovsky Cc: Saeed Mahameed Cc: linux-rdma@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 28a928b0684b..e96ee1e348cb 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -141,7 +141,7 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; -#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ MLX5_GET(reg, out, field)) -- cgit From a285909f471d6703a04b2b3942c352e27131c92b Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 6 Apr 2022 15:00:03 +0900 Subject: mm/slub, kunit: Make slub_kunit unaffected by user specified flags slub_kunit does not expect other debugging flags to be set when running tests. When SLAB_RED_ZONE flag is set globally, test fails because the flag affects number of errors reported. To make slub_kunit unaffected by user specified debugging flags, introduce SLAB_NO_USER_FLAGS to ignore them. With this flag, only flags specified in the code are used and others are ignored. Suggested-by: Vlastimil Babka Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/Yk0sY9yoJhFEXWOg@hyeyoo --- include/linux/slab.h | 7 +++++++ lib/slub_kunit.c | 10 +++++----- mm/slab.h | 5 +++-- mm/slub.c | 3 +++ 4 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 373b3ef99f4e..11ceddcae9f4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -112,6 +112,13 @@ #define SLAB_KASAN 0 #endif +/* + * Ignore user specified debugging flags. + * Intended for caches created for self-tests so they have only flags + * specified in the code and other flags are ignored. + */ +#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) + /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 8662dc6cb509..7a0564d7cb7a 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -12,7 +12,7 @@ static int slab_errors; static void test_clobber_zone(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0, - SLAB_RED_ZONE, NULL); + SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); @@ -30,7 +30,7 @@ static void test_clobber_zone(struct kunit *test) static void test_next_pointer(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); unsigned long tmp; unsigned long *ptr_addr; @@ -75,7 +75,7 @@ static void test_next_pointer(struct kunit *test) static void test_first_word(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -90,7 +90,7 @@ static void test_first_word(struct kunit *test) static void test_clobber_50th_byte(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0, - SLAB_POISON, NULL); + SLAB_POISON|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kmem_cache_free(s, p); @@ -106,7 +106,7 @@ static void test_clobber_50th_byte(struct kunit *test) static void test_clobber_redzone_free(struct kunit *test) { struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0, - SLAB_RED_ZONE, NULL); + SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL); u8 *p = kmem_cache_alloc(s, GFP_KERNEL); kasan_disable_current(); diff --git a/mm/slab.h b/mm/slab.h index fd7ae2024897..f7d018100994 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -331,7 +331,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, SLAB_ACCOUNT) #elif defined(CONFIG_SLUB) #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ - SLAB_TEMPORARY | SLAB_ACCOUNT) + SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_USER_FLAGS) #else #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE) #endif @@ -350,7 +350,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, SLAB_NOLEAKTRACE | \ SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | \ - SLAB_ACCOUNT) + SLAB_ACCOUNT | \ + SLAB_NO_USER_FLAGS) bool __kmem_cache_empty(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *); diff --git a/mm/slub.c b/mm/slub.c index 74d92aa4a3a2..4c78f5919356 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1584,6 +1584,9 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, slab_flags_t block_flags; slab_flags_t slub_debug_local = slub_debug; + if (flags & SLAB_NO_USER_FLAGS) + return flags; + /* * If the slab cache is for debugging (e.g. kmemleak) then * don't store user (stack trace) information by default, -- cgit From a5f1783be29adae15666fd803efd7d2979130869 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 2 Mar 2022 12:02:22 +0100 Subject: lib/stackdepot: allow requesting early initialization dynamically In a later patch we want to add stackdepot support for object owner tracking in slub caches, which is enabled by slub_debug boot parameter. This creates a bootstrap problem as some caches are created early in boot when slab_is_available() is false and thus stack_depot_init() tries to use memblock. But, as reported by Hyeonggon Yoo [1] we are already beyond memblock_free_all(). Ideally memblock allocation should fail, yet it succeeds, but later the system crashes, which is a separately handled issue. To resolve this boostrap issue in a robust way, this patch adds another way to request stack_depot_early_init(), which happens at a well-defined point of time. In addition to build-time CONFIG_STACKDEPOT_ALWAYS_INIT, code that's e.g. processing boot parameters (which happens early enough) can call a new function stack_depot_want_early_init(), which sets a flag that stack_depot_early_init() will check. In this patch we also convert page_owner to this approach. While it doesn't have the bootstrap issue as slub, it's also a functionality enabled by a boot param and can thus request stack_depot_early_init() with memblock allocation instead of later initialization with kvmalloc(). As suggested by Mike, make stack_depot_early_init() only attempt memblock allocation and stack_depot_init() only attempt kvmalloc(). Also change the latter to kvcalloc(). In both cases we can lose the explicit array zeroing, which the allocations do already. As suggested by Marco, provide empty implementations of the init functions for !CONFIG_STACKDEPOT builds to simplify the callers. [1] https://lore.kernel.org/all/YhnUcqyeMgCrWZbd@ip-172-31-19-208.ap-northeast-1.compute.internal/ Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Suggested-by: Mike Rapoport Suggested-by: Marco Elver Signed-off-by: Vlastimil Babka Reviewed-by: Marco Elver Reviewed-and-tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Mike Rapoport Acked-by: David Rientjes --- include/linux/stackdepot.h | 26 +++++++++++++++--- lib/stackdepot.c | 67 +++++++++++++++++++++++++++++++--------------- mm/page_owner.c | 9 ++++--- 3 files changed, 73 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 17f992fe6355..bc2797955de9 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -20,18 +20,36 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, gfp_t gfp_flags, bool can_alloc); /* - * Every user of stack depot has to call this during its own init when it's - * decided that it will be calling stack_depot_save() later. + * Every user of stack depot has to call stack_depot_init() during its own init + * when it's decided that it will be calling stack_depot_save() later. This is + * recommended for e.g. modules initialized later in the boot process, when + * slab_is_available() is true. * * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot * enabled as part of mm_init(), for subsystems where it's known at compile time * that stack depot will be used. + * + * Another alternative is to call stack_depot_want_early_init(), when the + * decision to use stack depot is taken e.g. when evaluating kernel boot + * parameters, which precedes the enablement point in mm_init(). + * + * stack_depot_init() and stack_depot_want_early_init() can be called regardless + * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print + * functions should only be called from code that makes sure CONFIG_STACKDEPOT + * is enabled. */ +#ifdef CONFIG_STACKDEPOT int stack_depot_init(void); -#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT -static inline int stack_depot_early_init(void) { return stack_depot_init(); } +void __init stack_depot_want_early_init(void); + +/* This is supposed to be called only from mm_init() */ +int __init stack_depot_early_init(void); #else +static inline int stack_depot_init(void) { return 0; } + +static inline void stack_depot_want_early_init(void) { } + static inline int stack_depot_early_init(void) { return 0; } #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index bf5ba9af0500..5ca0d086ef4a 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -66,6 +66,9 @@ struct stack_record { unsigned long entries[]; /* Variable-sized array of entries. */ }; +static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); +static bool __stack_depot_early_init_passed __initdata; + static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; static int depot_index; @@ -162,38 +165,58 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -/* - * __ref because of memblock_alloc(), which will not be actually called after - * the __init code is gone, because at that point slab_is_available() is true - */ -__ref int stack_depot_init(void) +void __init stack_depot_want_early_init(void) +{ + /* Too late to request early init now */ + WARN_ON(__stack_depot_early_init_passed); + + __stack_depot_want_early_init = true; +} + +int __init stack_depot_early_init(void) +{ + size_t size; + + /* This is supposed to be called only once, from mm_init() */ + if (WARN_ON(__stack_depot_early_init_passed)) + return 0; + + __stack_depot_early_init_passed = true; + + if (!__stack_depot_want_early_init || stack_depot_disable) + return 0; + + size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); + pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n", + size); + stack_table = memblock_alloc(size, SMP_CACHE_BYTES); + + if (!stack_table) { + pr_err("Stack Depot hash table allocation failed, disabling\n"); + stack_depot_disable = true; + return -ENOMEM; + } + + return 0; +} + +int stack_depot_init(void) { static DEFINE_MUTEX(stack_depot_init_mutex); + int ret = 0; mutex_lock(&stack_depot_init_mutex); if (!stack_depot_disable && !stack_table) { - size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); - int i; - - if (slab_is_available()) { - pr_info("Stack Depot allocating hash table with kvmalloc\n"); - stack_table = kvmalloc(size, GFP_KERNEL); - } else { - pr_info("Stack Depot allocating hash table with memblock_alloc\n"); - stack_table = memblock_alloc(size, SMP_CACHE_BYTES); - } - if (stack_table) { - for (i = 0; i < STACK_HASH_SIZE; i++) - stack_table[i] = NULL; - } else { + pr_info("Stack Depot allocating hash table with kvcalloc\n"); + stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL); + if (!stack_table) { pr_err("Stack Depot hash table allocation failed, disabling\n"); stack_depot_disable = true; - mutex_unlock(&stack_depot_init_mutex); - return -ENOMEM; + ret = -ENOMEM; } } mutex_unlock(&stack_depot_init_mutex); - return 0; + return ret; } EXPORT_SYMBOL_GPL(stack_depot_init); diff --git a/mm/page_owner.c b/mm/page_owner.c index fb3a05fdebdb..2743062e92c2 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -45,7 +45,12 @@ static void init_early_allocated_pages(void); static int __init early_page_owner_param(char *buf) { - return kstrtobool(buf, &page_owner_enabled); + int ret = kstrtobool(buf, &page_owner_enabled); + + if (page_owner_enabled) + stack_depot_want_early_init(); + + return ret; } early_param("page_owner", early_page_owner_param); @@ -83,8 +88,6 @@ static __init void init_page_owner(void) if (!page_owner_enabled) return; - stack_depot_init(); - register_dummy_stack(); register_failure_stack(); register_early_stack(); -- cgit From f4b41f062c424209e3939a81e6da022e049a45f2 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 4 Apr 2022 18:30:22 +0200 Subject: net: remove noblock parameter from skb_recv_datagram() skb_recv_datagram() has two parameters 'flags' and 'noblock' that are merged inside skb_recv_datagram() by 'flags | (noblock ? MSG_DONTWAIT : 0)' As 'flags' may contain MSG_DONTWAIT as value most callers split the 'flags' into 'flags' and 'noblock' with finally obsolete bit operations like this: skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); And this is not even done consistently with the 'flags' parameter. This patch removes the obsolete and costly splitting into two parameters and only performs bit operations when really needed on the caller side. One missing conversion thankfully reported by kernel test robot. I missed to enable kunit tests to build the mctp code. Reported-by: kernel test robot Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- drivers/isdn/mISDN/socket.c | 2 +- drivers/net/ppp/pppoe.c | 3 +-- include/linux/skbuff.h | 3 +-- net/appletalk/ddp.c | 3 +-- net/atm/common.c | 2 +- net/ax25/af_ax25.c | 3 +-- net/bluetooth/af_bluetooth.c | 3 +-- net/bluetooth/hci_sock.c | 3 +-- net/caif/caif_socket.c | 2 +- net/can/bcm.c | 5 +---- net/can/isotp.c | 4 +--- net/can/j1939/socket.c | 2 +- net/can/raw.c | 6 +----- net/core/datagram.c | 5 ++--- net/ieee802154/socket.c | 6 ++++-- net/ipv4/ping.c | 3 ++- net/ipv4/raw.c | 3 ++- net/ipv6/raw.c | 3 ++- net/iucv/af_iucv.c | 3 +-- net/key/af_key.c | 2 +- net/l2tp/l2tp_ip.c | 3 ++- net/l2tp/l2tp_ip6.c | 3 ++- net/l2tp/l2tp_ppp.c | 3 +-- net/mctp/af_mctp.c | 2 +- net/mctp/test/route-test.c | 8 ++++---- net/netlink/af_netlink.c | 3 +-- net/netrom/af_netrom.c | 3 ++- net/nfc/llcp_sock.c | 3 +-- net/nfc/rawsock.c | 3 +-- net/packet/af_packet.c | 2 +- net/phonet/datagram.c | 3 ++- net/phonet/pep.c | 6 ++++-- net/qrtr/af_qrtr.c | 3 +-- net/rose/af_rose.c | 3 ++- net/unix/af_unix.c | 5 +++-- net/vmw_vsock/vmci_transport.c | 5 +---- net/x25/af_x25.c | 3 +-- 37 files changed, 57 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index a6606736d8c5..2776ca5fc33f 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -121,7 +121,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (sk->sk_state == MISDN_CLOSED) return 0; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 3619520340b7..1b41cd9732d7 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1011,8 +1011,7 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, goto end; } - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &error); + skb = skb_recv_datagram(sk, flags, &error); if (error < 0) goto end; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a30cae8b0a5..2394441fa3dd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3836,8 +3836,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); -struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, - int *err); +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index bf5736c1d458..a06f4d4a6f47 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1753,8 +1753,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int err = 0; struct sk_buff *skb; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); lock_sock(sk); if (!skb) diff --git a/net/atm/common.c b/net/atm/common.c index 1cfa9bf1d187..d0c8ab7ff8f6 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -540,7 +540,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, !test_bit(ATM_VF_READY, &vcc->flags)) return 0; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error); + skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 363d47f94532..116481e4da82 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1669,8 +1669,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } /* Now we can treat all alike */ - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a0cb2e3da8d4..62705734343b 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -251,7 +251,6 @@ EXPORT_SYMBOL(bt_accept_dequeue); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; size_t copied; @@ -263,7 +262,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 33b3c0ffc339..189e3115c8c6 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1453,7 +1453,6 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; @@ -1470,7 +1469,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (sk->sk_state == BT_CLOSED) return 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 2b8892d502f7..251e666ba9a2 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -282,7 +282,7 @@ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, if (flags & MSG_OOB) goto read_error; - skb = skb_recv_datagram(sk, flags, 0 , &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) goto read_error; copylen = skb->len; diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d209b52e6a..64c07e650bb4 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1632,12 +1632,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; - int noblock; int err; - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; - skb = skb_recv_datagram(sk, flags, noblock, &error); + skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; diff --git a/net/can/isotp.c b/net/can/isotp.c index bafb0fb5f0e0..02d81effaa54 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1047,7 +1047,6 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; struct isotp_sock *so = isotp_sk(sk); - int noblock = flags & MSG_DONTWAIT; int ret = 0; if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) @@ -1056,8 +1055,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (!so->bound) return -EADDRNOTAVAIL; - flags &= ~MSG_DONTWAIT; - skb = skb_recv_datagram(sk, flags, noblock, &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) return ret; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6dff4510687a..0bb4fd3f6264 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -802,7 +802,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939, SCM_J1939_ERRQUEUE); - skb = skb_recv_datagram(sk, flags, 0, &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) return ret; diff --git a/net/can/raw.c b/net/can/raw.c index 7105fa4824e4..0cf728dcff36 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -846,16 +846,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; int err = 0; - int noblock; - - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; if (flags & MSG_ERRQUEUE) return sock_recv_errqueue(sk, msg, size, SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; diff --git a/net/core/datagram.c b/net/core/datagram.c index ee290776c661..70126d15ca6e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -310,12 +310,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, - int noblock, int *err) + int *err) { int off = 0; - return __skb_recv_datagram(sk, &sk->sk_receive_queue, - flags | (noblock ? MSG_DONTWAIT : 0), + return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 3b2366a88c3c..a725dd9bbda8 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -314,7 +314,8 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int err = -EOPNOTSUPP; struct sk_buff *skb; - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -703,7 +704,8 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct dgram_sock *ro = dgram_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3ee947557b88..550dc5c795c0 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -861,7 +861,8 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, if (flags & MSG_ERRQUEUE) return inet_recv_error(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9f97b9cbf7b3..c9dd9603f2e7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -769,7 +769,8 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c51d5ce3711c..8bb41f3b246a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -477,7 +477,8 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (np->rxpmtu && np->rxopt.bits.rxpmtu) return ipv6_recv_rxpmtu(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a1760add5bf1..a0385ddbffcf 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1223,7 +1223,6 @@ static void iucv_process_message_q(struct sock *sk) static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int copied, rlen; @@ -1242,7 +1241,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, /* receive/dequeue next skb: * the function understands MSG_PEEK and, thus, does not dequeue skb */ - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; diff --git a/net/key/af_key.c b/net/key/af_key.c index fd51db3be91c..d09ec26b1081 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3696,7 +3696,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b3edafa5fba4..c6a5cc2d88e7 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -526,7 +526,8 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 96f975777438..97fde8a9209b 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -671,7 +671,8 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index bf35710127dd..8be1fdc68a0b 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -191,8 +191,7 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, goto end; err = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto end; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index f0702d920d8d..5f204eb8abd2 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -196,7 +196,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) return rc; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 61205cf40074..24df29e135ed 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -352,7 +352,7 @@ static void mctp_test_route_input_sk(struct kunit *test) if (params->deliver) { KUNIT_EXPECT_EQ(test, rc, 0); - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); KUNIT_EXPECT_EQ(test, skb->len, 1); @@ -360,7 +360,7 @@ static void mctp_test_route_input_sk(struct kunit *test) } else { KUNIT_EXPECT_NE(test, rc, 0); - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_PTR_EQ(test, skb2, NULL); } @@ -423,7 +423,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) rc = mctp_route_input(&rt->rt, skb); } - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); if (params->rx_len) { KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); @@ -582,7 +582,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) rc = mctp_route_input(&rt->rt, skb); /* (potentially) receive message */ - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); if (params->deliver) KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 47a876ccd288..9fa85bb36c0e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1931,7 +1931,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - int noblock = flags & MSG_DONTWAIT; size_t copied; struct sk_buff *skb, *data_skb; int err, ret; @@ -1941,7 +1940,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fa9dc2ba3941..6f7f4392cffb 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1159,7 +1159,8 @@ static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } /* Now we can treat all alike */ - if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) { + skb = skb_recv_datagram(sk, flags, &er); + if (!skb) { release_sock(sk); return er; } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 4ca35791c93b..77642d18a3b4 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -821,7 +821,6 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; unsigned int copied, rlen; struct sk_buff *skb, *cskb; @@ -842,7 +841,7 @@ static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { pr_err("Recv datagram failed state %d %d %d", sk->sk_state, err, sock_error(sk)); diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 0ca214ab5aef..8dd569765f96 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -238,7 +238,6 @@ static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; int copied; @@ -246,7 +245,7 @@ static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, pr_debug("sock=%p sk=%p len=%zu flags=%d\n", sock, sk, len, flags); - skb = skb_recv_datagram(sk, flags, noblock, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) return rc; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c39c09899fd0..d3caaf4d4b3e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3421,7 +3421,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, * but then it will block. */ - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); /* * An error occurred so return it. Because skb_recv_datagram() diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 393e6aa7a592..3f2e62b63dd4 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -123,7 +123,8 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, MSG_CMSG_COMPAT)) goto out_nofree; - skb = skb_recv_datagram(sk, flags, noblock, &rval); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 65d463ad8770..441a26706592 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -772,7 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, u8 pipe_handle, enabled, n_sb; u8 aligned = 0; - skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp); + skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + errp); if (!skb) return NULL; @@ -1267,7 +1268,8 @@ static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return -EINVAL; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + flags |= (noblock ? MSG_DONTWAIT : 0); + skb = skb_recv_datagram(sk, flags, &err); lock_sock(sk); if (skb == NULL) { if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index ec2322529727..5c2fb992803b 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -1035,8 +1035,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, return -EADDRNOTAVAIL; } - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) { release_sock(sk); return rc; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 30a1cf4c16c6..bf2d986a6bc3 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1230,7 +1230,8 @@ static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return -ENOTCONN; /* Now we can treat all alike */ - if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) + skb = skb_recv_datagram(sk, flags, &er); + if (!skb) return er; qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e71a312faa1e..fecbd95da918 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1643,7 +1643,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, * so that no locks are necessary. */ - skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); + skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &err); if (!skb) { /* This means receive shutdown. */ if (err == 0) @@ -2500,7 +2501,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, int used, err; mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); mutex_unlock(&u->iolock); if (!skb) return err; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b17dc9745188..b14f0ed7427b 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1732,19 +1732,16 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, int flags) { int err; - int noblock; struct vmci_datagram *dg; size_t payload_len; struct sk_buff *skb; - noblock = flags & MSG_DONTWAIT; - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + skb = skb_recv_datagram(&vsk->sk, flags, &err); if (!skb) return err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 3a171828638b..6bc2ac8d8146 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1315,8 +1315,7 @@ static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } else { /* Now we can treat all alike */ release_sock(sk); - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); lock_sock(sk); if (!skb) goto out; -- cgit From fe696ccb277d332dc4e625b5b20b988b04d16c04 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 3 Apr 2022 15:53:54 -0700 Subject: gpu: host1x: Fix a kernel-doc warning Add @cache description to eliminate a kernel-doc warning. include/linux/host1x.h:104: warning: Function parameter or member 'cache' not described in 'host1x_client' Fixes: 1f39b1dfa53c ("drm/tegra: Implement buffer object cache") Signed-off-by: Randy Dunlap Cc: Thierry Reding Cc: linux-tegra@vger.kernel.org Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thierry Reding --- include/linux/host1x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index e8dc5bc41f79..00278853eadf 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -81,6 +81,7 @@ struct host1x_client_ops { * @parent: pointer to parent structure * @usecount: reference count for this structure * @lock: mutex for mutually exclusive concurrency + * @cache: host1x buffer object cache */ struct host1x_client { struct list_head list; -- cgit From 804775dfc2885e93a0a4b35db1914c2cc25172b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 5 Apr 2022 21:57:47 +0200 Subject: net: ethernet: mtk_eth_soc: add support for Wireless Ethernet Dispatch (WED) The Wireless Ethernet Dispatch subsystem on the MT7622 SoC can be configured to intercept and handle access to the DMA queues and PCIe interrupts for a MT7615/MT7915 wireless card. It can manage the internal WDMA (Wireless DMA) controller, which allows ethernet packets to be passed from the packet switch engine (PSE) to the wireless card, bypassing the CPU entirely. This can be used to implement hardware flow offloading from ethernet to WLAN. Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/Kconfig | 4 + drivers/net/ethernet/mediatek/Makefile | 5 + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 3 + drivers/net/ethernet/mediatek/mtk_wed.c | 875 ++++++++++++++++++++++++ drivers/net/ethernet/mediatek/mtk_wed.h | 128 ++++ drivers/net/ethernet/mediatek/mtk_wed_debugfs.c | 175 +++++ drivers/net/ethernet/mediatek/mtk_wed_ops.c | 8 + drivers/net/ethernet/mediatek/mtk_wed_regs.h | 251 +++++++ include/linux/soc/mediatek/mtk_wed.h | 131 ++++ 10 files changed, 1597 insertions(+) create mode 100644 drivers/net/ethernet/mediatek/mtk_wed.c create mode 100644 drivers/net/ethernet/mediatek/mtk_wed.h create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_debugfs.c create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_ops.c create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_regs.h create mode 100644 include/linux/soc/mediatek/mtk_wed.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index 86d356b4388d..da4ec235d146 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -7,6 +7,10 @@ config NET_VENDOR_MEDIATEK if NET_VENDOR_MEDIATEK +config NET_MEDIATEK_SOC_WED + depends on ARCH_MEDIATEK || COMPILE_TEST + def_bool NET_MEDIATEK_SOC != n + config NET_MEDIATEK_SOC tristate "MediaTek SoC Gigabit Ethernet support" depends on NET_DSA || !NET_DSA diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile index 79d4cdbbcbf5..45ba0970504a 100644 --- a/drivers/net/ethernet/mediatek/Makefile +++ b/drivers/net/ethernet/mediatek/Makefile @@ -5,4 +5,9 @@ obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o +mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed.o +ifdef CONFIG_DEBUG_FS +mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_debugfs.o +endif +obj-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_ops.o obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 70db217ed831..4d7c542d89fb 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -24,6 +24,7 @@ #include #include "mtk_eth_soc.h" +#include "mtk_wed.h" static int mtk_msg_level = -1; module_param_named(msg_level, mtk_msg_level, int, 0); @@ -3170,6 +3171,22 @@ static int mtk_probe(struct platform_device *pdev) } } + for (i = 0;; i++) { + struct device_node *np = of_parse_phandle(pdev->dev.of_node, + "mediatek,wed", i); + static const u32 wdma_regs[] = { + MTK_WDMA0_BASE, + MTK_WDMA1_BASE + }; + void __iomem *wdma; + + if (!np || i >= ARRAY_SIZE(wdma_regs)) + break; + + wdma = eth->base + wdma_regs[i]; + mtk_wed_add_hw(np, eth, wdma, i); + } + for (i = 0; i < 3; i++) { if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT) && i > 0) eth->irq[i] = eth->irq[0]; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index e701544c4287..74661682fd92 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -295,6 +295,9 @@ #define MTK_GDM1_TX_GPCNT 0x2438 #define MTK_STAT_OFFSET 0x40 +#define MTK_WDMA0_BASE 0x2800 +#define MTK_WDMA1_BASE 0x2c00 + /* QDMA descriptor txd4 */ #define TX_DMA_CHKSUM (0x7 << 29) #define TX_DMA_TSO BIT(28) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c new file mode 100644 index 000000000000..f0eacf819cd9 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Felix Fietkau */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mtk_eth_soc.h" +#include "mtk_wed_regs.h" +#include "mtk_wed.h" +#include "mtk_ppe.h" + +#define MTK_PCIE_BASE(n) (0x1a143000 + (n) * 0x2000) + +#define MTK_WED_PKT_SIZE 1900 +#define MTK_WED_BUF_SIZE 2048 +#define MTK_WED_BUF_PER_PAGE (PAGE_SIZE / 2048) + +#define MTK_WED_TX_RING_SIZE 2048 +#define MTK_WED_WDMA_RING_SIZE 1024 + +static struct mtk_wed_hw *hw_list[2]; +static DEFINE_MUTEX(hw_lock); + +static void +wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val) +{ + regmap_update_bits(dev->hw->regs, reg, mask | val, val); +} + +static void +wed_set(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + return wed_m32(dev, reg, 0, mask); +} + +static void +wed_clr(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + return wed_m32(dev, reg, mask, 0); +} + +static void +wdma_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val) +{ + wdma_w32(dev, reg, (wdma_r32(dev, reg) & ~mask) | val); +} + +static void +wdma_set(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + wdma_m32(dev, reg, 0, mask); +} + +static u32 +mtk_wed_read_reset(struct mtk_wed_device *dev) +{ + return wed_r32(dev, MTK_WED_RESET); +} + +static void +mtk_wed_reset(struct mtk_wed_device *dev, u32 mask) +{ + u32 status; + + wed_w32(dev, MTK_WED_RESET, mask); + if (readx_poll_timeout(mtk_wed_read_reset, dev, status, + !(status & mask), 0, 1000)) + WARN_ON_ONCE(1); +} + +static struct mtk_wed_hw * +mtk_wed_assign(struct mtk_wed_device *dev) +{ + struct mtk_wed_hw *hw; + + hw = hw_list[pci_domain_nr(dev->wlan.pci_dev->bus)]; + if (!hw || hw->wed_dev) + return NULL; + + hw->wed_dev = dev; + return hw; +} + +static int +mtk_wed_buffer_alloc(struct mtk_wed_device *dev) +{ + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + void **page_list; + int token = dev->wlan.token_start; + int ring_size; + int n_pages; + int i, page_idx; + + ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); + n_pages = ring_size / MTK_WED_BUF_PER_PAGE; + + page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + dev->buf_ring.size = ring_size; + dev->buf_ring.pages = page_list; + + desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc), + &desc_phys, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + dev->buf_ring.desc = desc; + dev->buf_ring.desc_phys = desc_phys; + + for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) { + dma_addr_t page_phys, buf_phys; + struct page *page; + void *buf; + int s; + + page = __dev_alloc_pages(GFP_KERNEL, 0); + if (!page) + return -ENOMEM; + + page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev->hw->dev, page_phys)) { + __free_page(page); + return -ENOMEM; + } + + page_list[page_idx++] = page; + dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + buf = page_to_virt(page); + buf_phys = page_phys; + + for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) { + u32 txd_size; + + txd_size = dev->wlan.init_buf(buf, buf_phys, token++); + + desc->buf0 = buf_phys; + desc->buf1 = buf_phys + txd_size; + desc->ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, + txd_size) | + FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1, + MTK_WED_BUF_SIZE - txd_size) | + MTK_WDMA_DESC_CTRL_LAST_SEG1; + desc->info = 0; + desc++; + + buf += MTK_WED_BUF_SIZE; + buf_phys += MTK_WED_BUF_SIZE; + } + + dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + + return 0; +} + +static void +mtk_wed_free_buffer(struct mtk_wed_device *dev) +{ + struct mtk_wdma_desc *desc = dev->buf_ring.desc; + void **page_list = dev->buf_ring.pages; + int page_idx; + int i; + + if (!page_list) + return; + + if (!desc) + goto free_pagelist; + + for (i = 0, page_idx = 0; i < dev->buf_ring.size; i += MTK_WED_BUF_PER_PAGE) { + void *page = page_list[page_idx++]; + + if (!page) + break; + + dma_unmap_page(dev->hw->dev, desc[i].buf0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(page); + } + + dma_free_coherent(dev->hw->dev, dev->buf_ring.size * sizeof(*desc), + desc, dev->buf_ring.desc_phys); + +free_pagelist: + kfree(page_list); +} + +static void +mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring) +{ + if (!ring->desc) + return; + + dma_free_coherent(dev->hw->dev, ring->size * sizeof(*ring->desc), + ring->desc, ring->desc_phys); +} + +static void +mtk_wed_free_tx_rings(struct mtk_wed_device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++) + mtk_wed_free_ring(dev, &dev->tx_ring[i]); + for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++) + mtk_wed_free_ring(dev, &dev->tx_wdma[i]); +} + +static void +mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en) +{ + u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; + + if (!dev->hw->num_flows) + mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; + + wed_w32(dev, MTK_WED_EXT_INT_MASK, en ? mask : 0); + wed_r32(dev, MTK_WED_EXT_INT_MASK); +} + +static void +mtk_wed_stop(struct mtk_wed_device *dev) +{ + regmap_write(dev->hw->mirror, dev->hw->index * 4, 0); + mtk_wed_set_ext_int(dev, false); + + wed_clr(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_EN | + MTK_WED_CTRL_WPDMA_INT_AGENT_EN | + MTK_WED_CTRL_WED_TX_BM_EN | + MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0); + wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0); + wdma_w32(dev, MTK_WDMA_INT_MASK, 0); + wdma_w32(dev, MTK_WDMA_INT_GRP2, 0); + wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0); + + wed_clr(dev, MTK_WED_GLO_CFG, + MTK_WED_GLO_CFG_TX_DMA_EN | + MTK_WED_GLO_CFG_RX_DMA_EN); + wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wed_clr(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); +} + +static void +mtk_wed_detach(struct mtk_wed_device *dev) +{ + struct device_node *wlan_node = dev->wlan.pci_dev->dev.of_node; + struct mtk_wed_hw *hw = dev->hw; + + mutex_lock(&hw_lock); + + mtk_wed_stop(dev); + + wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); + wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); + + mtk_wed_reset(dev, MTK_WED_RESET_WED); + + mtk_wed_free_buffer(dev); + mtk_wed_free_tx_rings(dev); + + if (of_dma_is_coherent(wlan_node)) + regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, + BIT(hw->index), BIT(hw->index)); + + if (!hw_list[!hw->index]->wed_dev && + hw->eth->dma_dev != hw->eth->dev) + mtk_eth_set_dma_device(hw->eth, hw->eth->dev); + + memset(dev, 0, sizeof(*dev)); + module_put(THIS_MODULE); + + hw->wed_dev = NULL; + mutex_unlock(&hw_lock); +} + +static void +mtk_wed_hw_init_early(struct mtk_wed_device *dev) +{ + u32 mask, set; + u32 offset; + + mtk_wed_stop(dev); + mtk_wed_reset(dev, MTK_WED_RESET_WED); + + mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE | + MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE | + MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE; + set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) | + MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP | + MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY; + wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set); + + wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO_PRERES); + + offset = dev->hw->index ? 0x04000400 : 0; + wed_w32(dev, MTK_WED_WDMA_OFFSET0, 0x2a042a20 + offset); + wed_w32(dev, MTK_WED_WDMA_OFFSET1, 0x29002800 + offset); + + wed_w32(dev, MTK_WED_PCIE_CFG_BASE, MTK_PCIE_BASE(dev->hw->index)); + wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys); +} + +static void +mtk_wed_hw_init(struct mtk_wed_device *dev) +{ + if (dev->init_done) + return; + + dev->init_done = true; + mtk_wed_set_ext_int(dev, false); + wed_w32(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, + dev->buf_ring.size / 128) | + FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, + MTK_WED_TX_RING_SIZE / 256)); + + wed_w32(dev, MTK_WED_TX_BM_BASE, dev->buf_ring.desc_phys); + + wed_w32(dev, MTK_WED_TX_BM_TKID, + FIELD_PREP(MTK_WED_TX_BM_TKID_START, + dev->wlan.token_start) | + FIELD_PREP(MTK_WED_TX_BM_TKID_END, + dev->wlan.token_start + dev->wlan.nbuf - 1)); + + wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE); + + wed_w32(dev, MTK_WED_TX_BM_DYN_THR, + FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) | + MTK_WED_TX_BM_DYN_THR_HI); + + mtk_wed_reset(dev, MTK_WED_RESET_TX_BM); + + wed_set(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WED_TX_BM_EN | + MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + + wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE); +} + +static void +mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size) +{ + int i; + + for (i = 0; i < size; i++) { + desc[i].buf0 = 0; + desc[i].ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE); + desc[i].buf1 = 0; + desc[i].info = 0; + } +} + +static u32 +mtk_wed_check_busy(struct mtk_wed_device *dev) +{ + if (wed_r32(dev, MTK_WED_GLO_CFG) & MTK_WED_GLO_CFG_TX_DMA_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_WPDMA_GLO_CFG) & + MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_CTRL) & MTK_WED_CTRL_WDMA_INT_AGENT_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_WDMA_GLO_CFG) & + MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY) + return true; + + if (wdma_r32(dev, MTK_WDMA_GLO_CFG) & + MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_CTRL) & + (MTK_WED_CTRL_WED_TX_BM_BUSY | MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY)) + return true; + + return false; +} + +static int +mtk_wed_poll_busy(struct mtk_wed_device *dev) +{ + int sleep = 15000; + int timeout = 100 * sleep; + u32 val; + + return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep, + timeout, false, dev); +} + +static void +mtk_wed_reset_dma(struct mtk_wed_device *dev) +{ + bool busy = false; + u32 val; + int i; + + for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++) { + struct mtk_wdma_desc *desc = dev->tx_ring[i].desc; + + if (!desc) + continue; + + mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE); + } + + if (mtk_wed_poll_busy(dev)) + busy = mtk_wed_check_busy(dev); + + if (busy) { + mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA); + } else { + wed_w32(dev, MTK_WED_RESET_IDX, + MTK_WED_RESET_IDX_TX | + MTK_WED_RESET_IDX_RX); + wed_w32(dev, MTK_WED_RESET_IDX, 0); + } + + wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); + wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); + + if (busy) { + mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT); + mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV); + } else { + wed_w32(dev, MTK_WED_WDMA_RESET_IDX, + MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV); + wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0); + + wed_set(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE); + + wed_clr(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE); + } + + for (i = 0; i < 100; i++) { + val = wed_r32(dev, MTK_WED_TX_BM_INTF); + if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40) + break; + } + + mtk_wed_reset(dev, MTK_WED_RESET_TX_FREE_AGENT); + mtk_wed_reset(dev, MTK_WED_RESET_TX_BM); + + if (busy) { + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT); + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV); + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV); + } else { + wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, + MTK_WED_WPDMA_RESET_IDX_TX | + MTK_WED_WPDMA_RESET_IDX_RX); + wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, 0); + } + +} + +static int +mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring, + int size) +{ + ring->desc = dma_alloc_coherent(dev->hw->dev, + size * sizeof(*ring->desc), + &ring->desc_phys, GFP_KERNEL); + if (!ring->desc) + return -ENOMEM; + + ring->size = size; + mtk_wed_ring_reset(ring->desc, size); + + return 0; +} + +static int +mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size) +{ + struct mtk_wed_ring *wdma = &dev->tx_wdma[idx]; + + if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE)) + return -ENOMEM; + + wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE, + wdma->desc_phys); + wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT, + size); + wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0); + + wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE, + wdma->desc_phys); + wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT, + size); + + return 0; +} + +static void +mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) +{ + u32 wdma_mask; + u32 val; + int i; + + for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++) + if (!dev->tx_wdma[i].desc) + mtk_wed_wdma_ring_setup(dev, i, 16); + + wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0)); + + mtk_wed_hw_init(dev); + + wed_set(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_EN | + MTK_WED_CTRL_WPDMA_INT_AGENT_EN | + MTK_WED_CTRL_WED_TX_BM_EN | + MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + + wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, MTK_WED_PCIE_INT_TRIGGER_STATUS); + + wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, + MTK_WED_WPDMA_INT_TRIGGER_RX_DONE | + MTK_WED_WPDMA_INT_TRIGGER_TX_DONE); + + wed_set(dev, MTK_WED_WPDMA_INT_CTRL, + MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV); + + wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask); + wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask); + + wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask); + wdma_w32(dev, MTK_WDMA_INT_GRP2, wdma_mask); + + wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask); + wed_w32(dev, MTK_WED_INT_MASK, irq_mask); + + wed_set(dev, MTK_WED_GLO_CFG, + MTK_WED_GLO_CFG_TX_DMA_EN | + MTK_WED_GLO_CFG_RX_DMA_EN); + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wed_set(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); + + mtk_wed_set_ext_int(dev, true); + val = dev->wlan.wpdma_phys | + MTK_PCIE_MIRROR_MAP_EN | + FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index); + + if (dev->hw->index) + val |= BIT(1); + val |= BIT(0); + regmap_write(dev->hw->mirror, dev->hw->index * 4, val); + + dev->running = true; +} + +static int +mtk_wed_attach(struct mtk_wed_device *dev) + __releases(RCU) +{ + struct mtk_wed_hw *hw; + int ret = 0; + + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "mtk_wed_attach without holding the RCU read lock"); + + if (pci_domain_nr(dev->wlan.pci_dev->bus) > 1 || + !try_module_get(THIS_MODULE)) + ret = -ENODEV; + + rcu_read_unlock(); + + if (ret) + return ret; + + mutex_lock(&hw_lock); + + hw = mtk_wed_assign(dev); + if (!hw) { + module_put(THIS_MODULE); + ret = -ENODEV; + goto out; + } + + dev_info(&dev->wlan.pci_dev->dev, "attaching wed device %d\n", hw->index); + + dev->hw = hw; + dev->dev = hw->dev; + dev->irq = hw->irq; + dev->wdma_idx = hw->index; + + if (hw->eth->dma_dev == hw->eth->dev && + of_dma_is_coherent(hw->eth->dev->of_node)) + mtk_eth_set_dma_device(hw->eth, hw->dev); + + ret = mtk_wed_buffer_alloc(dev); + if (ret) { + mtk_wed_detach(dev); + goto out; + } + + mtk_wed_hw_init_early(dev); + regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0); + +out: + mutex_unlock(&hw_lock); + + return ret; +} + +static int +mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->tx_ring[idx]; + + /* + * Tx ring redirection: + * Instead of configuring the WLAN PDMA TX ring directly, the WLAN + * driver allocated DMA ring gets configured into WED MTK_WED_RING_TX(n) + * registers. + * + * WED driver posts its own DMA ring as WLAN PDMA TX and configures it + * into MTK_WED_WPDMA_RING_TX(n) registers. + * It gets filled with packets picked up from WED TX ring and from + * WDMA RX. + */ + + BUG_ON(idx > ARRAY_SIZE(dev->tx_ring)); + + if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE)) + return -ENOMEM; + + if (mtk_wed_wdma_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE)) + return -ENOMEM; + + ring->reg_base = MTK_WED_RING_TX(idx); + ring->wpdma = regs; + + /* WED -> WPDMA */ + wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys); + wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE); + wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_CPU_IDX, 0); + + wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE, + ring->desc_phys); + wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT, + MTK_WED_TX_RING_SIZE); + wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0); + + return 0; +} + +static int +mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->txfree_ring; + int i; + + /* + * For txfree event handling, the same DMA ring is shared between WED + * and WLAN. The WLAN driver accesses the ring index registers through + * WED + */ + ring->reg_base = MTK_WED_RING_RX(1); + ring->wpdma = regs; + + for (i = 0; i < 12; i += 4) { + u32 val = readl(regs + i); + + wed_w32(dev, MTK_WED_RING_RX(1) + i, val); + wed_w32(dev, MTK_WED_WPDMA_RING_RX(1) + i, val); + } + + return 0; +} + +static u32 +mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask) +{ + u32 val; + + val = wed_r32(dev, MTK_WED_EXT_INT_STATUS); + wed_w32(dev, MTK_WED_EXT_INT_STATUS, val); + val &= MTK_WED_EXT_INT_STATUS_ERROR_MASK; + if (!dev->hw->num_flows) + val &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; + if (val && net_ratelimit()) + pr_err("mtk_wed%d: error status=%08x\n", dev->hw->index, val); + + val = wed_r32(dev, MTK_WED_INT_STATUS); + val &= mask; + wed_w32(dev, MTK_WED_INT_STATUS, val); /* ACK */ + + return val; +} + +static void +mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask) +{ + if (!dev->running) + return; + + mtk_wed_set_ext_int(dev, !!mask); + wed_w32(dev, MTK_WED_INT_MASK, mask); +} + +int mtk_wed_flow_add(int index) +{ + struct mtk_wed_hw *hw = hw_list[index]; + int ret; + + if (!hw || !hw->wed_dev) + return -ENODEV; + + if (hw->num_flows) { + hw->num_flows++; + return 0; + } + + mutex_lock(&hw_lock); + if (!hw->wed_dev) { + ret = -ENODEV; + goto out; + } + + ret = hw->wed_dev->wlan.offload_enable(hw->wed_dev); + if (!ret) + hw->num_flows++; + mtk_wed_set_ext_int(hw->wed_dev, true); + +out: + mutex_unlock(&hw_lock); + + return ret; +} + +void mtk_wed_flow_remove(int index) +{ + struct mtk_wed_hw *hw = hw_list[index]; + + if (!hw) + return; + + if (--hw->num_flows) + return; + + mutex_lock(&hw_lock); + if (!hw->wed_dev) + goto out; + + hw->wed_dev->wlan.offload_disable(hw->wed_dev); + mtk_wed_set_ext_int(hw->wed_dev, true); + +out: + mutex_unlock(&hw_lock); +} + +void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, + void __iomem *wdma, int index) +{ + static const struct mtk_wed_ops wed_ops = { + .attach = mtk_wed_attach, + .tx_ring_setup = mtk_wed_tx_ring_setup, + .txfree_ring_setup = mtk_wed_txfree_ring_setup, + .start = mtk_wed_start, + .stop = mtk_wed_stop, + .reset_dma = mtk_wed_reset_dma, + .reg_read = wed_r32, + .reg_write = wed_w32, + .irq_get = mtk_wed_irq_get, + .irq_set_mask = mtk_wed_irq_set_mask, + .detach = mtk_wed_detach, + }; + struct device_node *eth_np = eth->dev->of_node; + struct platform_device *pdev; + struct mtk_wed_hw *hw; + struct regmap *regs; + int irq; + + if (!np) + return; + + pdev = of_find_device_by_node(np); + if (!pdev) + return; + + get_device(&pdev->dev); + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return; + + regs = syscon_regmap_lookup_by_phandle(np, NULL); + if (!regs) + return; + + rcu_assign_pointer(mtk_soc_wed_ops, &wed_ops); + + mutex_lock(&hw_lock); + + if (WARN_ON(hw_list[index])) + goto unlock; + + hw = kzalloc(sizeof(*hw), GFP_KERNEL); + hw->node = np; + hw->regs = regs; + hw->eth = eth; + hw->dev = &pdev->dev; + hw->wdma = wdma; + hw->index = index; + hw->irq = irq; + hw->mirror = syscon_regmap_lookup_by_phandle(eth_np, + "mediatek,pcie-mirror"); + hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np, + "mediatek,hifsys"); + if (IS_ERR(hw->mirror) || IS_ERR(hw->hifsys)) { + kfree(hw); + goto unlock; + } + + if (!index) { + regmap_write(hw->mirror, 0, 0); + regmap_write(hw->mirror, 4, 0); + } + mtk_wed_hw_add_debugfs(hw); + + hw_list[index] = hw; + +unlock: + mutex_unlock(&hw_lock); +} + +void mtk_wed_exit(void) +{ + int i; + + rcu_assign_pointer(mtk_soc_wed_ops, NULL); + + synchronize_rcu(); + + for (i = 0; i < ARRAY_SIZE(hw_list); i++) { + struct mtk_wed_hw *hw; + + hw = hw_list[i]; + if (!hw) + continue; + + hw_list[i] = NULL; + debugfs_remove(hw->debugfs_dir); + put_device(hw->dev); + kfree(hw); + } +} diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h new file mode 100644 index 000000000000..404c9a9b130d --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Felix Fietkau */ + +#ifndef __MTK_WED_PRIV_H +#define __MTK_WED_PRIV_H + +#include +#include +#include + +struct mtk_eth; + +struct mtk_wed_hw { + struct device_node *node; + struct mtk_eth *eth; + struct regmap *regs; + struct regmap *hifsys; + struct device *dev; + void __iomem *wdma; + struct regmap *mirror; + struct dentry *debugfs_dir; + struct mtk_wed_device *wed_dev; + u32 debugfs_reg; + u32 num_flows; + char dirname[5]; + int irq; + int index; +}; + + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED +static inline void +wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val) +{ + regmap_write(dev->hw->regs, reg, val); +} + +static inline u32 +wed_r32(struct mtk_wed_device *dev, u32 reg) +{ + unsigned int val; + + regmap_read(dev->hw->regs, reg, &val); + + return val; +} + +static inline void +wdma_w32(struct mtk_wed_device *dev, u32 reg, u32 val) +{ + writel(val, dev->hw->wdma + reg); +} + +static inline u32 +wdma_r32(struct mtk_wed_device *dev, u32 reg) +{ + return readl(dev->hw->wdma + reg); +} + +static inline u32 +wpdma_tx_r32(struct mtk_wed_device *dev, int ring, u32 reg) +{ + if (!dev->tx_ring[ring].wpdma) + return 0; + + return readl(dev->tx_ring[ring].wpdma + reg); +} + +static inline void +wpdma_tx_w32(struct mtk_wed_device *dev, int ring, u32 reg, u32 val) +{ + if (!dev->tx_ring[ring].wpdma) + return; + + writel(val, dev->tx_ring[ring].wpdma + reg); +} + +static inline u32 +wpdma_txfree_r32(struct mtk_wed_device *dev, u32 reg) +{ + if (!dev->txfree_ring.wpdma) + return 0; + + return readl(dev->txfree_ring.wpdma + reg); +} + +static inline void +wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val) +{ + if (!dev->txfree_ring.wpdma) + return; + + writel(val, dev->txfree_ring.wpdma + reg); +} + +void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, + void __iomem *wdma, int index); +void mtk_wed_exit(void); +int mtk_wed_flow_add(int index); +void mtk_wed_flow_remove(int index); +#else +static inline void +mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, + void __iomem *wdma, int index) +{ +} +static inline void +mtk_wed_exit(void) +{ +} +static inline int mtk_wed_flow_add(int index) +{ + return -EINVAL; +} +static inline void mtk_wed_flow_remove(int index) +{ +} +#endif + +#ifdef CONFIG_DEBUG_FS +void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw); +#else +static inline void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw) +{ +} +#endif + +#endif diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c new file mode 100644 index 000000000000..a81d3fd1a439 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Felix Fietkau */ + +#include +#include "mtk_wed.h" +#include "mtk_wed_regs.h" + +struct reg_dump { + const char *name; + u16 offset; + u8 type; + u8 base; +}; + +enum { + DUMP_TYPE_STRING, + DUMP_TYPE_WED, + DUMP_TYPE_WDMA, + DUMP_TYPE_WPDMA_TX, + DUMP_TYPE_WPDMA_TXFREE, +}; + +#define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING } +#define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ } +#define DUMP_RING(_prefix, _base, ...) \ + { _prefix " BASE", _base, __VA_ARGS__ }, \ + { _prefix " CNT", _base + 0x4, __VA_ARGS__ }, \ + { _prefix " CIDX", _base + 0x8, __VA_ARGS__ }, \ + { _prefix " DIDX", _base + 0xc, __VA_ARGS__ } + +#define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED) +#define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED) + +#define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA) +#define DUMP_WDMA_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WDMA) + +#define DUMP_WPDMA_TX_RING(_n) DUMP_RING("WPDMA_TX" #_n, 0, DUMP_TYPE_WPDMA_TX, _n) +#define DUMP_WPDMA_TXFREE_RING DUMP_RING("WPDMA_RX1", 0, DUMP_TYPE_WPDMA_TXFREE) + +static void +print_reg_val(struct seq_file *s, const char *name, u32 val) +{ + seq_printf(s, "%-32s %08x\n", name, val); +} + +static void +dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev, + const struct reg_dump *regs, int n_regs) +{ + const struct reg_dump *cur; + u32 val; + + for (cur = regs; cur < ®s[n_regs]; cur++) { + switch (cur->type) { + case DUMP_TYPE_STRING: + seq_printf(s, "%s======== %s:\n", + cur > regs ? "\n" : "", + cur->name); + continue; + case DUMP_TYPE_WED: + val = wed_r32(dev, cur->offset); + break; + case DUMP_TYPE_WDMA: + val = wdma_r32(dev, cur->offset); + break; + case DUMP_TYPE_WPDMA_TX: + val = wpdma_tx_r32(dev, cur->base, cur->offset); + break; + case DUMP_TYPE_WPDMA_TXFREE: + val = wpdma_txfree_r32(dev, cur->offset); + break; + } + print_reg_val(s, cur->name, val); + } +} + + +static int +wed_txinfo_show(struct seq_file *s, void *data) +{ + static const struct reg_dump regs[] = { + DUMP_STR("WED TX"), + DUMP_WED(WED_TX_MIB(0)), + DUMP_WED_RING(WED_RING_TX(0)), + + DUMP_WED(WED_TX_MIB(1)), + DUMP_WED_RING(WED_RING_TX(1)), + + DUMP_STR("WPDMA TX"), + DUMP_WED(WED_WPDMA_TX_MIB(0)), + DUMP_WED_RING(WED_WPDMA_RING_TX(0)), + DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(0)), + + DUMP_WED(WED_WPDMA_TX_MIB(1)), + DUMP_WED_RING(WED_WPDMA_RING_TX(1)), + DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(1)), + + DUMP_STR("WPDMA TX"), + DUMP_WPDMA_TX_RING(0), + DUMP_WPDMA_TX_RING(1), + + DUMP_STR("WED WDMA RX"), + DUMP_WED(WED_WDMA_RX_MIB(0)), + DUMP_WED_RING(WED_WDMA_RING_RX(0)), + DUMP_WED(WED_WDMA_RX_THRES(0)), + DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(0)), + DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(0)), + + DUMP_WED(WED_WDMA_RX_MIB(1)), + DUMP_WED_RING(WED_WDMA_RING_RX(1)), + DUMP_WED(WED_WDMA_RX_THRES(1)), + DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(1)), + DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(1)), + + DUMP_STR("WDMA RX"), + DUMP_WDMA(WDMA_GLO_CFG), + DUMP_WDMA_RING(WDMA_RING_RX(0)), + DUMP_WDMA_RING(WDMA_RING_RX(1)), + }; + struct mtk_wed_hw *hw = s->private; + struct mtk_wed_device *dev = hw->wed_dev; + + if (!dev) + return 0; + + dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(wed_txinfo); + + +static int +mtk_wed_reg_set(void *data, u64 val) +{ + struct mtk_wed_hw *hw = data; + + regmap_write(hw->regs, hw->debugfs_reg, val); + + return 0; +} + +static int +mtk_wed_reg_get(void *data, u64 *val) +{ + struct mtk_wed_hw *hw = data; + unsigned int regval; + int ret; + + ret = regmap_read(hw->regs, hw->debugfs_reg, ®val); + if (ret) + return ret; + + *val = regval; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mtk_wed_reg_get, mtk_wed_reg_set, + "0x%08llx\n"); + +void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw) +{ + struct dentry *dir; + + snprintf(hw->dirname, sizeof(hw->dirname), "wed%d", hw->index); + dir = debugfs_create_dir(hw->dirname, NULL); + if (!dir) + return; + + hw->debugfs_dir = dir; + debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg); + debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval); + debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops); +} diff --git a/drivers/net/ethernet/mediatek/mtk_wed_ops.c b/drivers/net/ethernet/mediatek/mtk_wed_ops.c new file mode 100644 index 000000000000..a5d9d8a5bce2 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed_ops.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau */ + +#include +#include + +const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; +EXPORT_SYMBOL_GPL(mtk_soc_wed_ops); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h new file mode 100644 index 000000000000..0a0465ea58b4 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau */ + +#ifndef __MTK_WED_REGS_H +#define __MTK_WED_REGS_H + +#define MTK_WDMA_DESC_CTRL_LEN1 GENMASK(14, 0) +#define MTK_WDMA_DESC_CTRL_LAST_SEG1 BIT(15) +#define MTK_WDMA_DESC_CTRL_BURST BIT(16) +#define MTK_WDMA_DESC_CTRL_LEN0 GENMASK(29, 16) +#define MTK_WDMA_DESC_CTRL_LAST_SEG0 BIT(30) +#define MTK_WDMA_DESC_CTRL_DMA_DONE BIT(31) + +struct mtk_wdma_desc { + __le32 buf0; + __le32 ctrl; + __le32 buf1; + __le32 info; +} __packed __aligned(4); + +#define MTK_WED_RESET 0x008 +#define MTK_WED_RESET_TX_BM BIT(0) +#define MTK_WED_RESET_TX_FREE_AGENT BIT(4) +#define MTK_WED_RESET_WPDMA_TX_DRV BIT(8) +#define MTK_WED_RESET_WPDMA_RX_DRV BIT(9) +#define MTK_WED_RESET_WPDMA_INT_AGENT BIT(11) +#define MTK_WED_RESET_WED_TX_DMA BIT(12) +#define MTK_WED_RESET_WDMA_RX_DRV BIT(17) +#define MTK_WED_RESET_WDMA_INT_AGENT BIT(19) +#define MTK_WED_RESET_WED BIT(31) + +#define MTK_WED_CTRL 0x00c +#define MTK_WED_CTRL_WPDMA_INT_AGENT_EN BIT(0) +#define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY BIT(1) +#define MTK_WED_CTRL_WDMA_INT_AGENT_EN BIT(2) +#define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY BIT(3) +#define MTK_WED_CTRL_WED_TX_BM_EN BIT(8) +#define MTK_WED_CTRL_WED_TX_BM_BUSY BIT(9) +#define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN BIT(10) +#define MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY BIT(11) +#define MTK_WED_CTRL_RESERVE_EN BIT(12) +#define MTK_WED_CTRL_RESERVE_BUSY BIT(13) +#define MTK_WED_CTRL_FINAL_DIDX_READ BIT(24) +#define MTK_WED_CTRL_MIB_READ_CLEAR BIT(28) + +#define MTK_WED_EXT_INT_STATUS 0x020 +#define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR BIT(0) +#define MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD BIT(1) +#define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID BIT(4) +#define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH BIT(8) +#define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH BIT(9) +#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH BIT(12) +#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH BIT(13) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR BIT(16) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR BIT(17) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT BIT(18) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN BIT(19) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_BM_DMAD_COHERENT BIT(20) +#define MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR BIT(21) +#define MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR BIT(22) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_DMA_RECYCLE BIT(24) +#define MTK_WED_EXT_INT_STATUS_ERROR_MASK (MTK_WED_EXT_INT_STATUS_TF_LEN_ERR | \ + MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD | \ + MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID | \ + MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR | \ + MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR | \ + MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN | \ + MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR | \ + MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR) + +#define MTK_WED_EXT_INT_MASK 0x028 + +#define MTK_WED_STATUS 0x060 +#define MTK_WED_STATUS_TX GENMASK(15, 8) + +#define MTK_WED_TX_BM_CTRL 0x080 +#define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM GENMASK(6, 0) +#define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM GENMASK(22, 16) +#define MTK_WED_TX_BM_CTRL_PAUSE BIT(28) + +#define MTK_WED_TX_BM_BASE 0x084 + +#define MTK_WED_TX_BM_TKID 0x088 +#define MTK_WED_TX_BM_TKID_START GENMASK(15, 0) +#define MTK_WED_TX_BM_TKID_END GENMASK(31, 16) + +#define MTK_WED_TX_BM_BUF_LEN 0x08c + +#define MTK_WED_TX_BM_INTF 0x09c +#define MTK_WED_TX_BM_INTF_TKID GENMASK(15, 0) +#define MTK_WED_TX_BM_INTF_TKFIFO_FDEP GENMASK(23, 16) +#define MTK_WED_TX_BM_INTF_TKID_VALID BIT(28) +#define MTK_WED_TX_BM_INTF_TKID_READ BIT(29) + +#define MTK_WED_TX_BM_DYN_THR 0x0a0 +#define MTK_WED_TX_BM_DYN_THR_LO GENMASK(6, 0) +#define MTK_WED_TX_BM_DYN_THR_HI GENMASK(22, 16) + +#define MTK_WED_INT_STATUS 0x200 +#define MTK_WED_INT_MASK 0x204 + +#define MTK_WED_GLO_CFG 0x208 +#define MTK_WED_GLO_CFG_TX_DMA_EN BIT(0) +#define MTK_WED_GLO_CFG_TX_DMA_BUSY BIT(1) +#define MTK_WED_GLO_CFG_RX_DMA_EN BIT(2) +#define MTK_WED_GLO_CFG_RX_DMA_BUSY BIT(3) +#define MTK_WED_GLO_CFG_RX_BT_SIZE GENMASK(5, 4) +#define MTK_WED_GLO_CFG_TX_WB_DDONE BIT(6) +#define MTK_WED_GLO_CFG_BIG_ENDIAN BIT(7) +#define MTK_WED_GLO_CFG_DIS_BT_SIZE_ALIGN BIT(8) +#define MTK_WED_GLO_CFG_TX_BT_SIZE_LO BIT(9) +#define MTK_WED_GLO_CFG_MULTI_DMA_EN GENMASK(11, 10) +#define MTK_WED_GLO_CFG_FIFO_LITTLE_ENDIAN BIT(12) +#define MTK_WED_GLO_CFG_MI_DEPTH_RD GENMASK(21, 13) +#define MTK_WED_GLO_CFG_TX_BT_SIZE_HI GENMASK(23, 22) +#define MTK_WED_GLO_CFG_SW_RESET BIT(24) +#define MTK_WED_GLO_CFG_FIRST_TOKEN_ONLY BIT(26) +#define MTK_WED_GLO_CFG_OMIT_RX_INFO BIT(27) +#define MTK_WED_GLO_CFG_OMIT_TX_INFO BIT(28) +#define MTK_WED_GLO_CFG_BYTE_SWAP BIT(29) +#define MTK_WED_GLO_CFG_RX_2B_OFFSET BIT(31) + +#define MTK_WED_RESET_IDX 0x20c +#define MTK_WED_RESET_IDX_TX GENMASK(3, 0) +#define MTK_WED_RESET_IDX_RX GENMASK(17, 16) + +#define MTK_WED_TX_MIB(_n) (0x2a0 + (_n) * 4) + +#define MTK_WED_RING_TX(_n) (0x300 + (_n) * 0x10) + +#define MTK_WED_RING_RX(_n) (0x400 + (_n) * 0x10) + +#define MTK_WED_WPDMA_INT_TRIGGER 0x504 +#define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE BIT(1) +#define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE GENMASK(5, 4) + +#define MTK_WED_WPDMA_GLO_CFG 0x508 +#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN BIT(0) +#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY BIT(1) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN BIT(2) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_BUSY BIT(3) +#define MTK_WED_WPDMA_GLO_CFG_RX_BT_SIZE GENMASK(5, 4) +#define MTK_WED_WPDMA_GLO_CFG_TX_WB_DDONE BIT(6) +#define MTK_WED_WPDMA_GLO_CFG_BIG_ENDIAN BIT(7) +#define MTK_WED_WPDMA_GLO_CFG_DIS_BT_SIZE_ALIGN BIT(8) +#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_LO BIT(9) +#define MTK_WED_WPDMA_GLO_CFG_MULTI_DMA_EN GENMASK(11, 10) +#define MTK_WED_WPDMA_GLO_CFG_FIFO_LITTLE_ENDIAN BIT(12) +#define MTK_WED_WPDMA_GLO_CFG_MI_DEPTH_RD GENMASK(21, 13) +#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_HI GENMASK(23, 22) +#define MTK_WED_WPDMA_GLO_CFG_SW_RESET BIT(24) +#define MTK_WED_WPDMA_GLO_CFG_FIRST_TOKEN_ONLY BIT(26) +#define MTK_WED_WPDMA_GLO_CFG_OMIT_RX_INFO BIT(27) +#define MTK_WED_WPDMA_GLO_CFG_OMIT_TX_INFO BIT(28) +#define MTK_WED_WPDMA_GLO_CFG_BYTE_SWAP BIT(29) +#define MTK_WED_WPDMA_GLO_CFG_RX_2B_OFFSET BIT(31) + +#define MTK_WED_WPDMA_RESET_IDX 0x50c +#define MTK_WED_WPDMA_RESET_IDX_TX GENMASK(3, 0) +#define MTK_WED_WPDMA_RESET_IDX_RX GENMASK(17, 16) + +#define MTK_WED_WPDMA_INT_CTRL 0x520 +#define MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV BIT(21) + +#define MTK_WED_WPDMA_INT_MASK 0x524 + +#define MTK_WED_PCIE_CFG_BASE 0x560 + +#define MTK_WED_PCIE_INT_TRIGGER 0x570 +#define MTK_WED_PCIE_INT_TRIGGER_STATUS BIT(16) + +#define MTK_WED_WPDMA_CFG_BASE 0x580 + +#define MTK_WED_WPDMA_TX_MIB(_n) (0x5a0 + (_n) * 4) +#define MTK_WED_WPDMA_TX_COHERENT_MIB(_n) (0x5d0 + (_n) * 4) + +#define MTK_WED_WPDMA_RING_TX(_n) (0x600 + (_n) * 0x10) +#define MTK_WED_WPDMA_RING_RX(_n) (0x700 + (_n) * 0x10) +#define MTK_WED_WDMA_RING_RX(_n) (0x900 + (_n) * 0x10) +#define MTK_WED_WDMA_RX_THRES(_n) (0x940 + (_n) * 0x4) + +#define MTK_WED_WDMA_GLO_CFG 0xa04 +#define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN BIT(0) +#define MTK_WED_WDMA_GLO_CFG_RX_DRV_EN BIT(2) +#define MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY BIT(3) +#define MTK_WED_WDMA_GLO_CFG_BT_SIZE GENMASK(5, 4) +#define MTK_WED_WDMA_GLO_CFG_TX_WB_DDONE BIT(6) +#define MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE BIT(13) +#define MTK_WED_WDMA_GLO_CFG_WCOMPLETE_SEL BIT(16) +#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_RXDMA_BYPASS BIT(17) +#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_BYPASS BIT(18) +#define MTK_WED_WDMA_GLO_CFG_FSM_RETURN_IDLE BIT(19) +#define MTK_WED_WDMA_GLO_CFG_WAIT_COHERENT BIT(20) +#define MTK_WED_WDMA_GLO_CFG_AXI_W_AFTER_AW BIT(21) +#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY_SINGLE_W BIT(22) +#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY BIT(23) +#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP BIT(24) +#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE BIT(25) +#define MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE BIT(26) +#define MTK_WED_WDMA_GLO_CFG_RXDRV_CLKGATE_BYPASS BIT(30) + +#define MTK_WED_WDMA_RESET_IDX 0xa08 +#define MTK_WED_WDMA_RESET_IDX_RX GENMASK(17, 16) +#define MTK_WED_WDMA_RESET_IDX_DRV GENMASK(25, 24) + +#define MTK_WED_WDMA_INT_TRIGGER 0xa28 +#define MTK_WED_WDMA_INT_TRIGGER_RX_DONE GENMASK(17, 16) + +#define MTK_WED_WDMA_INT_CTRL 0xa2c +#define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL GENMASK(17, 16) + +#define MTK_WED_WDMA_OFFSET0 0xaa4 +#define MTK_WED_WDMA_OFFSET1 0xaa8 + +#define MTK_WED_WDMA_RX_MIB(_n) (0xae0 + (_n) * 4) +#define MTK_WED_WDMA_RX_RECYCLE_MIB(_n) (0xae8 + (_n) * 4) +#define MTK_WED_WDMA_RX_PROCESSED_MIB(_n) (0xaf0 + (_n) * 4) + +#define MTK_WED_RING_OFS_BASE 0x00 +#define MTK_WED_RING_OFS_COUNT 0x04 +#define MTK_WED_RING_OFS_CPU_IDX 0x08 +#define MTK_WED_RING_OFS_DMA_IDX 0x0c + +#define MTK_WDMA_RING_RX(_n) (0x100 + (_n) * 0x10) + +#define MTK_WDMA_GLO_CFG 0x204 +#define MTK_WDMA_GLO_CFG_RX_INFO_PRERES GENMASK(28, 26) + +#define MTK_WDMA_RESET_IDX 0x208 +#define MTK_WDMA_RESET_IDX_TX GENMASK(3, 0) +#define MTK_WDMA_RESET_IDX_RX GENMASK(17, 16) + +#define MTK_WDMA_INT_MASK 0x228 +#define MTK_WDMA_INT_MASK_TX_DONE GENMASK(3, 0) +#define MTK_WDMA_INT_MASK_RX_DONE GENMASK(17, 16) +#define MTK_WDMA_INT_MASK_TX_DELAY BIT(28) +#define MTK_WDMA_INT_MASK_TX_COHERENT BIT(29) +#define MTK_WDMA_INT_MASK_RX_DELAY BIT(30) +#define MTK_WDMA_INT_MASK_RX_COHERENT BIT(31) + +#define MTK_WDMA_INT_GRP1 0x250 +#define MTK_WDMA_INT_GRP2 0x254 + +#define MTK_PCIE_MIRROR_MAP(n) ((n) ? 0x4 : 0x0) +#define MTK_PCIE_MIRROR_MAP_EN BIT(0) +#define MTK_PCIE_MIRROR_MAP_WED_ID BIT(1) + +/* DMA channel mapping */ +#define HIFSYS_DMA_AG_MAP 0x008 + +#endif diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h new file mode 100644 index 000000000000..7e00cca06709 --- /dev/null +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -0,0 +1,131 @@ +#ifndef __MTK_WED_H +#define __MTK_WED_H + +#include +#include +#include +#include + +#define MTK_WED_TX_QUEUES 2 + +struct mtk_wed_hw; +struct mtk_wdma_desc; + +struct mtk_wed_ring { + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + int size; + + u32 reg_base; + void __iomem *wpdma; +}; + +struct mtk_wed_device { +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + const struct mtk_wed_ops *ops; + struct device *dev; + struct mtk_wed_hw *hw; + bool init_done, running; + int wdma_idx; + int irq; + + struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring txfree_ring; + struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; + + struct { + int size; + void **pages; + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + } buf_ring; + + /* filled by driver: */ + struct { + struct pci_dev *pci_dev; + + u32 wpdma_phys; + + u16 token_start; + unsigned int nbuf; + + u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); + int (*offload_enable)(struct mtk_wed_device *wed); + void (*offload_disable)(struct mtk_wed_device *wed); + } wlan; +#endif +}; + +struct mtk_wed_ops { + int (*attach)(struct mtk_wed_device *dev); + int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*txfree_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); + void (*detach)(struct mtk_wed_device *dev); + + void (*stop)(struct mtk_wed_device *dev); + void (*start)(struct mtk_wed_device *dev, u32 irq_mask); + void (*reset_dma)(struct mtk_wed_device *dev); + + u32 (*reg_read)(struct mtk_wed_device *dev, u32 reg); + void (*reg_write)(struct mtk_wed_device *dev, u32 reg, u32 val); + + u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask); + void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); +}; + +extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; + +static inline int +mtk_wed_device_attach(struct mtk_wed_device *dev) +{ + int ret = -ENODEV; + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + rcu_read_lock(); + dev->ops = rcu_dereference(mtk_soc_wed_ops); + if (dev->ops) + ret = dev->ops->attach(dev); + else + rcu_read_unlock(); + + if (ret) + dev->ops = NULL; +#endif + + return ret; +} + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED +#define mtk_wed_device_active(_dev) !!(_dev)->ops +#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) +#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->tx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \ + (_dev)->ops->txfree_ring_setup(_dev, _regs) +#define mtk_wed_device_reg_read(_dev, _reg) \ + (_dev)->ops->reg_read(_dev, _reg) +#define mtk_wed_device_reg_write(_dev, _reg, _val) \ + (_dev)->ops->reg_write(_dev, _reg, _val) +#define mtk_wed_device_irq_get(_dev, _mask) \ + (_dev)->ops->irq_get(_dev, _mask) +#define mtk_wed_device_irq_set_mask(_dev, _mask) \ + (_dev)->ops->irq_set_mask(_dev, _mask) +#else +static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) +{ + return false; +} +#define mtk_wed_device_detach(_dev) do {} while (0) +#define mtk_wed_device_start(_dev, _mask) do {} while (0) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_reg_read(_dev, _reg) 0 +#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) +#define mtk_wed_device_irq_get(_dev, _mask) 0 +#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0) +#endif + +#endif -- cgit From a333215e10cb5d3b1e0685ca117f0e9452215485 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 5 Apr 2022 21:57:48 +0200 Subject: net: ethernet: mtk_eth_soc: implement flow offloading to WED devices This allows hardware flow offloading from Ethernet to WLAN on MT7622 SoC Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_ppe.c | 18 ++++++++ drivers/net/ethernet/mediatek/mtk_ppe.h | 14 ++++--- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 56 ++++++++++++++++++++++++- drivers/net/ethernet/mediatek/mtk_wed.h | 7 ++++ include/linux/netdevice.h | 7 ++++ net/core/dev.c | 4 ++ 6 files changed, 98 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 3ad10c793308..472bcd3269a7 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -329,6 +329,24 @@ int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid) return 0; } +int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq, + int bss, int wcid) +{ + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); + u32 *ib2 = mtk_foe_entry_ib2(entry); + + *ib2 &= ~MTK_FOE_IB2_PORT_MG; + *ib2 |= MTK_FOE_IB2_WDMA_WINFO; + if (wdma_idx) + *ib2 |= MTK_FOE_IB2_WDMA_DEVIDX; + + l2->vlan2 = FIELD_PREP(MTK_FOE_VLAN2_WINFO_BSS, bss) | + FIELD_PREP(MTK_FOE_VLAN2_WINFO_WCID, wcid) | + FIELD_PREP(MTK_FOE_VLAN2_WINFO_RING, txq); + + return 0; +} + static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry) { return !(entry->ib1 & MTK_FOE_IB1_STATIC) && diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 242fb8f2ae65..df8ccaf48171 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -48,9 +48,9 @@ enum { #define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5) #define MTK_FOE_IB2_MULTICAST BIT(8) -#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12) -#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16) -#define MTK_FOE_IB2_WHNAT_NAT BIT(17) +#define MTK_FOE_IB2_WDMA_QID2 GENMASK(13, 12) +#define MTK_FOE_IB2_WDMA_DEVIDX BIT(16) +#define MTK_FOE_IB2_WDMA_WINFO BIT(17) #define MTK_FOE_IB2_PORT_MG GENMASK(17, 12) @@ -58,9 +58,9 @@ enum { #define MTK_FOE_IB2_DSCP GENMASK(31, 24) -#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0) -#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6) -#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14) +#define MTK_FOE_VLAN2_WINFO_BSS GENMASK(5, 0) +#define MTK_FOE_VLAN2_WINFO_WCID GENMASK(13, 6) +#define MTK_FOE_VLAN2_WINFO_RING GENMASK(15, 14) enum { MTK_FOE_STATE_INVALID, @@ -281,6 +281,8 @@ int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry, int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port); int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid); int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid); +int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq, + int bss, int wcid); int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, u16 timestamp); int mtk_ppe_debugfs_init(struct mtk_ppe *ppe); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 7bb1f20002b5..bcf342bb9051 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -10,6 +10,7 @@ #include #include #include "mtk_eth_soc.h" +#include "mtk_wed.h" struct mtk_flow_data { struct ethhdr eth; @@ -39,6 +40,7 @@ struct mtk_flow_entry { struct rhash_head node; unsigned long cookie; u16 hash; + s8 wed_index; }; static const struct rhashtable_params mtk_flow_ht_params = { @@ -80,6 +82,35 @@ mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth) memcpy(dest, src, act->mangle.mask ? 2 : 4); } +static int +mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_info *info) +{ + struct net_device_path_ctx ctx = { + .dev = dev, + .daddr = addr, + }; + struct net_device_path path = {}; + + if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED)) + return -1; + + if (!dev->netdev_ops->ndo_fill_forward_path) + return -1; + + if (dev->netdev_ops->ndo_fill_forward_path(&ctx, &path)) + return -1; + + if (path.type != DEV_PATH_MTK_WDMA) + return -1; + + info->wdma_idx = path.mtk_wdma.wdma_idx; + info->queue = path.mtk_wdma.queue; + info->bss = path.mtk_wdma.bss; + info->wcid = path.mtk_wdma.wcid; + + return 0; +} + static int mtk_flow_mangle_ports(const struct flow_action_entry *act, @@ -149,10 +180,20 @@ mtk_flow_get_dsa_port(struct net_device **dev) static int mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, - struct net_device *dev) + struct net_device *dev, const u8 *dest_mac, + int *wed_index) { + struct mtk_wdma_info info = {}; int pse_port, dsa_port; + if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) { + mtk_foe_entry_set_wdma(foe, info.wdma_idx, info.queue, info.bss, + info.wcid); + pse_port = 3; + *wed_index = info.wdma_idx; + goto out; + } + dsa_port = mtk_flow_get_dsa_port(&dev); if (dsa_port >= 0) mtk_foe_entry_set_dsa(foe, dsa_port); @@ -164,6 +205,7 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, else return -EOPNOTSUPP; +out: mtk_foe_entry_set_pse_port(foe, pse_port); return 0; @@ -179,6 +221,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) struct net_device *odev = NULL; struct mtk_flow_entry *entry; int offload_type = 0; + int wed_index = -1; u16 addr_type = 0; u32 timestamp; u8 l4proto = 0; @@ -326,10 +369,14 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) if (data.pppoe.num == 1) mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid); - err = mtk_flow_set_output_device(eth, &foe, odev); + err = mtk_flow_set_output_device(eth, &foe, odev, data.eth.h_dest, + &wed_index); if (err) return err; + if (wed_index >= 0 && (err = mtk_wed_flow_add(wed_index)) < 0) + return err; + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; @@ -343,6 +390,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) } entry->hash = hash; + entry->wed_index = wed_index; err = rhashtable_insert_fast(ð->flow_table, &entry->node, mtk_flow_ht_params); if (err < 0) @@ -353,6 +401,8 @@ clear_flow: mtk_foe_entry_clear(ð->ppe, hash); free: kfree(entry); + if (wed_index >= 0) + mtk_wed_flow_remove(wed_index); return err; } @@ -369,6 +419,8 @@ mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f) mtk_foe_entry_clear(ð->ppe, entry->hash); rhashtable_remove_fast(ð->flow_table, &entry->node, mtk_flow_ht_params); + if (entry->wed_index >= 0) + mtk_wed_flow_remove(entry->wed_index); kfree(entry); return 0; diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h index 404c9a9b130d..981ec613f4b0 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.h +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -7,6 +7,7 @@ #include #include #include +#include struct mtk_eth; @@ -27,6 +28,12 @@ struct mtk_wed_hw { int index; }; +struct mtk_wdma_info { + u8 wdma_idx; + u8 queue; + u16 wcid; + u8 bss; +}; #ifdef CONFIG_NET_MEDIATEK_SOC_WED static inline void diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b6a1e7f643da..7b2a0b739684 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -862,6 +862,7 @@ enum net_device_path_type { DEV_PATH_BRIDGE, DEV_PATH_PPPOE, DEV_PATH_DSA, + DEV_PATH_MTK_WDMA, }; struct net_device_path { @@ -887,6 +888,12 @@ struct net_device_path { int port; u16 proto; } dsa; + struct { + u8 wdma_idx; + u8 queue; + u16 wcid; + u8 bss; + } mtk_wdma; }; }; diff --git a/net/core/dev.c b/net/core/dev.c index 2ec17358d7b4..d5a362d53b34 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -701,6 +701,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, if (WARN_ON_ONCE(last_dev == ctx.dev)) return -1; } + + if (!ctx.dev) + return ret; + path = dev_fwd_path(stack); if (!path) return -1; -- cgit From 3e9c4584336149146fe15cb5703fc10a2ca2d2a0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 24 Mar 2022 11:30:25 +0100 Subject: gpu: host1x: Do not use mapping cache for job submissions Buffer mappings used in job submissions are usually small and not rapidly reused as opposed to framebuffers (which are usually large and rapidly reused, for example when page-flipping between double-buffered framebuffers). Avoid going through the mapping cache for these buffers since the cache would also lead to leaks if nobody is ever releasing the cache's last reference. For DRM/KMS these last references are dropped when the framebuffers are removed and therefore no longer needed. While at it, also add a note about the need to explicitly remove the final reference to the mapping in the cache. Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 4 ++-- include/linux/host1x.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 5e8c183167b7..b2761aa03b95 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -175,7 +175,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - map = host1x_bo_pin(dev, bo, direction, &client->cache); + map = host1x_bo_pin(dev, bo, direction, NULL); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; @@ -222,7 +222,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache); + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 00278853eadf..c0bf4e581fe9 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -31,6 +31,11 @@ u64 host1x_get_dma_mask(struct host1x *host1x); * struct host1x_bo_cache - host1x buffer object cache * @mappings: list of mappings * @lock: synchronizes accesses to the list of mappings + * + * Note that entries are not periodically evicted from this cache and instead need to be + * explicitly released. This is used primarily for DRM/KMS where the cache's reference is + * released when the last reference to a buffer object represented by a mapping in this + * cache is dropped. */ struct host1x_bo_cache { struct list_head mappings; -- cgit From c8d4c18bfbc4ab467188dbe45cc8155759f49d9e Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Nov 2021 15:20:45 +0100 Subject: dma-buf/drivers: make reserving a shared slot mandatory v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit all the users of dma_resv_add_excl_fence() and make sure they reserve a shared slot also when only trying to add an exclusive fence. This is the next step towards handling the exclusive fence like a shared one. v2: fix missed case in amdgpu v3: and two more radeon, rename function v4: add one more case to TTM, fix i915 after rebase Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220406075132.3263-2-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 10 ++-- drivers/dma-buf/st-dma-resv.c | 64 ++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 8 +-- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 ++-- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 6 +- .../gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 5 +- drivers/gpu/drm/i915/i915_vma.c | 10 +++- .../gpu/drm/i915/selftests/intel_memory_region.c | 7 +++ drivers/gpu/drm/lima/lima_gem.c | 10 ++-- drivers/gpu/drm/msm/msm_gem_submit.c | 18 +++--- drivers/gpu/drm/nouveau/nouveau_fence.c | 8 +-- drivers/gpu/drm/panfrost/panfrost_job.c | 4 ++ drivers/gpu/drm/qxl/qxl_release.c | 2 +- drivers/gpu/drm/radeon/radeon_cs.c | 4 ++ drivers/gpu/drm/radeon/radeon_object.c | 8 +++ drivers/gpu/drm/radeon/radeon_vm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 8 ++- drivers/gpu/drm/ttm/ttm_bo_util.c | 12 +++- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 15 +++-- drivers/gpu/drm/v3d/v3d_gem.c | 15 +++-- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 12 ++-- drivers/gpu/drm/virtio/virtgpu_gem.c | 9 +++ drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 16 ++++-- include/linux/dma-resv.h | 4 +- 30 files changed, 176 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 15ffac35439d..8c650b96357a 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -152,7 +152,7 @@ static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) } /** - * dma_resv_reserve_shared - Reserve space to add shared fences to + * dma_resv_reserve_fences - Reserve space to add shared fences to * a dma_resv. * @obj: reservation object * @num_fences: number of fences we want to add @@ -167,7 +167,7 @@ static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) * RETURNS * Zero for success, or -errno */ -int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) +int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) { struct dma_resv_list *old, *new; unsigned int i, j, k, max; @@ -230,7 +230,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) return 0; } -EXPORT_SYMBOL(dma_resv_reserve_shared); +EXPORT_SYMBOL(dma_resv_reserve_fences); #ifdef CONFIG_DEBUG_MUTEXES /** @@ -238,7 +238,7 @@ EXPORT_SYMBOL(dma_resv_reserve_shared); * @obj: the dma_resv object to reset * * Reset the number of pre-reserved shared slots to test that drivers do - * correct slot allocation using dma_resv_reserve_shared(). See also + * correct slot allocation using dma_resv_reserve_fences(). See also * &dma_resv_list.shared_max. */ void dma_resv_reset_shared_max(struct dma_resv *obj) @@ -260,7 +260,7 @@ EXPORT_SYMBOL(dma_resv_reset_shared_max); * @fence: the shared fence to add * * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and - * dma_resv_reserve_shared() has been called. + * dma_resv_reserve_fences() has been called. * * See also &dma_resv.fence for a discussion of the semantics. */ diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index cbe999c6e7a6..d2e61f6ae989 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -75,17 +75,16 @@ static int test_signaling(void *arg, bool shared) goto err_free; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - goto err_unlock; - } + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + goto err_unlock; + } + if (shared) dma_resv_add_shared_fence(&resv, f); - } else { + else dma_resv_add_excl_fence(&resv, f); - } if (dma_resv_test_signaled(&resv, shared)) { pr_err("Resv unexpectedly signaled\n"); @@ -134,17 +133,16 @@ static int test_for_each(void *arg, bool shared) goto err_free; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - goto err_unlock; - } + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + goto err_unlock; + } + if (shared) dma_resv_add_shared_fence(&resv, f); - } else { + else dma_resv_add_excl_fence(&resv, f); - } r = -ENOENT; dma_resv_for_each_fence(&cursor, &resv, shared, fence) { @@ -206,18 +204,17 @@ static int test_for_each_unlocked(void *arg, bool shared) goto err_free; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - dma_resv_unlock(&resv); - goto err_free; - } + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + dma_resv_unlock(&resv); + goto err_free; + } + if (shared) dma_resv_add_shared_fence(&resv, f); - } else { + else dma_resv_add_excl_fence(&resv, f); - } dma_resv_unlock(&resv); r = -ENOENT; @@ -290,18 +287,17 @@ static int test_get_fences(void *arg, bool shared) goto err_resv; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - dma_resv_unlock(&resv); - goto err_resv; - } + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + dma_resv_unlock(&resv); + goto err_resv; + } + if (shared) dma_resv_add_shared_fence(&resv, f); - } else { + else dma_resv_add_excl_fence(&resv, f); - } dma_resv_unlock(&resv); r = dma_resv_get_fences(&resv, shared, &i, &fences); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 900ed2a7483b..98b1736bb221 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1233,7 +1233,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; - ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(vm->root.bo, @@ -2571,7 +2571,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem * Add process eviction fence to bo so they can * evict each other. */ - ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 25731719c627..6f57a2fd5fe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1388,6 +1388,14 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { struct dma_resv *resv = bo->tbo.base.resv; + int r; + + r = dma_resv_reserve_fences(resv, 1); + if (r) { + /* As last resort on OOM we block for the fence */ + dma_fence_wait(fence, false); + return; + } if (shared) dma_resv_add_shared_fence(resv, fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5d11978c162e..b13451255e8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2926,7 +2926,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) goto error_free_root; - r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) goto error_unreserve; @@ -3369,7 +3369,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } - r = dma_resv_reserve_shared(root->tbo.base.resv, 1); + r = dma_resv_reserve_fences(root->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve fence slot\n", r); goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3b8856b4cece..b3fc3e958227 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -548,7 +548,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, goto reserve_bo_failed; } - r = dma_resv_reserve_shared(bo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(bo->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve bo\n", r); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 5f502c49aec2..53f7c78628a4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -179,11 +179,9 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) struct etnaviv_gem_submit_bo *bo = &submit->bos[i]; struct dma_resv *robj = bo->obj->base.resv; - if (!(bo->flags & ETNA_SUBMIT_BO_WRITE)) { - ret = dma_resv_reserve_shared(robj, 1); - if (ret) - return ret; - } + ret = dma_resv_reserve_fences(robj, 1); + if (ret) + return ret; if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT) continue; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index ce91b23385cf..1fd0cc9ca213 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -108,7 +108,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, trace_i915_gem_object_clflush(obj); clflush = NULL; - if (!(flags & I915_CLFLUSH_SYNC)) + if (!(flags & I915_CLFLUSH_SYNC) && + dma_resv_reserve_fences(obj->base.resv, 1) == 0) clflush = clflush_work_create(obj); if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d42f437149c9..78f8797853ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -998,11 +998,9 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } } - if (!(ev->flags & EXEC_OBJECT_WRITE)) { - err = dma_resv_reserve_shared(vma->obj->base.resv, 1); - if (err) - return err; - } + err = dma_resv_reserve_fences(vma->obj->base.resv, 1); + if (err) + return err; GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); @@ -2303,7 +2301,7 @@ static int eb_parse(struct i915_execbuffer *eb) if (IS_ERR(batch)) return PTR_ERR(batch); - err = dma_resv_reserve_shared(shadow->obj->base.resv, 1); + err = dma_resv_reserve_fences(shadow->obj->base.resv, 1); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 1ebe6e4086a1..432ac74ff225 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -611,7 +611,11 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, assert_object_held(src); i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); - ret = dma_resv_reserve_shared(src_bo->base.resv, 1); + ret = dma_resv_reserve_fences(src_bo->base.resv, 1); + if (ret) + return ret; + + ret = dma_resv_reserve_fences(dst_bo->base.resv, 1); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index d534141b2cf7..0e52eb87cd55 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -216,7 +216,10 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, i915_gem_object_is_lmem(obj), 0xdeadbeaf, &rq); if (rq) { - dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + err = dma_resv_reserve_fences(obj->base.resv, 1); + if (!err) + dma_resv_add_excl_fence(obj->base.resv, + &rq->fence); i915_gem_object_set_moving_fence(obj, &rq->fence); i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 94fcdb7bd21d..bae3423f58e8 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1819,6 +1819,12 @@ int _i915_vma_move_to_active(struct i915_vma *vma, intel_frontbuffer_put(front); } + if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { + err = dma_resv_reserve_fences(vma->obj->base.resv, 1); + if (unlikely(err)) + return err; + } + if (fence) { dma_resv_add_excl_fence(vma->obj->base.resv, fence); obj->write_domain = I915_GEM_DOMAIN_RENDER; @@ -1826,7 +1832,7 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } } else { if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { - err = dma_resv_reserve_shared(vma->obj->base.resv, 1); + err = dma_resv_reserve_fences(vma->obj->base.resv, 1); if (unlikely(err)) return err; } @@ -2044,7 +2050,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) if (!obj->mm.rsgt) return -EBUSY; - err = dma_resv_reserve_shared(obj->base.resv, 1); + err = dma_resv_reserve_fences(obj->base.resv, 1); if (err) return -EBUSY; diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ba32893e0873..6114e013092b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -1043,6 +1043,13 @@ static int igt_lmem_write_cpu(void *arg) } i915_gem_object_lock(obj, NULL); + + err = dma_resv_reserve_fences(obj->base.resv, 1); + if (err) { + i915_gem_object_unlock(obj); + goto out_put; + } + /* Put the pages into a known state -- from the gpu for added fun */ intel_engine_pm_get(engine); err = intel_context_migrate_clear(engine->gt->migrate.context, NULL, diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 55bb1ec3c4f7..e0a11ee0e86d 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -257,13 +257,11 @@ int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset) static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo, bool write, bool explicit) { - int err = 0; + int err; - if (!write) { - err = dma_resv_reserve_shared(lima_bo_resv(bo), 1); - if (err) - return err; - } + err = dma_resv_reserve_fences(lima_bo_resv(bo), 1); + if (err) + return err; /* explicit sync use user passed dep fence */ if (explicit) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index c6d60c8d286d..3164db8be893 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -320,16 +320,14 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) struct drm_gem_object *obj = &submit->bos[i].obj->base; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; - if (!write) { - /* NOTE: _reserve_shared() must happen before - * _add_shared_fence(), which makes this a slightly - * strange place to call it. OTOH this is a - * convenient can-fail point to hook it in. - */ - ret = dma_resv_reserve_shared(obj->resv, 1); - if (ret) - return ret; - } + /* NOTE: _reserve_shared() must happen before + * _add_shared_fence(), which makes this a slightly + * strange place to call it. OTOH this is a + * convenient can-fail point to hook it in. + */ + ret = dma_resv_reserve_fences(obj->resv, 1); + if (ret) + return ret; /* exclusive fences must be ordered */ if (no_implicit && !write) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index a3a04e0d76ec..0268259e97eb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -346,11 +346,9 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, struct dma_resv *resv = nvbo->bo.base.resv; int i, ret; - if (!exclusive) { - ret = dma_resv_reserve_shared(resv, 1); - if (ret) - return ret; - } + ret = dma_resv_reserve_fences(resv, 1); + if (ret) + return ret; /* Waiting for the exclusive fence first causes performance regressions * under some circumstances. So manually wait for the shared ones first. diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index a6925dbb6224..c34114560e49 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -247,6 +247,10 @@ static int panfrost_acquire_object_fences(struct drm_gem_object **bos, int i, ret; for (i = 0; i < bo_count; i++) { + ret = dma_resv_reserve_fences(bos[i]->resv, 1); + if (ret) + return ret; + /* panfrost always uses write mode in its current uapi */ ret = drm_sched_job_add_implicit_dependencies(job, bos[i], true); diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 469979cd0341..cde1e8ddaeaa 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -200,7 +200,7 @@ static int qxl_release_validate_bo(struct qxl_bo *bo) return ret; } - ret = dma_resv_reserve_shared(bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 9ed2b2700e0a..446f7bae54c4 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -535,6 +535,10 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, return r; radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update); + + r = dma_resv_reserve_fences(bo->tbo.base.resv, 1); + if (r) + return r; } return radeon_vm_clear_invalids(rdev, vm); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 91a72cd14304..7ffd2e90f325 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -782,6 +782,14 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, bool shared) { struct dma_resv *resv = bo->tbo.base.resv; + int r; + + r = dma_resv_reserve_fences(resv, 1); + if (r) { + /* As last resort on OOM we block for the fence */ + dma_fence_wait(&fence->base, false); + return; + } if (shared) dma_resv_add_shared_fence(resv, &fence->base); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index bb53016f3138..987cabbf1318 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -831,7 +831,7 @@ static int radeon_vm_update_ptes(struct radeon_device *rdev, int r; radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true); - r = dma_resv_reserve_shared(pt->tbo.base.resv, 1); + r = dma_resv_reserve_fences(pt->tbo.base.resv, 1); if (r) return r; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e5fd0f2c0299..c49996cf25d0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -151,6 +151,10 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, } } + ret = dma_resv_reserve_fences(bo->base.resv, 1); + if (ret) + goto out_err; + ret = bdev->funcs->move(bo, evict, ctx, mem, hop); if (ret) { if (ret == -EMULTIHOP) @@ -735,7 +739,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, dma_resv_add_shared_fence(bo->base.resv, fence); - ret = dma_resv_reserve_shared(bo->base.resv, 1); + ret = dma_resv_reserve_fences(bo->base.resv, 1); if (unlikely(ret)) { dma_fence_put(fence); return ret; @@ -794,7 +798,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, bool type_found = false; int i, ret; - ret = dma_resv_reserve_shared(bo->base.resv, 1); + ret = dma_resv_reserve_fences(bo->base.resv, 1); if (unlikely(ret)) return ret; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 219dd81bbeab..1b96b91bf81b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -221,9 +221,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base = *bo; - ttm_bo_get(bo); - fbo->bo = bo; - /** * Fix up members that we shouldn't copy directly: * TODO: Explicit member copy would probably be better here. @@ -250,6 +247,15 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + kfree(fbo); + return ret; + } + + ttm_bo_get(bo); + fbo->bo = bo; + ttm_bo_move_to_lru_tail_unlocked(&fbo->base); *new_obj = &fbo->base; diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 071c48d672c6..789c645f004e 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -90,6 +90,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; + unsigned int num_fences; ret = ttm_bo_reserve(bo, intr, (ticket == NULL), ticket); if (ret == -EALREADY && dups) { @@ -100,12 +101,10 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, continue; } + num_fences = min(entry->num_shared, 1u); if (!ret) { - if (!entry->num_shared) - continue; - - ret = dma_resv_reserve_shared(bo->base.resv, - entry->num_shared); + ret = dma_resv_reserve_fences(bo->base.resv, + num_fences); if (!ret) continue; } @@ -120,9 +119,9 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, ret = ttm_bo_reserve_slowpath(bo, intr, ticket); } - if (!ret && entry->num_shared) - ret = dma_resv_reserve_shared(bo->base.resv, - entry->num_shared); + if (!ret) + ret = dma_resv_reserve_fences(bo->base.resv, + num_fences); if (unlikely(ret != 0)) { if (ticket) { diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 92bc0faee84f..961812d33827 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -259,16 +259,21 @@ v3d_lock_bo_reservations(struct v3d_job *job, return ret; for (i = 0; i < job->bo_count; i++) { + ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); + if (ret) + goto fail; + ret = drm_sched_job_add_implicit_dependencies(&job->base, job->bo[i], true); - if (ret) { - drm_gem_unlock_reservations(job->bo, job->bo_count, - acquire_ctx); - return ret; - } + if (ret) + goto fail; } return 0; + +fail: + drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); + return ret; } /** diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 4abf10b66fe8..594bd6bb00d2 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -644,7 +644,7 @@ retry: for (i = 0; i < exec->bo_count; i++) { bo = &exec->bo[i]->base; - ret = dma_resv_reserve_shared(bo->resv, 1); + ret = dma_resv_reserve_fences(bo->resv, 1); if (ret) { vc4_unlock_bo_reservations(dev, exec, acquire_ctx); return ret; diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index bd6f75285fd9..2ddbebca87d9 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -157,12 +157,14 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, } /* Expose the fence via the dma-buf */ - ret = 0; dma_resv_lock(resv, NULL); - if (arg->flags & VGEM_FENCE_WRITE) - dma_resv_add_excl_fence(resv, fence); - else if ((ret = dma_resv_reserve_shared(resv, 1)) == 0) - dma_resv_add_shared_fence(resv, fence); + ret = dma_resv_reserve_fences(resv, 1); + if (!ret) { + if (arg->flags & VGEM_FENCE_WRITE) + dma_resv_add_excl_fence(resv, fence); + else + dma_resv_add_shared_fence(resv, fence); + } dma_resv_unlock(resv); /* Record the fence in our idr for later signaling */ diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 48d3c9955f0d..1820ca6cf673 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -214,6 +214,7 @@ void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs, int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs) { + unsigned int i; int ret; if (objs->nents == 1) { @@ -222,6 +223,14 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs) ret = drm_gem_lock_reservations(objs->objs, objs->nents, &objs->ticket); } + if (ret) + return ret; + + for (i = 0; i < objs->nents; ++i) { + ret = dma_resv_reserve_fences(objs->objs[i]->resv, 1); + if (ret) + return ret; + } return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 31aecc46624b..fe13aa8b4a64 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -747,16 +747,22 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence) { struct ttm_device *bdev = bo->bdev; - struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, bdev); + int ret; - if (fence == NULL) { + if (fence == NULL) vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL); + else + dma_fence_get(&fence->base); + + ret = dma_resv_reserve_fences(bo->base.resv, 1); + if (!ret) dma_resv_add_excl_fence(bo->base.resv, &fence->base); - dma_fence_put(&fence->base); - } else - dma_resv_add_excl_fence(bo->base.resv, &fence->base); + else + /* Last resort fallback when we are OOM */ + dma_fence_wait(&fence->base, false); + dma_fence_put(&fence->base); } diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index ecb697d4d861..5fa04d0fccad 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -117,7 +117,7 @@ struct dma_resv { * A new fence is added by calling dma_resv_add_shared_fence(). Since * this often needs to be done past the point of no return in command * submission it cannot fail, and therefore sufficient slots need to be - * reserved by calling dma_resv_reserve_shared(). + * reserved by calling dma_resv_reserve_fences(). * * Note that actual semantics of what an exclusive or shared fence mean * is defined by the user, for reservation objects shared across drivers @@ -413,7 +413,7 @@ static inline void dma_resv_unlock(struct dma_resv *obj) void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); -int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); +int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, struct dma_fence *fence); -- cgit From a60707d74bd1d119cf7bcc5101cda912fc46d5e3 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:45:57 +0800 Subject: sched: Move child_runs_first sysctls to fair.c move child_runs_first sysctls to fair.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 2 -- kernel/sched/fair.c | 19 +++++++++++++++++++ kernel/sched/sched.h | 2 ++ kernel/sysctl.c | 7 ------- 4 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c1076b5e17fb..1d2ff3cd1728 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -14,8 +14,6 @@ extern unsigned long sysctl_hung_task_timeout_secs; enum { sysctl_hung_task_timeout_secs = 0 }; #endif -extern unsigned int sysctl_sched_child_runs_first; - enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_LOG, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d4bd299d67ab..788b1d6a3248 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -109,6 +109,25 @@ static unsigned int sched_nr_latency = 8; * parent will (try to) run first. */ unsigned int sysctl_sched_child_runs_first __read_mostly; +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_child_runs_first_sysctls[] = { + { + .procname = "sched_child_runs_first", + .data = &sysctl_sched_child_runs_first, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +static int __init sched_child_runs_first_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_child_runs_first_sysctls); + return 0; +} +late_initcall(sched_child_runs_first_sysctl_init); +#endif /* * SCHED_OTHER wake-up granularity. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 58263f90c559..767fc1de9646 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -108,6 +108,8 @@ extern __read_mostly int scheduler_running; extern unsigned long calc_load_update; extern atomic_long_t calc_load_tasks; +extern unsigned int sysctl_sched_child_runs_first; + extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 830aaf8ca08e..6bbb8e1af675 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1659,13 +1659,6 @@ int proc_do_static_key(struct ctl_table *table, int write, } static struct ctl_table kern_table[] = { - { - .procname = "sched_child_runs_first", - .data = &sysctl_sched_child_runs_first, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_SCHEDSTATS { .procname = "sched_schedstats", -- cgit From f5ef06d58be8311a9425e6a54a053ecb350952f3 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:45:58 +0800 Subject: sched: Move schedstats sysctls to core.c move schedstats sysctls to core.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 2 -- kernel/sched/core.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 11 ----------- 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 1d2ff3cd1728..6c7a6850559b 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -64,8 +64,6 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern unsigned int sysctl_sched_energy_aware; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d575b4914925..04440da5955f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4430,7 +4430,7 @@ out: __setup("schedstats=", setup_schedstats); #ifdef CONFIG_PROC_SYSCTL -int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, +static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; @@ -4449,6 +4449,26 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, set_schedstats(state); return err; } + +static struct ctl_table sched_schedstats_sysctls[] = { + { + .procname = "sched_schedstats", + .data = NULL, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_schedstats, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init sched_schedstats_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_schedstats_sysctls); + return 0; +} +late_initcall(sched_schedstats_sysctl_init); #endif /* CONFIG_PROC_SYSCTL */ #endif /* CONFIG_SCHEDSTATS */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6bbb8e1af675..fc0eeca20718 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1659,17 +1659,6 @@ int proc_do_static_key(struct ctl_table *table, int write, } static struct ctl_table kern_table[] = { -#ifdef CONFIG_SCHEDSTATS - { - .procname = "sched_schedstats", - .data = NULL, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_schedstats, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_TASK_DELAY_ACCT { .procname = "task_delayacct", -- cgit From d9ab0e63fa7f8405fbb19e28c5191e0880a7f2db Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:45:59 +0800 Subject: sched: Move rt_period/runtime sysctls to rt.c move rt_period/runtime sysctls to rt.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 11 ----------- kernel/rcu/rcu.h | 2 ++ kernel/sched/core.c | 13 ------------- kernel/sched/rt.c | 43 ++++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 4 ++++ kernel/sysctl.c | 14 -------------- 6 files changed, 48 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 6c7a6850559b..4391c1307945 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,15 +31,6 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -/* - * control realtime throttling: - * - * /proc/sys/kernel/sched_rt_period_us - * /proc/sys/kernel/sched_rt_runtime_us - */ -extern unsigned int sysctl_sched_rt_period; -extern int sysctl_sched_rt_runtime; - extern unsigned int sysctl_sched_dl_period_max; extern unsigned int sysctl_sched_dl_period_min; @@ -58,8 +49,6 @@ extern int sched_rr_timeslice; int sched_rr_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int sched_rt_handler(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 24b5f2c2de87..7812c740b3bf 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -23,6 +23,8 @@ #define RCU_SEQ_CTR_SHIFT 2 #define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) +extern int sysctl_sched_rt_runtime; + /* * Return the counter portion of a sequence number previously returned * by rcu_seq_snap() or rcu_seq_current(). diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 04440da5955f..774f3229db37 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -145,12 +145,6 @@ const_debug unsigned int sysctl_sched_nr_migrate = 8; const_debug unsigned int sysctl_sched_nr_migrate = 32; #endif -/* - * period over which we measure -rt task CPU usage in us. - * default: 1s - */ -unsigned int sysctl_sched_rt_period = 1000000; - __read_mostly int scheduler_running; #ifdef CONFIG_SCHED_CORE @@ -444,13 +438,6 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { } #endif /* CONFIG_SCHED_CORE */ -/* - * part of the period that we allow rt tasks to run in us. - * default: 0.95s - */ -int sysctl_sched_rt_runtime = 950000; - - /* * Serialization rules: * diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a32c46889af8..5663bb5ff890 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -13,6 +13,47 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; +/* + * period over which we measure -rt task CPU usage in us. + * default: 1s + */ +unsigned int sysctl_sched_rt_period = 1000000; + +/* + * part of the period that we allow rt tasks to run in us. + * default: 0.95s + */ +int sysctl_sched_rt_runtime = 950000; + +static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_rt_sysctls[] = { + { + .procname = "sched_rt_period_us", + .data = &sysctl_sched_rt_period, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_rt_handler, + }, + { + .procname = "sched_rt_runtime_us", + .data = &sysctl_sched_rt_runtime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rt_handler, + }, + {} +}; + +static int __init sched_rt_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_rt_sysctls); + return 0; +} +late_initcall(sched_rt_sysctl_init); +#endif + static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) { struct rt_bandwidth *rt_b = @@ -2925,7 +2966,7 @@ static void sched_rt_do_global(void) raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); } -int sched_rt_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int old_period, old_runtime; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 767fc1de9646..3b406c78a8e9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -114,6 +114,10 @@ extern void calc_global_load_tick(struct rq *this_rq); extern long calc_load_fold_active(struct rq *this_rq, long adjust); extern void call_trace_sched_update_nr_running(struct rq *rq, int count); + +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; + /* * Helpers for converting nanosecond timing to jiffy resolution */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fc0eeca20718..029bfe06c68d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,20 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ - { - .procname = "sched_rt_period_us", - .data = &sysctl_sched_rt_period, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_rt_handler, - }, - { - .procname = "sched_rt_runtime_us", - .data = &sysctl_sched_rt_runtime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sched_rt_handler, - }, { .procname = "sched_deadline_period_max_us", .data = &sysctl_sched_dl_period_max, -- cgit From 84227c12888b1105725cd2de14705b029bcbb4b2 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:00 +0800 Subject: sched: Move deadline_period sysctls to deadline.c move deadline_period sysctls to deadline.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 3 --- kernel/sched/deadline.c | 42 ++++++++++++++++++++++++++++++++++-------- kernel/sysctl.c | 14 -------------- 3 files changed, 34 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 4391c1307945..7da9b94c5e1c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,9 +31,6 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -extern unsigned int sysctl_sched_dl_period_max; -extern unsigned int sysctl_sched_dl_period_min; - #ifdef CONFIG_UCLAMP_TASK extern unsigned int sysctl_sched_uclamp_util_min; extern unsigned int sysctl_sched_uclamp_util_max; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fb4255ae0b2c..82e10b74a7b2 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -16,6 +16,40 @@ * Fabio Checconi */ +/* + * Default limits for DL period; on the top end we guard against small util + * tasks still getting ridiculously long effective runtimes, on the bottom end we + * guard against timer DoS. + */ +static unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */ +static unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */ +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_dl_sysctls[] = { + { + .procname = "sched_deadline_period_max_us", + .data = &sysctl_sched_dl_period_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_deadline_period_min_us", + .data = &sysctl_sched_dl_period_min, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +static int __init sched_dl_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_dl_sysctls); + return 0; +} +late_initcall(sched_dl_sysctl_init); +#endif + static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) { return container_of(dl_se, struct task_struct, dl); @@ -2879,14 +2913,6 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) attr->sched_flags |= dl_se->flags; } -/* - * Default limits for DL period; on the top end we guard against small util - * tasks still getting ridiculously long effective runtimes, on the bottom end we - * guard against timer DoS. - */ -unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */ -unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */ - /* * This function validates the new parameters of a -deadline task. * We ask for the deadline not being zero, and greater or equal diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 029bfe06c68d..2c8c75e11a37 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,20 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ - { - .procname = "sched_deadline_period_max_us", - .data = &sysctl_sched_dl_period_max, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sched_deadline_period_min_us", - .data = &sysctl_sched_dl_period_min, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "sched_rr_timeslice_ms", .data = &sysctl_sched_rr_timeslice, -- cgit From dafd7a9dad22fadcb290b24dff54e2eae3b89776 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:01 +0800 Subject: sched: Move rr_timeslice sysctls to rt.c move rr_timeslice sysctls to rt.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 5 ----- kernel/sched/rt.c | 13 +++++++++++-- kernel/sched/sched.h | 1 + kernel/sysctl.c | 7 ------- 4 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 7da9b94c5e1c..3d307e512d1f 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -41,11 +41,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default; extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif -extern int sysctl_sched_rr_timeslice; -extern int sched_rr_timeslice; - -int sched_rr_handler(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5663bb5ff890..71791be36065 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -5,7 +5,7 @@ */ int sched_rr_timeslice = RR_TIMESLICE; -int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; +static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; /* More than 4 hours if BW_SHIFT equals 20. */ static const u64 max_rt_runtime = MAX_BW; @@ -27,6 +27,8 @@ int sysctl_sched_rt_runtime = 950000; static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); +static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #ifdef CONFIG_SYSCTL static struct ctl_table sched_rt_sysctls[] = { { @@ -43,6 +45,13 @@ static struct ctl_table sched_rt_sysctls[] = { .mode = 0644, .proc_handler = sched_rt_handler, }, + { + .procname = "sched_rr_timeslice_ms", + .data = &sysctl_sched_rr_timeslice, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rr_handler, + }, {} }; @@ -3005,7 +3014,7 @@ undo: return ret; } -int sched_rr_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3b406c78a8e9..ae0f6e5a76f9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -117,6 +117,7 @@ extern void call_trace_sched_update_nr_running(struct rq *rq, int count); extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; +extern int sched_rr_timeslice; /* * Helpers for converting nanosecond timing to jiffy resolution diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2c8c75e11a37..b074f70a3e11 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,13 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ - { - .procname = "sched_rr_timeslice_ms", - .data = &sysctl_sched_rr_timeslice, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sched_rr_handler, - }, #ifdef CONFIG_UCLAMP_TASK { .procname = "sched_util_clamp_min", -- cgit From 3267e0156c3341ac25b37a0f60551cdae1634b60 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:02 +0800 Subject: sched: Move uclamp_util sysctls to core.c move uclamp_util sysctls to core.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 8 -------- kernel/sched/core.c | 48 ++++++++++++++++++++++++++++++++++---------- kernel/sysctl.c | 23 --------------------- 3 files changed, 37 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 3d307e512d1f..0934b21a57a4 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,18 +31,10 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -#ifdef CONFIG_UCLAMP_TASK -extern unsigned int sysctl_sched_uclamp_util_min; -extern unsigned int sysctl_sched_uclamp_util_max; -extern unsigned int sysctl_sched_uclamp_util_min_rt_default; -#endif - #ifdef CONFIG_CFS_BANDWIDTH extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif -int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 774f3229db37..ef31751c5799 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1306,10 +1306,10 @@ static void set_load_weight(struct task_struct *p, bool update_load) static DEFINE_MUTEX(uclamp_mutex); /* Max allowed minimum utilization */ -unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; +static unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; /* Max allowed maximum utilization */ -unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; +static unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; /* * By default RT tasks run at the maximum performance point/capacity of the @@ -1326,7 +1326,7 @@ unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; * This knob will not override the system default sched_util_clamp_min defined * above. */ -unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE; +static unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE; /* All clamps are required to be less or equal than these values */ static struct uclamp_se uclamp_default[UCLAMP_CNT]; @@ -1779,7 +1779,7 @@ static void uclamp_update_root_tg(void) static void uclamp_update_root_tg(void) { } #endif -int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, +static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { bool update_root_tg = false; @@ -4436,8 +4436,12 @@ static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, set_schedstats(state); return err; } +#endif /* CONFIG_PROC_SYSCTL */ +#endif /* CONFIG_SCHEDSTATS */ -static struct ctl_table sched_schedstats_sysctls[] = { +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_core_sysctls[] = { +#ifdef CONFIG_SCHEDSTATS { .procname = "sched_schedstats", .data = NULL, @@ -4447,17 +4451,39 @@ static struct ctl_table sched_schedstats_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif /* CONFIG_SCHEDSTATS */ +#ifdef CONFIG_UCLAMP_TASK + { + .procname = "sched_util_clamp_min", + .data = &sysctl_sched_uclamp_util_min, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, + { + .procname = "sched_util_clamp_max", + .data = &sysctl_sched_uclamp_util_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, + { + .procname = "sched_util_clamp_min_rt_default", + .data = &sysctl_sched_uclamp_util_min_rt_default, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_sched_uclamp_handler, + }, +#endif /* CONFIG_UCLAMP_TASK */ {} }; - -static int __init sched_schedstats_sysctl_init(void) +static int __init sched_core_sysctl_init(void) { - register_sysctl_init("kernel", sched_schedstats_sysctls); + register_sysctl_init("kernel", sched_core_sysctls); return 0; } -late_initcall(sched_schedstats_sysctl_init); -#endif /* CONFIG_PROC_SYSCTL */ -#endif /* CONFIG_SCHEDSTATS */ +late_initcall(sched_core_sysctl_init); +#endif /* CONFIG_SYSCTL */ /* * fork()/clone()-time setup: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b074f70a3e11..a48c090d57f9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,29 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_UCLAMP_TASK - { - .procname = "sched_util_clamp_min", - .data = &sysctl_sched_uclamp_util_min, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_sched_uclamp_handler, - }, - { - .procname = "sched_util_clamp_max", - .data = &sysctl_sched_uclamp_util_max, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_sched_uclamp_handler, - }, - { - .procname = "sched_util_clamp_min_rt_default", - .data = &sysctl_sched_uclamp_util_min_rt_default, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_sched_uclamp_handler, - }, -#endif #ifdef CONFIG_CFS_BANDWIDTH { .procname = "sched_cfs_bandwidth_slice_us", -- cgit From d4ae80ffa64f87b9c355692b680b603add084e96 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:03 +0800 Subject: sched: Move cfs_bandwidth_slice sysctls to fair.c move cfs_bandwidth_slice sysctls to fair.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 4 ---- kernel/sched/fair.c | 51 +++++++++++++++++++++++++++----------------- kernel/sysctl.c | 10 --------- 3 files changed, 31 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0934b21a57a4..198f77c8a873 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,10 +31,6 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif -#ifdef CONFIG_CFS_BANDWIDTH -extern unsigned int sysctl_sched_cfs_bandwidth_slice; -#endif - int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 788b1d6a3248..265bf7a75a37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -109,25 +109,6 @@ static unsigned int sched_nr_latency = 8; * parent will (try to) run first. */ unsigned int sysctl_sched_child_runs_first __read_mostly; -#ifdef CONFIG_SYSCTL -static struct ctl_table sched_child_runs_first_sysctls[] = { - { - .procname = "sched_child_runs_first", - .data = &sysctl_sched_child_runs_first, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; - -static int __init sched_child_runs_first_sysctl_init(void) -{ - register_sysctl_init("kernel", sched_child_runs_first_sysctls); - return 0; -} -late_initcall(sched_child_runs_first_sysctl_init); -#endif /* * SCHED_OTHER wake-up granularity. @@ -192,7 +173,37 @@ int __weak arch_asym_cpu_priority(int cpu) * * (default: 5 msec, units: microseconds) */ -unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; +static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; +#endif + +#ifdef CONFIG_SYSCTL +static struct ctl_table sched_fair_sysctls[] = { + { + .procname = "sched_child_runs_first", + .data = &sysctl_sched_child_runs_first, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_CFS_BANDWIDTH + { + .procname = "sched_cfs_bandwidth_slice_us", + .data = &sysctl_sched_cfs_bandwidth_slice, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, +#endif + {} +}; + +static int __init sched_fair_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_fair_sysctls); + return 0; +} +late_initcall(sched_fair_sysctl_init); #endif static inline void update_load_add(struct load_weight *lw, unsigned long inc) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a48c090d57f9..10ab81c7c457 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,16 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_CFS_BANDWIDTH - { - .procname = "sched_cfs_bandwidth_slice_us", - .data = &sysctl_sched_cfs_bandwidth_slice, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - }, -#endif #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) { .procname = "sched_energy_aware", -- cgit From 8a0441415b3f9b9a920a6a5086580ea3daa7b884 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 15 Feb 2022 19:46:04 +0800 Subject: sched: Move energy_aware sysctls to topology.c move energy_aware sysctls to topology.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Luis Chamberlain --- include/linux/sched/sysctl.h | 6 ------ kernel/sched/topology.c | 25 +++++++++++++++++++++++-- kernel/sysctl.c | 11 ----------- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 198f77c8a873..e650946816d0 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -34,10 +34,4 @@ extern int sysctl_numa_balancing_mode; int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -extern unsigned int sysctl_sched_energy_aware; -int sched_energy_aware_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -#endif - #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 810750e62118..05b6c2ad90b9 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -206,7 +206,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) DEFINE_STATIC_KEY_FALSE(sched_energy_present); -unsigned int sysctl_sched_energy_aware = 1; +static unsigned int sysctl_sched_energy_aware = 1; DEFINE_MUTEX(sched_energy_mutex); bool sched_energy_update; @@ -220,7 +220,7 @@ void rebuild_sched_domains_energy(void) } #ifdef CONFIG_PROC_SYSCTL -int sched_energy_aware_handler(struct ctl_table *table, int write, +static int sched_energy_aware_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, state; @@ -237,6 +237,27 @@ int sched_energy_aware_handler(struct ctl_table *table, int write, return ret; } + +static struct ctl_table sched_energy_aware_sysctls[] = { + { + .procname = "sched_energy_aware", + .data = &sysctl_sched_energy_aware, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_energy_aware_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init sched_energy_aware_sysctl_init(void) +{ + register_sysctl_init("kernel", sched_energy_aware_sysctls); + return 0; +} + +late_initcall(sched_energy_aware_sysctl_init); #endif static void free_pd(struct perf_domain *pd) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 10ab81c7c457..8241c5401ee8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1681,17 +1681,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) - { - .procname = "sched_energy_aware", - .data = &sysctl_sched_energy_aware, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_energy_aware_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", -- cgit From 06d177662fb86b80c7fc2290667b9a14cb0bd925 Mon Sep 17 00:00:00 2001 From: tangmeng Date: Thu, 17 Feb 2022 12:23:21 +0800 Subject: kernel/reboot: move reboot sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the poweroff_cmd and ctrl-alt-del sysctls to its own file, kernel/reboot.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/reboot.h | 4 ---- kernel/reboot.c | 34 ++++++++++++++++++++++++++++++++-- kernel/sysctl.c | 14 -------------- 3 files changed, 32 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reboot.h b/include/linux/reboot.h index af907a3d68d1..a2429648d831 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -71,12 +71,8 @@ extern void kernel_restart(char *cmd); extern void kernel_halt(void); extern void kernel_power_off(void); -extern int C_A_D; /* for sysctl */ void ctrl_alt_del(void); -#define POWEROFF_CMD_PATH_LEN 256 -extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; - extern void orderly_poweroff(bool force); extern void orderly_reboot(void); void hw_protection_shutdown(const char *reason, int ms_until_forced); diff --git a/kernel/reboot.c b/kernel/reboot.c index 6bcc5d6a6572..ed4e6dfb7d44 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -23,7 +23,7 @@ * this indicates whether you can reboot with ctrl-alt-del: the default is yes */ -int C_A_D = 1; +static int C_A_D = 1; struct pid *cad_pid; EXPORT_SYMBOL(cad_pid); @@ -417,9 +417,37 @@ void ctrl_alt_del(void) kill_cad_pid(SIGINT, 1); } -char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; +#define POWEROFF_CMD_PATH_LEN 256 +static char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; static const char reboot_cmd[] = "/sbin/reboot"; +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_reboot_table[] = { + { + .procname = "poweroff_cmd", + .data = &poweroff_cmd, + .maxlen = POWEROFF_CMD_PATH_LEN, + .mode = 0644, + .proc_handler = proc_dostring, + }, + { + .procname = "ctrl-alt-del", + .data = &C_A_D, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static void __init kernel_reboot_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_reboot_table); +} +#else +#define kernel_reboot_sysctls_init() do { } while (0) +#endif /* CONFIG_SYSCTL */ + static int run_cmd(const char *cmd) { char **argv; @@ -886,6 +914,8 @@ static int __init reboot_ksysfs_init(void) return ret; } + kernel_reboot_sysctls_init(); + return 0; } late_initcall(reboot_ksysfs_init); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8241c5401ee8..5e43569ce2be 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1798,13 +1798,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "ctrl-alt-del", - .data = &C_A_D, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_FUNCTION_TRACER { .procname = "ftrace_enabled", @@ -2111,13 +2104,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "poweroff_cmd", - .data = &poweroff_cmd, - .maxlen = POWEROFF_CMD_PATH_LEN, - .mode = 0644, - .proc_handler = proc_dostring, - }, #ifdef CONFIG_KEYS { .procname = "keys", -- cgit From 43fe219aa56a2fdd8f0623c9470a32b14b0617a5 Mon Sep 17 00:00:00 2001 From: sujiaxun Date: Thu, 17 Feb 2022 18:51:48 -0800 Subject: mm: move oom_kill sysctls to their own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the oom_kill sysctls to their own file, mm/oom_kill.c [sfr@canb.auug.org.au: null-terminate the array] Link: https://lkml.kernel.org/r/20220216193202.28838626@canb.auug.org.au Link: https://lkml.kernel.org/r/20220215093203.31032-1-sujiaxun@uniontech.com Signed-off-by: sujiaxun Signed-off-by: Stephen Rothwell Cc: Kees Cook Cc: Iurii Zaikin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Luis Chamberlain --- include/linux/oom.h | 4 ---- kernel/sysctl.c | 23 ----------------------- mm/oom_kill.c | 38 +++++++++++++++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 2db9a1432511..02d1e7bbd8cd 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -123,8 +123,4 @@ extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); -/* sysctls */ -extern int sysctl_oom_dump_tasks; -extern int sysctl_oom_kill_allocating_task; -extern int sysctl_panic_on_oom; #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5e43569ce2be..a21c0ea396f3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2241,29 +2241,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - { - .procname = "panic_on_oom", - .data = &sysctl_panic_on_oom, - .maxlen = sizeof(sysctl_panic_on_oom), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, - }, - { - .procname = "oom_kill_allocating_task", - .data = &sysctl_oom_kill_allocating_task, - .maxlen = sizeof(sysctl_oom_kill_allocating_task), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "oom_dump_tasks", - .data = &sysctl_oom_dump_tasks, - .maxlen = sizeof(sysctl_oom_dump_tasks), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "overcommit_ratio", .data = &sysctl_overcommit_ratio, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 7ec38194f8e1..7cc338a9e9e4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -52,9 +52,38 @@ #define CREATE_TRACE_POINTS #include -int sysctl_panic_on_oom; -int sysctl_oom_kill_allocating_task; -int sysctl_oom_dump_tasks = 1; +static int sysctl_panic_on_oom; +static int sysctl_oom_kill_allocating_task; +static int sysctl_oom_dump_tasks = 1; + +#ifdef CONFIG_SYSCTL +static struct ctl_table vm_oom_kill_table[] = { + { + .procname = "panic_on_oom", + .data = &sysctl_panic_on_oom, + .maxlen = sizeof(sysctl_panic_on_oom), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { + .procname = "oom_kill_allocating_task", + .data = &sysctl_oom_kill_allocating_task, + .maxlen = sizeof(sysctl_oom_kill_allocating_task), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "oom_dump_tasks", + .data = &sysctl_oom_dump_tasks, + .maxlen = sizeof(sysctl_oom_dump_tasks), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; +#endif /* * Serializes oom killer invocations (out_of_memory()) from all contexts to @@ -677,6 +706,9 @@ static void wake_oom_reaper(struct task_struct *tsk) static int __init oom_init(void) { oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); +#ifdef CONFIG_SYSCTL + register_sysctl_init("vm", vm_oom_kill_table); +#endif return 0; } subsys_initcall(oom_init) -- cgit From aa779e5102195e1d9ade95dcbc0bfbd8f916eb59 Mon Sep 17 00:00:00 2001 From: zhanglianjie Date: Thu, 17 Feb 2022 18:51:51 -0800 Subject: mm: move page-writeback sysctls to their own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. So move the page-writeback sysctls to its own file. [akpm@linux-foundation.org: coding-style cleanups] akpm@linux-foundation.org: fix CONFIG_SYSCTL=n warnings] Link: https://lkml.kernel.org/r/20220129012955.26594-1-zhanglianjie@uniontech.com Signed-off-by: zhanglianjie Cc: Kees Cook Cc: Iurii Zaikin Cc: Luis Chamberlain Signed-off-by: Andrew Morton Signed-off-by: Luis Chamberlain --- include/linux/writeback.h | 15 ------- kernel/sysctl.c | 69 ------------------------------- mm/page-writeback.c | 103 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 93 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fec248ab1fec..dc2b94e6a94f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -345,28 +345,13 @@ void wb_domain_exit(struct wb_domain *dom); extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ -extern int dirty_background_ratio; -extern unsigned long dirty_background_bytes; -extern int vm_dirty_ratio; -extern unsigned long vm_dirty_bytes; extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; extern unsigned int dirtytime_expire_interval; -extern int vm_highmem_is_dirtyable; extern int laptop_mode; -int dirty_background_ratio_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int dirty_background_bytes_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int dirty_ratio_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int dirty_bytes_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int dirtytime_interval_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a21c0ea396f3..36bfe1c92d44 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -100,8 +100,6 @@ static const int six_hundred_forty_kb = 640 * 1024; #endif -/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ -static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; static const int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; @@ -2263,55 +2261,6 @@ static struct ctl_table vm_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, - { - .procname = "dirty_background_ratio", - .data = &dirty_background_ratio, - .maxlen = sizeof(dirty_background_ratio), - .mode = 0644, - .proc_handler = dirty_background_ratio_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "dirty_background_bytes", - .data = &dirty_background_bytes, - .maxlen = sizeof(dirty_background_bytes), - .mode = 0644, - .proc_handler = dirty_background_bytes_handler, - .extra1 = SYSCTL_LONG_ONE, - }, - { - .procname = "dirty_ratio", - .data = &vm_dirty_ratio, - .maxlen = sizeof(vm_dirty_ratio), - .mode = 0644, - .proc_handler = dirty_ratio_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "dirty_bytes", - .data = &vm_dirty_bytes, - .maxlen = sizeof(vm_dirty_bytes), - .mode = 0644, - .proc_handler = dirty_bytes_handler, - .extra1 = (void *)&dirty_bytes_min, - }, - { - .procname = "dirty_writeback_centisecs", - .data = &dirty_writeback_interval, - .maxlen = sizeof(dirty_writeback_interval), - .mode = 0644, - .proc_handler = dirty_writeback_centisecs_handler, - }, - { - .procname = "dirty_expire_centisecs", - .data = &dirty_expire_interval, - .maxlen = sizeof(dirty_expire_interval), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, { .procname = "dirtytime_expire_seconds", .data = &dirtytime_expire_interval, @@ -2483,13 +2432,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, }, #endif - { - .procname = "laptop_mode", - .data = &laptop_mode, - .maxlen = sizeof(laptop_mode), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "vfs_cache_pressure", .data = &sysctl_vfs_cache_pressure, @@ -2587,17 +2529,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, }, #endif -#ifdef CONFIG_HIGHMEM - { - .procname = "highmem_is_dirtyable", - .data = &vm_highmem_is_dirtyable, - .maxlen = sizeof(vm_highmem_is_dirtyable), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_MEMORY_FAILURE { .procname = "memory_failure_early_kill", diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7e2da284e427..438762173a59 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -70,30 +70,33 @@ static long ratelimit_pages = 32; /* * Start background writeback (via writeback threads) at this percentage */ -int dirty_background_ratio = 10; +static int dirty_background_ratio = 10; /* * dirty_background_bytes starts at 0 (disabled) so that it is a function of * dirty_background_ratio * the amount of dirtyable memory */ -unsigned long dirty_background_bytes; +static unsigned long dirty_background_bytes; /* * free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true */ -int vm_highmem_is_dirtyable; +static int vm_highmem_is_dirtyable; /* * The generator of dirty data starts writeback at this percentage */ -int vm_dirty_ratio = 20; +static int vm_dirty_ratio = 20; + +/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ +static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; /* * vm_dirty_bytes starts at 0 (disabled) so that it is a function of * vm_dirty_ratio * the amount of dirtyable memory */ -unsigned long vm_dirty_bytes; +static unsigned long vm_dirty_bytes; /* * The interval between `kupdate'-style writebacks @@ -491,7 +494,8 @@ bool node_dirty_ok(struct pglist_data *pgdat) return nr_pages <= limit; } -int dirty_background_ratio_handler(struct ctl_table *table, int write, +#ifdef CONFIG_SYSCTL +static int dirty_background_ratio_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -502,7 +506,7 @@ int dirty_background_ratio_handler(struct ctl_table *table, int write, return ret; } -int dirty_background_bytes_handler(struct ctl_table *table, int write, +static int dirty_background_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -513,7 +517,7 @@ int dirty_background_bytes_handler(struct ctl_table *table, int write, return ret; } -int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, +static int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; @@ -527,7 +531,7 @@ int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, return ret; } -int dirty_bytes_handler(struct ctl_table *table, int write, +static int dirty_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; @@ -540,6 +544,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write, } return ret; } +#endif static unsigned long wp_next_time(unsigned long cur_time) { @@ -1981,10 +1986,11 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) return false; } +#ifdef CONFIG_SYSCTL /* * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ -int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, +static int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { unsigned int old_interval = dirty_writeback_interval; @@ -2005,6 +2011,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, return ret; } +#endif void laptop_mode_timer_fn(struct timer_list *t) { @@ -2069,6 +2076,79 @@ static int page_writeback_cpu_online(unsigned int cpu) return 0; } +#ifdef CONFIG_SYSCTL +static struct ctl_table vm_page_writeback_sysctls[] = { + { + .procname = "dirty_background_ratio", + .data = &dirty_background_ratio, + .maxlen = sizeof(dirty_background_ratio), + .mode = 0644, + .proc_handler = dirty_background_ratio_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "dirty_background_bytes", + .data = &dirty_background_bytes, + .maxlen = sizeof(dirty_background_bytes), + .mode = 0644, + .proc_handler = dirty_background_bytes_handler, + .extra1 = SYSCTL_LONG_ONE, + }, + { + .procname = "dirty_ratio", + .data = &vm_dirty_ratio, + .maxlen = sizeof(vm_dirty_ratio), + .mode = 0644, + .proc_handler = dirty_ratio_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "dirty_bytes", + .data = &vm_dirty_bytes, + .maxlen = sizeof(vm_dirty_bytes), + .mode = 0644, + .proc_handler = dirty_bytes_handler, + .extra1 = (void *)&dirty_bytes_min, + }, + { + .procname = "dirty_writeback_centisecs", + .data = &dirty_writeback_interval, + .maxlen = sizeof(dirty_writeback_interval), + .mode = 0644, + .proc_handler = dirty_writeback_centisecs_handler, + }, + { + .procname = "dirty_expire_centisecs", + .data = &dirty_expire_interval, + .maxlen = sizeof(dirty_expire_interval), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +#ifdef CONFIG_HIGHMEM + { + .procname = "highmem_is_dirtyable", + .data = &vm_highmem_is_dirtyable, + .maxlen = sizeof(vm_highmem_is_dirtyable), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif + { + .procname = "laptop_mode", + .data = &laptop_mode, + .maxlen = sizeof(laptop_mode), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + {} +}; +#endif + /* * Called early on to tune the page writeback dirty limits. * @@ -2093,6 +2173,9 @@ void __init page_writeback_init(void) page_writeback_cpu_online, NULL); cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL, page_writeback_cpu_online); +#ifdef CONFIG_SYSCTL + register_sysctl_init("vm", vm_page_writeback_sysctls); +#endif } /** -- cgit From f79c9b8ae8bde10126586c1bb55b5fd027276d8e Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:58:57 +0800 Subject: kernel/lockdep: move lockdep sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the prove_locking and lock_stat sysctls to its own file, kernel/lockdep.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/lockdep.h | 4 ---- kernel/locking/lockdep.c | 35 +++++++++++++++++++++++++++++++++-- kernel/sysctl.c | 21 --------------------- 3 files changed, 33 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105..37951c17908e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -16,10 +16,6 @@ struct task_struct; -/* for sysctl */ -extern int prove_locking; -extern int lock_stat; - #ifdef CONFIG_LOCKDEP #include diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c06cab6546ed..a4382ae1be59 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -64,19 +64,50 @@ #include #ifdef CONFIG_PROVE_LOCKING -int prove_locking = 1; +static int prove_locking = 1; module_param(prove_locking, int, 0644); #else #define prove_locking 0 #endif #ifdef CONFIG_LOCK_STAT -int lock_stat = 1; +static int lock_stat = 1; module_param(lock_stat, int, 0644); #else #define lock_stat 0 #endif +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_lockdep_table[] = { +#ifdef CONFIG_PROVE_LOCKING + { + .procname = "prove_locking", + .data = &prove_locking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif /* CONFIG_PROVE_LOCKING */ +#ifdef CONFIG_LOCK_STAT + { + .procname = "lock_stat", + .data = &lock_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif /* CONFIG_LOCK_STAT */ + { } +}; + +static __init int kernel_lockdep_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_lockdep_table); + return 0; +} +late_initcall(kernel_lockdep_sysctls_init); +#endif /* CONFIG_SYSCTL */ + DEFINE_PER_CPU(unsigned int, lockdep_recursion); EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 36bfe1c92d44..95380d250c8c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -88,9 +88,6 @@ #ifdef CONFIG_RT_MUTEXES #include #endif -#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) -#include -#endif #if defined(CONFIG_SYSCTL) @@ -1679,24 +1676,6 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_FOUR, }, #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_PROVE_LOCKING - { - .procname = "prove_locking", - .data = &prove_locking, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif -#ifdef CONFIG_LOCK_STAT - { - .procname = "lock_stat", - .data = &lock_stat, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif { .procname = "panic", .data = &panic_timeout, -- cgit From 9df918698408fd914493aba0b7858fef50eba63a Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:12 +0800 Subject: kernel/panic: move panic sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the oops_all_cpu_backtrace sysctl to its own file, kernel/panic.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/panic.h | 6 ------ kernel/panic.c | 26 +++++++++++++++++++++++++- kernel/sysctl.c | 11 ----------- 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/panic.h b/include/linux/panic.h index f5844908a089..e71161da69c4 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -15,12 +15,6 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); -#ifdef CONFIG_SMP -extern unsigned int sysctl_oops_all_cpu_backtrace; -#else -#define sysctl_oops_all_cpu_backtrace 0 -#endif /* CONFIG_SMP */ - extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/kernel/panic.c b/kernel/panic.c index eb4dfb932c85..eb3f2fe4f6d7 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -43,7 +43,9 @@ * Should we dump all CPUs backtraces in an oops event? * Defaults to 0, can be changed via sysctl. */ -unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +#else +#define sysctl_oops_all_cpu_backtrace 0 #endif /* CONFIG_SMP */ int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; @@ -73,6 +75,28 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); +#if defined(CONFIG_SMP) && defined(CONFIG_SYSCTL) +static struct ctl_table kern_panic_table[] = { + { + .procname = "oops_all_cpu_backtrace", + .data = &sysctl_oops_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static __init int kernel_panic_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_panic_table); + return 0; +} +late_initcall(kernel_panic_sysctls_init); +#endif + static long no_blink(int state) { return 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 95380d250c8c..90fc2212b536 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1922,17 +1922,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_SMP - { - .procname = "oops_all_cpu_backtrace", - .data = &sysctl_oops_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ { .procname = "pid_max", .data = &pid_max, -- cgit From 801b501439d1b366d524dee4fc1e6b3473a95b9a Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:23 +0800 Subject: kernel/acct: move acct sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the acct sysctl to its own file, kernel/acct.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/acct.h | 1 - kernel/acct.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 12 ------------ 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acct.h b/include/linux/acct.h index bc70e81895c0..2718c4854815 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -21,7 +21,6 @@ #ifdef CONFIG_BSD_PROCESS_ACCT struct pid_namespace; -extern int acct_parm[]; /* for sysctl */ extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); extern void acct_exit_ns(struct pid_namespace *); diff --git a/kernel/acct.c b/kernel/acct.c index 3df53cf1dcd5..13706356ec54 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -70,11 +70,31 @@ * Turned into sysctl-controllable parameters. AV, 12/11/98 */ -int acct_parm[3] = {4, 2, 30}; +static int acct_parm[3] = {4, 2, 30}; #define RESUME (acct_parm[0]) /* >foo% free space - resume */ #define SUSPEND (acct_parm[1]) /* #endif -#ifdef CONFIG_BSD_PROCESS_ACCT -#include -#endif #ifdef CONFIG_RT_MUTEXES #include #endif @@ -1856,15 +1853,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dostring, }, #endif -#ifdef CONFIG_BSD_PROCESS_ACCT - { - .procname = "acct", - .data = &acct_parm, - .maxlen = 3*sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif #ifdef CONFIG_MAGIC_SYSRQ { .procname = "sysrq", -- cgit From 1186618a6a35d43a865448472a261184b608d13c Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:36 +0800 Subject: kernel/delayacct: move delayacct sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the delayacct sysctl to its own file, kernel/delayacct.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/delayacct.h | 3 --- kernel/delayacct.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 12 ------------ 3 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 3e03d010bd2e..6b16a6930a19 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -61,9 +61,6 @@ extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); -extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); - extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); diff --git a/kernel/delayacct.c b/kernel/delayacct.c index c5e8cea9e05f..2c1e18f7c5cf 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -44,7 +44,7 @@ void delayacct_init(void) } #ifdef CONFIG_PROC_SYSCTL -int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, +static int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int state = delayacct_on; @@ -63,6 +63,26 @@ int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, set_delayacct(state); return err; } + +static struct ctl_table kern_delayacct_table[] = { + { + .procname = "task_delayacct", + .data = NULL, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_delayacct, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static __init int kernel_delayacct_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_delayacct_table); + return 0; +} +late_initcall(kernel_delayacct_sysctls_init); #endif void __delayacct_tsk_init(struct task_struct *tsk) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5421e28dbb25..9b74ba12a711 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -67,7 +67,6 @@ #include #include #include -#include #include "../lib/kstrtox.h" @@ -1651,17 +1650,6 @@ int proc_do_static_key(struct ctl_table *table, int write, } static struct ctl_table kern_table[] = { -#ifdef CONFIG_TASK_DELAY_ACCT - { - .procname = "task_delayacct", - .data = NULL, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_delayacct, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_TASK_DELAY_ACCT */ #ifdef CONFIG_NUMA_BALANCING { .procname = "numa_balancing", -- cgit From d772cc2c321900f3f463a124eebeb7218e66dda6 Mon Sep 17 00:00:00 2001 From: tangmeng Date: Fri, 18 Feb 2022 18:59:49 +0800 Subject: kernel/do_mount_initrd: move real_root_dev sysctls to its own file kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the real_root_dev sysctl to its own file, kernel/do_mount_initrd.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain --- include/linux/initrd.h | 2 -- init/do_mounts_initrd.c | 22 +++++++++++++++++++++- kernel/sysctl.c | 9 --------- 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 1bbe9af48dc3..f1a1f4c92ded 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -29,8 +29,6 @@ static inline void wait_for_initramfs(void) {} extern phys_addr_t phys_initrd_start; extern unsigned long phys_initrd_size; -extern unsigned int real_root_dev; - extern char __initramfs_start[]; extern unsigned long __initramfs_size; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 533d81ed74d4..327962ea354c 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -14,12 +14,32 @@ unsigned long initrd_start, initrd_end; int initrd_below_start_ok; -unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ +static unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ static int __initdata mount_initrd = 1; phys_addr_t phys_initrd_start __initdata; unsigned long phys_initrd_size __initdata; +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_do_mounts_initrd_table[] = { + { + .procname = "real-root-dev", + .data = &real_root_dev, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static __init int kernel_do_mounts_initrd_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_do_mounts_initrd_table); + return 0; +} +late_initcall(kernel_do_mounts_initrd_sysctls_init); +#endif /* CONFIG_SYSCTL */ + static int __init no_initrd(char *str) { mount_initrd = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9b74ba12a711..10a551f8fcab 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1693,15 +1693,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sysctl_latencytop, }, -#endif -#ifdef CONFIG_BLK_DEV_INITRD - { - .procname = "real-root-dev", - .data = &real_root_dev, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #endif { .procname = "print-fatal-signals", -- cgit From 8e4e83b2278bdfb55cb2b13de07cf0a721ce8af7 Mon Sep 17 00:00:00 2001 From: Wei Xiao Date: Wed, 23 Feb 2022 19:11:53 +0800 Subject: ftrace: move sysctl_ftrace_enabled to ftrace.c This moves ftrace_enabled to trace/ftrace.c. We move sysctls to places where features actually belong to improve the readability of the code and reduce the risk of code merge conflicts. At the same time, the proc-sysctl maintainers do not want to know what sysctl knobs you wish to add for your owner piece of code, we just care about the core logic. Signed-off-by: Wei Xiao Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- include/linux/ftrace.h | 3 --- kernel/sysctl.c | 9 --------- kernel/trace/ftrace.c | 22 +++++++++++++++++++++- 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4816b7e11047..088b915853dd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -101,9 +101,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; -extern int -ftrace_enable_sysctl(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 10a551f8fcab..21172d3dad6e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1751,15 +1751,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_FUNCTION_TRACER - { - .procname = "ftrace_enabled", - .data = &ftrace_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = ftrace_enable_sysctl, - }, -#endif #ifdef CONFIG_STACK_TRACER { .procname = "stack_tracer_enabled", diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f1d2f5e7263..a5efbbc289b4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7921,7 +7921,8 @@ static bool is_permanent_ops_registered(void) return false; } -int +#ifdef CONFIG_SYSCTL +static int ftrace_enable_sysctl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -7964,3 +7965,22 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_unlock(&ftrace_lock); return ret; } + +static struct ctl_table ftrace_sysctls[] = { + { + .procname = "ftrace_enabled", + .data = &ftrace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ftrace_enable_sysctl, + }, + {} +}; + +static int __init ftrace_sysctl_init(void) +{ + register_sysctl_init("kernel", ftrace_sysctls); + return 0; +} +late_initcall(ftrace_sysctl_init); +#endif -- cgit From 7bc80a5462c37eab58a9ea386064307c0f447fd1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 9 Nov 2021 11:08:18 +0100 Subject: dma-buf: add enum dma_resv_usage v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds the dma_resv_usage enum and allows us to specify why a dma_resv object is queried for its containing fences. Additional to that a dma_resv_usage_rw() helper function is added to aid retrieving the fences for a read or write userspace submission. This is then deployed to the different query functions of the dma_resv object and all of their users. When the write paratermer was previously true we now use DMA_RESV_USAGE_WRITE and DMA_RESV_USAGE_READ otherwise. v2: add KERNEL/OTHER in separate patch v3: some kerneldoc suggestions by Daniel v4: some more kerneldoc suggestions by Daniel, fix missing cases lost in the rebase pointed out by Bas. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-2-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 6 +- drivers/dma-buf/dma-resv.c | 35 +++++------ drivers/dma-buf/st-dma-resv.c | 48 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +- drivers/gpu/drm/drm_gem.c | 3 +- drivers/gpu/drm/drm_gem_atomic_helper.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 6 +- drivers/gpu/drm/i915/display/intel_atomic_plane.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 6 +- .../gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 3 +- drivers/gpu/drm/i915/i915_request.c | 3 +- drivers/gpu/drm/i915/i915_sw_fence.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 3 +- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 3 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 8 +-- drivers/gpu/drm/nouveau/nouveau_fence.c | 8 ++- drivers/gpu/drm/nouveau/nouveau_gem.c | 3 +- drivers/gpu/drm/panfrost/panfrost_drv.c | 3 +- drivers/gpu/drm/qxl/qxl_debugfs.c | 3 +- drivers/gpu/drm/radeon/radeon_display.c | 3 +- drivers/gpu/drm/radeon/radeon_gem.c | 9 ++- drivers/gpu/drm/radeon/radeon_mn.c | 4 +- drivers/gpu/drm/radeon/radeon_sync.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 4 +- drivers/gpu/drm/scheduler/sched_main.c | 3 +- drivers/gpu/drm/ttm/ttm_bo.c | 18 +++--- drivers/gpu/drm/vgem/vgem_fence.c | 4 +- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 5 +- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 3 +- drivers/infiniband/core/umem_dmabuf.c | 3 +- include/linux/dma-buf.h | 8 ++- include/linux/dma-resv.h | 73 ++++++++++++++++++---- 46 files changed, 215 insertions(+), 126 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 775d3afb4169..1cddb65eafda 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -216,7 +216,8 @@ static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write, struct dma_fence *fence; int r; - dma_resv_for_each_fence(&cursor, resv, write, fence) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(write), + fence) { dma_fence_get(fence); r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); if (!r) @@ -1124,7 +1125,8 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf, long ret; /* Wait on any implicit rendering fences */ - ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT); + ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write), + true, MAX_SCHEDULE_TIMEOUT); if (ret < 0) return ret; diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 8c650b96357a..17237e6ee30c 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -384,7 +384,7 @@ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) cursor->seq = read_seqcount_begin(&cursor->obj->seq); cursor->index = -1; cursor->shared_count = 0; - if (cursor->all_fences) { + if (cursor->usage >= DMA_RESV_USAGE_READ) { cursor->fences = dma_resv_shared_list(cursor->obj); if (cursor->fences) cursor->shared_count = cursor->fences->shared_count; @@ -496,7 +496,7 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) dma_resv_assert_held(cursor->obj); cursor->index = 0; - if (cursor->all_fences) + if (cursor->usage >= DMA_RESV_USAGE_READ) cursor->fences = dma_resv_shared_list(cursor->obj); else cursor->fences = NULL; @@ -551,7 +551,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) list = NULL; excl = NULL; - dma_resv_iter_begin(&cursor, src, true); + dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, f) { if (dma_resv_iter_is_restarted(&cursor)) { @@ -597,7 +597,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @write: true if we should return all fences + * @usage: controls which fences to include, see enum dma_resv_usage. * @num_fences: the number of fences returned * @fences: the array of fence ptrs returned (array is krealloc'd to the * required size, and must be freed by caller) @@ -605,7 +605,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * Retrieve all fences from the reservation object. * Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, bool write, +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences) { struct dma_resv_iter cursor; @@ -614,7 +614,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write, *num_fences = 0; *fences = NULL; - dma_resv_iter_begin(&cursor, obj, write); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) { @@ -646,7 +646,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences); /** * dma_resv_get_singleton - Get a single fence for all the fences * @obj: the reservation object - * @write: true if we should return all fences + * @usage: controls which fences to include, see enum dma_resv_usage. * @fence: the resulting fence * * Get a single fence representing all the fences inside the resv object. @@ -658,7 +658,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences); * * Returns 0 on success and negative error values on failure. */ -int dma_resv_get_singleton(struct dma_resv *obj, bool write, +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, struct dma_fence **fence) { struct dma_fence_array *array; @@ -666,7 +666,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, bool write, unsigned count; int r; - r = dma_resv_get_fences(obj, write, &count, &fences); + r = dma_resv_get_fences(obj, usage, &count, &fences); if (r) return r; @@ -700,7 +700,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton); * dma_resv_wait_timeout - Wait on reservation's objects * shared and/or exclusive fences. * @obj: the reservation object - * @wait_all: if true, wait on all fences, else wait on just exclusive fence + * @usage: controls which fences to include, see enum dma_resv_usage. * @intr: if true, do interruptible wait * @timeout: timeout value in jiffies or zero to return immediately * @@ -710,14 +710,14 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton); * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or * greater than zer on success. */ -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout) +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj, wait_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { ret = dma_fence_wait_timeout(fence, intr, ret); @@ -737,8 +737,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. * @obj: the reservation object - * @test_all: if true, test all fences, otherwise only test the exclusive - * fence + * @usage: controls which fences to include, see enum dma_resv_usage. * * Callers are not required to hold specific locks, but maybe hold * dma_resv_lock() already. @@ -747,12 +746,12 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * * True if all fences signaled, else false. */ -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj, test_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { dma_resv_iter_end(&cursor); return false; @@ -775,7 +774,7 @@ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, obj, true, fence) { + dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { seq_printf(seq, "\t%s fence:", dma_resv_iter_is_exclusive(&cursor) ? "Exclusive" : "Shared"); diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d2e61f6ae989..d097981061b1 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -58,7 +58,7 @@ static int sanitycheck(void *arg) return r; } -static int test_signaling(void *arg, bool shared) +static int test_signaling(void *arg, enum dma_resv_usage usage) { struct dma_resv resv; struct dma_fence *f; @@ -81,18 +81,18 @@ static int test_signaling(void *arg, bool shared) goto err_unlock; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); - if (dma_resv_test_signaled(&resv, shared)) { + if (dma_resv_test_signaled(&resv, usage)) { pr_err("Resv unexpectedly signaled\n"); r = -EINVAL; goto err_unlock; } dma_fence_signal(f); - if (!dma_resv_test_signaled(&resv, shared)) { + if (!dma_resv_test_signaled(&resv, usage)) { pr_err("Resv not reporting signaled\n"); r = -EINVAL; goto err_unlock; @@ -107,15 +107,15 @@ err_free: static int test_excl_signaling(void *arg) { - return test_signaling(arg, false); + return test_signaling(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_signaling(void *arg) { - return test_signaling(arg, true); + return test_signaling(arg, DMA_RESV_USAGE_READ); } -static int test_for_each(void *arg, bool shared) +static int test_for_each(void *arg, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *f, *fence; @@ -139,13 +139,13 @@ static int test_for_each(void *arg, bool shared) goto err_unlock; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); r = -ENOENT; - dma_resv_for_each_fence(&cursor, &resv, shared, fence) { + dma_resv_for_each_fence(&cursor, &resv, usage, fence) { if (!r) { pr_err("More than one fence found\n"); r = -EINVAL; @@ -156,7 +156,8 @@ static int test_for_each(void *arg, bool shared) r = -EINVAL; goto err_unlock; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_is_exclusive(&cursor) != + (usage >= DMA_RESV_USAGE_READ)) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_unlock; @@ -178,15 +179,15 @@ err_free: static int test_excl_for_each(void *arg) { - return test_for_each(arg, false); + return test_for_each(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_for_each(void *arg) { - return test_for_each(arg, true); + return test_for_each(arg, DMA_RESV_USAGE_READ); } -static int test_for_each_unlocked(void *arg, bool shared) +static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *f, *fence; @@ -211,14 +212,14 @@ static int test_for_each_unlocked(void *arg, bool shared) goto err_free; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); dma_resv_unlock(&resv); r = -ENOENT; - dma_resv_iter_begin(&cursor, &resv, shared); + dma_resv_iter_begin(&cursor, &resv, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!r) { pr_err("More than one fence found\n"); @@ -234,7 +235,8 @@ static int test_for_each_unlocked(void *arg, bool shared) r = -EINVAL; goto err_iter_end; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_is_exclusive(&cursor) != + (usage >= DMA_RESV_USAGE_READ)) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_iter_end; @@ -262,15 +264,15 @@ err_free: static int test_excl_for_each_unlocked(void *arg) { - return test_for_each_unlocked(arg, false); + return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_for_each_unlocked(void *arg) { - return test_for_each_unlocked(arg, true); + return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ); } -static int test_get_fences(void *arg, bool shared) +static int test_get_fences(void *arg, enum dma_resv_usage usage) { struct dma_fence *f, **fences = NULL; struct dma_resv resv; @@ -294,13 +296,13 @@ static int test_get_fences(void *arg, bool shared) goto err_resv; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); dma_resv_unlock(&resv); - r = dma_resv_get_fences(&resv, shared, &i, &fences); + r = dma_resv_get_fences(&resv, usage, &i, &fences); if (r) { pr_err("get_fences failed\n"); goto err_free; @@ -324,12 +326,12 @@ err_resv: static int test_excl_get_fences(void *arg) { - return test_get_fences(arg, false); + return test_get_fences(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_get_fences(void *arg) { - return test_get_fences(arg, true); + return test_get_fences(arg, DMA_RESV_USAGE_READ); } int dma_resv(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e85e347eb670..413f32c3fd63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1288,7 +1288,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, * * TODO: Remove together with dma_resv rework. */ - dma_resv_for_each_fence(&cursor, resv, false, fence) { + dma_resv_for_each_fence(&cursor, resv, + DMA_RESV_USAGE_WRITE, + fence) { break; } dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index fae5c1debfad..7a6908d71820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -200,8 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - /* TODO: Unify this with other drivers */ - r = dma_resv_get_fences(new_abo->tbo.base.resv, true, + r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE, &work->shared_count, &work->shared); if (unlikely(r != 0)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 57b74d35052f..84a53758e18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, timeout); /* ret == 0 means not signaled, * ret > 0 means signaled diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 81207737c716..65998cbcd7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, struct dma_fence *fence; int r; - r = dma_resv_get_singleton(resv, true, &fence); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence); if (r) goto fallback; @@ -139,7 +139,8 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4b153daf283d..86f5248676b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6f57a2fd5fe3..a7f39f8ab7be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -768,8 +768,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 40e06745fae9..744e144e5fc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -259,7 +259,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL; - dma_resv_for_each_fence(&cursor, resv, true, f) { + /* TODO: Use DMA_RESV_USAGE_READ here */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f7f149588432..5db5066e74b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1344,7 +1344,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, + DMA_RESV_USAGE_READ, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 39c74d9fa7cc..3654326219e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1163,7 +1163,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(bo->tbo.base.resv, + DMA_RESV_USAGE_WRITE, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b13451255e8b..a0376fd36a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2059,7 +2059,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, true, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2665,7 +2665,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ)) return false; /* Try to block ongoing updates */ @@ -2845,7 +2845,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true, + timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, + DMA_RESV_USAGE_READ, true, timeout); if (timeout <= 0) return timeout; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b30656959fd8..9e24b1e616af 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9236,7 +9236,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * deadlock during GPU reset when this fence will not signal * but we hold reservation lock for the BO. */ - r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(abo->tbo.base.resv, + DMA_RESV_USAGE_WRITE, false, msecs_to_jiffies(5000)); if (unlikely(r <= 0)) DRM_ERROR("Waiting for fences timed out!"); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 133dfae06fab..eb0c2d041f13 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -771,7 +771,8 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, return -EINVAL; } - ret = dma_resv_wait_timeout(obj->resv, wait_all, true, timeout); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all), + true, timeout); if (ret == 0) ret = -ETIME; else if (ret > 0) diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index 9338ddb7edff..a6d89aed0bda 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -151,7 +151,7 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0); - ret = dma_resv_get_singleton(obj->resv, false, &fence); + ret = dma_resv_get_singleton(obj->resv, DMA_RESV_USAGE_WRITE, &fence); if (ret) return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index d5314aa28ff7..507172e2780b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -380,12 +380,14 @@ int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op, } if (op & ETNA_PREP_NOSYNC) { - if (!dma_resv_test_signaled(obj->resv, write)) + if (!dma_resv_test_signaled(obj->resv, + dma_resv_usage_rw(write))) return -EBUSY; } else { unsigned long remain = etnaviv_timeout_to_jiffies(timeout); - ret = dma_resv_wait_timeout(obj->resv, write, true, remain); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write), + true, remain); if (ret <= 0) return ret == 0 ? -ETIMEDOUT : ret; } diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 5712688232fb..03e86e836a17 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -997,7 +997,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb; - dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_iter_begin(&cursor, obj->base.resv, + DMA_RESV_USAGE_WRITE); dma_resv_for_each_fence_unlocked(&cursor, fence) { add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 470fdfd61a0f..14a1c0ad8c3c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -138,12 +138,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !dma_resv_test_signaled(obj->resv, true); + * !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ); * to report the overall busyness. This is what the wait-ioctl does. * */ args->busy = 0; - dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) args->busy = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 444f8268b9c5..a200d3e66573 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) struct intel_memory_region *mr = READ_ONCE(obj->mm.region); #ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) && + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) && i915_gem_object_evictable(obj)); #endif return mr && (mr->type == INTEL_MEMORY_LOCAL || diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 6d1a71d6404c..644fe237601c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -86,7 +86,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, return true; /* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout(obj->base.resv, true, false, + r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index dab3d30c09a0..319936f91ac5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -40,7 +40,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1; - dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL); + dma_resv_iter_begin(&cursor, resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) { ret = i915_gem_object_wait_fence(fence, flags, timeout); if (ret <= 0) @@ -117,7 +118,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL); + dma_resv_iter_begin(&cursor, obj->base.resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) i915_gem_fence_wait_priority(fence, attr); dma_resv_iter_end(&cursor); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index b071a58dd6da..b4275b55e5b8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -219,7 +219,8 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_detach; } - timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ); + timeout = dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_WRITE, + true, 5 * HZ); if (!timeout) { pr_err("dmabuf wait for exclusive fence timed out.\n"); timeout = -ETIME; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 582770360ad1..73d5195146b0 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1598,7 +1598,8 @@ i915_request_await_object(struct i915_request *to, struct dma_fence *fence; int ret = 0; - dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + dma_resv_for_each_fence(&cursor, obj->base.resv, + dma_resv_usage_rw(write), fence) { ret = i915_request_await_dma_fence(to, fence); if (ret) break; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 2a74a9a1cafe..ae984c66c48a 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -585,7 +585,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp)); - dma_resv_iter_begin(&cursor, resv, write); + dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(write)); dma_resv_for_each_fence_unlocked(&cursor, f) { pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 02b9ae65a96a..01bbb5f2d462 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -848,7 +848,8 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout); long ret; - ret = dma_resv_wait_timeout(obj->resv, write, true, remain); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write), + true, remain); if (ret == 0) return remain == 0 ? -EBUSY : -ETIMEDOUT; else if (ret < 0) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index e2faf92e4831..8642b84ea20c 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -558,7 +558,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; } - ret = dma_resv_get_singleton(nvbo->bo.base.resv, false, + ret = dma_resv_get_singleton(nvbo->bo.base.resv, + DMA_RESV_USAGE_WRITE, &asyw->state.fence); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 74f8652d2bd3..c6bb4dbcd735 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -962,11 +962,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, struct dma_fence *fence; int ret; - /* TODO: This is actually a memory management dependency */ - ret = dma_resv_get_singleton(bo->base.resv, false, &fence); + ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE, + &fence); if (ret) - dma_resv_wait_timeout(bo->base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); nv10_bo_put_tile_region(dev, *old_tile, fence); *old_tile = new_tile; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 0268259e97eb..d5e81ccee01c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -350,14 +350,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, if (ret) return ret; - /* Waiting for the exclusive fence first causes performance regressions - * under some circumstances. So manually wait for the shared ones first. + /* Waiting for the writes first causes performance regressions + * under some circumstances. So manually wait for the reads first. */ for (i = 0; i < 2; ++i) { struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, exclusive, fence) { + dma_resv_for_each_fence(&cursor, resv, + dma_resv_usage_rw(exclusive), + fence) { struct nouveau_fence *f; if (i == 0 && dma_resv_iter_is_exclusive(&cursor)) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9416bee92141..fab542a758ff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -962,7 +962,8 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, return -ENOENT; nvbo = nouveau_gem_object(gem); - lret = dma_resv_wait_timeout(nvbo->bo.base.resv, write, true, + lret = dma_resv_wait_timeout(nvbo->bo.base.resv, + dma_resv_usage_rw(write), true, no_wait ? 0 : 30 * HZ); if (!lret) ret = -EBUSY; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 94b6f0a19c83..7fcbc2a5b6cd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -316,7 +316,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, if (!gem_obj) return -ENOENT; - ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout); + ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ, + true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c index 6a36b0fd845c..33e5889d6608 100644 --- a/drivers/gpu/drm/qxl/qxl_debugfs.c +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c @@ -61,7 +61,8 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data) struct dma_fence *fence; int rel = 0; - dma_resv_iter_begin(&cursor, bo->tbo.base.resv, true); + dma_resv_iter_begin(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) rel = 0; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index f60e826cd292..57ff2b723c87 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -533,7 +533,8 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } - r = dma_resv_get_singleton(new_rbo->tbo.base.resv, false, &work->fence); + r = dma_resv_get_singleton(new_rbo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + &work->fence); if (r) { radeon_bo_unreserve(new_rbo); DRM_ERROR("failed to get new rbo buffer fences\n"); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index f563284a7fac..6616a828f40b 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -162,7 +162,9 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, } if (domain == RADEON_GEM_DOMAIN_CPU) { /* Asking for cpu access wait for object idle */ - r = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); + r = dma_resv_wait_timeout(robj->tbo.base.resv, + DMA_RESV_USAGE_READ, + true, 30 * HZ); if (!r) r = -EBUSY; @@ -524,7 +526,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); - r = dma_resv_test_signaled(robj->tbo.base.resv, true); + r = dma_resv_test_signaled(robj->tbo.base.resv, DMA_RESV_USAGE_READ); if (r == 0) r = -EBUSY; else @@ -553,7 +555,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, 30 * HZ); if (ret == 0) r = -EBUSY; else if (ret < 0) diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 9fa88549c89e..68ebeb1bdfff 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -66,8 +66,8 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn, return true; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index b991ba1bcd51..49bbb2266c0f 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_fence *f; int r = 0; - dma_resv_for_each_fence(&cursor, resv, shared, f) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(shared), f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index bc0f44299bb9..a50750740ab0 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -478,8 +478,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, return -EINVAL; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) { DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); return r ? r : -ETIME; diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index c5660b066554..76fd2904c7c6 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -705,7 +705,8 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, dma_resv_assert_held(obj->resv); - dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write), + fence) { /* Make sure to grab an additional ref on the added fence */ dma_fence_get(fence); ret = drm_sched_job_add_dependency(job, fence); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c49996cf25d0..cff05b62f3f7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,7 +223,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, resv, true); + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); @@ -252,7 +252,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, struct dma_resv *resv = &bo->base._resv; int ret; - if (dma_resv_test_signaled(resv, true)) + if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ)) ret = 0; else ret = -EBUSY; @@ -264,7 +264,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, dma_resv_unlock(bo->base.resv); spin_unlock(&bo->bdev->lru_lock); - lret = dma_resv_wait_timeout(resv, true, interruptible, + lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + interruptible, 30 * HZ); if (lret < 0) @@ -367,7 +368,8 @@ static void ttm_bo_release(struct kref *kref) /* Last resort, if we fail to allocate memory for the * fences block for the BO to become idle */ - dma_resv_wait_timeout(bo->base.resv, true, false, + dma_resv_wait_timeout(bo->base.resv, + DMA_RESV_USAGE_READ, false, 30 * HZ); } @@ -378,7 +380,7 @@ static void ttm_bo_release(struct kref *kref) ttm_mem_io_free(bdev, bo->resource); } - if (!dma_resv_test_signaled(bo->base.resv, true) || + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); @@ -1044,14 +1046,14 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, long timeout = 15 * HZ; if (no_wait) { - if (dma_resv_test_signaled(bo->base.resv, true)) + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) return 0; else return -EBUSY; } - timeout = dma_resv_wait_timeout(bo->base.resv, true, interruptible, - timeout); + timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + interruptible, timeout); if (timeout < 0) return timeout; diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 2ddbebca87d9..91fc4940c65a 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -130,6 +130,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, struct vgem_file *vfile = file->driver_priv; struct dma_resv *resv; struct drm_gem_object *obj; + enum dma_resv_usage usage; struct dma_fence *fence; int ret; @@ -151,7 +152,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, /* Check for a conflicting fence */ resv = obj->resv; - if (!dma_resv_test_signaled(resv, arg->flags & VGEM_FENCE_WRITE)) { + usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE); + if (!dma_resv_test_signaled(resv, usage)) { ret = -EBUSY; goto err_fence; } diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 77743fd2c61a..f8d83358d2a0 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -518,9 +518,10 @@ static int virtio_gpu_wait_ioctl(struct drm_device *dev, void *data, return -ENOENT; if (args->flags & VIRTGPU_WAIT_NOWAIT) { - ret = dma_resv_test_signaled(obj->resv, true); + ret = dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ); } else { - ret = dma_resv_wait_timeout(obj->resv, true, true, timeout); + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, + true, timeout); } if (ret == 0) ret = -EBUSY; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index fe13aa8b4a64..b96884f7d03d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -528,8 +528,8 @@ static int vmw_user_bo_synccpu_grab(struct vmw_buffer_object *vmw_bo, if (flags & drm_vmw_synccpu_allow_cs) { long lret; - lret = dma_resv_wait_timeout(bo->base.resv, true, true, - nonblock ? 0 : + lret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + true, nonblock ? 0 : MAX_SCHEDULE_TIMEOUT); if (!lret) return -EBUSY; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 626067104751..a84d1d5628d0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1164,7 +1164,8 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, if (bo->moving) dma_fence_put(bo->moving); - return dma_resv_get_singleton(bo->base.resv, false, + return dma_resv_get_singleton(bo->base.resv, + DMA_RESV_USAGE_WRITE, &bo->moving); } diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index d32cd7538835..f9901d273b8e 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -67,7 +67,8 @@ wait_fence: * may be not up-to-date. Wait for the exporter to finish * the migration. */ - return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false, + return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, + DMA_RESV_USAGE_WRITE, false, MAX_SCHEDULE_TIMEOUT); } EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 6fb91956ab8d..a297397743a2 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -408,6 +408,9 @@ struct dma_buf { * pipelining across drivers. These do not set any fences for their * access. An example here is v4l. * + * - Driver should use dma_resv_usage_rw() when retrieving fences as + * dependency for implicit synchronization. + * * DYNAMIC IMPORTER RULES: * * Dynamic importers, see dma_buf_attachment_is_dynamic(), have @@ -423,8 +426,9 @@ struct dma_buf { * * IMPORTANT: * - * All drivers must obey the struct dma_resv rules, specifically the - * rules for updating and obeying fences. + * All drivers and memory management related functions must obey the + * struct dma_resv rules, specifically the rules for updating and + * obeying fences. See enum dma_resv_usage for further descriptions. */ struct dma_resv *resv; diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 5fa04d0fccad..92cd8023980f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -49,6 +49,53 @@ extern struct ww_class reservation_ww_class; struct dma_resv_list; +/** + * enum dma_resv_usage - how the fences from a dma_resv obj are used + * + * This enum describes the different use cases for a dma_resv object and + * controls which fences are returned when queried. + * + * An important fact is that there is the order WRITEobj = obj; - cursor->all_fences = all_fences; + cursor->usage = usage; cursor->fence = NULL; } @@ -241,7 +288,7 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * dma_resv_for_each_fence - fence iterator * @cursor: a struct dma_resv_iter pointer * @obj: a dma_resv object pointer - * @all_fences: true if all fences should be returned + * @usage: controls which fences to return * @fence: the current fence * * Iterate over the fences in a struct dma_resv object while holding the @@ -250,8 +297,8 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * valid as long as the lock is held and so no extra reference to the fence is * taken. */ -#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ - for (dma_resv_iter_begin(cursor, obj, all_fences), \ +#define dma_resv_for_each_fence(cursor, obj, usage, fence) \ + for (dma_resv_iter_begin(cursor, obj, usage), \ fence = dma_resv_iter_first(cursor); fence; \ fence = dma_resv_iter_next(cursor)) @@ -418,14 +465,14 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); -int dma_resv_get_fences(struct dma_resv *obj, bool write, +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences); -int dma_resv_get_singleton(struct dma_resv *obj, bool write, +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, struct dma_fence **fence); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout); -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all); +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout); +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage); void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq); #endif /* _LINUX_RESERVATION_H */ -- cgit From 73511edf8b196e6f1ccda0fdf294ff57aa2dc9db Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 9 Nov 2021 11:08:18 +0100 Subject: dma-buf: specify usage while adding fences to dma_resv obj v7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of distingting between shared and exclusive fences specify the fence usage while adding fences. Rework all drivers to use this interface instead and deprecate the old one. v2: some kerneldoc comments suggested by Daniel v3: fix a missing case in radeon v4: rebase on nouveau changes, fix lockdep and temporary disable warning v5: more documentation updates v6: separate internal dma_resv changes from this patch, avoids to disable warning temporary, rebase on upstream changes v7: fix missed case in lima driver, minimize changes to i915_gem_busy_ioctl Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-3-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 48 +++++++--- drivers/dma-buf/st-dma-resv.c | 101 ++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 6 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 5 +- .../gpu/drm/i915/gem/selftests/i915_gem_migrate.c | 4 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 8 +- .../gpu/drm/i915/selftests/intel_memory_region.c | 3 +- drivers/gpu/drm/lima/lima_gem.c | 7 +- drivers/gpu/drm/msm/msm_gem_submit.c | 6 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 4 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/qxl/qxl_release.c | 3 +- drivers/gpu/drm/radeon/radeon_object.c | 6 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 5 +- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 6 +- drivers/gpu/drm/v3d/v3d_gem.c | 4 +- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 9 +- drivers/gpu/drm/virtio/virtgpu_gem.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 3 +- include/linux/dma-buf.h | 16 ++-- include/linux/dma-resv.h | 25 +++-- 30 files changed, 149 insertions(+), 166 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 17237e6ee30c..543dae6566d2 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -234,14 +234,14 @@ EXPORT_SYMBOL(dma_resv_reserve_fences); #ifdef CONFIG_DEBUG_MUTEXES /** - * dma_resv_reset_shared_max - reset shared fences for debugging + * dma_resv_reset_max_fences - reset shared fences for debugging * @obj: the dma_resv object to reset * * Reset the number of pre-reserved shared slots to test that drivers do * correct slot allocation using dma_resv_reserve_fences(). See also * &dma_resv_list.shared_max. */ -void dma_resv_reset_shared_max(struct dma_resv *obj) +void dma_resv_reset_max_fences(struct dma_resv *obj) { struct dma_resv_list *fences = dma_resv_shared_list(obj); @@ -251,7 +251,7 @@ void dma_resv_reset_shared_max(struct dma_resv *obj) if (fences) fences->shared_max = fences->shared_count; } -EXPORT_SYMBOL(dma_resv_reset_shared_max); +EXPORT_SYMBOL(dma_resv_reset_max_fences); #endif /** @@ -264,7 +264,8 @@ EXPORT_SYMBOL(dma_resv_reset_shared_max); * * See also &dma_resv.fence for a discussion of the semantics. */ -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) +static void dma_resv_add_shared_fence(struct dma_resv *obj, + struct dma_fence *fence) { struct dma_resv_list *fobj; struct dma_fence *old; @@ -305,13 +306,13 @@ replace: write_seqcount_end(&obj->seq); dma_fence_put(old); } -EXPORT_SYMBOL(dma_resv_add_shared_fence); /** * dma_resv_replace_fences - replace fences in the dma_resv obj * @obj: the reservation object * @context: the context of the fences to replace * @replacement: the new fence to use instead + * @usage: how the new fence is used, see enum dma_resv_usage * * Replace fences with a specified context with a new fence. Only valid if the * operation represented by the original fence has no longer access to the @@ -321,12 +322,16 @@ EXPORT_SYMBOL(dma_resv_add_shared_fence); * update fence which makes the resource inaccessible. */ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, - struct dma_fence *replacement) + struct dma_fence *replacement, + enum dma_resv_usage usage) { struct dma_resv_list *list; struct dma_fence *old; unsigned int i; + /* Only readers supported for now */ + WARN_ON(usage != DMA_RESV_USAGE_READ); + dma_resv_assert_held(obj); write_seqcount_begin(&obj->seq); @@ -360,7 +365,8 @@ EXPORT_SYMBOL(dma_resv_replace_fences); * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock(). * See also &dma_resv.fence_excl for a discussion of the semantics. */ -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) +static void dma_resv_add_excl_fence(struct dma_resv *obj, + struct dma_fence *fence) { struct dma_fence *old_fence = dma_resv_excl_fence(obj); @@ -375,7 +381,27 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) dma_fence_put(old_fence); } -EXPORT_SYMBOL(dma_resv_add_excl_fence); + +/** + * dma_resv_add_fence - Add a fence to the dma_resv obj + * @obj: the reservation object + * @fence: the fence to add + * @usage: how the fence is used, see enum dma_resv_usage + * + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and + * dma_resv_reserve_fences() has been called. + * + * See also &dma_resv.fence for a discussion of the semantics. + */ +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage) +{ + if (usage == DMA_RESV_USAGE_WRITE) + dma_resv_add_excl_fence(obj, fence); + else + dma_resv_add_shared_fence(obj, fence); +} +EXPORT_SYMBOL(dma_resv_add_fence); /* Restart the iterator by initializing all the necessary fields, but not the * relation to the dma_resv object. */ @@ -574,7 +600,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) } dma_fence_get(f); - if (dma_resv_iter_is_exclusive(&cursor)) + if (dma_resv_iter_usage(&cursor) == DMA_RESV_USAGE_WRITE) excl = f; else RCU_INIT_POINTER(list->shared[list->shared_count++], f); @@ -771,13 +797,13 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) { + static const char *usage[] = { "write", "read" }; struct dma_resv_iter cursor; struct dma_fence *fence; dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { seq_printf(seq, "\t%s fence:", - dma_resv_iter_is_exclusive(&cursor) ? - "Exclusive" : "Shared"); + usage[dma_resv_iter_usage(&cursor)]); dma_fence_describe(fence, seq); } } diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d097981061b1..d0f7c2bfd4f0 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -58,8 +58,9 @@ static int sanitycheck(void *arg) return r; } -static int test_signaling(void *arg, enum dma_resv_usage usage) +static int test_signaling(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv resv; struct dma_fence *f; int r; @@ -81,11 +82,7 @@ static int test_signaling(void *arg, enum dma_resv_usage usage) goto err_unlock; } - if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); - + dma_resv_add_fence(&resv, f, usage); if (dma_resv_test_signaled(&resv, usage)) { pr_err("Resv unexpectedly signaled\n"); r = -EINVAL; @@ -105,18 +102,9 @@ err_free: return r; } -static int test_excl_signaling(void *arg) -{ - return test_signaling(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_signaling(void *arg) -{ - return test_signaling(arg, DMA_RESV_USAGE_READ); -} - -static int test_for_each(void *arg, enum dma_resv_usage usage) +static int test_for_each(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv; @@ -139,10 +127,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) goto err_unlock; } - if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); + dma_resv_add_fence(&resv, f, usage); r = -ENOENT; dma_resv_for_each_fence(&cursor, &resv, usage, fence) { @@ -156,8 +141,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) r = -EINVAL; goto err_unlock; } - if (dma_resv_iter_is_exclusive(&cursor) != - (usage >= DMA_RESV_USAGE_READ)) { + if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_unlock; @@ -177,18 +161,9 @@ err_free: return r; } -static int test_excl_for_each(void *arg) -{ - return test_for_each(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_for_each(void *arg) -{ - return test_for_each(arg, DMA_RESV_USAGE_READ); -} - -static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) +static int test_for_each_unlocked(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv; @@ -212,10 +187,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) goto err_free; } - if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); + dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv); r = -ENOENT; @@ -235,8 +207,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) r = -EINVAL; goto err_iter_end; } - if (dma_resv_iter_is_exclusive(&cursor) != - (usage >= DMA_RESV_USAGE_READ)) { + if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_iter_end; @@ -262,18 +233,9 @@ err_free: return r; } -static int test_excl_for_each_unlocked(void *arg) -{ - return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_for_each_unlocked(void *arg) -{ - return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ); -} - -static int test_get_fences(void *arg, enum dma_resv_usage usage) +static int test_get_fences(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_fence *f, **fences = NULL; struct dma_resv resv; int r, i; @@ -296,10 +258,7 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage) goto err_resv; } - if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); + dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv); r = dma_resv_get_fences(&resv, usage, &i, &fences); @@ -324,30 +283,24 @@ err_resv: return r; } -static int test_excl_get_fences(void *arg) -{ - return test_get_fences(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_get_fences(void *arg) -{ - return test_get_fences(arg, DMA_RESV_USAGE_READ); -} - int dma_resv(void) { static const struct subtest tests[] = { SUBTEST(sanitycheck), - SUBTEST(test_excl_signaling), - SUBTEST(test_shared_signaling), - SUBTEST(test_excl_for_each), - SUBTEST(test_shared_for_each), - SUBTEST(test_excl_for_each_unlocked), - SUBTEST(test_shared_for_each_unlocked), - SUBTEST(test_excl_get_fences), - SUBTEST(test_shared_get_fences), + SUBTEST(test_signaling), + SUBTEST(test_for_each), + SUBTEST(test_for_each_unlocked), + SUBTEST(test_get_fences), }; + enum dma_resv_usage usage; + int r; spin_lock_init(&fence_lock); - return subtests(tests, NULL); + for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ; + ++usage) { + r = subtests(tests, (void *)(unsigned long)usage); + if (r) + return r; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 98b1736bb221..5031e26e6716 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, - replacement); + replacement, DMA_RESV_USAGE_READ); dma_fence_put(replacement); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 413f32c3fd63..76fd916424d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -55,8 +55,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - /* One for TTM and one for the CS job */ - p->uf_entry.tv.num_shared = 2; + /* One for TTM and two for the CS job */ + p->uf_entry.tv.num_shared = 3; drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a7f39f8ab7be..a3cdf8a24377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1397,10 +1397,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, return; } - if (shared) - dma_resv_add_shared_fence(resv, fence); - else - dma_resv_add_excl_fence(resv, fence); + dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ : + DMA_RESV_USAGE_WRITE); } /** diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 53f7c78628a4..98bb5c9239de 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -202,14 +202,10 @@ static void submit_attach_object_fences(struct etnaviv_gem_submit *submit) for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = &submit->bos[i].obj->base; + bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE; - if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(obj->resv, - submit->out_fence); - else - dma_resv_add_shared_fence(obj->resv, - submit->out_fence); - + dma_resv_add_fence(obj->resv, submit->out_fence, write ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); submit_unlock_object(submit, i); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 14a1c0ad8c3c..ddda468241ef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -148,11 +148,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, if (dma_resv_iter_is_restarted(&cursor)) args->busy = 0; - if (dma_resv_iter_is_exclusive(&cursor)) - /* Translate the exclusive fence to the READ *and* WRITE engine */ + if (dma_resv_iter_usage(&cursor) <= DMA_RESV_USAGE_WRITE) + /* Translate the write fences to the READ *and* WRITE engine */ args->busy |= busy_check_writer(fence); else - /* Translate shared fences to READ set of engines */ + /* Translate read fences to READ set of engines */ args->busy |= busy_check_reader(fence); } dma_resv_iter_end(&cursor); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 1fd0cc9ca213..f5f2b8b115ea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -116,7 +116,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, obj->base.resv, NULL, true, i915_fence_timeout(i915), I915_FENCE_GFP); - dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); + dma_resv_add_fence(obj->base.resv, &clflush->base.dma, + DMA_RESV_USAGE_WRITE); dma_fence_work_commit(&clflush->base); /* * We must have successfully populated the pages(since we are diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 432ac74ff225..438b8a95b3d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -637,9 +637,8 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, if (IS_ERR_OR_NULL(copy_fence)) return PTR_ERR_OR_ZERO(copy_fence); - dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence); - dma_resv_add_shared_fence(src_bo->base.resv, copy_fence); - + dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ); dma_fence_put(copy_fence); return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 0e52eb87cd55..4997ed18b6e4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -218,8 +218,8 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, if (rq) { err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) - dma_resv_add_excl_fence(obj->base.resv, - &rq->fence); + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_WRITE); i915_gem_object_set_moving_fence(obj, &rq->fence); i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index a132e241c3ee..3a6e3f6d239f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1220,7 +1220,8 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); if (rq) { - dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_WRITE); i915_gem_object_set_moving_fence(obj, &rq->fence); i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bae3423f58e8..524477d8939e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1826,7 +1826,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (fence) { - dma_resv_add_excl_fence(vma->obj->base.resv, fence); + dma_resv_add_fence(vma->obj->base.resv, fence, + DMA_RESV_USAGE_WRITE); obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->read_domains = 0; } @@ -1838,7 +1839,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (fence) { - dma_resv_add_shared_fence(vma->obj->base.resv, fence); + dma_resv_add_fence(vma->obj->base.resv, fence, + DMA_RESV_USAGE_READ); obj->write_domain = 0; } } @@ -2078,7 +2080,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) goto out_rpm; } - dma_resv_add_shared_fence(obj->base.resv, fence); + dma_resv_add_fence(obj->base.resv, fence, DMA_RESV_USAGE_READ); dma_fence_put(fence); out_rpm: diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 6114e013092b..73eb53edb8de 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -1056,7 +1056,8 @@ static int igt_lmem_write_cpu(void *arg) obj->mm.pages->sgl, I915_CACHE_NONE, true, 0xdeadbeaf, &rq); if (rq) { - dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_WRITE); i915_request_put(rq); } diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index e0a11ee0e86d..0f1ca0b0db49 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -364,10 +364,9 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit) fence = lima_sched_context_queue_task(submit->task); for (i = 0; i < submit->nr_bos; i++) { - if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence); - else - dma_resv_add_shared_fence(lima_bo_resv(bos[i]), fence); + dma_resv_add_fence(lima_bo_resv(bos[i]), fence, + submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); } drm_gem_unlock_reservations((struct drm_gem_object **)bos, diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3164db8be893..8d1eef914ba8 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -395,9 +395,11 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) struct drm_gem_object *obj = &submit->bos[i].obj->base; if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(obj->resv, submit->user_fence); + dma_resv_add_fence(obj->resv, submit->user_fence, + DMA_RESV_USAGE_WRITE); else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) - dma_resv_add_shared_fence(obj->resv, submit->user_fence); + dma_resv_add_fence(obj->resv, submit->user_fence, + DMA_RESV_USAGE_READ); } } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index c6bb4dbcd735..05076e530e7d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1308,10 +1308,11 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool excl { struct dma_resv *resv = nvbo->bo.base.resv; - if (exclusive) - dma_resv_add_excl_fence(resv, &fence->base); - else if (fence) - dma_resv_add_shared_fence(resv, &fence->base); + if (!fence) + return; + + dma_resv_add_fence(resv, &fence->base, exclusive ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); } static void diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index d5e81ccee01c..7f01dcf81fab 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -360,9 +360,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(exclusive), fence) { + enum dma_resv_usage usage; struct nouveau_fence *f; - if (i == 0 && dma_resv_iter_is_exclusive(&cursor)) + usage = dma_resv_iter_usage(&cursor); + if (i == 0 && usage == DMA_RESV_USAGE_WRITE) continue; f = nouveau_local_fence(fence, chan->drm); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index c34114560e49..fda5871aebe3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -268,7 +268,7 @@ static void panfrost_attach_object_fences(struct drm_gem_object **bos, int i; for (i = 0; i < bo_count; i++) - dma_resv_add_excl_fence(bos[i]->resv, fence); + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); } int panfrost_job_push(struct panfrost_job *job) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index cde1e8ddaeaa..368d26da0d6a 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -429,7 +429,8 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) list_for_each_entry(entry, &release->bos, head) { bo = entry->bo; - dma_resv_add_shared_fence(bo->base.resv, &release->base); + dma_resv_add_fence(bo->base.resv, &release->base, + DMA_RESV_USAGE_READ); ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 7ffd2e90f325..cb5c4aa45cef 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -791,8 +791,6 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, return; } - if (shared) - dma_resv_add_shared_fence(resv, &fence->base); - else - dma_resv_add_excl_fence(resv, &fence->base); + dma_resv_add_fence(resv, &fence->base, shared ? + DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index cff05b62f3f7..d74f9eea855e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -739,7 +739,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, return ret; } - dma_resv_add_shared_fence(bo->base.resv, fence); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); ret = dma_resv_reserve_fences(bo->base.resv, 1); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1b96b91bf81b..7a96a1db13a7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -507,7 +507,8 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, if (ret) return ret; - dma_resv_add_excl_fence(&ghost_obj->base._resv, fence); + dma_resv_add_fence(&ghost_obj->base._resv, fence, + DMA_RESV_USAGE_WRITE); /** * If we're not moving to fixed memory, the TTM object @@ -561,7 +562,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); int ret = 0; - dma_resv_add_excl_fence(bo->base.resv, fence); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); if (!evict) ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt); else if (!from->use_tt && pipeline) diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 789c645f004e..0eb995d25df1 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -154,10 +154,8 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - if (entry->num_shared) - dma_resv_add_shared_fence(bo->base.resv, fence); - else - dma_resv_add_excl_fence(bo->base.resv, fence); + dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ? + DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE); ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 961812d33827..2352e9640922 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -550,8 +550,8 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, for (i = 0; i < job->bo_count; i++) { /* XXX: Use shared fences for read-only objects. */ - dma_resv_add_excl_fence(job->bo[i]->resv, - job->done_fence); + dma_resv_add_fence(job->bo[i]->resv, job->done_fence, + DMA_RESV_USAGE_WRITE); } drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 594bd6bb00d2..38550317e025 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -546,7 +546,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno; - dma_resv_add_shared_fence(bo->base.base.resv, exec->fence); + dma_resv_add_fence(bo->base.base.resv, exec->fence); } list_for_each_entry(bo, &exec->unref_list, unref_head) { diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 91fc4940c65a..c2a879734d40 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -161,12 +161,9 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, /* Expose the fence via the dma-buf */ dma_resv_lock(resv, NULL); ret = dma_resv_reserve_fences(resv, 1); - if (!ret) { - if (arg->flags & VGEM_FENCE_WRITE) - dma_resv_add_excl_fence(resv, fence); - else - dma_resv_add_shared_fence(resv, fence); - } + if (!ret) + dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); dma_resv_unlock(resv); /* Record the fence in our idr for later signaling */ diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 1820ca6cf673..580a78809836 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -250,7 +250,8 @@ void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs, int i; for (i = 0; i < objs->nents; i++) - dma_resv_add_excl_fence(objs->objs[i]->resv, fence); + dma_resv_add_fence(objs->objs[i]->resv, fence, + DMA_RESV_USAGE_WRITE); } void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index b96884f7d03d..bec50223efe5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -758,7 +758,8 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, ret = dma_resv_reserve_fences(bo->base.resv, 1); if (!ret) - dma_resv_add_excl_fence(bo->base.resv, &fence->base); + dma_resv_add_fence(bo->base.resv, &fence->base, + DMA_RESV_USAGE_WRITE); else /* Last resort fallback when we are OOM */ dma_fence_wait(&fence->base, false); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index a297397743a2..71731796c8c3 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -393,15 +393,15 @@ struct dma_buf { * e.g. exposed in `Implicit Fence Poll Support`_ must follow the * below rules. * - * - Drivers must add a shared fence through dma_resv_add_shared_fence() - * for anything the userspace API considers a read access. This highly - * depends upon the API and window system. + * - Drivers must add a read fence through dma_resv_add_fence() with the + * DMA_RESV_USAGE_READ flag for anything the userspace API considers a + * read access. This highly depends upon the API and window system. * - * - Similarly drivers must set the exclusive fence through - * dma_resv_add_excl_fence() for anything the userspace API considers - * write access. + * - Similarly drivers must add a write fence through + * dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for + * anything the userspace API considers write access. * - * - Drivers may just always set the exclusive fence, since that only + * - Drivers may just always add a write fence, since that only * causes unecessarily synchronization, but no correctness issues. * * - Some drivers only expose a synchronous userspace API with no @@ -416,7 +416,7 @@ struct dma_buf { * Dynamic importers, see dma_buf_attachment_is_dynamic(), have * additional constraints on how they set up fences: * - * - Dynamic importers must obey the exclusive fence and wait for it to + * - Dynamic importers must obey the write fences and wait for them to * signal before allowing access to the buffer's underlying storage * through the device. * diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 92cd8023980f..98dc5234b487 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -195,6 +195,9 @@ struct dma_resv_iter { /** @fence: the currently handled fence */ struct dma_fence *fence; + /** @fence_usage: the usage of the current fence */ + enum dma_resv_usage fence_usage; + /** @seq: sequence number to check for modifications */ unsigned int seq; @@ -244,14 +247,15 @@ static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) } /** - * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * dma_resv_iter_usage - Return the usage of the current fence * @cursor: the cursor of the current position * - * Returns true if the currently returned fence is the exclusive one. + * Returns the usage of the currently processed fence. */ -static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +static inline enum dma_resv_usage +dma_resv_iter_usage(struct dma_resv_iter *cursor) { - return cursor->index == 0; + return cursor->fence_usage; } /** @@ -306,9 +310,9 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) #ifdef CONFIG_DEBUG_MUTEXES -void dma_resv_reset_shared_max(struct dma_resv *obj); +void dma_resv_reset_max_fences(struct dma_resv *obj); #else -static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {} +static inline void dma_resv_reset_max_fences(struct dma_resv *obj) {} #endif /** @@ -454,17 +458,18 @@ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj) */ static inline void dma_resv_unlock(struct dma_resv *obj) { - dma_resv_reset_shared_max(obj); + dma_resv_reset_max_fences(obj); ww_mutex_unlock(&obj->lock); } void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences); -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage); void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, - struct dma_fence *fence); -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); + struct dma_fence *fence, + enum dma_resv_usage usage); int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences); int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, -- cgit From 047a1b877ed48098bed71fcfb1d4891e1b54441d Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 23 Nov 2021 09:33:07 +0100 Subject: dma-buf & drm/amdgpu: remove dma_resv workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had. This allows removing the workaround from amdgpu which used a container for this instead. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Cc: amd-gfx@lists.freedesktop.org Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-4-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 353 +++++++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 53 +---- include/linux/dma-resv.h | 47 +--- 4 files changed, 157 insertions(+), 297 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 543dae6566d2..378d47e1cfea 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -44,12 +44,12 @@ /** * DOC: Reservation Object Overview * - * The reservation object provides a mechanism to manage shared and - * exclusive fences associated with a buffer. A reservation object - * can have attached one exclusive fence (normally associated with - * write operations) or N shared fences (read operations). The RCU - * mechanism is used to protect read access to fences from locked - * write-side updates. + * The reservation object provides a mechanism to manage a container of + * dma_fence object associated with a resource. A reservation object + * can have any number of fences attaches to it. Each fence carries an usage + * parameter determining how the operation represented by the fence is using the + * resource. The RCU mechanism is used to protect read access to fences from + * locked write-side updates. * * See struct dma_resv for more details. */ @@ -57,39 +57,59 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); +/* Mask for the lower fence pointer bits */ +#define DMA_RESV_LIST_MASK 0x3 + struct dma_resv_list { struct rcu_head rcu; - u32 shared_count, shared_max; - struct dma_fence __rcu *shared[]; + u32 num_fences, max_fences; + struct dma_fence __rcu *table[]; }; -/** - * dma_resv_list_alloc - allocate fence list - * @shared_max: number of fences we need space for - * +/* Extract the fence and usage flags from an RCU protected entry in the list. */ +static void dma_resv_list_entry(struct dma_resv_list *list, unsigned int index, + struct dma_resv *resv, struct dma_fence **fence, + enum dma_resv_usage *usage) +{ + long tmp; + + tmp = (long)rcu_dereference_check(list->table[index], + resv ? dma_resv_held(resv) : true); + *fence = (struct dma_fence *)(tmp & ~DMA_RESV_LIST_MASK); + if (usage) + *usage = tmp & DMA_RESV_LIST_MASK; +} + +/* Set the fence and usage flags at the specific index in the list. */ +static void dma_resv_list_set(struct dma_resv_list *list, + unsigned int index, + struct dma_fence *fence, + enum dma_resv_usage usage) +{ + long tmp = ((long)fence) | usage; + + RCU_INIT_POINTER(list->table[index], (struct dma_fence *)tmp); +} + +/* * Allocate a new dma_resv_list and make sure to correctly initialize - * shared_max. + * max_fences. */ -static struct dma_resv_list *dma_resv_list_alloc(unsigned int shared_max) +static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences) { struct dma_resv_list *list; - list = kmalloc(struct_size(list, shared, shared_max), GFP_KERNEL); + list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL); if (!list) return NULL; - list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) / - sizeof(*list->shared); + list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) / + sizeof(*list->table); return list; } -/** - * dma_resv_list_free - free fence list - * @list: list to free - * - * Free a dma_resv_list and make sure to drop all references. - */ +/* Free a dma_resv_list and make sure to drop all references. */ static void dma_resv_list_free(struct dma_resv_list *list) { unsigned int i; @@ -97,9 +117,12 @@ static void dma_resv_list_free(struct dma_resv_list *list) if (!list) return; - for (i = 0; i < list->shared_count; ++i) - dma_fence_put(rcu_dereference_protected(list->shared[i], true)); + for (i = 0; i < list->num_fences; ++i) { + struct dma_fence *fence; + dma_resv_list_entry(list, i, NULL, &fence, NULL); + dma_fence_put(fence); + } kfree_rcu(list, rcu); } @@ -112,8 +135,7 @@ void dma_resv_init(struct dma_resv *obj) ww_mutex_init(&obj->lock, &reservation_ww_class); seqcount_ww_mutex_init(&obj->seq, &obj->lock); - RCU_INIT_POINTER(obj->fence, NULL); - RCU_INIT_POINTER(obj->fence_excl, NULL); + RCU_INIT_POINTER(obj->fences, NULL); } EXPORT_SYMBOL(dma_resv_init); @@ -123,46 +145,32 @@ EXPORT_SYMBOL(dma_resv_init); */ void dma_resv_fini(struct dma_resv *obj) { - struct dma_resv_list *fobj; - struct dma_fence *excl; - /* * This object should be dead and all references must have * been released to it, so no need to be protected with rcu. */ - excl = rcu_dereference_protected(obj->fence_excl, 1); - if (excl) - dma_fence_put(excl); - - fobj = rcu_dereference_protected(obj->fence, 1); - dma_resv_list_free(fobj); + dma_resv_list_free(rcu_dereference_protected(obj->fences, true)); ww_mutex_destroy(&obj->lock); } EXPORT_SYMBOL(dma_resv_fini); -static inline struct dma_fence * -dma_resv_excl_fence(struct dma_resv *obj) -{ - return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); -} - -static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) +/* Dereference the fences while ensuring RCU rules */ +static inline struct dma_resv_list *dma_resv_fences_list(struct dma_resv *obj) { - return rcu_dereference_check(obj->fence, dma_resv_held(obj)); + return rcu_dereference_check(obj->fences, dma_resv_held(obj)); } /** - * dma_resv_reserve_fences - Reserve space to add shared fences to - * a dma_resv. + * dma_resv_reserve_fences - Reserve space to add fences to a dma_resv object. * @obj: reservation object * @num_fences: number of fences we want to add * - * Should be called before dma_resv_add_shared_fence(). Must - * be called with @obj locked through dma_resv_lock(). + * Should be called before dma_resv_add_fence(). Must be called with @obj + * locked through dma_resv_lock(). * * Note that the preallocated slots need to be re-reserved if @obj is unlocked - * at any time before calling dma_resv_add_shared_fence(). This is validated - * when CONFIG_DEBUG_MUTEXES is enabled. + * at any time before calling dma_resv_add_fence(). This is validated when + * CONFIG_DEBUG_MUTEXES is enabled. * * RETURNS * Zero for success, or -errno @@ -174,11 +182,11 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) dma_resv_assert_held(obj); - old = dma_resv_shared_list(obj); - if (old && old->shared_max) { - if ((old->shared_count + num_fences) <= old->shared_max) + old = dma_resv_fences_list(obj); + if (old && old->max_fences) { + if ((old->num_fences + num_fences) <= old->max_fences) return 0; - max = max(old->shared_count + num_fences, old->shared_max * 2); + max = max(old->num_fences + num_fences, old->max_fences * 2); } else { max = max(4ul, roundup_pow_of_two(num_fences)); } @@ -193,27 +201,27 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) * references from the old struct are carried over to * the new. */ - for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) { + for (i = 0, j = 0, k = max; i < (old ? old->num_fences : 0); ++i) { + enum dma_resv_usage usage; struct dma_fence *fence; - fence = rcu_dereference_protected(old->shared[i], - dma_resv_held(obj)); + dma_resv_list_entry(old, i, obj, &fence, &usage); if (dma_fence_is_signaled(fence)) - RCU_INIT_POINTER(new->shared[--k], fence); + RCU_INIT_POINTER(new->table[--k], fence); else - RCU_INIT_POINTER(new->shared[j++], fence); + dma_resv_list_set(new, j++, fence, usage); } - new->shared_count = j; + new->num_fences = j; /* * We are not changing the effective set of fences here so can * merely update the pointer to the new array; both existing * readers and new readers will see exactly the same set of - * active (unsignaled) shared fences. Individual fences and the + * active (unsignaled) fences. Individual fences and the * old array are protected by RCU and so will not vanish under * the gaze of the rcu_read_lock() readers. */ - rcu_assign_pointer(obj->fence, new); + rcu_assign_pointer(obj->fences, new); if (!old) return 0; @@ -222,7 +230,7 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) for (i = k; i < max; ++i) { struct dma_fence *fence; - fence = rcu_dereference_protected(new->shared[i], + fence = rcu_dereference_protected(new->table[i], dma_resv_held(obj)); dma_fence_put(fence); } @@ -234,38 +242,39 @@ EXPORT_SYMBOL(dma_resv_reserve_fences); #ifdef CONFIG_DEBUG_MUTEXES /** - * dma_resv_reset_max_fences - reset shared fences for debugging + * dma_resv_reset_max_fences - reset fences for debugging * @obj: the dma_resv object to reset * - * Reset the number of pre-reserved shared slots to test that drivers do + * Reset the number of pre-reserved fence slots to test that drivers do * correct slot allocation using dma_resv_reserve_fences(). See also - * &dma_resv_list.shared_max. + * &dma_resv_list.max_fences. */ void dma_resv_reset_max_fences(struct dma_resv *obj) { - struct dma_resv_list *fences = dma_resv_shared_list(obj); + struct dma_resv_list *fences = dma_resv_fences_list(obj); dma_resv_assert_held(obj); - /* Test shared fence slot reservation */ + /* Test fence slot reservation */ if (fences) - fences->shared_max = fences->shared_count; + fences->max_fences = fences->num_fences; } EXPORT_SYMBOL(dma_resv_reset_max_fences); #endif /** - * dma_resv_add_shared_fence - Add a fence to a shared slot + * dma_resv_add_fence - Add a fence to the dma_resv obj * @obj: the reservation object - * @fence: the shared fence to add + * @fence: the fence to add + * @usage: how the fence is used, see enum dma_resv_usage * - * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and * dma_resv_reserve_fences() has been called. * * See also &dma_resv.fence for a discussion of the semantics. */ -static void dma_resv_add_shared_fence(struct dma_resv *obj, - struct dma_fence *fence) +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage) { struct dma_resv_list *fobj; struct dma_fence *old; @@ -280,32 +289,33 @@ static void dma_resv_add_shared_fence(struct dma_resv *obj, */ WARN_ON(dma_fence_is_container(fence)); - fobj = dma_resv_shared_list(obj); - count = fobj->shared_count; + fobj = dma_resv_fences_list(obj); + count = fobj->num_fences; write_seqcount_begin(&obj->seq); for (i = 0; i < count; ++i) { + enum dma_resv_usage old_usage; - old = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(obj)); - if (old->context == fence->context || + dma_resv_list_entry(fobj, i, obj, &old, &old_usage); + if ((old->context == fence->context && old_usage >= usage) || dma_fence_is_signaled(old)) goto replace; } - BUG_ON(fobj->shared_count >= fobj->shared_max); + BUG_ON(fobj->num_fences >= fobj->max_fences); old = NULL; count++; replace: - RCU_INIT_POINTER(fobj->shared[i], fence); - /* pointer update must be visible before we extend the shared_count */ - smp_store_mb(fobj->shared_count, count); + dma_resv_list_set(fobj, i, fence, usage); + /* pointer update must be visible before we extend the num_fences */ + smp_store_mb(fobj->num_fences, count); write_seqcount_end(&obj->seq); dma_fence_put(old); } +EXPORT_SYMBOL(dma_resv_add_fence); /** * dma_resv_replace_fences - replace fences in the dma_resv obj @@ -326,128 +336,63 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, enum dma_resv_usage usage) { struct dma_resv_list *list; - struct dma_fence *old; unsigned int i; - /* Only readers supported for now */ - WARN_ON(usage != DMA_RESV_USAGE_READ); - dma_resv_assert_held(obj); + list = dma_resv_fences_list(obj); write_seqcount_begin(&obj->seq); + for (i = 0; list && i < list->num_fences; ++i) { + struct dma_fence *old; - old = dma_resv_excl_fence(obj); - if (old->context == context) { - RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement)); - dma_fence_put(old); - } - - list = dma_resv_shared_list(obj); - for (i = 0; list && i < list->shared_count; ++i) { - old = rcu_dereference_protected(list->shared[i], - dma_resv_held(obj)); + dma_resv_list_entry(list, i, obj, &old, NULL); if (old->context != context) continue; - rcu_assign_pointer(list->shared[i], dma_fence_get(replacement)); + dma_resv_list_set(list, i, replacement, usage); dma_fence_put(old); } - write_seqcount_end(&obj->seq); } EXPORT_SYMBOL(dma_resv_replace_fences); -/** - * dma_resv_add_excl_fence - Add an exclusive fence. - * @obj: the reservation object - * @fence: the exclusive fence to add - * - * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock(). - * See also &dma_resv.fence_excl for a discussion of the semantics. - */ -static void dma_resv_add_excl_fence(struct dma_resv *obj, - struct dma_fence *fence) -{ - struct dma_fence *old_fence = dma_resv_excl_fence(obj); - - dma_resv_assert_held(obj); - - dma_fence_get(fence); - - write_seqcount_begin(&obj->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(obj->fence_excl, fence); - write_seqcount_end(&obj->seq); - - dma_fence_put(old_fence); -} - -/** - * dma_resv_add_fence - Add a fence to the dma_resv obj - * @obj: the reservation object - * @fence: the fence to add - * @usage: how the fence is used, see enum dma_resv_usage - * - * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and - * dma_resv_reserve_fences() has been called. - * - * See also &dma_resv.fence for a discussion of the semantics. - */ -void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, - enum dma_resv_usage usage) -{ - if (usage == DMA_RESV_USAGE_WRITE) - dma_resv_add_excl_fence(obj, fence); - else - dma_resv_add_shared_fence(obj, fence); -} -EXPORT_SYMBOL(dma_resv_add_fence); - -/* Restart the iterator by initializing all the necessary fields, but not the - * relation to the dma_resv object. */ +/* Restart the unlocked iteration by initializing the cursor object. */ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) { cursor->seq = read_seqcount_begin(&cursor->obj->seq); - cursor->index = -1; - cursor->shared_count = 0; - if (cursor->usage >= DMA_RESV_USAGE_READ) { - cursor->fences = dma_resv_shared_list(cursor->obj); - if (cursor->fences) - cursor->shared_count = cursor->fences->shared_count; - } else { - cursor->fences = NULL; - } + cursor->index = 0; + cursor->num_fences = 0; + cursor->fences = dma_resv_fences_list(cursor->obj); + if (cursor->fences) + cursor->num_fences = cursor->fences->num_fences; cursor->is_restarted = true; } /* Walk to the next not signaled fence and grab a reference to it */ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) { - struct dma_resv *obj = cursor->obj; + if (!cursor->fences) + return; do { /* Drop the reference from the previous round */ dma_fence_put(cursor->fence); - if (cursor->index == -1) { - cursor->fence = dma_resv_excl_fence(obj); - cursor->index++; - if (!cursor->fence) - continue; - - } else if (!cursor->fences || - cursor->index >= cursor->shared_count) { + if (cursor->index >= cursor->num_fences) { cursor->fence = NULL; break; - } else { - struct dma_resv_list *fences = cursor->fences; - unsigned int idx = cursor->index++; - - cursor->fence = rcu_dereference(fences->shared[idx]); } + + dma_resv_list_entry(cursor->fences, cursor->index++, + cursor->obj, &cursor->fence, + &cursor->fence_usage); cursor->fence = dma_fence_get_rcu(cursor->fence); - if (!cursor->fence || !dma_fence_is_signaled(cursor->fence)) + if (!cursor->fence) + break; + + if (!dma_fence_is_signaled(cursor->fence) && + cursor->usage >= cursor->fence_usage) break; } while (true); } @@ -522,15 +467,9 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) dma_resv_assert_held(cursor->obj); cursor->index = 0; - if (cursor->usage >= DMA_RESV_USAGE_READ) - cursor->fences = dma_resv_shared_list(cursor->obj); - else - cursor->fences = NULL; - - fence = dma_resv_excl_fence(cursor->obj); - if (!fence) - fence = dma_resv_iter_next(cursor); + cursor->fences = dma_resv_fences_list(cursor->obj); + fence = dma_resv_iter_next(cursor); cursor->is_restarted = true; return fence; } @@ -545,17 +484,22 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_first); */ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) { - unsigned int idx; + struct dma_fence *fence; dma_resv_assert_held(cursor->obj); cursor->is_restarted = false; - if (!cursor->fences || cursor->index >= cursor->fences->shared_count) - return NULL; - idx = cursor->index++; - return rcu_dereference_protected(cursor->fences->shared[idx], - dma_resv_held(cursor->obj)); + do { + if (!cursor->fences || + cursor->index >= cursor->fences->num_fences) + return NULL; + + dma_resv_list_entry(cursor->fences, cursor->index++, + cursor->obj, &fence, &cursor->fence_usage); + } while (cursor->fence_usage > cursor->usage); + + return fence; } EXPORT_SYMBOL_GPL(dma_resv_iter_next); @@ -570,57 +514,43 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { struct dma_resv_iter cursor; struct dma_resv_list *list; - struct dma_fence *f, *excl; + struct dma_fence *f; dma_resv_assert_held(dst); list = NULL; - excl = NULL; dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, f) { if (dma_resv_iter_is_restarted(&cursor)) { dma_resv_list_free(list); - dma_fence_put(excl); - - if (cursor.shared_count) { - list = dma_resv_list_alloc(cursor.shared_count); - if (!list) { - dma_resv_iter_end(&cursor); - return -ENOMEM; - } - list->shared_count = 0; - - } else { - list = NULL; + list = dma_resv_list_alloc(cursor.num_fences); + if (!list) { + dma_resv_iter_end(&cursor); + return -ENOMEM; } - excl = NULL; + list->num_fences = 0; } dma_fence_get(f); - if (dma_resv_iter_usage(&cursor) == DMA_RESV_USAGE_WRITE) - excl = f; - else - RCU_INIT_POINTER(list->shared[list->shared_count++], f); + dma_resv_list_set(list, list->num_fences++, f, + dma_resv_iter_usage(&cursor)); } dma_resv_iter_end(&cursor); write_seqcount_begin(&dst->seq); - excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); - list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq); dma_resv_list_free(list); - dma_fence_put(excl); - return 0; } EXPORT_SYMBOL(dma_resv_copy_fences); /** - * dma_resv_get_fences - Get an object's shared and exclusive + * dma_resv_get_fences - Get an object's fences * fences without update side lock held * @obj: the reservation object * @usage: controls which fences to include, see enum dma_resv_usage. @@ -649,7 +579,7 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, while (*num_fences) dma_fence_put((*fences)[--(*num_fences)]); - count = cursor.shared_count + 1; + count = cursor.num_fences + 1; /* Eventually re-allocate the array */ *fences = krealloc_array(*fences, count, @@ -723,8 +653,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, EXPORT_SYMBOL_GPL(dma_resv_get_singleton); /** - * dma_resv_wait_timeout - Wait on reservation's objects - * shared and/or exclusive fences. + * dma_resv_wait_timeout - Wait on reservation's objects fences * @obj: the reservation object * @usage: controls which fences to include, see enum dma_resv_usage. * @intr: if true, do interruptible wait diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 044b41f0bfd9..529d52a204cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -34,7 +34,6 @@ struct amdgpu_fpriv; struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; - struct dma_fence_chain *chain; uint32_t priority; struct page **user_pages; bool user_invalidated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 76fd916424d6..8de283997769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -574,14 +574,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); e->bo_va = amdgpu_vm_bo_find(vm, bo); - - if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { - e->chain = dma_fence_chain_alloc(); - if (!e->chain) { - r = -ENOMEM; - goto error_validate; - } - } } /* Move fence waiting after getting reservation lock of @@ -642,13 +634,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) { - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } + if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - } out: return r; } @@ -688,17 +675,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) { - struct amdgpu_bo_list_entry *e; - - amdgpu_bo_list_for_each_entry(e, parser->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } - + if (error && backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -1272,31 +1251,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_resv *resv = e->tv.bo->base.resv; - struct dma_fence_chain *chain = e->chain; - struct dma_resv_iter cursor; - struct dma_fence *fence; - - if (!chain) - continue; - - /* - * Temporary workaround dma_resv shortcommings by wrapping up - * the submission in a dma_fence_chain and add it as exclusive - * fence. - * - * TODO: Remove together with dma_resv rework. - */ - dma_resv_for_each_fence(&cursor, resv, - DMA_RESV_USAGE_WRITE, - fence) { - break; - } - dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1); - rcu_assign_pointer(resv->fence_excl, &chain->base); - e->chain = NULL; - } + /* Make sure all BOs are remembered as writers */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 0; ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 98dc5234b487..7bb7e7edbb6f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -99,8 +99,8 @@ static inline enum dma_resv_usage dma_resv_usage_rw(bool write) /** * struct dma_resv - a reservation object manages fences for a buffer * - * There are multiple uses for this, with sometimes slightly different rules in - * how the fence slots are used. + * This is a container for dma_fence objects which needs to handle multiple use + * cases. * * One use is to synchronize cross-driver access to a struct dma_buf, either for * dynamic buffer management or just to handle implicit synchronization between @@ -130,47 +130,22 @@ struct dma_resv { * @seq: * * Sequence count for managing RCU read-side synchronization, allows - * read-only access to @fence_excl and @fence while ensuring we take a - * consistent snapshot. + * read-only access to @fences while ensuring we take a consistent + * snapshot. */ seqcount_ww_mutex_t seq; /** - * @fence_excl: + * @fences: * - * The exclusive fence, if there is one currently. + * Array of fences which where added to the dma_resv object * - * To guarantee that no fences are lost, this new fence must signal - * only after the previous exclusive fence has signalled. If - * semantically only a new access is added without actually treating the - * previous one as a dependency the exclusive fences can be strung - * together using struct dma_fence_chain. - * - * Note that actual semantics of what an exclusive or shared fence mean - * is defined by the user, for reservation objects shared across drivers - * see &dma_buf.resv. - */ - struct dma_fence __rcu *fence_excl; - - /** - * @fence: - * - * List of current shared fences. - * - * There are no ordering constraints of shared fences against the - * exclusive fence slot. If a waiter needs to wait for all access, it - * has to wait for both sets of fences to signal. - * - * A new fence is added by calling dma_resv_add_shared_fence(). Since - * this often needs to be done past the point of no return in command + * A new fence is added by calling dma_resv_add_fence(). Since this + * often needs to be done past the point of no return in command * submission it cannot fail, and therefore sufficient slots need to be * reserved by calling dma_resv_reserve_fences(). - * - * Note that actual semantics of what an exclusive or shared fence mean - * is defined by the user, for reservation objects shared across drivers - * see &dma_buf.resv. */ - struct dma_resv_list __rcu *fence; + struct dma_resv_list __rcu *fences; }; /** @@ -207,8 +182,8 @@ struct dma_resv_iter { /** @fences: the shared fences; private, *MUST* not dereference */ struct dma_resv_list *fences; - /** @shared_count: number of shared fences */ - unsigned int shared_count; + /** @num_fences: number of fences */ + unsigned int num_fences; /** @is_restarted: true if this is the first returned fence */ bool is_restarted; -- cgit From b29895e18304feb7e8afc6388db7ece60327b23c Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 26 Nov 2021 14:12:42 +0100 Subject: dma-buf: add DMA_RESV_USAGE_KERNEL v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an usage for kernel submissions. Waiting for those are mandatory for dynamic DMA-bufs. As a precaution this patch also changes all occurrences where fences are added as part of memory management in TTM, VMWGFX and i915 to use the new value because it now becomes possible for drivers to ignore fences with the WRITE usage. v2: use "must" in documentation, fix whitespaces v3: separate out some driver changes and better document why some changes should still be part of this patch. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-5-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 2 +- drivers/dma-buf/st-dma-resv.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 2 +- include/linux/dma-resv.h | 24 +++++++++++++++++++++--- 7 files changed, 28 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 378d47e1cfea..f4860e5f2d8b 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -726,7 +726,7 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) { - static const char *usage[] = { "write", "read" }; + static const char *usage[] = { "kernel", "write", "read" }; struct dma_resv_iter cursor; struct dma_fence *fence; diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d0f7c2bfd4f0..062b57d63fa6 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -296,7 +296,7 @@ int dma_resv(void) int r; spin_lock_init(&fence_lock); - for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ; + for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ; ++usage) { r = subtests(tests, (void *)(unsigned long)usage); if (r) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index f5f2b8b115ea..0512afdd20d8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -117,7 +117,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, i915_fence_timeout(i915), I915_FENCE_GFP); dma_resv_add_fence(obj->base.resv, &clflush->base.dma, - DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_KERNEL); dma_fence_work_commit(&clflush->base); /* * We must have successfully populated the pages(since we are diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d74f9eea855e..6bf3fb1c8045 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -739,7 +739,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, return ret; } - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); ret = dma_resv_reserve_fences(bo->base.resv, 1); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 7a96a1db13a7..99deb45894f4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -508,7 +508,7 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, return ret; dma_resv_add_fence(&ghost_obj->base._resv, fence, - DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_KERNEL); /** * If we're not moving to fixed memory, the TTM object @@ -562,7 +562,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); int ret = 0; - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); if (!evict) ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt); else if (!from->use_tt && pipeline) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index bec50223efe5..408ede1f967f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -759,7 +759,7 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, ret = dma_resv_reserve_fences(bo->base.resv, 1); if (!ret) dma_resv_add_fence(bo->base.resv, &fence->base, - DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_KERNEL); else /* Last resort fallback when we are OOM */ dma_fence_wait(&fence->base, false); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 7bb7e7edbb6f..a749f229ae91 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -55,11 +55,29 @@ struct dma_resv_list; * This enum describes the different use cases for a dma_resv object and * controls which fences are returned when queried. * - * An important fact is that there is the order WRITE Date: Tue, 9 Nov 2021 11:08:18 +0100 Subject: dma-buf: add DMA_RESV_USAGE_BOOKKEEP v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an usage for submissions independent of implicit sync but still interesting for memory management. v2: cleanup the kerneldoc a bit v3: separate amdgpu changes from this Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-10-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 4 ++-- drivers/dma-buf/st-dma-resv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/qxl/qxl_debugfs.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_mn.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 14 +++++++------- include/linux/dma-resv.h | 13 ++++++++++++- 14 files changed, 35 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index f4860e5f2d8b..5b64aa554c36 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -520,7 +520,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) list = NULL; - dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); + dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, f) { if (dma_resv_iter_is_restarted(&cursor)) { @@ -726,7 +726,7 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) { - static const char *usage[] = { "kernel", "write", "read" }; + static const char *usage[] = { "kernel", "write", "read", "bookkeep" }; struct dma_resv_iter cursor; struct dma_fence *fence; diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index 062b57d63fa6..8ace9e84c845 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -296,7 +296,7 @@ int dma_resv(void) int r; spin_lock_init(&fence_lock); - for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ; + for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_BOOKKEEP; ++usage) { r = subtests(tests, (void *)(unsigned long)usage); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 65998cbcd7f7..4ba4b54092f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, struct dma_fence *fence; int r; - r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence); if (r) goto fallback; @@ -139,7 +139,7 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 86f5248676b0..b86c0b8252a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,7 +75,7 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 744e144e5fc2..11c46b3e4c60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -260,7 +260,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return -EINVAL; /* TODO: Use DMA_RESV_USAGE_READ here */ - dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5db5066e74b4..49ffad312d5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1345,7 +1345,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * be resident to run successfully */ dma_resv_for_each_fence(&resv_cursor, bo->base.resv, - DMA_RESV_USAGE_READ, f) { + DMA_RESV_USAGE_BOOKKEEP, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a0376fd36a82..5277c10d901d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2059,7 +2059,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2665,7 +2665,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP)) return false; /* Try to block ongoing updates */ @@ -2846,7 +2846,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, - DMA_RESV_USAGE_READ, + DMA_RESV_USAGE_BOOKKEEP, true, timeout); if (timeout <= 0) return timeout; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index a200d3e66573..4115a222a853 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) struct intel_memory_region *mr = READ_ONCE(obj->mm.region); #ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) && + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP) && i915_gem_object_evictable(obj)); #endif return mr && (mr->type == INTEL_MEMORY_LOCAL || diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 644fe237601c..094f06b4ce33 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -86,7 +86,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, return true; /* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false, + r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c index 33e5889d6608..2d9ed3b94574 100644 --- a/drivers/gpu/drm/qxl/qxl_debugfs.c +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c @@ -62,7 +62,7 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data) int rel = 0; dma_resv_iter_begin(&cursor, bo->tbo.base.resv, - DMA_RESV_USAGE_READ); + DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) rel = 0; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 6616a828f40b..8c01a7f0e027 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -163,7 +163,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, if (domain == RADEON_GEM_DOMAIN_CPU) { /* Asking for cpu access wait for object idle */ r = dma_resv_wait_timeout(robj->tbo.base.resv, - DMA_RESV_USAGE_READ, + DMA_RESV_USAGE_BOOKKEEP, true, 30 * HZ); if (!r) r = -EBUSY; diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 68ebeb1bdfff..29fe8423bd90 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -66,7 +66,7 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn, return true; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6bf3fb1c8045..360f980c7e10 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,7 +223,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ); + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); @@ -252,7 +252,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, struct dma_resv *resv = &bo->base._resv; int ret; - if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ)) + if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) ret = 0; else ret = -EBUSY; @@ -264,7 +264,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, dma_resv_unlock(bo->base.resv); spin_unlock(&bo->bdev->lru_lock); - lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, interruptible, 30 * HZ); @@ -369,7 +369,7 @@ static void ttm_bo_release(struct kref *kref) * fences block for the BO to become idle */ dma_resv_wait_timeout(bo->base.resv, - DMA_RESV_USAGE_READ, false, + DMA_RESV_USAGE_BOOKKEEP, false, 30 * HZ); } @@ -380,7 +380,7 @@ static void ttm_bo_release(struct kref *kref) ttm_mem_io_free(bdev, bo->resource); } - if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) || + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); @@ -1046,13 +1046,13 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, long timeout = 15 * HZ; if (no_wait) { - if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP)) return 0; else return -EBUSY; } - timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, interruptible, timeout); if (timeout < 0) return timeout; diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index a749f229ae91..1db759eacc98 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -55,7 +55,7 @@ struct dma_resv_list; * This enum describes the different use cases for a dma_resv object and * controls which fences are returned when queried. * - * An important fact is that there is the order KERNEL Date: Mon, 4 Apr 2022 14:58:37 +0200 Subject: dma-buf: drop seq count based update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should be possible now since we don't have the distinction between exclusive and shared fences any more. The only possible pitfall is that a dma_fence would be reused during the RCU grace period, but even that could be handled with a single extra check. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-15-christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 33 ++++++++++++--------------------- drivers/dma-buf/st-dma-resv.c | 2 +- include/linux/dma-resv.h | 12 ------------ 3 files changed, 13 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5b64aa554c36..0cce6e4ec946 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -133,7 +133,6 @@ static void dma_resv_list_free(struct dma_resv_list *list) void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class); - seqcount_ww_mutex_init(&obj->seq, &obj->lock); RCU_INIT_POINTER(obj->fences, NULL); } @@ -292,28 +291,24 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, fobj = dma_resv_fences_list(obj); count = fobj->num_fences; - write_seqcount_begin(&obj->seq); - for (i = 0; i < count; ++i) { enum dma_resv_usage old_usage; dma_resv_list_entry(fobj, i, obj, &old, &old_usage); if ((old->context == fence->context && old_usage >= usage) || - dma_fence_is_signaled(old)) - goto replace; + dma_fence_is_signaled(old)) { + dma_resv_list_set(fobj, i, fence, usage); + dma_fence_put(old); + return; + } } BUG_ON(fobj->num_fences >= fobj->max_fences); - old = NULL; count++; -replace: dma_resv_list_set(fobj, i, fence, usage); /* pointer update must be visible before we extend the num_fences */ smp_store_mb(fobj->num_fences, count); - - write_seqcount_end(&obj->seq); - dma_fence_put(old); } EXPORT_SYMBOL(dma_resv_add_fence); @@ -341,7 +336,6 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, dma_resv_assert_held(obj); list = dma_resv_fences_list(obj); - write_seqcount_begin(&obj->seq); for (i = 0; list && i < list->num_fences; ++i) { struct dma_fence *old; @@ -352,14 +346,12 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, dma_resv_list_set(list, i, replacement, usage); dma_fence_put(old); } - write_seqcount_end(&obj->seq); } EXPORT_SYMBOL(dma_resv_replace_fences); /* Restart the unlocked iteration by initializing the cursor object. */ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) { - cursor->seq = read_seqcount_begin(&cursor->obj->seq); cursor->index = 0; cursor->num_fences = 0; cursor->fences = dma_resv_fences_list(cursor->obj); @@ -388,8 +380,10 @@ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) cursor->obj, &cursor->fence, &cursor->fence_usage); cursor->fence = dma_fence_get_rcu(cursor->fence); - if (!cursor->fence) - break; + if (!cursor->fence) { + dma_resv_iter_restart_unlocked(cursor); + continue; + } if (!dma_fence_is_signaled(cursor->fence) && cursor->usage >= cursor->fence_usage) @@ -415,7 +409,7 @@ struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) do { dma_resv_iter_restart_unlocked(cursor); dma_resv_iter_walk_unlocked(cursor); - } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + } while (dma_resv_fences_list(cursor->obj) != cursor->fences); rcu_read_unlock(); return cursor->fence; @@ -438,13 +432,13 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) rcu_read_lock(); cursor->is_restarted = false; - restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + restart = dma_resv_fences_list(cursor->obj) != cursor->fences; do { if (restart) dma_resv_iter_restart_unlocked(cursor); dma_resv_iter_walk_unlocked(cursor); restart = true; - } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + } while (dma_resv_fences_list(cursor->obj) != cursor->fences); rcu_read_unlock(); return cursor->fence; @@ -540,10 +534,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) } dma_resv_iter_end(&cursor); - write_seqcount_begin(&dst->seq); list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst)); - write_seqcount_end(&dst->seq); - dma_resv_list_free(list); return 0; } diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index 8ace9e84c845..813779e3c9be 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -217,7 +217,7 @@ static int test_for_each_unlocked(void *arg) if (r == -ENOENT) { r = -EINVAL; /* That should trigger an restart */ - cursor.seq--; + cursor.fences = (void*)~0; } else if (r == -EINVAL) { r = 0; } diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 1db759eacc98..c8ccbc94d5d2 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -155,15 +155,6 @@ struct dma_resv { */ struct ww_mutex lock; - /** - * @seq: - * - * Sequence count for managing RCU read-side synchronization, allows - * read-only access to @fences while ensuring we take a consistent - * snapshot. - */ - seqcount_ww_mutex_t seq; - /** * @fences: * @@ -202,9 +193,6 @@ struct dma_resv_iter { /** @fence_usage: the usage of the current fence */ enum dma_resv_usage fence_usage; - /** @seq: sequence number to check for modifications */ - unsigned int seq; - /** @index: index into the shared fences */ unsigned int index; -- cgit From e84815cbbc767617221e6891e77f2486c9199dfa Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 7 Apr 2022 10:20:55 +0200 Subject: seqlock: drop seqcount_ww_mutex_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daniel pointed out that this series removes the last user of seqcount_ww_mutex_t, so let's drop this. Signed-off-by: Christian König Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: linux-kernel@vger.kernel.org Acked-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-16-christian.koenig@amd.com --- include/linux/seqlock.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 37ded6b8fee6..3926e9027947 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -164,7 +163,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * static initializer or init function. This enables lockdep to validate * that the write side critical section is properly serialized. * - * LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex. + * LOCKNAME: raw_spinlock, spinlock, rwlock or mutex */ /* @@ -184,7 +183,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) -#define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex) /* * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers @@ -277,7 +275,6 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_s SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) -SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL)) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t @@ -304,8 +301,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ - __seqprop_case((s), mutex, prop), \ - __seqprop_case((s), ww_mutex, prop)) + __seqprop_case((s), mutex, prop)) #define seqprop_ptr(s) __seqprop(s, ptr) #define seqprop_sequence(s) __seqprop(s, sequence) -- cgit From 6b2060cf9138a2cd5f3468a949d3869abed049ef Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 5 Apr 2022 23:03:26 +0200 Subject: fb: Delete fb_info->queue It was only used by fbcon, and that now switched to its own, private work. Acked-by: Sam Ravnborg Acked-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220405210335.3434130-9-daniel.vetter@ffwll.ch --- include/linux/fb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 9a77ab615c36..f95da1af9ff6 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -450,7 +450,6 @@ struct fb_info { struct fb_var_screeninfo var; /* Current var */ struct fb_fix_screeninfo fix; /* Current fix */ struct fb_monspecs monspecs; /* Current Monitor specs */ - struct work_struct queue; /* Framebuffer event queue */ struct fb_pixmap pixmap; /* Image hardware mapper */ struct fb_pixmap sprite; /* Cursor hardware mapper */ struct fb_cmap cmap; /* Current cmap */ -- cgit From 6264f58ca0e54e41d63c2d00334a48bac28fbf30 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Apr 2022 14:37:54 -0700 Subject: net: extract a few internals from netdevice.h There's a number of functions and static variables used under net/core/ but not from the outside. We currently dump most of them into netdevice.h. That bad for many reasons: - netdevice.h is very cluttered, hard to figure out what the APIs are; - netdevice.h is very long; - we have to touch netdevice.h more which causes expensive incremental builds. Create a header under net/core/ and move some declarations. The new header is also a bit of a catch-all but that's fine, if we create more specific headers people will likely over-think where their declaration fit best. And end up putting them in netdevice.h, again. More work should be done on splitting netdevice.h into more targeted headers, but that'd be more time consuming so small steps. Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 72 +----------------------------------- net/core/dev.c | 1 + net/core/dev.h | 91 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/dev_addr_lists.c | 2 + net/core/dev_ioctl.c | 2 + net/core/link_watch.c | 1 + net/core/net-procfs.c | 2 + net/core/net-sysfs.c | 1 + net/core/rtnetlink.c | 2 + net/core/sock.c | 2 + net/core/sysctl_net_core.c | 2 + 11 files changed, 108 insertions(+), 70 deletions(-) create mode 100644 net/core/dev.h (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7b2a0b739684..7e7b2a72e473 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -59,7 +59,8 @@ struct dsa_port; struct ip_tunnel_parm; struct macsec_context; struct macsec_ops; - +struct netdev_name_node; +struct sd_flow_limit; struct sfp_bus; /* 802.11 specific */ struct wireless_dev; @@ -1020,16 +1021,6 @@ struct dev_ifalias { struct devlink; struct tlsdev_ops; -struct netdev_name_node { - struct hlist_node hlist; - struct list_head list; - struct net_device *dev; - const char *name; -}; - -int netdev_name_node_alt_create(struct net_device *dev, const char *name); -int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); - struct netdev_net_notifier { struct list_head list; struct notifier_block *nb; @@ -2975,7 +2966,6 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); @@ -3034,19 +3024,6 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } -#ifdef CONFIG_NET_FLOW_LIMIT -#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ -struct sd_flow_limit { - u64 count; - unsigned int num_buckets; - unsigned int history_head; - u16 history[FLOW_LIMIT_HISTORY]; - u8 buckets[]; -}; - -extern int netdev_flow_limit_table_len; -#endif /* CONFIG_NET_FLOW_LIMIT */ - /* * Incoming packets are placed on per-CPU queues */ @@ -3770,7 +3747,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); -int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, @@ -3782,13 +3758,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, return __dev_change_net_namespace(dev, net, pat, 0); } int __dev_set_mtu(struct net_device *, int); -int dev_validate_mtu(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); -int dev_set_mtu_ext(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); int dev_set_mtu(struct net_device *, int); -int dev_change_tx_queue_len(struct net_device *, unsigned long); -void dev_set_group(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, @@ -3796,24 +3766,13 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); -int dev_change_carrier(struct net_device *, bool new_carrier); -int dev_get_phys_port_id(struct net_device *dev, - struct netdev_phys_item_id *ppid); -int dev_get_phys_port_name(struct net_device *dev, - char *name, size_t len); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -int dev_change_proto_down(struct net_device *dev, bool proto_down); -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); @@ -3898,13 +3857,6 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, bool dev_nit_active(struct net_device *dev); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); -extern int netdev_budget; -extern unsigned int netdev_budget_usecs; - -/* Used by rtnetlink.c:__rtnl_unlock()/rtnl_unlock() */ -extern struct list_head net_todo_list; -void netdev_run_todo(void); - static inline void __dev_put(struct net_device *dev) { if (dev) { @@ -4021,10 +3973,7 @@ static inline void dev_replace_track(struct net_device *odev, * called netif_lowerlayer_*() because they represent the state of any * kind of lower layer not just hardware media. */ - -void linkwatch_init_dev(struct net_device *dev); void linkwatch_fire_event(struct net_device *dev); -void linkwatch_forget_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4470,9 +4419,6 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); -void dev_addr_flush(struct net_device *dev); -int dev_addr_init(struct net_device *dev); -void dev_addr_check(struct net_device *dev); /* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, const unsigned char *addr); @@ -4562,7 +4508,6 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); -void __dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); @@ -4580,11 +4525,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; -extern int netdev_tstamp_prequeue; -extern int netdev_unregister_timeout_secs; -extern int weight_p; -extern int dev_weight_rx_bias; -extern int dev_weight_tx_bias; extern int dev_rx_weight; extern int dev_tx_weight; extern int gro_normal_batch; @@ -4772,12 +4712,6 @@ static inline void netdev_rx_csum_fault(struct net_device *dev, void net_enable_timestamp(void); void net_disable_timestamp(void); -#ifdef CONFIG_PROC_FS -int __init dev_proc_init(void); -#else -#define dev_proc_init() 0 -#endif - static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev, bool more) @@ -4813,8 +4747,6 @@ extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); -void linkwatch_run_queue(void); - static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { diff --git a/net/core/dev.c b/net/core/dev.c index 8755ad71be6c..f00d29856b43 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -151,6 +151,7 @@ #include #include +#include "dev.h" #include "net-sysfs.h" diff --git a/net/core/dev.h b/net/core/dev.h new file mode 100644 index 000000000000..27923df00637 --- /dev/null +++ b/net/core/dev.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_CORE_DEV_H +#define _NET_CORE_DEV_H + +#include + +struct net; +struct net_device; +struct netdev_bpf; +struct netdev_phys_item_id; +struct netlink_ext_ack; + +/* Random bits of netdevice that don't need to be exposed */ +#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ +struct sd_flow_limit { + u64 count; + unsigned int num_buckets; + unsigned int history_head; + u16 history[FLOW_LIMIT_HISTORY]; + u8 buckets[]; +}; + +extern int netdev_flow_limit_table_len; + +#ifdef CONFIG_PROC_FS +int __init dev_proc_init(void); +#else +#define dev_proc_init() 0 +#endif + +void linkwatch_init_dev(struct net_device *dev); +void linkwatch_forget_dev(struct net_device *dev); +void linkwatch_run_queue(void); + +void dev_addr_flush(struct net_device *dev); +int dev_addr_init(struct net_device *dev); +void dev_addr_check(struct net_device *dev); + +/* sysctls not referred to from outside net/core/ */ +extern int netdev_budget; +extern unsigned int netdev_budget_usecs; + +extern int netdev_tstamp_prequeue; +extern int netdev_unregister_timeout_secs; +extern int weight_p; +extern int dev_weight_rx_bias; +extern int dev_weight_tx_bias; + +/* rtnl helpers */ +extern struct list_head net_todo_list; +void netdev_run_todo(void); + +/* netdev management, shared between various uAPI entry points */ +struct netdev_name_node { + struct hlist_node hlist; + struct list_head list; + struct net_device *dev; + const char *name; +}; + +int netdev_get_name(struct net *net, char *name, int ifindex); +int dev_change_name(struct net_device *dev, const char *newname); + +int netdev_name_node_alt_create(struct net_device *dev, const char *name); +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); + +int dev_validate_mtu(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); +int dev_set_mtu_ext(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); + +int dev_get_phys_port_id(struct net_device *dev, + struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); + +int dev_change_proto_down(struct net_device *dev, bool proto_down); +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, + u32 value); + +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, int expected_fd, u32 flags); + +int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len); +void dev_set_group(struct net_device *dev, int new_group); +int dev_change_carrier(struct net_device *dev, bool new_carrier); + +void __dev_set_rx_mode(struct net_device *dev); + +#endif diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bead38ca50bd..baa63dee2829 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -12,6 +12,8 @@ #include #include +#include "dev.h" + /* * General list handling functions */ diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1b807d119da5..4f6be442ae7e 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -10,6 +10,8 @@ #include #include +#include "dev.h" + /* * Map an interface index to its name (SIOCGIFNAME) */ diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 95098d1a49bd..a244d3bade7d 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -18,6 +18,7 @@ #include #include +#include "dev.h" enum lw_bits { LW_URGENT = 0, diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 88cc0ad7d386..1ec23bf8b05c 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -4,6 +4,8 @@ #include #include +#include "dev.h" + #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) #define get_bucket(x) ((x) >> BUCKET_SPACE) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9cbc1c8289bc..4980c3a50475 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include #include +#include "dev.h" #include "net-sysfs.h" #ifdef CONFIG_SYSFS diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0e4502d641eb..4041b3e2e8ec 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -54,6 +54,8 @@ #include #include +#include "dev.h" + #define RTNL_MAX_TYPE 50 #define RTNL_SLAVE_MAX_TYPE 40 diff --git a/net/core/sock.c b/net/core/sock.c index 1180a0cb0110..7000403eaeb2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -141,6 +141,8 @@ #include +#include "dev.h" + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7123fe7feeac..8295e5877eb3 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,6 +23,8 @@ #include #include +#include "dev.h" + static int two = 2; static int three = 3; static int int_3600 = 3600; -- cgit From 794c24e9921f32ded4422833a990ccf11dc3c00e Mon Sep 17 00:00:00 2001 From: Jeffrey Ji Date: Wed, 6 Apr 2022 17:26:00 +0000 Subject: net-core: rx_otherhost_dropped to core_stats Increment rx_otherhost_dropped counter when packet dropped due to mismatched dest MAC addr. An example when this drop can occur is when manually crafting raw packets that will be consumed by a user space application via a tap device. For testing purposes local traffic was generated using trafgen for the client and netcat to start a server Tested: Created 2 netns, sent 1 packet using trafgen from 1 to the other with "{eth(daddr=$INCORRECT_MAC...}", verified that iproute2 showed the counter was incremented. (Also had to modify iproute2 to show the stat, additional patch for that coming next.) Signed-off-by: Jeffrey Ji Reviewed-by: Brian Vazquez Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220406172600.1141083-1-jeffreyjilinux@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ include/uapi/linux/if_link.h | 5 +++++ net/core/dev.c | 1 + net/ipv4/ip_input.c | 1 + net/ipv6/ip6_input.c | 1 + 5 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e7b2a72e473..28ea4f8269d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -203,6 +203,7 @@ struct net_device_core_stats { local_t rx_dropped; local_t tx_dropped; local_t rx_nohandler; + local_t rx_otherhost_dropped; } __aligned(4 * sizeof(local_t)); #include @@ -3837,6 +3838,7 @@ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) +DEV_CORE_STATS_INC(rx_otherhost_dropped) static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cc284c048e69..d1e600816b82 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -211,6 +211,9 @@ struct rtnl_link_stats { * @rx_nohandler: Number of packets received on the interface * but dropped by the networking stack because the device is * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. */ struct rtnl_link_stats64 { __u64 rx_packets; @@ -243,6 +246,8 @@ struct rtnl_link_stats64 { __u64 rx_compressed; __u64 tx_compressed; __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; }; /* Subset of link stats useful for in-HW collection. Meaning of the fields is as diff --git a/net/core/dev.c b/net/core/dev.c index f00d29856b43..e027410e861b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10358,6 +10358,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, storage->rx_dropped += local_read(&core_stats->rx_dropped); storage->tx_dropped += local_read(&core_stats->tx_dropped); storage->rx_nohandler += local_read(&core_stats->rx_nohandler); + storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped); } } return storage; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 95f7bb052784..b1165f717cd1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -451,6 +451,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) * that it receives, do not try to analyse it. */ if (skb->pkt_type == PACKET_OTHERHOST) { + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); drop_reason = SKB_DROP_REASON_OTHERHOST; goto drop; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5b5ea35635f9..b4880c7c84eb 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -150,6 +150,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, struct inet6_dev *idev; if (skb->pkt_type == PACKET_OTHERHOST) { + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); kfree_skb(skb); return NULL; } -- cgit From 85ebb1a6bd62147ebcfa70500d513331a8daf9e0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Apr 2022 13:35:52 +0300 Subject: gpiolib: Introduce for_each_gpiochip_node() loop helper Introduce for_each_gpiochip_node() loop helper which iterates over the GPIO controller child nodes of a given device. Signed-off-by: Andy Shevchenko Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 98c93510640e..bfc91f122d5f 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -3,13 +3,14 @@ #define __LINUX_GPIO_DRIVER_H #include -#include #include #include #include #include #include #include +#include +#include struct gpio_desc; struct of_phandle_args; @@ -750,4 +751,8 @@ static inline void gpiochip_unlock_as_irq(struct gpio_chip *gc, } #endif /* CONFIG_GPIOLIB */ +#define for_each_gpiochip_node(dev, child) \ + device_for_each_child_node(dev, child) \ + if (!fwnode_property_present(child, "gpio-controller")) {} else + #endif /* __LINUX_GPIO_DRIVER_H */ -- cgit From 0b19dde90ad004592792a928c75e80612be3e2e8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Apr 2022 13:35:53 +0300 Subject: gpiolib: Introduce gpiochip_node_count() helper The gpiochip_node_count() helper iterates over the device child nodes that have the "gpio-controller" property set. It returns the number of such nodes under a given device. Signed-off-by: Andy Shevchenko Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index bfc91f122d5f..12de0b22b4ef 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -755,4 +755,15 @@ static inline void gpiochip_unlock_as_irq(struct gpio_chip *gc, device_for_each_child_node(dev, child) \ if (!fwnode_property_present(child, "gpio-controller")) {} else +static inline unsigned int gpiochip_node_count(struct device *dev) +{ + struct fwnode_handle *child; + unsigned int count = 0; + + for_each_gpiochip_node(dev, child) + count++; + + return count; +} + #endif /* __LINUX_GPIO_DRIVER_H */ -- cgit From 2fa33b3518a8da0a5345b7ae0064223b5e4e156f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 Apr 2022 11:25:36 +0300 Subject: net/mlx5_fpga: Drop INNOVA IPsec support Mellanox INNOVA IPsec cards are EOL in Nov, 2019 [1]. As such, the code is unmaintained, untested and not in-use by any upstream/distro oriented customers. In order to reduce code complexity, drop the kernel code. [1] https://network.nvidia.com/related-docs/eol/LCR-000535.pdf Link: https://lore.kernel.org/r/2afe88ec5020a491079eacf6fe3c89b64d65195c.1649232994.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 14 +- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 - .../net/ethernet/mellanox/mlx5/core/accel/ipsec.c | 6 +- .../net/ethernet/mellanox/mlx5/core/en/params.c | 7 - .../mellanox/mlx5/core/en_accel/ipsec_stats.c | 17 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 - drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 54 +- .../net/ethernet/mellanox/mlx5/core/fpga/core.h | 2 - .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 1582 -------------------- .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.h | 62 - drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 - include/linux/mlx5/mlx5_ifc_fpga.h | 148 -- 13 files changed, 5 insertions(+), 1906 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 0c82b376416b..e34e64a9ff4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -22,7 +22,6 @@ config MLX5_ACCEL config MLX5_FPGA bool "Mellanox Technologies Innova support" depends on MLX5_CORE - select MLX5_ACCEL help Build support for the Innova family of network cards by Mellanox Technologies. Innova network cards are comprised of a ConnectX chip @@ -143,17 +142,6 @@ config MLX5_CORE_IPOIB help MLX5 IPoIB offloads & acceleration support. -config MLX5_FPGA_IPSEC - bool "Mellanox Technologies IPsec Innova support" - depends on MLX5_CORE - depends on MLX5_FPGA - help - Build IPsec support for the Innova family of network cards by Mellanox - Technologies. Innova network cards are comprised of a ConnectX chip - and an FPGA chip on one board. If you select this option, the - mlx5_core driver will include the Innova FPGA core and allow building - sandbox-specific client drivers. - config MLX5_IPSEC bool "Mellanox Technologies IPsec Connect-X support" depends on MLX5_CORE_EN @@ -171,7 +159,7 @@ config MLX5_EN_IPSEC depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD - depends on MLX5_FPGA_IPSEC || MLX5_IPSEC + depends on MLX5_IPSEC help Build support for IPsec cryptography-offload acceleration in the NIC. Note: Support for hardware with this capability needs to be selected diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 44ff1623707a..e50361656305 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -89,7 +89,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # Accelerations & FPGA # mlx5_core-$(CONFIG_MLX5_IPSEC) += accel/ipsec_offload.o -mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/ipsec.o mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c index 09f5ce97af46..45296ec2d055 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -35,7 +35,6 @@ #include "accel/ipsec.h" #include "mlx5_core.h" -#include "fpga/ipsec.h" #include "accel/ipsec_offload.h" void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) @@ -43,10 +42,7 @@ void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) const struct mlx5_accel_ipsec_ops *ipsec_ops; int err = 0; - ipsec_ops = (mlx5_ipsec_offload_ops(mdev)) ? - mlx5_ipsec_offload_ops(mdev) : - mlx5_fpga_ipsec_ops(mdev); - + ipsec_ops = mlx5_ipsec_offload_ops(mdev); if (!ipsec_ops || !ipsec_ops->init) { mlx5_core_dbg(mdev, "IPsec ops is not supported\n"); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 7f76c4f9389b..ebb12817b795 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -6,7 +6,6 @@ #include "en/port.h" #include "en_accel/en_accel.h" #include "accel/ipsec.h" -#include "fpga/ipsec.h" static bool mlx5e_rx_is_xdp(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -327,9 +326,6 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) return false; - if (mlx5_fpga_is_ipsec_device(mdev)) - return false; - if (params->xdp_prog) { /* XSK params are not considered here. If striding RQ is in use, * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will @@ -423,9 +419,6 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, int max_mtu; int i; - if (mlx5_fpga_is_ipsec_device(mdev)) - byte_count += MLX5E_METADATA_ETHER_LEN; - if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index 5cb936541b9e..1607c305d3ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -38,7 +38,6 @@ #include "accel/ipsec.h" #include "fpga/sdk.h" #include "en_accel/ipsec.h" -#include "fpga/ipsec.h" static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) }, @@ -105,7 +104,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw) { - return (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0; + return 0; } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) @@ -121,25 +120,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw) { - unsigned int i; - - if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) - for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_ipsec_hw_stats_desc[i].format); - return idx; } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw) { - int i; - - if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) - for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats, - mlx5e_ipsec_hw_stats_desc, - i); return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 89a85030b0eb..0a303879d0f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -67,7 +67,6 @@ #include "en/ptp.h" #include "qos.h" #include "en/trap.h" -#include "fpga/ipsec.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -4467,12 +4466,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return -EINVAL; } - if (mlx5_fpga_is_ipsec_device(priv->mdev)) { - netdev_warn(netdev, - "XDP is not available on Innova cards with IPsec support\n"); - return -EINVAL; - } - new_params = priv->channels.params; new_params.xdp_prog = prog; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 84cebf4c5ada..a180c80e9f68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -49,7 +49,6 @@ #include "en/rep/tc.h" #include "ipoib/ipoib.h" #include "accel/ipsec.h" -#include "fpga/ipsec.h" #include "en_accel/ipsec_rxtx.h" #include "en_accel/ktls_txrx.h" #include "en/xdp.h" @@ -2384,46 +2383,6 @@ const struct mlx5e_rx_handlers mlx5i_rx_handlers = { }; #endif /* CONFIG_MLX5_CORE_IPOIB */ -#ifdef CONFIG_MLX5_EN_IPSEC - -static void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) -{ - struct mlx5_wq_cyc *wq = &rq->wqe.wq; - struct mlx5e_wqe_frag_info *wi; - struct sk_buff *skb; - u32 cqe_bcnt; - u16 ci; - - ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); - wi = get_frag(rq, ci); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - - if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - rq->stats->wqe_err++; - goto wq_free_wqe; - } - - skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, - mlx5e_skb_from_cqe_linear, - mlx5e_skb_from_cqe_nonlinear, - rq, cqe, wi, cqe_bcnt); - if (unlikely(!skb)) /* a DROP, save the page-reuse checks */ - goto wq_free_wqe; - - skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt); - if (unlikely(!skb)) - goto wq_free_wqe; - - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - napi_gro_receive(rq->cq.napi, skb); - -wq_free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); - mlx5_wq_cyc_pop(wq); -} - -#endif /* CONFIG_MLX5_EN_IPSEC */ - int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk) { struct net_device *netdev = rq->netdev; @@ -2440,10 +2399,6 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - if (mlx5_fpga_is_ipsec_device(mdev)) { - netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n"); - return -EINVAL; - } if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo; if (!rq->handle_rx_cqe) { @@ -2467,14 +2422,7 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool mlx5e_skb_from_cqe_nonlinear; rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - -#ifdef CONFIG_MLX5_EN_IPSEC - if ((mlx5_fpga_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) && - priv->ipsec) - rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe; - else -#endif - rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; if (!rq->handle_rx_cqe) { netdev_err(netdev, "RX handler of RQ is not set\n"); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index e9e72d260681..750c32050165 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -57,8 +57,6 @@ struct mlx5_fpga_device { u32 mkey; struct mlx5_uars_page *uar; } conn_res; - - struct mlx5_fpga_ipsec *ipsec; }; #define mlx5_fpga_dbg(__adev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c deleted file mode 100644 index 8ec148010d62..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ /dev/null @@ -1,1582 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include -#include -#include -#include -#include - -#include "mlx5_core.h" -#include "fs_cmd.h" -#include "fpga/ipsec.h" -#include "fpga/sdk.h" -#include "fpga/core.h" - -enum mlx5_fpga_ipsec_cmd_status { - MLX5_FPGA_IPSEC_CMD_PENDING, - MLX5_FPGA_IPSEC_CMD_SEND_FAIL, - MLX5_FPGA_IPSEC_CMD_COMPLETE, -}; - -struct mlx5_fpga_ipsec_cmd_context { - struct mlx5_fpga_dma_buf buf; - enum mlx5_fpga_ipsec_cmd_status status; - struct mlx5_ifc_fpga_ipsec_cmd_resp resp; - int status_code; - struct completion complete; - struct mlx5_fpga_device *dev; - struct list_head list; /* Item in pending_cmds */ - u8 command[]; -}; - -struct mlx5_fpga_esp_xfrm; - -struct mlx5_fpga_ipsec_sa_ctx { - struct rhash_head hash; - struct mlx5_ifc_fpga_ipsec_sa hw_sa; - u32 sa_handle; - struct mlx5_core_dev *dev; - struct mlx5_fpga_esp_xfrm *fpga_xfrm; -}; - -struct mlx5_fpga_esp_xfrm { - unsigned int num_rules; - struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; - struct mutex lock; /* xfrm lock */ - struct mlx5_accel_esp_xfrm accel_xfrm; -}; - -struct mlx5_fpga_ipsec_rule { - struct rb_node node; - struct fs_fte *fte; - struct mlx5_fpga_ipsec_sa_ctx *ctx; -}; - -static const struct rhashtable_params rhash_sa = { - /* Keep out "cmd" field from the key as it's - * value is not constant during the lifetime - * of the key object. - */ - .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) - - sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), - .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) + - sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), - .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash), - .automatic_shrinking = true, - .min_size = 1, -}; - -struct mlx5_fpga_ipsec { - struct mlx5_fpga_device *fdev; - struct list_head pending_cmds; - spinlock_t pending_cmds_lock; /* Protects pending_cmds */ - u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)]; - struct mlx5_fpga_conn *conn; - - struct notifier_block fs_notifier_ingress_bypass; - struct notifier_block fs_notifier_egress; - - /* Map hardware SA --> SA context - * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx) - * We will use this hash to avoid SAs duplication in fpga which - * aren't allowed - */ - struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */ - struct mutex sa_hash_lock; - - /* Tree holding all rules for this fpga device - * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id) - */ - struct rb_root rules_rb; - struct mutex rules_rb_lock; /* rules lock */ - - struct ida halloc; -}; - -bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) - return false; - - if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != - MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC) - return false; - - return true; -} - -static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, - u8 status) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - - if (status) { - context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context, - buf); - mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n", - status); - context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL; - complete(&context->complete); - } -} - -static inline -int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome) -{ - switch (syndrome) { - case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS: - return 0; - case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE: - return -EEXIST; - case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST: - return -EINVAL; - case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: - return -EIO; - } - return -EIO; -} - -static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) -{ - struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data; - struct mlx5_fpga_ipsec_cmd_context *context; - enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome; - struct mlx5_fpga_device *fdev = cb_arg; - unsigned long flags; - - if (buf->sg[0].size < sizeof(*resp)) { - mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n", - buf->sg[0].size, sizeof(*resp)); - return; - } - - mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n", - ntohl(resp->syndrome)); - - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - context = list_first_entry_or_null(&fdev->ipsec->pending_cmds, - struct mlx5_fpga_ipsec_cmd_context, - list); - if (context) - list_del(&context->list); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - - if (!context) { - mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n"); - return; - } - mlx5_fpga_dbg(fdev, "Handling response for %p\n", context); - - syndrome = ntohl(resp->syndrome); - context->status_code = syndrome_to_errno(syndrome); - context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE; - memcpy(&context->resp, resp, sizeof(*resp)); - - if (context->status_code) - mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n", - syndrome); - - complete(&context->complete); -} - -static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, - const void *cmd, int cmd_size) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - struct mlx5_fpga_device *fdev = mdev->fpga; - unsigned long flags; - int res; - - if (!fdev || !fdev->ipsec) - return ERR_PTR(-EOPNOTSUPP); - - if (cmd_size & 3) - return ERR_PTR(-EINVAL); - - context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC); - if (!context) - return ERR_PTR(-ENOMEM); - - context->status = MLX5_FPGA_IPSEC_CMD_PENDING; - context->dev = fdev; - context->buf.complete = mlx5_fpga_ipsec_send_complete; - init_completion(&context->complete); - memcpy(&context->command, cmd, cmd_size); - context->buf.sg[0].size = cmd_size; - context->buf.sg[0].data = &context->command; - - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); - if (!res) - list_add_tail(&context->list, &fdev->ipsec->pending_cmds); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - - if (res) { - mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res); - kfree(context); - return ERR_PTR(res); - } - - /* Context should be freed by the caller after completion. */ - return context; -} - -static int mlx5_fpga_ipsec_cmd_wait(void *ctx) -{ - struct mlx5_fpga_ipsec_cmd_context *context = ctx; - unsigned long timeout = - msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC); - int res; - - res = wait_for_completion_timeout(&context->complete, timeout); - if (!res) { - mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n"); - return -ETIMEDOUT; - } - - if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE) - res = context->status_code; - else - res = -EIO; - - return res; -} - -static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec) -{ - if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command)) - return true; - return false; -} - -static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa, - int opcode) -{ - struct mlx5_core_dev *dev = fdev->mdev; - struct mlx5_ifc_fpga_ipsec_sa *sa; - struct mlx5_fpga_ipsec_cmd_context *cmd_context; - size_t sa_cmd_size; - int err; - - hw_sa->ipsec_sa_v1.cmd = htonl(opcode); - if (is_v2_sadb_supported(fdev->ipsec)) - sa_cmd_size = sizeof(*hw_sa); - else - sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1); - - cmd_context = (struct mlx5_fpga_ipsec_cmd_context *) - mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size); - if (IS_ERR(cmd_context)) - return PTR_ERR(cmd_context); - - err = mlx5_fpga_ipsec_cmd_wait(cmd_context); - if (err) - goto out; - - sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command; - if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) { - mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", - ntohl(sa->ipsec_sa_v1.sw_sa_handle), - ntohl(cmd_context->resp.sw_sa_handle)); - err = -EIO; - } - -out: - kfree(cmd_context); - return err; -} - -u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - u32 ret = 0; - - if (mlx5_fpga_is_ipsec_device(mdev)) { - ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE; - ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA; - } else { - return ret; - } - - if (!fdev->ipsec) - return ret; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp)) - ret |= MLX5_ACCEL_IPSEC_CAP_ESP; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6)) - ret |= MLX5_ACCEL_IPSEC_CAP_IPV6; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso)) - ret |= MLX5_ACCEL_IPSEC_CAP_LSO; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer)) - ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) { - ret |= MLX5_ACCEL_IPSEC_CAP_ESN; - ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; - } - - return ret; -} - -static unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!fdev || !fdev->ipsec) - return 0; - - return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - number_of_ipsec_counters); -} - -static int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int counters_count) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - unsigned int i; - __be32 *data; - u32 count; - u64 addr; - int ret; - - if (!fdev || !fdev->ipsec) - return 0; - - addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - ipsec_counters_addr_low) + - ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - ipsec_counters_addr_high) << 32); - - count = mlx5_fpga_ipsec_counters_count(mdev); - - data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto out; - } - - ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data, - MLX5_FPGA_ACCESS_TYPE_DONTCARE); - if (ret < 0) { - mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n", - ret); - goto out; - } - ret = 0; - - if (count > counters_count) - count = counters_count; - - /* Each counter is low word, then high. But each word is big-endian */ - for (i = 0; i < count; i++) - counters[i] = (u64)ntohl(data[i * 2]) | - ((u64)ntohl(data[i * 2 + 1]) << 32); - -out: - kfree(data); - return ret; -} - -static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0}; - int err; - - cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); - cmd.flags = htonl(flags); - context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); - if (IS_ERR(context)) - return PTR_ERR(context); - - err = mlx5_fpga_ipsec_cmd_wait(context); - if (err) - goto out; - - if ((context->resp.flags & cmd.flags) != cmd.flags) { - mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n", - cmd.flags, - context->resp.flags); - err = -EIO; - } - -out: - kfree(context); - return err; -} - -static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev) -{ - u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev); - u32 flags = 0; - - if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER) - flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER; - - return mlx5_fpga_ipsec_set_caps(mdev, flags); -} - -static void -mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa) -{ - const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm; - - /* key */ - memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key, - aes_gcm->key_len / 8); - /* Duplicate 128 bit key twice according to HW layout */ - if (aes_gcm->key_len == 128) - memcpy(&hw_sa->ipsec_sa_v1.key_enc[16], - aes_gcm->aes_key, aes_gcm->key_len / 8); - - /* salt and seq_iv */ - memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv, - sizeof(aes_gcm->seq_iv)); - memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt, - sizeof(aes_gcm->salt)); - - /* esn */ - if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN; - hw_sa->ipsec_sa_v1.flags |= - (xfrm_attrs->flags & - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; - hw_sa->esn = htonl(xfrm_attrs->esn); - } else { - hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN; - hw_sa->ipsec_sa_v1.flags &= - ~(xfrm_attrs->flags & - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; - hw_sa->esn = 0; - } - - /* rx handle */ - hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle); - - /* enc mode */ - switch (aes_gcm->key_len) { - case 128: - hw_sa->ipsec_sa_v1.enc_mode = - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128; - break; - case 256: - hw_sa->ipsec_sa_v1.enc_mode = - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128; - break; - } - - /* flags */ - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID | - MLX5_FPGA_IPSEC_SA_SPI_EN | - MLX5_FPGA_IPSEC_SA_IP_ESP; - - if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT) - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX; - else - hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX; -} - -static void -mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, - const __be32 saddr[4], - const __be32 daddr[4], - const __be32 spi, bool is_ipv6, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa) -{ - mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa); - - /* IPs */ - memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip)); - memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip)); - - /* SPI */ - hw_sa->ipsec_sa_v1.spi = spi; - - /* flags */ - if (is_ipv6) - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6; -} - -static bool is_full_mask(const void *p, size_t len) -{ - WARN_ON(len % 4); - - return !memchr_inv(p, 0xff, len); -} - -static bool validate_fpga_full_mask(struct mlx5_core_dev *dev, - const u32 *match_c, - const u32 *match_v) -{ - const void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - match_c, - misc_parameters); - const void *headers_c = MLX5_ADDR_OF(fte_match_param, - match_c, - outer_headers); - const void *headers_v = MLX5_ADDR_OF(fte_match_param, - match_v, - outer_headers); - - if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) { - const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4); - const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, - ipv4)) || - !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, - ipv4))) - return false; - } else { - const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6); - const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6); - - if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)) || - !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6))) - return false; - } - - if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - outer_esp_spi), - MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi))) - return false; - - return true; -} - -static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev, - u8 match_criteria_enable, - const u32 *match_c, - const u32 *match_v) -{ - u32 ipsec_dev_caps = mlx5_fpga_ipsec_device_caps(dev); - bool ipv6_flow; - - ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v); - - if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) || - mlx5_fs_is_outer_udp_flow(match_c, match_v) || - mlx5_fs_is_outer_tcp_flow(match_c, match_v) || - mlx5_fs_is_vxlan_flow(match_c) || - !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) || - ipv6_flow)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) && - mlx5_fs_is_outer_ipsec_flow(match_c)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) && - ipv6_flow) - return false; - - if (!validate_fpga_full_mask(dev, match_c, match_v)) - return false; - - return true; -} - -static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, - u8 match_criteria_enable, - const u32 *match_c, - const u32 *match_v, - struct mlx5_flow_act *flow_act, - struct mlx5_flow_context *flow_context) -{ - const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) || - MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0); - bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) || - MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0); - int ret; - - ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c, - match_v); - if (!ret) - return ret; - - if (is_dmac || is_smac || - (match_criteria_enable & - ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || - (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) - return false; - - return true; -} - -static void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *accel_xfrm, - const __be32 saddr[4], const __be32 daddr[4], - const __be32 spi, bool is_ipv6, u32 *sa_handle) -{ - struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - container_of(accel_xfrm, typeof(*fpga_xfrm), - accel_xfrm); - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - int opcode, err; - void *context; - - /* alloc SA */ - sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL); - if (!sa_ctx) - return ERR_PTR(-ENOMEM); - - sa_ctx->dev = mdev; - - /* build candidate SA */ - mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs, - saddr, daddr, spi, is_ipv6, - &sa_ctx->hw_sa); - - mutex_lock(&fpga_xfrm->lock); - - if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */ - /* all rules must be with same IPs and SPI */ - if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa, - sizeof(sa_ctx->hw_sa))) { - context = ERR_PTR(-EINVAL); - goto exists; - } - - ++fpga_xfrm->num_rules; - context = fpga_xfrm->sa_ctx; - goto exists; - } - - if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) { - err = ida_alloc_min(&fipsec->halloc, 1, GFP_KERNEL); - if (err < 0) { - context = ERR_PTR(err); - goto exists; - } - - sa_ctx->sa_handle = err; - if (sa_handle) - *sa_handle = sa_ctx->sa_handle; - } - /* This is unbounded fpga_xfrm, try to add to hash */ - mutex_lock(&fipsec->sa_hash_lock); - - err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa); - if (err) { - /* Can't bound different accel_xfrm to already existing sa_ctx. - * This is because we can't support multiple ketmats for - * same IPs and SPI - */ - context = ERR_PTR(-EEXIST); - goto unlock_hash; - } - - /* Bound accel_xfrm to sa_ctx */ - opcode = is_v2_sadb_supported(fdev->ipsec) ? - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 : - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA; - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); - sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) { - context = ERR_PTR(err); - goto delete_hash; - } - - mutex_unlock(&fipsec->sa_hash_lock); - - ++fpga_xfrm->num_rules; - fpga_xfrm->sa_ctx = sa_ctx; - sa_ctx->fpga_xfrm = fpga_xfrm; - - mutex_unlock(&fpga_xfrm->lock); - - return sa_ctx; - -delete_hash: - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa)); -unlock_hash: - mutex_unlock(&fipsec->sa_hash_lock); - if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_free(&fipsec->halloc, sa_ctx->sa_handle); -exists: - mutex_unlock(&fpga_xfrm->lock); - kfree(sa_ctx); - return context; -} - -static void * -mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, - struct fs_fte *fte, - bool is_egress) -{ - struct mlx5_accel_esp_xfrm *accel_xfrm; - __be32 saddr[4], daddr[4], spi; - struct mlx5_flow_group *fg; - bool is_ipv6 = false; - - fs_get_obj(fg, fte->node.parent); - /* validate */ - if (is_egress && - !mlx5_is_fpga_egress_ipsec_rule(mdev, - fg->mask.match_criteria_enable, - fg->mask.match_criteria, - fte->val, - &fte->action, - &fte->flow_context)) - return ERR_PTR(-EINVAL); - else if (!mlx5_is_fpga_ipsec_rule(mdev, - fg->mask.match_criteria_enable, - fg->mask.match_criteria, - fte->val)) - return ERR_PTR(-EINVAL); - - /* get xfrm context */ - accel_xfrm = - (struct mlx5_accel_esp_xfrm *)fte->action.esp_id; - - /* IPs */ - if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria, - fte->val)) { - memcpy(&saddr[3], - MLX5_ADDR_OF(fte_match_set_lyr_2_4, - fte->val, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - sizeof(saddr[3])); - memcpy(&daddr[3], - MLX5_ADDR_OF(fte_match_set_lyr_2_4, - fte->val, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - sizeof(daddr[3])); - } else { - memcpy(saddr, - MLX5_ADDR_OF(fte_match_param, - fte->val, - outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), - sizeof(saddr)); - memcpy(daddr, - MLX5_ADDR_OF(fte_match_param, - fte->val, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - sizeof(daddr)); - is_ipv6 = true; - } - - /* SPI */ - spi = MLX5_GET_BE(typeof(spi), - fte_match_param, fte->val, - misc_parameters.outer_esp_spi); - - /* create */ - return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm, - saddr, daddr, - spi, is_ipv6, NULL); -} - -static void -mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx) -{ - struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - int opcode = is_v2_sadb_supported(fdev->ipsec) ? - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 : - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA; - int err; - - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); - sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) { - WARN_ON(err); - return; - } - - if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action == - MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_free(&fipsec->halloc, sa_ctx->sa_handle); - - mutex_lock(&fipsec->sa_hash_lock); - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa)); - mutex_unlock(&fipsec->sa_hash_lock); -} - -static void mlx5_fpga_ipsec_delete_sa_ctx(void *context) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm; - - mutex_lock(&fpga_xfrm->lock); - if (!--fpga_xfrm->num_rules) { - mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); - kfree(fpga_xfrm->sa_ctx); - fpga_xfrm->sa_ctx = NULL; - } - mutex_unlock(&fpga_xfrm->lock); -} - -static inline struct mlx5_fpga_ipsec_rule * -_rule_search(struct rb_root *root, struct fs_fte *fte) -{ - struct rb_node *node = root->rb_node; - - while (node) { - struct mlx5_fpga_ipsec_rule *rule = - container_of(node, struct mlx5_fpga_ipsec_rule, - node); - - if (rule->fte < fte) - node = node->rb_left; - else if (rule->fte > fte) - node = node->rb_right; - else - return rule; - } - return NULL; -} - -static struct mlx5_fpga_ipsec_rule * -rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte) -{ - struct mlx5_fpga_ipsec_rule *rule; - - mutex_lock(&ipsec_dev->rules_rb_lock); - rule = _rule_search(&ipsec_dev->rules_rb, fte); - mutex_unlock(&ipsec_dev->rules_rb_lock); - - return rule; -} - -static inline int _rule_insert(struct rb_root *root, - struct mlx5_fpga_ipsec_rule *rule) -{ - struct rb_node **new = &root->rb_node, *parent = NULL; - - /* Figure out where to put new node */ - while (*new) { - struct mlx5_fpga_ipsec_rule *this = - container_of(*new, struct mlx5_fpga_ipsec_rule, - node); - - parent = *new; - if (rule->fte < this->fte) - new = &((*new)->rb_left); - else if (rule->fte > this->fte) - new = &((*new)->rb_right); - else - return -EEXIST; - } - - /* Add new node and rebalance tree. */ - rb_link_node(&rule->node, parent, new); - rb_insert_color(&rule->node, root); - - return 0; -} - -static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - int ret; - - mutex_lock(&ipsec_dev->rules_rb_lock); - ret = _rule_insert(&ipsec_dev->rules_rb, rule); - mutex_unlock(&ipsec_dev->rules_rb_lock); - - return ret; -} - -static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - struct rb_root *root = &ipsec_dev->rules_rb; - - mutex_lock(&ipsec_dev->rules_rb_lock); - rb_erase(&rule->node, root); - mutex_unlock(&ipsec_dev->rules_rb_lock); -} - -static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - _rule_delete(ipsec_dev, rule); - kfree(rule); -} - -struct mailbox_mod { - uintptr_t saved_esp_id; - u32 saved_action; - u32 saved_outer_esp_spi_value; -}; - -static void restore_spec_mailbox(struct fs_fte *fte, - struct mailbox_mod *mbox_mod) -{ - char *misc_params_v = MLX5_ADDR_OF(fte_match_param, - fte->val, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - mbox_mod->saved_outer_esp_spi_value); - fte->action.action |= mbox_mod->saved_action; - fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id; -} - -static void modify_spec_mailbox(struct mlx5_core_dev *mdev, - struct fs_fte *fte, - struct mailbox_mod *mbox_mod) -{ - char *misc_params_v = MLX5_ADDR_OF(fte_match_param, - fte->val, - misc_parameters); - - mbox_mod->saved_esp_id = fte->action.esp_id; - mbox_mod->saved_action = fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT); - mbox_mod->saved_outer_esp_spi_value = - MLX5_GET(fte_match_set_misc, misc_params_v, - outer_esp_spi); - - fte->action.esp_id = 0; - fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT); - if (!MLX5_CAP_FLOWTABLE(mdev, - flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0); -} - -static enum fs_flow_table_type egress_to_fs_ft(bool egress) -{ - return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX; -} - -static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg, - bool is_egress) -{ - int (*create_flow_group)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, u32 *in, - struct mlx5_flow_group *fg) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group; - char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria.misc_parameters); - struct mlx5_core_dev *dev = ns->dev; - u32 saved_outer_esp_spi_mask; - u8 match_criteria_enable; - int ret; - - if (MLX5_CAP_FLOWTABLE(dev, - flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - return create_flow_group(ns, ft, in, fg); - - match_criteria_enable = - MLX5_GET(create_flow_group_in, in, match_criteria_enable); - saved_outer_esp_spi_mask = - MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); - if (!match_criteria_enable || !saved_outer_esp_spi_mask) - return create_flow_group(ns, ft, in, fg); - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0); - - if (!(*misc_params_c) && - !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) - MLX5_SET(create_flow_group_in, in, match_criteria_enable, - match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS); - - ret = create_flow_group(ns, ft, in, fg); - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask); - MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable); - - return ret; -} - -static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte, - bool is_egress) -{ - int (*create_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte; - struct mlx5_core_dev *dev = ns->dev; - struct mlx5_fpga_device *fdev = dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_ipsec_rule *rule; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return create_fte(ns, ft, fg, fte); - - rule = kzalloc(sizeof(*rule), GFP_KERNEL); - if (!rule) - return -ENOMEM; - - rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress); - if (IS_ERR(rule->ctx)) { - int err = PTR_ERR(rule->ctx); - - kfree(rule); - return err; - } - - rule->fte = fte; - WARN_ON(rule_insert(fipsec, rule)); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = create_fte(ns, ft, fg, fte); - restore_spec_mailbox(fte, &mbox_mod); - if (ret) { - _rule_delete(fipsec, rule); - mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); - kfree(rule); - } - - return ret; -} - -static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte, - bool is_egress) -{ - int (*update_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte; - struct mlx5_core_dev *dev = ns->dev; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return update_fte(ns, ft, fg, modify_mask, fte); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = update_fte(ns, ft, fg, modify_mask, fte); - restore_spec_mailbox(fte, &mbox_mod); - - return ret; -} - -static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte, - bool is_egress) -{ - int (*delete_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte; - struct mlx5_core_dev *dev = ns->dev; - struct mlx5_fpga_device *fdev = dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_ipsec_rule *rule; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return delete_fte(ns, ft, fte); - - rule = rule_search(fipsec, fte); - if (!rule) - return -ENOENT; - - mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); - rule_delete(fipsec, rule); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = delete_fte(ns, ft, fte); - restore_spec_mailbox(fte, &mbox_mod); - - return ret; -} - -static int -mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg) -{ - return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true); -} - -static int -mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true); -} - -static int -mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, - true); -} - -static int -mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_delete_fte(ns, ft, fte, true); -} - -static int -mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg) -{ - return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false); -} - -static int -mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false); -} - -static int -mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, - false); -} - -static int -mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_delete_fte(ns, ft, fte, false); -} - -static struct mlx5_flow_cmds fpga_ipsec_ingress; -static struct mlx5_flow_cmds fpga_ipsec_egress; - -const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - switch (type) { - case FS_FT_NIC_RX: - return &fpga_ipsec_ingress; - case FS_FT_NIC_TX: - return &fpga_ipsec_egress; - default: - WARN_ON(true); - return NULL; - } -} - -static int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_conn_attr init_attr = {0}; - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_conn *conn; - int err; - - if (!mlx5_fpga_is_ipsec_device(mdev)) - return 0; - - fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL); - if (!fdev->ipsec) - return -ENOMEM; - - fdev->ipsec->fdev = fdev; - - err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps), - fdev->ipsec->caps); - if (err) { - mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n", - err); - goto error; - } - - INIT_LIST_HEAD(&fdev->ipsec->pending_cmds); - spin_lock_init(&fdev->ipsec->pending_cmds_lock); - - init_attr.rx_size = SBU_QP_QUEUE_SIZE; - init_attr.tx_size = SBU_QP_QUEUE_SIZE; - init_attr.recv_cb = mlx5_fpga_ipsec_recv; - init_attr.cb_arg = fdev; - conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); - if (IS_ERR(conn)) { - err = PTR_ERR(conn); - mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n", - err); - goto error; - } - fdev->ipsec->conn = conn; - - err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa); - if (err) - goto err_destroy_conn; - mutex_init(&fdev->ipsec->sa_hash_lock); - - fdev->ipsec->rules_rb = RB_ROOT; - mutex_init(&fdev->ipsec->rules_rb_lock); - - err = mlx5_fpga_ipsec_enable_supported_caps(mdev); - if (err) { - mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n", - err); - goto err_destroy_hash; - } - - ida_init(&fdev->ipsec->halloc); - - return 0; - -err_destroy_hash: - rhashtable_destroy(&fdev->ipsec->sa_hash); - -err_destroy_conn: - mlx5_fpga_sbu_conn_destroy(conn); - -error: - kfree(fdev->ipsec); - fdev->ipsec = NULL; - return err; -} - -static void destroy_rules_rb(struct rb_root *root) -{ - struct mlx5_fpga_ipsec_rule *r, *tmp; - - rbtree_postorder_for_each_entry_safe(r, tmp, root, node) { - rb_erase(&r->node, root); - mlx5_fpga_ipsec_delete_sa_ctx(r->ctx); - kfree(r); - } -} - -static void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!mlx5_fpga_is_ipsec_device(mdev)) - return; - - ida_destroy(&fdev->ipsec->halloc); - destroy_rules_rb(&fdev->ipsec->rules_rb); - rhashtable_destroy(&fdev->ipsec->sa_hash); - - mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn); - kfree(fdev->ipsec); - fdev->ipsec = NULL; -} - -void mlx5_fpga_ipsec_build_fs_cmds(void) -{ - /* ingress */ - fpga_ipsec_ingress.create_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table; - fpga_ipsec_ingress.destroy_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table; - fpga_ipsec_ingress.modify_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table; - fpga_ipsec_ingress.create_flow_group = - mlx5_fpga_ipsec_fs_create_flow_group_ingress; - fpga_ipsec_ingress.destroy_flow_group = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group; - fpga_ipsec_ingress.create_fte = - mlx5_fpga_ipsec_fs_create_fte_ingress; - fpga_ipsec_ingress.update_fte = - mlx5_fpga_ipsec_fs_update_fte_ingress; - fpga_ipsec_ingress.delete_fte = - mlx5_fpga_ipsec_fs_delete_fte_ingress; - fpga_ipsec_ingress.update_root_ft = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft; - - /* egress */ - fpga_ipsec_egress.create_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table; - fpga_ipsec_egress.destroy_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table; - fpga_ipsec_egress.modify_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table; - fpga_ipsec_egress.create_flow_group = - mlx5_fpga_ipsec_fs_create_flow_group_egress; - fpga_ipsec_egress.destroy_flow_group = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group; - fpga_ipsec_egress.create_fte = - mlx5_fpga_ipsec_fs_create_fte_egress; - fpga_ipsec_egress.update_fte = - mlx5_fpga_ipsec_fs_update_fte_egress; - fpga_ipsec_egress.delete_fte = - mlx5_fpga_ipsec_fs_delete_fte_egress; - fpga_ipsec_egress.update_root_ft = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft; -} - -static int -mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - if (attrs->tfc_pad) { - mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n"); - return -EOPNOTSUPP; - } - - if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) { - mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) { - mlx5_core_err(mdev, "Only aes gcm keymat is supported\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.iv_algo != - MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) { - mlx5_core_err(mdev, "Only iv sequence algo is supported\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.icv_len != 128) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.key_len != 128 && - attrs->keymat.aes_gcm.key_len != 256) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); - return -EOPNOTSUPP; - } - - if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) && - (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps, - v2_command))) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); - return -EOPNOTSUPP; - } - - return 0; -} - -static struct mlx5_accel_esp_xfrm * -mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm; - - if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) { - mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n"); - return ERR_PTR(-EINVAL); - } - - if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { - mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); - return ERR_PTR(-EOPNOTSUPP); - } - - fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL); - if (!fpga_xfrm) - return ERR_PTR(-ENOMEM); - - mutex_init(&fpga_xfrm->lock); - memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs, - sizeof(fpga_xfrm->accel_xfrm.attrs)); - - return &fpga_xfrm->accel_xfrm; -} - -static void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - container_of(xfrm, struct mlx5_fpga_esp_xfrm, - accel_xfrm); - /* assuming no sa_ctx are connected to this xfrm_ctx */ - kfree(fpga_xfrm); -} - -static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - struct mlx5_core_dev *mdev = xfrm->mdev; - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_esp_xfrm *fpga_xfrm; - struct mlx5_ifc_fpga_ipsec_sa org_hw_sa; - - int err = 0; - - if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) - return 0; - - if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { - mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); - return -EOPNOTSUPP; - } - - if (is_v2_sadb_supported(fipsec)) { - mlx5_core_warn(mdev, "Modify esp is not supported\n"); - return -EOPNOTSUPP; - } - - fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm); - - mutex_lock(&fpga_xfrm->lock); - - if (!fpga_xfrm->sa_ctx) - /* Unbounded xfrm, change only sw attrs */ - goto change_sw_xfrm_attrs; - - /* copy original hw sa */ - memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa)); - mutex_lock(&fipsec->sa_hash_lock); - /* remove original hw sa from hash */ - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, rhash_sa)); - /* update hw_sa with new xfrm attrs*/ - mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs, - &fpga_xfrm->sa_ctx->hw_sa); - /* try to insert new hw_sa to hash */ - err = rhashtable_insert_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, rhash_sa); - if (err) - goto rollback_sa; - - /* modify device with new hw_sa */ - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa, - MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2); - fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, - rhash_sa)); -rollback_sa: - if (err) { - /* return original hw_sa to hash */ - memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa, - sizeof(org_hw_sa)); - WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, - rhash_sa)); - } - mutex_unlock(&fipsec->sa_hash_lock); - -change_sw_xfrm_attrs: - if (!err) - memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs)); - mutex_unlock(&fpga_xfrm->lock); - return err; -} - -static const struct mlx5_accel_ipsec_ops fpga_ipsec_ops = { - .device_caps = mlx5_fpga_ipsec_device_caps, - .counters_count = mlx5_fpga_ipsec_counters_count, - .counters_read = mlx5_fpga_ipsec_counters_read, - .create_hw_context = mlx5_fpga_ipsec_create_sa_ctx, - .free_hw_context = mlx5_fpga_ipsec_delete_sa_ctx, - .init = mlx5_fpga_ipsec_init, - .cleanup = mlx5_fpga_ipsec_cleanup, - .esp_create_xfrm = mlx5_fpga_esp_create_xfrm, - .esp_modify_xfrm = mlx5_fpga_esp_modify_xfrm, - .esp_destroy_xfrm = mlx5_fpga_esp_destroy_xfrm, -}; - -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) -{ - if (!mlx5_fpga_is_ipsec_device(mdev)) - return NULL; - - return &fpga_ipsec_ops; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h deleted file mode 100644 index 8931b5584477..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_FPGA_IPSEC_H__ -#define __MLX5_FPGA_IPSEC_H__ - -#include "accel/ipsec.h" -#include "fs_cmd.h" - -#ifdef CONFIG_MLX5_FPGA_IPSEC -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev); -u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); -const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); -void mlx5_fpga_ipsec_build_fs_cmds(void); -bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev); -#else -static inline -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) -{ return NULL; } -static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } -static inline const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - return mlx5_fs_cmd_get_default(type); -} - -static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {}; -static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; } - -#endif /* CONFIG_MLX5_FPGA_IPSEC */ -#endif /* __MLX5_FPGA_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 816d991f7621..fe7bdccea301 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,8 +40,6 @@ #include "fs_cmd.h" #include "fs_ft_pool.h" #include "diag/fs_tracepoint.h" -#include "accel/ipsec.h" -#include "fpga/ipsec.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -2519,10 +2517,6 @@ static struct mlx5_flow_root_namespace struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_namespace *ns; - if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && - (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX)) - cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type); - /* Create the root namespace */ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); if (!root_ns) @@ -3172,8 +3166,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE || - MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { err = init_egress_root_ns(steering); if (err) goto err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7f287e300fb4..387602bbfecc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -62,7 +62,6 @@ #include "lib/mlx5.h" #include "lib/tout.h" #include "fpga/core.h" -#include "fpga/ipsec.h" #include "accel/ipsec.h" #include "lib/clock.h" #include "lib/vxlan.h" @@ -1937,7 +1936,6 @@ static int __init init(void) get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); mlx5_core_verify_params(); - mlx5_fpga_ipsec_build_fs_cmds(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index e3d824f6a309..45c7c0d67635 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -386,68 +386,6 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_ipsec_extended_cap_bits { - u8 encapsulation[0x20]; - - u8 reserved_0[0x12]; - u8 v2_command[0x1]; - u8 udp_encap[0x1]; - u8 rx_no_trailer[0x1]; - u8 ipv4_fragment[0x1]; - u8 ipv6[0x1]; - u8 esn[0x1]; - u8 lso[0x1]; - u8 transport_and_tunnel_mode[0x1]; - u8 tunnel_mode[0x1]; - u8 transport_mode[0x1]; - u8 ah_esp[0x1]; - u8 esp[0x1]; - u8 ah[0x1]; - u8 ipv4_options[0x1]; - - u8 auth_alg[0x20]; - - u8 enc_alg[0x20]; - - u8 sa_cap[0x20]; - - u8 reserved_1[0x10]; - u8 number_of_ipsec_counters[0x10]; - - u8 ipsec_counters_addr_low[0x20]; - u8 ipsec_counters_addr_high[0x20]; -}; - -struct mlx5_ifc_ipsec_counters_bits { - u8 dec_in_packets[0x40]; - - u8 dec_out_packets[0x40]; - - u8 dec_bypass_packets[0x40]; - - u8 enc_in_packets[0x40]; - - u8 enc_out_packets[0x40]; - - u8 enc_bypass_packets[0x40]; - - u8 drop_dec_packets[0x40]; - - u8 failed_auth_dec_packets[0x40]; - - u8 drop_enc_packets[0x40]; - - u8 success_add_sa[0x40]; - - u8 fail_add_sa[0x40]; - - u8 success_delete_sa[0x40]; - - u8 fail_delete_sa[0x40]; - - u8 dropped_cmd[0x40]; -}; - enum { MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED = 0x1, MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED = 0x2, @@ -464,90 +402,4 @@ struct mlx5_ifc_fpga_qp_error_event_bits { u8 reserved_at_c0[0x8]; u8 fpga_qpn[0x18]; }; -enum mlx5_ifc_fpga_ipsec_response_syndrome { - MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0, - MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, - MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2, - MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, -}; - -struct mlx5_ifc_fpga_ipsec_cmd_resp { - __be32 syndrome; - union { - __be32 sw_sa_handle; - __be32 flags; - }; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_cmd_opcode { - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1, - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3, - MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4, - MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5, -}; - -enum mlx5_ifc_fpga_ipsec_cap { - MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0), -}; - -struct mlx5_ifc_fpga_ipsec_cmd_cap { - __be32 cmd; - __be32 flags; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_sa_flags { - MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0), - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1), - MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2), - MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3), - MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4), - MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5), - MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6), - MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7), -}; - -enum mlx5_ifc_fpga_ipsec_sa_enc_mode { - MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3, -}; - -struct mlx5_ifc_fpga_ipsec_sa_v1 { - __be32 cmd; - u8 key_enc[32]; - u8 key_auth[32]; - __be32 sip[4]; - __be32 dip[4]; - union { - struct { - __be32 reserved; - u8 salt_iv[8]; - __be32 salt; - } __packed gcm; - struct { - u8 salt[16]; - } __packed cbc; - }; - __be32 spi; - __be32 sw_sa_handle; - __be16 tfclen; - u8 enc_mode; - u8 reserved1[2]; - u8 flags; - u8 reserved2[2]; -}; - -struct mlx5_ifc_fpga_ipsec_sa { - struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1; - __be16 udp_sp; - __be16 udp_dp; - u8 reserved1[4]; - __be32 esn; - __be16 vid; /* only 12 bits, rest is reserved */ - __be16 reserved2; -} __packed; #endif /* MLX5_IFC_FPGA_H */ -- cgit From de8bdb476908e64805df4bfbad20618cbb1f9ffa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 Apr 2022 11:25:42 +0300 Subject: RDMA/mlx5: Drop crypto flow steering API The mlx5 flow steering crypto API was intended to be used in FPGA devices, which is not supported for years already. The removal of mlx5 crypto FPGA code together with inability to configure encryption keys makes the low steering API completely unusable. So delete the code, so any ESP flow steering requests will fail with not supported error, as it is happening now anyway as no device support this type of API. Link: https://lore.kernel.org/r/634a5face7734381463d809bfb89850f6998deac.1649232994.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/fs.c | 223 +-------------------- drivers/infiniband/hw/mlx5/main.c | 27 --- .../net/ethernet/mellanox/mlx5/core/accel/ipsec.c | 5 +- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 4 +- include/linux/mlx5/accel.h | 13 +- 5 files changed, 10 insertions(+), 262 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 661ed2b44508..9c2886bc72cb 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include "mlx5_ib.h" @@ -148,16 +147,6 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, { switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; case IB_FLOW_ACTION_UNSPECIFIED: if (maction->flow_action_raw.sub_type == MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { @@ -368,14 +357,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, ib_spec->type & IB_FLOW_SPEC_INNER); break; case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; + return -EOPNOTSUPP; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) @@ -587,47 +569,6 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) return false; } -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - static bool is_valid_ethertype(struct mlx5_core_dev *mdev, const struct ib_flow_attr *flow_attr, bool check_inner) @@ -1154,8 +1095,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { + if (is_egress) { err = -EINVAL; goto free; } @@ -1740,149 +1680,6 @@ unlock: return ERR_PTR(err); } -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \ - MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) { switch (maction->flow_action_raw.sub_type) { @@ -1906,13 +1703,6 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) struct mlx5_ib_flow_action *maction = to_mflow_act(action); switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; case IB_FLOW_ACTION_UNSPECIFIED: destroy_flow_action_raw(maction); break; @@ -2709,11 +2499,6 @@ static const struct ib_device_ops flow_ops = { .destroy_flow_action = mlx5_ib_destroy_flow_action, }; -static const struct ib_device_ops flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) { dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); @@ -2724,9 +2509,5 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) mutex_init(&dev->flow_db->lock); ib_set_device_ops(&dev->ib_dev, &flow_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops); - return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d2e67ecc5479..61aa196d6484 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -41,7 +41,6 @@ #include "wr.h" #include "restrack.h" #include "counters.h" -#include #include #include #include @@ -906,10 +905,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_RX_HASH_SRC_PORT_UDP | MLX5_RX_HASH_DST_PORT_UDP | MLX5_RX_HASH_INNER; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - resp.rss_caps.rx_hash_fields_mask |= - MLX5_RX_HASH_IPSEC_SPI; resp.response_length += sizeof(resp.rss_caps); } } else { @@ -1791,19 +1786,6 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_EGRESS)) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - } - resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; resp->num_ports = dev->num_ports; @@ -3600,13 +3582,6 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR, &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC), &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY)); -ADD_UVERBS_ATTRIBUTES_SIMPLE( - mlx5_ib_flow_action, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - enum mlx5_ib_uapi_flow_action_flags)); - ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_query_context, UVERBS_OBJECT_DEVICE, @@ -3624,8 +3599,6 @@ static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN(mlx5_ib_dm_defs), - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, - &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c index 45296ec2d055..387be13b2f1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -133,8 +133,7 @@ void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) + const struct mlx5_accel_esp_xfrm_attrs *attrs) { const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; struct mlx5_accel_esp_xfrm *xfrm; @@ -142,7 +141,7 @@ mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, if (!ipsec_ops || !ipsec_ops->esp_create_xfrm) return ERR_PTR(-EOPNOTSUPP); - xfrm = ipsec_ops->esp_create_xfrm(mdev, attrs, flags); + xfrm = ipsec_ops->esp_create_xfrm(mdev, attrs, 0); if (IS_ERR(xfrm)) return xfrm; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 13f6fed74950..f6e3b549424f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -333,9 +333,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x) /* create xfrm */ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); - sa_entry->xfrm = - mlx5_accel_esp_create_xfrm(priv->mdev, &attrs, - MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA); + sa_entry->xfrm = mlx5_accel_esp_create_xfrm(priv->mdev, &attrs); if (IS_ERR(sa_entry->xfrm)) { err = PTR_ERR(sa_entry->xfrm); goto err_sa_entry; diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index dacf69516002..af67d51308cf 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -111,10 +111,6 @@ struct mlx5_accel_esp_xfrm { struct mlx5_accel_esp_xfrm_attrs attrs; }; -enum { - MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0, -}; - enum mlx5_accel_ipsec_cap { MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1, @@ -132,8 +128,7 @@ u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags); + const struct mlx5_accel_esp_xfrm_attrs *attrs); void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_accel_esp_xfrm_attrs *attrs); @@ -144,8 +139,10 @@ static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { ret static inline struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) { return ERR_PTR(-EOPNOTSUPP); } + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {} static inline int -- cgit From 2451da081a343e079d9f5a7b063fcdf0bc439aa8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 Apr 2022 11:25:46 +0300 Subject: net/mlx5: Unify device IPsec capabilities check Merge two different function to one in order to provide coherent picture if the device is IPsec capable or not. Link: https://lore.kernel.org/r/8f10ea06ad19c6f651e9fb33921009658f01e1d5.1649232994.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/accel/ipsec_offload.c | 38 +++++++++++----------- .../mellanox/mlx5/core/accel/ipsec_offload.h | 26 --------------- .../net/ethernet/mellanox/mlx5/core/en/params.c | 4 +-- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 12 +++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- include/linux/mlx5/accel.h | 7 ++-- 6 files changed, 32 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c index 3a85157f9f07..9dbebef19ff0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c @@ -6,9 +6,6 @@ #include "lib/mlx5.h" #include "en_accel/ipsec_fs.h" -#define MLX5_IPSEC_DEV_BASIC_CAPS (MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 | \ - MLX5_ACCEL_IPSEC_CAP_LSO) - struct mlx5_ipsec_sa_ctx { struct rhash_head hash; u32 enc_key_id; @@ -25,17 +22,31 @@ struct mlx5_ipsec_esp_xfrm { struct mlx5_accel_esp_xfrm accel_xfrm; }; -static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev) +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) { - u32 caps = MLX5_IPSEC_DEV_BASIC_CAPS; + u32 caps; + + if (!MLX5_CAP_GEN(mdev, ipsec_offload)) + return 0; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return 0; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return 0; - if (!mlx5_is_ipsec_device(mdev)) + if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) || + !MLX5_CAP_ETH(mdev, insert_trailer)) return 0; if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) || !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt)) return 0; + caps = MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 | + MLX5_ACCEL_IPSEC_CAP_LSO; + if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) && MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt)) caps |= MLX5_ACCEL_IPSEC_CAP_ESP; @@ -52,6 +63,7 @@ static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev) WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24); return caps; } +EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); static int mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, @@ -367,7 +379,6 @@ change_sw_xfrm_attrs: } static const struct mlx5_accel_ipsec_ops ipsec_offload_ops = { - .device_caps = mlx5_ipsec_offload_device_caps, .create_hw_context = mlx5_ipsec_offload_create_sa_ctx, .free_hw_context = mlx5_ipsec_offload_delete_sa_ctx, .init = mlx5_ipsec_offload_init, @@ -379,7 +390,7 @@ static const struct mlx5_accel_ipsec_ops ipsec_offload_ops = { static const struct mlx5_accel_ipsec_ops * mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) { - if (!mlx5_ipsec_offload_device_caps(mdev)) + if (!mlx5_ipsec_device_caps(mdev)) return NULL; return &ipsec_offload_ops; @@ -416,17 +427,6 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) ipsec_ops->cleanup(mdev); } -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->device_caps) - return 0; - - return ipsec_ops->device_caps(mdev); -} -EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps); - unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) { const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h index 4a7d49ed5604..3d13e2c136b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h @@ -9,9 +9,6 @@ #ifdef CONFIG_MLX5_IPSEC -#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ - MLX5_ACCEL_IPSEC_CAP_DEVICE) - unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, unsigned int count); @@ -25,7 +22,6 @@ void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); struct mlx5_accel_ipsec_ops { - u32 (*device_caps)(struct mlx5_core_dev *mdev); unsigned int (*counters_count)(struct mlx5_core_dev *mdev); int (*counters_read)(struct mlx5_core_dev *mdev, u64 *counters, unsigned int count); @@ -45,25 +41,8 @@ struct mlx5_accel_ipsec_ops { void (*esp_destroy_xfrm)(struct mlx5_accel_esp_xfrm *xfrm); }; -static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - if (!MLX5_CAP_GEN(mdev, ipsec_offload)) - return false; - - if (!MLX5_CAP_GEN(mdev, log_max_dek)) - return false; - - if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) - return false; - - return MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) && - MLX5_CAP_ETH(mdev, insert_trailer); -} #else -#define MLX5_IPSEC_DEV(mdev) false - static inline void * mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, struct mlx5_accel_esp_xfrm *xfrm, @@ -80,10 +59,5 @@ static inline void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, static inline void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) {} static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) {} -static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - return false; -} - #endif /* CONFIG_MLX5_IPSEC */ #endif /* __MLX5_IPSEC_OFFLOAD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index d2ec0961fe9e..9f4ae8bc09b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -689,8 +689,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); bool allow_swp; - allow_swp = mlx5_geneve_tx_allowed(mdev) || - !!MLX5_IPSEC_DEV(mdev); + allow_swp = + mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 1391a0c84f16..c280a18ff002 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -226,8 +226,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) return -EINVAL; } if (x->props.flags & XFRM_STATE_ESN && - !(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_ESN)) { + !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_CAP_ESN)) { netdev_info(netdev, "Cannot offload ESN xfrm states\n"); return -EINVAL; } @@ -275,8 +274,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) return -EINVAL; } if (x->props.family == AF_INET6 && - !(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_IPV6)) { + !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_CAP_IPV6)) { netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n"); return -EINVAL; } @@ -406,7 +404,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv) { struct mlx5e_ipsec *ipsec = NULL; - if (!MLX5_IPSEC_DEV(priv->mdev)) { + if (!mlx5_ipsec_device_caps(priv->mdev)) { netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); return 0; } @@ -519,7 +517,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; struct net_device *netdev = priv->netdev; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || !MLX5_CAP_ETH(mdev, swp)) { mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); return; @@ -538,7 +536,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) netdev->features |= NETIF_F_HW_ESP_TX_CSUM; netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) || + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) || !MLX5_CAP_ETH(mdev, swp_lso)) { mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 346f7034fec8..6a3a08fd8910 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1329,7 +1329,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); - if (MLX5_IPSEC_DEV(c->priv->mdev)) + if (mlx5_ipsec_device_caps(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index af67d51308cf..9145e2d37c0e 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -124,7 +124,7 @@ enum mlx5_accel_ipsec_cap { #ifdef CONFIG_MLX5_ACCEL -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev); struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, @@ -135,7 +135,10 @@ int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, #else -static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } +static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} static inline struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, -- cgit From 54deb0e77561973f4ca4515e18ab972c281eea1d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 Apr 2022 11:25:48 +0300 Subject: net/mlx5: Remove not-needed IPsec config In current code, the CONFIG_MLX5_IPSEC and CONFIG_MLX5_EN_IPSEC are the same. So remove useless indirection. Link: https://lore.kernel.org/r/fd14492cbc01a0d51a5bfedde02bcd2154123fde.1649232994.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 16 +--------------- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 ++-- .../ethernet/mellanox/mlx5/core/accel/ipsec_offload.h | 18 ++---------------- include/linux/mlx5/accel.h | 4 ++-- include/linux/mlx5/driver.h | 2 +- 5 files changed, 8 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index e34e64a9ff4a..176883cf2827 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -142,28 +142,14 @@ config MLX5_CORE_IPOIB help MLX5 IPoIB offloads & acceleration support. -config MLX5_IPSEC +config MLX5_EN_IPSEC bool "Mellanox Technologies IPsec Connect-X support" depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD select MLX5_ACCEL - help - Build IPsec support for the Connect-X family of network cards by Mellanox - Technologies. - Note: If you select this option, the mlx5_core driver will include - IPsec support for the Connect-X family. - -config MLX5_EN_IPSEC - bool "IPSec XFRM cryptography-offload acceleration" - depends on MLX5_CORE_EN - depends on XFRM_OFFLOAD - depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD - depends on MLX5_IPSEC help Build support for IPsec cryptography-offload acceleration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. config MLX5_EN_TLS bool "Mellanox Technologies TLS Connect-X support" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index ad852703a3cb..9e715a1056f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -88,13 +88,13 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # # Accelerations & FPGA # -mlx5_core-$(CONFIG_MLX5_IPSEC) += accel/ipsec_offload.o mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ - en_accel/ipsec_stats.o en_accel/ipsec_fs.o + en_accel/ipsec_stats.o en_accel/ipsec_fs.o \ + accel/ipsec_offload.o mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h index 3d13e2c136b1..36e700b596d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h @@ -7,7 +7,7 @@ #include #include -#ifdef CONFIG_MLX5_IPSEC +#ifdef CONFIG_MLX5_EN_IPSEC unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, @@ -42,22 +42,8 @@ struct mlx5_accel_ipsec_ops { }; #else - -static inline void * -mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - u32 *sa_handle) -{ - return NULL; -} - -static inline void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, - void *context) -{ -} - static inline void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) {} static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) {} -#endif /* CONFIG_MLX5_IPSEC */ +#endif /* CONFIG_MLX5_EN_IPSEC */ #endif /* __MLX5_IPSEC_OFFLOAD_H__ */ diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index 9145e2d37c0e..73e4d50a9f02 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -122,7 +122,7 @@ enum mlx5_accel_ipsec_cap { MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7, }; -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_EN_IPSEC u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev); @@ -152,5 +152,5 @@ static inline int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; } -#endif /* CONFIG_MLX5_ACCEL */ +#endif /* CONFIG_MLX5_EN_IPSEC */ #endif /* __MLX5_ACCEL_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9424503eb8d3..5af53c035949 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -778,7 +778,7 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_EN_IPSEC const struct mlx5_accel_ipsec_ops *ipsec_ops; #endif struct mlx5_clock clock; -- cgit From f2b41b32cde8453a0a26875261f0e26809c2805a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 Apr 2022 11:25:51 +0300 Subject: net/mlx5: Remove ipsec_ops function table There is only one IPsec implementation and ipsec_ops is not needed at all in this situation. Together with removal of ipsec_ops, we can drop the entry checks as these functions are called for IPsec devices only. Link: https://lore.kernel.org/r/bc8dd1c8a77b65dbf5e2cf92c813ffaca2505c5f.1649232994.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec_fs.h | 5 - .../mellanox/mlx5/core/en_accel/ipsec_offload.c | 118 ++------------------- .../mellanox/mlx5/core/en_accel/ipsec_offload.h | 35 ------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 - include/linux/mlx5/driver.h | 3 - 5 files changed, 8 insertions(+), 157 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h index b3e23aa5beeb..b70953979709 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h @@ -9,7 +9,6 @@ #include "ipsec_offload.h" #include "en/fs.h" -#ifdef CONFIG_MLX5_EN_IPSEC void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv); int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv); int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, @@ -19,8 +18,4 @@ int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, struct mlx5_accel_esp_xfrm_attrs *attrs, struct mlx5e_ipsec_rule *ipsec_rule); -#else -static inline void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) { return 0; } -#endif #endif /* __MLX5_IPSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 7ae2d308139e..f0f44bd95cc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -106,8 +106,7 @@ mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, static struct mlx5_accel_esp_xfrm * mlx5_ipsec_offload_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) + const struct mlx5_accel_esp_xfrm_attrs *attrs) { struct mlx5_ipsec_esp_xfrm *mxfrm; int err = 0; @@ -286,11 +285,6 @@ static void mlx5_ipsec_offload_delete_sa_ctx(void *context) mutex_unlock(&mxfrm->lock); } -static int mlx5_ipsec_offload_init(struct mlx5_core_dev *mdev) -{ - return 0; -} - static int mlx5_modify_ipsec_obj(struct mlx5_core_dev *mdev, struct mlx5_ipsec_obj_attrs *attrs, u32 ipsec_id) @@ -378,86 +372,12 @@ change_sw_xfrm_attrs: return err; } -static const struct mlx5_accel_ipsec_ops ipsec_offload_ops = { - .create_hw_context = mlx5_ipsec_offload_create_sa_ctx, - .free_hw_context = mlx5_ipsec_offload_delete_sa_ctx, - .init = mlx5_ipsec_offload_init, - .esp_create_xfrm = mlx5_ipsec_offload_esp_create_xfrm, - .esp_destroy_xfrm = mlx5_ipsec_offload_esp_destroy_xfrm, - .esp_modify_xfrm = mlx5_ipsec_offload_esp_modify_xfrm, -}; - -static const struct mlx5_accel_ipsec_ops * -mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) -{ - if (!mlx5_ipsec_device_caps(mdev)) - return NULL; - - return &ipsec_offload_ops; -} - -void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops; - int err = 0; - - ipsec_ops = mlx5_ipsec_offload_ops(mdev); - if (!ipsec_ops || !ipsec_ops->init) { - mlx5_core_dbg(mdev, "IPsec ops is not supported\n"); - return; - } - - err = ipsec_ops->init(mdev); - if (err) { - mlx5_core_warn_once( - mdev, "Failed to start IPsec device, err = %d\n", err); - return; - } - - mdev->ipsec_ops = ipsec_ops; -} - -void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->cleanup) - return; - - ipsec_ops->cleanup(mdev); -} - -unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->counters_count) - return -EOPNOTSUPP; - - return ipsec_ops->counters_count(mdev); -} - -int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int count) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->counters_read) - return -EOPNOTSUPP; - - return ipsec_ops->counters_read(mdev, counters, count); -} - void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, struct mlx5_accel_esp_xfrm *xfrm, u32 *sa_handle) { - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; __be32 saddr[4] = {}, daddr[4] = {}; - if (!ipsec_ops || !ipsec_ops->create_hw_context) - return ERR_PTR(-EOPNOTSUPP); - if (!xfrm->attrs.is_ipv6) { saddr[3] = xfrm->attrs.saddr.a4; daddr[3] = xfrm->attrs.daddr.a4; @@ -466,59 +386,37 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr)); } - return ipsec_ops->create_hw_context(mdev, xfrm, saddr, daddr, - xfrm->attrs.spi, - xfrm->attrs.is_ipv6, sa_handle); + return mlx5_ipsec_offload_create_sa_ctx(mdev, xfrm, saddr, daddr, + xfrm->attrs.spi, + xfrm->attrs.is_ipv6, sa_handle); } void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) { - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->free_hw_context) - return; - - ipsec_ops->free_hw_context(context); + mlx5_ipsec_offload_delete_sa_ctx(context); } struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, const struct mlx5_accel_esp_xfrm_attrs *attrs) { - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; struct mlx5_accel_esp_xfrm *xfrm; - if (!ipsec_ops || !ipsec_ops->esp_create_xfrm) - return ERR_PTR(-EOPNOTSUPP); - - xfrm = ipsec_ops->esp_create_xfrm(mdev, attrs, 0); + xfrm = mlx5_ipsec_offload_esp_create_xfrm(mdev, attrs); if (IS_ERR(xfrm)) return xfrm; xfrm->mdev = mdev; return xfrm; } -EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm); void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) { - const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->esp_destroy_xfrm) - return; - - ipsec_ops->esp_destroy_xfrm(xfrm); + mlx5_ipsec_offload_esp_destroy_xfrm(xfrm); } -EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm); int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_accel_esp_xfrm_attrs *attrs) { - const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->esp_modify_xfrm) - return -EOPNOTSUPP; - - return ipsec_ops->esp_modify_xfrm(xfrm, attrs); + return mlx5_ipsec_offload_esp_modify_xfrm(xfrm, attrs); } -EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h index 36e700b596d8..7dac104e6ef1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h @@ -7,43 +7,8 @@ #include #include -#ifdef CONFIG_MLX5_EN_IPSEC - -unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); -int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int count); - void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, struct mlx5_accel_esp_xfrm *xfrm, u32 *sa_handle); void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context); - -void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); -void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); - -struct mlx5_accel_ipsec_ops { - unsigned int (*counters_count)(struct mlx5_core_dev *mdev); - int (*counters_read)(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int count); - void *(*create_hw_context)(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - const __be32 saddr[4], const __be32 daddr[4], - const __be32 spi, bool is_ipv6, - u32 *sa_handle); - void (*free_hw_context)(void *context); - int (*init)(struct mlx5_core_dev *mdev); - void (*cleanup)(struct mlx5_core_dev *mdev); - struct mlx5_accel_esp_xfrm *(*esp_create_xfrm)( - struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, u32 flags); - int (*esp_modify_xfrm)(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs); - void (*esp_destroy_xfrm)(struct mlx5_accel_esp_xfrm *xfrm); -}; - -#else -static inline void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) {} - -static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) {} -#endif /* CONFIG_MLX5_EN_IPSEC */ #endif /* __MLX5_IPSEC_OFFLOAD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 032de078723c..d504c8cb8f96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1181,8 +1181,6 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_fpga_start; } - mlx5_accel_ipsec_init(dev); - err = mlx5_init_fs(dev); if (err) { mlx5_core_err(dev, "Failed to init flow steering\n"); @@ -1230,7 +1228,6 @@ err_vhca: err_set_hca: mlx5_cleanup_fs(dev); err_fs: - mlx5_accel_ipsec_cleanup(dev); mlx5_fpga_device_stop(dev); err_fpga_start: mlx5_rsc_dump_cleanup(dev); @@ -1256,7 +1253,6 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_sf_hw_table_destroy(dev); mlx5_vhca_event_stop(dev); mlx5_cleanup_fs(dev); - mlx5_accel_ipsec_cleanup(dev); mlx5_fpga_device_stop(dev); mlx5_rsc_dump_cleanup(dev); mlx5_hv_vhca_cleanup(dev->hv_vhca); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5af53c035949..ff47d49d8be4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -777,9 +777,6 @@ struct mlx5_core_dev { } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; -#endif -#ifdef CONFIG_MLX5_EN_IPSEC - const struct mlx5_accel_ipsec_ops *ipsec_ops; #endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; -- cgit From 2984287c4c19949d7eb451dcad0bd5c54a2a376f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 6 Apr 2022 11:25:52 +0300 Subject: net/mlx5: Remove not-implemented IPsec capabilities Clean a capabilities enum to remove not-implemented bits. Link: https://lore.kernel.org/r/1044bb7b779107ff38e48e3f6553421104f3f819.1649232994.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c | 4 +--- include/linux/mlx5/accel.h | 11 ++++------- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index f0f44bd95cc9..37c9880719cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -51,10 +51,8 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt)) caps |= MLX5_ACCEL_IPSEC_CAP_ESP; - if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) { + if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) caps |= MLX5_ACCEL_IPSEC_CAP_ESN; - caps |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; - } /* We can accommodate up to 2^24 different IPsec objects * because we use up to 24 bit in flow table metadata diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index 73e4d50a9f02..0f2596297f6a 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -113,13 +113,10 @@ struct mlx5_accel_esp_xfrm { enum mlx5_accel_ipsec_cap { MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, - MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1, - MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2, - MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, - MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, - MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5, - MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 6, - MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7, + MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 1, + MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 2, + MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 3, + MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 4, }; #ifdef CONFIG_MLX5_EN_IPSEC -- cgit From a8b6d6708bb682108d8c899bc0cb7873240daf8a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 7 Feb 2022 15:38:28 +0100 Subject: iio: core: Enhance the kernel doc of modes and currentmodes iio_dev entries Let's provide more details about these two variables because their understanding may not be straightforward for someone not used to the IIO subsystem internal logic. The different modes will soon be also be more documented for the same reason. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220207143840.707510-2-miquel.raynal@bootlin.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index faf00f2c0be6..f191b80466cd 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -488,8 +488,15 @@ struct iio_buffer_setup_ops { /** * struct iio_dev - industrial I/O device - * @modes: [DRIVER] operating modes supported by device - * @currentmode: [INTERN] current operating mode + * @modes: [DRIVER] bitmask listing all the operating modes + * supported by the IIO device. This list should be + * initialized before registering the IIO device. It can + * also be filed up by the IIO core, as a result of + * enabling particular features in the driver + * (see iio_triggered_event_setup()). + * @currentmode: [INTERN] operating mode currently in use, may be + * eventually checked by device drivers but should be + * considered read-only as this is a core internal bit * @dev: [DRIVER] device structure, should be assigned a parent * and owner * @buffer: [DRIVER] any buffer present -- cgit From 474010127e2505fc463236470908e1ff5ddb3578 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 7 Feb 2022 15:38:33 +0100 Subject: iio: st_sensors: Add a local lock for protecting odr Right now the (framework) mlock lock is (ab)used for multiple purposes: 1- protecting concurrent accesses over the odr local cache 2- avoid changing samplig frequency whilst buffer is running Let's start by handling situation #1 with a local lock. Suggested-by: Jonathan Cameron Cc: Denis Ciocca Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220207143840.707510-7-miquel.raynal@bootlin.com Signed-off-by: Jonathan Cameron --- drivers/iio/common/st_sensors/st_sensors_core.c | 24 ++++++++++++++++++------ include/linux/iio/common/st_sensors.h | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 829ffe54d8a1..0cc66c82d58a 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -71,16 +71,18 @@ st_sensors_match_odr_error: int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr) { - int err; + int err = 0; struct st_sensor_odr_avl odr_out = {0, 0}; struct st_sensor_data *sdata = iio_priv(indio_dev); + mutex_lock(&sdata->odr_lock); + if (!sdata->sensor_settings->odr.mask) - return 0; + goto unlock_mutex; err = st_sensors_match_odr(sdata->sensor_settings, odr, &odr_out); if (err < 0) - goto st_sensors_match_odr_error; + goto unlock_mutex; if ((sdata->sensor_settings->odr.addr == sdata->sensor_settings->pw.addr) && @@ -103,7 +105,9 @@ int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr) if (err >= 0) sdata->odr = odr_out.hz; -st_sensors_match_odr_error: +unlock_mutex: + mutex_unlock(&sdata->odr_lock); + return err; } EXPORT_SYMBOL_NS(st_sensors_set_odr, IIO_ST_SENSORS); @@ -361,6 +365,8 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, struct st_sensors_platform_data *of_pdata; int err = 0; + mutex_init(&sdata->odr_lock); + /* If OF/DT pdata exists, it will take precedence of anything else */ of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata); if (IS_ERR(of_pdata)) @@ -554,18 +560,24 @@ int st_sensors_read_info_raw(struct iio_dev *indio_dev, err = -EBUSY; goto out; } else { + mutex_lock(&sdata->odr_lock); err = st_sensors_set_enable(indio_dev, true); - if (err < 0) + if (err < 0) { + mutex_unlock(&sdata->odr_lock); goto out; + } msleep((sdata->sensor_settings->bootime * 1000) / sdata->odr); err = st_sensors_read_axis_data(indio_dev, ch, val); - if (err < 0) + if (err < 0) { + mutex_unlock(&sdata->odr_lock); goto out; + } *val = *val >> ch->scan_type.shift; err = st_sensors_set_enable(indio_dev, false); + mutex_unlock(&sdata->odr_lock); } out: mutex_unlock(&indio_dev->mlock); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 22f67845cdd3..db4a1b260348 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -237,6 +237,7 @@ struct st_sensor_settings { * @hw_irq_trigger: if we're using the hardware interrupt on the sensor. * @hw_timestamp: Latest timestamp from the interrupt handler, when in use. * @buffer_data: Data used by buffer part. + * @odr_lock: Local lock for preventing concurrent ODR accesses/changes */ struct st_sensor_data { struct iio_trigger *trig; @@ -261,6 +262,8 @@ struct st_sensor_data { s64 hw_timestamp; char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; + + struct mutex odr_lock; }; #ifdef CONFIG_IIO_BUFFER -- cgit From 2f53b4adfede66f1bc1c8bb7efd7ced2bad1191a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 7 Feb 2022 15:38:36 +0100 Subject: iio: Un-inline iio_buffer_enabled() As we are going to hide the currentmode inside the opaque structure, this helper would soon need to call a non-inline function which would simply drop the benefit of having the helper defined inline in a header. One alternative is to move this helper in the core as there is no more interest in defining it inline in a header. We will pay the minor cost either way. Let's do like the iio_device_id() helper which also refers to the opaque structure and gets defined in the core. Suggested-by: Jonathan Cameron Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220207143840.707510-10-miquel.raynal@bootlin.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 12 ++++++++++++ include/linux/iio/iio.h | 11 +---------- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 2f48e9a97274..c91930244915 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -184,6 +184,18 @@ int iio_device_id(struct iio_dev *indio_dev) } EXPORT_SYMBOL_GPL(iio_device_id); +/** + * iio_buffer_enabled() - helper function to test if the buffer is enabled + * @indio_dev: IIO device structure for device + */ +bool iio_buffer_enabled(struct iio_dev *indio_dev) +{ + return indio_dev->currentmode + & (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | + INDIO_BUFFER_SOFTWARE); +} +EXPORT_SYMBOL_GPL(iio_buffer_enabled); + /** * iio_sysfs_match_string_with_gaps - matches given string in an array with gaps * @array: array of strings diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index f191b80466cd..faabb852128a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -550,6 +550,7 @@ struct iio_dev { }; int iio_device_id(struct iio_dev *indio_dev); +bool iio_buffer_enabled(struct iio_dev *indio_dev); const struct iio_chan_spec *iio_find_channel_from_si(struct iio_dev *indio_dev, int si); @@ -679,16 +680,6 @@ struct iio_dev *devm_iio_device_alloc(struct device *parent, int sizeof_priv); __printf(2, 3) struct iio_trigger *devm_iio_trigger_alloc(struct device *parent, const char *fmt, ...); -/** - * iio_buffer_enabled() - helper function to test if the buffer is enabled - * @indio_dev: IIO device structure for device - **/ -static inline bool iio_buffer_enabled(struct iio_dev *indio_dev) -{ - return indio_dev->currentmode - & (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | - INDIO_BUFFER_SOFTWARE); -} /** * iio_get_debugfs_dentry() - helper function to get the debugfs_dentry -- cgit From 8c576f87ad7eb639b8bd4472a9bb830e0696dda5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 7 Feb 2022 15:38:37 +0100 Subject: iio: core: Hide read accesses to iio_dev->currentmode In order to later move this variable within the opaque structure, let's create a helper for accessing it in read-only mode. This helper will be exposed to device drivers and kept accessible for the few that could need it. The write access to this variable however should be fully reserved to the core so in a second step we will hide this variable into the opaque structure. Cc: Eugen Hristev Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220207143840.707510-11-miquel.raynal@bootlin.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bmc150-accel-core.c | 4 ++-- drivers/iio/adc/at91-sama5d2_adc.c | 4 ++-- drivers/iio/industrialio-core.c | 11 +++++++++++ include/linux/iio/iio.h | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 7516d7dde1af..57e8a8350cd1 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -1525,7 +1525,7 @@ static int bmc150_accel_buffer_postenable(struct iio_dev *indio_dev) struct bmc150_accel_data *data = iio_priv(indio_dev); int ret = 0; - if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) + if (iio_device_get_current_mode(indio_dev) == INDIO_BUFFER_TRIGGERED) return 0; mutex_lock(&data->mutex); @@ -1557,7 +1557,7 @@ static int bmc150_accel_buffer_predisable(struct iio_dev *indio_dev) { struct bmc150_accel_data *data = iio_priv(indio_dev); - if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) + if (iio_device_get_current_mode(indio_dev) == INDIO_BUFFER_TRIGGERED) return 0; mutex_lock(&data->mutex); diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 854b1f81d807..b764823ce57e 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1117,7 +1117,7 @@ static int at91_adc_buffer_prepare(struct iio_dev *indio_dev) return at91_adc_configure_touch(st, true); /* if we are not in triggered mode, we cannot enable the buffer. */ - if (!(indio_dev->currentmode & INDIO_ALL_TRIGGERED_MODES)) + if (!(iio_device_get_current_mode(indio_dev) & INDIO_ALL_TRIGGERED_MODES)) return -EINVAL; /* we continue with the triggered buffer */ @@ -1159,7 +1159,7 @@ static int at91_adc_buffer_postdisable(struct iio_dev *indio_dev) return at91_adc_configure_touch(st, false); /* if we are not in triggered mode, nothing to do here */ - if (!(indio_dev->currentmode & INDIO_ALL_TRIGGERED_MODES)) + if (!(iio_device_get_current_mode(indio_dev) & INDIO_ALL_TRIGGERED_MODES)) return -EINVAL; /* diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index c91930244915..fa1e00bee787 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -2070,6 +2070,17 @@ void iio_device_release_direct_mode(struct iio_dev *indio_dev) } EXPORT_SYMBOL_GPL(iio_device_release_direct_mode); +/** + * iio_device_get_current_mode() - helper function providing read-only access to + * the @currentmode variable + * @indio_dev: IIO device structure for device + */ +int iio_device_get_current_mode(struct iio_dev *indio_dev) +{ + return indio_dev->currentmode; +} +EXPORT_SYMBOL_GPL(iio_device_get_current_mode); + subsys_initcall(iio_init); module_exit(iio_exit); diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index faabb852128a..31098ffa7dc9 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -550,6 +550,7 @@ struct iio_dev { }; int iio_device_id(struct iio_dev *indio_dev); +int iio_device_get_current_mode(struct iio_dev *indio_dev); bool iio_buffer_enabled(struct iio_dev *indio_dev); const struct iio_chan_spec -- cgit From 51570c9d4b3a678f77a50ac139f67290e946ec86 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 7 Feb 2022 15:38:38 +0100 Subject: iio: core: Move the currentmode entry to the opaque structure This entry should, under no situation, be modified by device drivers. Now that we have limited its read access to device drivers really needing it and did so through a dedicated helper, we can easily move this variable to the opaque structure in order to prevent any further modification from non-authorized code (out of the core, basically). Signed-off-by: Miquel Raynal Reviewed-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20220207143840.707510-12-miquel.raynal@bootlin.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 12 ++++++------ drivers/iio/industrialio-core.c | 10 +++++++--- drivers/iio/industrialio-trigger.c | 2 +- include/linux/iio/iio-opaque.h | 4 ++++ include/linux/iio/iio.h | 4 ---- 5 files changed, 18 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 4706d0e3c954..615662d75e68 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -1065,7 +1065,7 @@ static int iio_enable_buffers(struct iio_dev *indio_dev, indio_dev->active_scan_mask = config->scan_mask; indio_dev->scan_timestamp = config->scan_timestamp; indio_dev->scan_bytes = config->scan_bytes; - indio_dev->currentmode = config->mode; + iio_dev_opaque->currentmode = config->mode; iio_update_demux(indio_dev); @@ -1103,7 +1103,7 @@ static int iio_enable_buffers(struct iio_dev *indio_dev, } } - if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) { + if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { ret = iio_trigger_attach_poll_func(indio_dev->trig, indio_dev->pollfunc); if (ret) @@ -1122,7 +1122,7 @@ static int iio_enable_buffers(struct iio_dev *indio_dev, return 0; err_detach_pollfunc: - if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) { + if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { iio_trigger_detach_poll_func(indio_dev->trig, indio_dev->pollfunc); } @@ -1135,7 +1135,7 @@ err_run_postdisable: if (indio_dev->setup_ops->postdisable) indio_dev->setup_ops->postdisable(indio_dev); err_undo_config: - indio_dev->currentmode = INDIO_DIRECT_MODE; + iio_dev_opaque->currentmode = INDIO_DIRECT_MODE; indio_dev->active_scan_mask = NULL; return ret; @@ -1165,7 +1165,7 @@ static int iio_disable_buffers(struct iio_dev *indio_dev) ret = ret2; } - if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) { + if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { iio_trigger_detach_poll_func(indio_dev->trig, indio_dev->pollfunc); } @@ -1184,7 +1184,7 @@ static int iio_disable_buffers(struct iio_dev *indio_dev) iio_free_scan_mask(indio_dev, indio_dev->active_scan_mask); indio_dev->active_scan_mask = NULL; - indio_dev->currentmode = INDIO_DIRECT_MODE; + iio_dev_opaque->currentmode = INDIO_DIRECT_MODE; return ret; } diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index fa1e00bee787..a8c4e85c2bb5 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -190,7 +190,9 @@ EXPORT_SYMBOL_GPL(iio_device_id); */ bool iio_buffer_enabled(struct iio_dev *indio_dev) { - return indio_dev->currentmode + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); + + return iio_dev_opaque->currentmode & (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | INDIO_BUFFER_SOFTWARE); } @@ -2072,12 +2074,14 @@ EXPORT_SYMBOL_GPL(iio_device_release_direct_mode); /** * iio_device_get_current_mode() - helper function providing read-only access to - * the @currentmode variable + * the opaque @currentmode variable * @indio_dev: IIO device structure for device */ int iio_device_get_current_mode(struct iio_dev *indio_dev) { - return indio_dev->currentmode; + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); + + return iio_dev_opaque->currentmode; } EXPORT_SYMBOL_GPL(iio_device_get_current_mode); diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index f504ed351b3e..585b6cef8fcc 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -444,7 +444,7 @@ static ssize_t iio_trigger_write_current(struct device *dev, int ret; mutex_lock(&indio_dev->mlock); - if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) { + if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) { mutex_unlock(&indio_dev->mlock); return -EBUSY; } diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 2be12b7b5dc5..6b3586b3f952 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -7,6 +7,9 @@ * struct iio_dev_opaque - industrial I/O device opaque information * @indio_dev: public industrial I/O device information * @id: used to identify device internally + * @currentmode: operating mode currently in use, may be eventually + * checked by device drivers but should be considered + * read-only as this is a core internal bit * @driver_module: used to make it harder to undercut users * @info_exist_lock: lock to prevent use during removal * @trig_readonly: mark the current trigger immutable @@ -36,6 +39,7 @@ */ struct iio_dev_opaque { struct iio_dev indio_dev; + int currentmode; int id; struct module *driver_module; struct mutex info_exist_lock; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 31098ffa7dc9..85cb924debd9 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -494,9 +494,6 @@ struct iio_buffer_setup_ops { * also be filed up by the IIO core, as a result of * enabling particular features in the driver * (see iio_triggered_event_setup()). - * @currentmode: [INTERN] operating mode currently in use, may be - * eventually checked by device drivers but should be - * considered read-only as this is a core internal bit * @dev: [DRIVER] device structure, should be assigned a parent * and owner * @buffer: [DRIVER] any buffer present @@ -523,7 +520,6 @@ struct iio_buffer_setup_ops { */ struct iio_dev { int modes; - int currentmode; struct device dev; struct iio_buffer *buffer; -- cgit From f67c6c73cb07a4778425a2064640333ef7bfa42b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 7 Feb 2022 15:38:39 +0100 Subject: iio: core: Simplify the registration of kfifo buffers Among all the users of the kfifo buffers, no one uses the INDIO_BUFFER_HARDWARE mode. So let's take this as a general rule and simplify a little bit the internals - overall the documentation - by eliminating unused specific cases. Use the INDIO_BUFFER_SOFTWARE mode by default with kfifo buffers, which will basically mimic what all the "non direct" modes do. Cc: Benson Leung Cc: Guenter Roeck Cc: Jyoti Bhayana Cc: Jean-Baptiste Maneyrol Cc: Lorenzo Bianconi Cc: Michael Hennerich Cc: Greg Kroah-Hartman Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20220207143840.707510-13-miquel.raynal@bootlin.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367.c | 1 - drivers/iio/accel/fxls8962af-core.c | 1 - drivers/iio/accel/sca3000.c | 1 - drivers/iio/accel/ssp_accel_sensor.c | 1 - drivers/iio/adc/ina2xx-adc.c | 1 - drivers/iio/adc/ti_am335x_adc.c | 4 +--- drivers/iio/buffer/kfifo_buf.c | 10 +--------- drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 3 +-- drivers/iio/common/scmi_sensors/scmi_iio.c | 1 - drivers/iio/gyro/ssp_gyro_sensor.c | 1 - drivers/iio/health/max30100.c | 1 - drivers/iio/health/max30102.c | 1 - drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c | 1 - drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c | 1 - drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 1 - drivers/iio/light/apds9960.c | 1 - drivers/staging/iio/impedance-analyzer/ad5933.c | 1 - include/linux/iio/kfifo_buf.h | 5 ++--- 18 files changed, 5 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 62960134ea19..0289ed8cf2c6 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -1567,7 +1567,6 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops, return ret; ret = devm_iio_kfifo_buffer_setup_ext(st->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &adxl367_buffer_ops, adxl367_fifo_attributes); if (ret) diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index a9d2f10d5d45..8874d6d61725 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -1217,7 +1217,6 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq) return ret; ret = devm_iio_kfifo_buffer_setup(dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &fxls8962af_buffer_ops); if (ret) return ret; diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c index 83c81072511e..29a68a7d34cd 100644 --- a/drivers/iio/accel/sca3000.c +++ b/drivers/iio/accel/sca3000.c @@ -1474,7 +1474,6 @@ static int sca3000_probe(struct spi_device *spi) indio_dev->modes = INDIO_DIRECT_MODE; ret = devm_iio_kfifo_buffer_setup(&spi->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &sca3000_ring_setup_ops); if (ret) return ret; diff --git a/drivers/iio/accel/ssp_accel_sensor.c b/drivers/iio/accel/ssp_accel_sensor.c index a1164b439f41..7ca9d0d543e0 100644 --- a/drivers/iio/accel/ssp_accel_sensor.c +++ b/drivers/iio/accel/ssp_accel_sensor.c @@ -113,7 +113,6 @@ static int ssp_accel_probe(struct platform_device *pdev) indio_dev->available_scan_masks = ssp_accel_scan_mask; ret = devm_iio_kfifo_buffer_setup(&pdev->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &ssp_accel_buffer_ops); if (ret) return ret; diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c index 8d902a32a0fd..08a2c547f0b3 100644 --- a/drivers/iio/adc/ina2xx-adc.c +++ b/drivers/iio/adc/ina2xx-adc.c @@ -1027,7 +1027,6 @@ static int ina2xx_probe(struct i2c_client *client, indio_dev->name = id->name; ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &ina2xx_setup_ops); if (ret) return ret; diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index dbdc1ef48566..567d43a30955 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -376,9 +376,7 @@ static int tiadc_iio_buffered_hardware_setup(struct device *dev, { int ret; - ret = devm_iio_kfifo_buffer_setup(dev, indio_dev, - INDIO_BUFFER_SOFTWARE, - setup_ops); + ret = devm_iio_kfifo_buffer_setup(dev, indio_dev, setup_ops); if (ret) return ret; diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c index 416d35a61ae2..35d8b4077376 100644 --- a/drivers/iio/buffer/kfifo_buf.c +++ b/drivers/iio/buffer/kfifo_buf.c @@ -259,8 +259,6 @@ static struct iio_buffer *devm_iio_kfifo_allocate(struct device *dev) * devm_iio_kfifo_buffer_setup_ext - Allocate a kfifo buffer & attach it to an IIO device * @dev: Device object to which to attach the life-time of this kfifo buffer * @indio_dev: The device the buffer should be attached to - * @mode_flags: The mode flags for this buffer (INDIO_BUFFER_SOFTWARE and/or - * INDIO_BUFFER_TRIGGERED). * @setup_ops: The setup_ops required to configure the HW part of the buffer (optional) * @buffer_attrs: Extra sysfs buffer attributes for this IIO buffer * @@ -271,22 +269,16 @@ static struct iio_buffer *devm_iio_kfifo_allocate(struct device *dev) */ int devm_iio_kfifo_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, - int mode_flags, const struct iio_buffer_setup_ops *setup_ops, const struct attribute **buffer_attrs) { struct iio_buffer *buffer; - if (!mode_flags) - return -EINVAL; - buffer = devm_iio_kfifo_allocate(dev); if (!buffer) return -ENOMEM; - mode_flags &= kfifo_access_funcs.modes; - - indio_dev->modes |= mode_flags; + indio_dev->modes |= INDIO_BUFFER_SOFTWARE; indio_dev->setup_ops = setup_ops; buffer->attrs = buffer_attrs; diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index b2725c6adc7f..a4cf1d9a8a49 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -333,8 +333,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev, * We can not use trigger here, as events are generated * as soon as sample_frequency is set. */ - ret = devm_iio_kfifo_buffer_setup_ext(dev, indio_dev, - INDIO_BUFFER_SOFTWARE, NULL, + ret = devm_iio_kfifo_buffer_setup_ext(dev, indio_dev, NULL, cros_ec_sensor_fifo_attributes); if (ret) return ret; diff --git a/drivers/iio/common/scmi_sensors/scmi_iio.c b/drivers/iio/common/scmi_sensors/scmi_iio.c index d538bf3ab1ef..793d628db55f 100644 --- a/drivers/iio/common/scmi_sensors/scmi_iio.c +++ b/drivers/iio/common/scmi_sensors/scmi_iio.c @@ -686,7 +686,6 @@ static int scmi_iio_dev_probe(struct scmi_device *sdev) err = devm_iio_kfifo_buffer_setup(&scmi_iio_dev->dev, scmi_iio_dev, - INDIO_BUFFER_SOFTWARE, &scmi_iio_buffer_ops); if (err < 0) { dev_err(dev, diff --git a/drivers/iio/gyro/ssp_gyro_sensor.c b/drivers/iio/gyro/ssp_gyro_sensor.c index 5fd1bf9902ea..d332474bc484 100644 --- a/drivers/iio/gyro/ssp_gyro_sensor.c +++ b/drivers/iio/gyro/ssp_gyro_sensor.c @@ -113,7 +113,6 @@ static int ssp_gyro_probe(struct platform_device *pdev) indio_dev->available_scan_masks = ssp_gyro_scan_mask; ret = devm_iio_kfifo_buffer_setup(&pdev->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &ssp_gyro_buffer_ops); if (ret) return ret; diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c index 36ba7611d9ce..ad5717965223 100644 --- a/drivers/iio/health/max30100.c +++ b/drivers/iio/health/max30100.c @@ -433,7 +433,6 @@ static int max30100_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &max30100_buffer_setup_ops); if (ret) return ret; diff --git a/drivers/iio/health/max30102.c b/drivers/iio/health/max30102.c index 2292876c55e2..abbcef563807 100644 --- a/drivers/iio/health/max30102.c +++ b/drivers/iio/health/max30102.c @@ -542,7 +542,6 @@ static int max30102_probe(struct i2c_client *client, } ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &max30102_buffer_setup_ops); if (ret) return ret; diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c index 383cc3250342..c3f433ad3af6 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c @@ -731,7 +731,6 @@ struct iio_dev *inv_icm42600_accel_init(struct inv_icm42600_state *st) indio_dev->available_scan_masks = inv_icm42600_accel_scan_masks; ret = devm_iio_kfifo_buffer_setup(dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &inv_icm42600_buffer_ops); if (ret) return ERR_PTR(ret); diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c index cec1dd0e0464..9d94a8518e3c 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c @@ -743,7 +743,6 @@ struct iio_dev *inv_icm42600_gyro_init(struct inv_icm42600_state *st) indio_dev->setup_ops = &inv_icm42600_buffer_ops; ret = devm_iio_kfifo_buffer_setup(dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &inv_icm42600_buffer_ops); if (ret) return ERR_PTR(ret); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 16730a780964..f80c62849d30 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -746,7 +746,6 @@ int st_lsm6dsx_fifo_setup(struct st_lsm6dsx_hw *hw) continue; ret = devm_iio_kfifo_buffer_setup(hw->dev, hw->iio_devs[i], - INDIO_BUFFER_SOFTWARE, &st_lsm6dsx_buffer_ops); if (ret) return ret; diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index 4141c0fa7bc4..09b831f9f40b 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -1003,7 +1003,6 @@ static int apds9960_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &apds9960_buffer_setup_ops); if (ret) return ret; diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 793918e1c45f..f177b20f0f2d 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -749,7 +749,6 @@ static int ad5933_probe(struct i2c_client *client, indio_dev->num_channels = ARRAY_SIZE(ad5933_channels); ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev, - INDIO_BUFFER_SOFTWARE, &ad5933_ring_setup_ops); if (ret) return ret; diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h index ccd2ceae7b25..8a83fb58232d 100644 --- a/include/linux/iio/kfifo_buf.h +++ b/include/linux/iio/kfifo_buf.h @@ -12,11 +12,10 @@ void iio_kfifo_free(struct iio_buffer *r); int devm_iio_kfifo_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, - int mode_flags, const struct iio_buffer_setup_ops *setup_ops, const struct attribute **buffer_attrs); -#define devm_iio_kfifo_buffer_setup(dev, indio_dev, mode_flags, setup_ops) \ - devm_iio_kfifo_buffer_setup_ext((dev), (indio_dev), (mode_flags), (setup_ops), NULL) +#define devm_iio_kfifo_buffer_setup(dev, indio_dev, setup_ops) \ + devm_iio_kfifo_buffer_setup_ext((dev), (indio_dev), (setup_ops), NULL) #endif -- cgit From 9f8ed577c28813410614b418bad42285840c1a00 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Apr 2022 14:20:50 +0800 Subject: net: skb: rename SKB_DROP_REASON_PTYPE_ABSENT As David Ahern suggested, the reasons for skb drops should be more general and not be code based. Therefore, rename SKB_DROP_REASON_PTYPE_ABSENT to SKB_DROP_REASON_UNHANDLED_PROTO, which is used for the cases of no L3 protocol handler, no L4 protocol handler, version extensions, etc. From previous discussion, now we have the aim to make these reasons more abstract and users based, avoiding code based. Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 +++----- include/trace/events/skb.h | 2 +- net/core/dev.c | 8 +++----- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2394441fa3dd..173bc35a10a3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -408,11 +408,9 @@ enum skb_drop_reason { */ SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle - * the skb. For an etner packet, - * this means that L3 protocol is - * not supported - */ + SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented + * or not supported + */ SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation * error */ diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index e1670e1e4934..85abd7cbd221 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -50,7 +50,7 @@ EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ EM(SKB_DROP_REASON_XDP, XDP) \ EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ - EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \ + EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \ EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ diff --git a/net/core/dev.c b/net/core/dev.c index e027410e861b..ba853e878007 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5375,13 +5375,11 @@ check_vlan_id: *ppt_prev = pt_prev; } else { drop: - if (!deliver_exact) { + if (!deliver_exact) dev_core_stats_rx_dropped_inc(skb->dev); - kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT); - } else { + else dev_core_stats_rx_nohandler_inc(skb->dev); - kfree_skb(skb); - } + kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO); /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) */ -- cgit From b384c95a861eebf47e88695cf6a29f34e0b10b0f Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 7 Apr 2022 14:20:52 +0800 Subject: net: icmp: add skb drop reasons to icmp protocol Replace kfree_skb() used in icmp_rcv() and icmpv6_rcv() with kfree_skb_reason(). In order to get the reasons of the skb drops after icmp message handle, we change the return type of 'handler()' in 'struct icmp_control' from 'bool' to 'enum skb_drop_reason'. This may change its original intention, as 'false' means failure, but 'SKB_NOT_DROPPED_YET' means success now. Therefore, all 'handler' and the call of them need to be handled. Following 'handler' functions are involved: icmp_unreach() icmp_redirect() icmp_echo() icmp_timestamp() icmp_discard() And following new drop reasons are added: SKB_DROP_REASON_ICMP_CSUM SKB_DROP_REASON_INVALID_PROTO The reason 'INVALID_PROTO' is introduced for the case that the packet doesn't follow rfc 1122 and is dropped. This is not a common case, and I believe we can locate the problem from the data in the packet. For now, this 'INVALID_PROTO' is used for the icmp broadcasts with wrong types. Maybe there should be a document file for these reasons. For example, list all the case that causes the 'UNHANDLED_PROTO' and 'INVALID_PROTO' drop reason. Therefore, users can locate their problems according to the document. Reviewed-by: Hao Peng Reviewed-by: Jiang Biao Signed-off-by: Menglong Dong Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++++ include/net/ping.h | 2 +- include/trace/events/skb.h | 2 ++ net/ipv4/icmp.c | 75 +++++++++++++++++++++++++++------------------- net/ipv4/ping.c | 14 +++++---- net/ipv6/icmp.c | 24 +++++++++------ 6 files changed, 75 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 173bc35a10a3..9b81ba497665 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -442,6 +442,11 @@ enum skb_drop_reason { SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented * at tun/tap, e.g., check_filter() */ + SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ + SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC + * 2211, such as a broadcasts + * ICMP_TIMESTAMP + */ SKB_DROP_REASON_MAX, }; diff --git a/include/net/ping.h b/include/net/ping.h index 2fe78874318c..b68fbfdb606f 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -76,7 +76,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -bool ping_rcv(struct sk_buff *skb); +enum skb_drop_reason ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 85abd7cbd221..42647114fffe 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -61,6 +61,8 @@ EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ + EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ + EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72a375c7f417..236debd9fded 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -186,7 +186,7 @@ EXPORT_SYMBOL(icmp_err_convert); */ struct icmp_control { - bool (*handler)(struct sk_buff *skb); + enum skb_drop_reason (*handler)(struct sk_buff *skb); short error; /* This ICMP is classed as an error message */ }; @@ -839,8 +839,9 @@ static bool icmp_tag_validation(int proto) * ICMP_PARAMETERPROB. */ -static bool icmp_unreach(struct sk_buff *skb) +static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; const struct iphdr *iph; struct icmphdr *icmph; struct net *net; @@ -860,8 +861,10 @@ static bool icmp_unreach(struct sk_buff *skb) icmph = icmp_hdr(skb); iph = (const struct iphdr *)skb->data; - if (iph->ihl < 5) /* Mangled header, drop. */ + if (iph->ihl < 5) { /* Mangled header, drop. */ + reason = SKB_DROP_REASON_IP_INHDR; goto out_err; + } switch (icmph->type) { case ICMP_DEST_UNREACH: @@ -941,10 +944,10 @@ static bool icmp_unreach(struct sk_buff *skb) icmp_socket_deliver(skb, info); out: - return true; + return reason; out_err: __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return false; + return reason ?: SKB_DROP_REASON_NOT_SPECIFIED; } @@ -952,20 +955,20 @@ out_err: * Handle ICMP_REDIRECT. */ -static bool icmp_redirect(struct sk_buff *skb) +static enum skb_drop_reason icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); - return false; + return SKB_DROP_REASON_PKT_TOO_SMALL; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) { /* there aught to be a stat */ - return false; + return SKB_DROP_REASON_NOMEM; } icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway)); - return true; + return SKB_NOT_DROPPED_YET; } /* @@ -982,7 +985,7 @@ static bool icmp_redirect(struct sk_buff *skb) * See also WRT handling of options once they are done and working. */ -static bool icmp_echo(struct sk_buff *skb) +static enum skb_drop_reason icmp_echo(struct sk_buff *skb) { struct icmp_bxm icmp_param; struct net *net; @@ -990,7 +993,7 @@ static bool icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ if (net->ipv4.sysctl_icmp_echo_ignore_all) - return true; + return SKB_NOT_DROPPED_YET; icmp_param.data.icmph = *icmp_hdr(skb); icmp_param.skb = skb; @@ -1001,10 +1004,10 @@ static bool icmp_echo(struct sk_buff *skb) if (icmp_param.data.icmph.type == ICMP_ECHO) icmp_param.data.icmph.type = ICMP_ECHOREPLY; else if (!icmp_build_probe(skb, &icmp_param.data.icmph)) - return true; + return SKB_NOT_DROPPED_YET; icmp_reply(&icmp_param, skb); - return true; + return SKB_NOT_DROPPED_YET; } /* Helper for icmp_echo and icmpv6_echo_reply. @@ -1122,7 +1125,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe); * MUST be accurate to a few minutes. * MUST be updated at least at 15Hz. */ -static bool icmp_timestamp(struct sk_buff *skb) +static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) { struct icmp_bxm icmp_param; /* @@ -1147,17 +1150,17 @@ static bool icmp_timestamp(struct sk_buff *skb) icmp_param.data_len = 0; icmp_param.head_len = sizeof(struct icmphdr) + 12; icmp_reply(&icmp_param, skb); - return true; + return SKB_NOT_DROPPED_YET; out_err: __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); - return false; + return SKB_DROP_REASON_PKT_TOO_SMALL; } -static bool icmp_discard(struct sk_buff *skb) +static enum skb_drop_reason icmp_discard(struct sk_buff *skb) { /* pretend it was a success */ - return true; + return SKB_NOT_DROPPED_YET; } /* @@ -1165,18 +1168,20 @@ static bool icmp_discard(struct sk_buff *skb) */ int icmp_rcv(struct sk_buff *skb) { - struct icmphdr *icmph; + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); - bool success; + struct icmphdr *icmph; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & - XFRM_STATE_ICMP)) + XFRM_STATE_ICMP)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr))) goto drop; @@ -1184,8 +1189,11 @@ int icmp_rcv(struct sk_buff *skb) nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*icmph)); - if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, + skb)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } skb_set_network_header(skb, nh); } @@ -1207,13 +1215,13 @@ int icmp_rcv(struct sk_buff *skb) /* We can't use icmp_pointers[].handler() because it is an array of * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42. */ - success = icmp_echo(skb); - goto success_check; + reason = icmp_echo(skb); + goto reason_check; } if (icmph->type == ICMP_EXT_ECHOREPLY) { - success = ping_rcv(skb); - goto success_check; + reason = ping_rcv(skb); + goto reason_check; } /* @@ -1222,8 +1230,10 @@ int icmp_rcv(struct sk_buff *skb) * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently * discarded. */ - if (icmph->type > NR_ICMP_TYPES) + if (icmph->type > NR_ICMP_TYPES) { + reason = SKB_DROP_REASON_UNHANDLED_PROTO; goto error; + } /* * Parse the ICMP message @@ -1239,27 +1249,30 @@ int icmp_rcv(struct sk_buff *skb) if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } if (icmph->type != ICMP_ECHO && icmph->type != ICMP_TIMESTAMP && icmph->type != ICMP_ADDRESS && icmph->type != ICMP_ADDRESSREPLY) { + reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } } - success = icmp_pointers[icmph->type].handler(skb); -success_check: - if (success) { + reason = icmp_pointers[icmph->type].handler(skb); +reason_check: + if (!reason) { consume_skb(skb); return NET_RX_SUCCESS; } drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NET_RX_DROP; csum_error: + reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); error: __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9878c7696acf..2897fcf71211 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -961,12 +961,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); * All we need to do is get the socket. */ -bool ping_rcv(struct sk_buff *skb) +enum skb_drop_reason ping_rcv(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET; struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); - bool rc = false; /* We assume the packet has already been checked by icmp_rcv */ @@ -981,15 +981,17 @@ bool ping_rcv(struct sk_buff *skb) struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); - if (skb2 && !ping_queue_rcv_skb(sk, skb2)) - rc = true; + if (skb2) + reason = __ping_queue_rcv_skb(sk, skb2); + else + reason = SKB_DROP_REASON_NOMEM; sock_put(sk); } - if (!rc) + if (reason) pr_debug("no socket, dropping\n"); - return rc; + return reason; } EXPORT_SYMBOL_GPL(ping_rcv); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e6b978ea0e87..01c8003c9fc9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -864,21 +864,23 @@ out: static int icmpv6_rcv(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct net *net = dev_net(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; - bool success = false; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & - XFRM_STATE_ICMP)) + XFRM_STATE_ICMP)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; + } if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; @@ -886,8 +888,11 @@ static int icmpv6_rcv(struct sk_buff *skb) nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*hdr)); - if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, + skb)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; + } skb_set_network_header(skb, nh); } @@ -924,11 +929,11 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - success = ping_rcv(skb); + reason = ping_rcv(skb); break; case ICMPV6_EXT_ECHO_REPLY: - success = ping_rcv(skb); + reason = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -994,19 +999,20 @@ static int icmpv6_rcv(struct sk_buff *skb) /* until the v6 path can be better sorted assume failure and * preserve the status quo behaviour for the rest of the paths to here */ - if (success) - consume_skb(skb); + if (reason) + kfree_skb_reason(skb, reason); else - kfree_skb(skb); + consume_skb(skb); return 0; csum_error: + reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } -- cgit From 52126d4c03798cc55aa927fea4c776ab26b5a5f0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 6 Feb 2022 10:47:45 +0100 Subject: dmaengine: Remove a useless mutex According to lib/idr.c, The IDA handles its own locking. It is safe to call any of the IDA functions without synchronisation in your code. so the 'chan_mutex' mutex can just be removed. It is here only to protect some ida_alloc()/ida_free() calls. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/7180452c1d77b039e27b6f9418e0e7d9dd33c431.1644140845.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 7 ------- include/linux/dmaengine.h | 1 - 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 2cfa8458b51b..e80feeea0e01 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1053,9 +1053,7 @@ static int __dma_async_device_channel_register(struct dma_device *device, * When the chan_id is a negative value, we are dynamically adding * the channel. Otherwise we are static enumerating. */ - mutex_lock(&device->chan_mutex); chan->chan_id = ida_alloc(&device->chan_ida, GFP_KERNEL); - mutex_unlock(&device->chan_mutex); if (chan->chan_id < 0) { pr_err("%s: unable to alloc ida for chan: %d\n", __func__, chan->chan_id); @@ -1078,9 +1076,7 @@ static int __dma_async_device_channel_register(struct dma_device *device, return 0; err_out_ida: - mutex_lock(&device->chan_mutex); ida_free(&device->chan_ida, chan->chan_id); - mutex_unlock(&device->chan_mutex); err_free_dev: kfree(chan->dev); err_free_local: @@ -1113,9 +1109,7 @@ static void __dma_async_device_channel_unregister(struct dma_device *device, device->chancnt--; chan->dev->chan = NULL; mutex_unlock(&dma_list_mutex); - mutex_lock(&device->chan_mutex); ida_free(&device->chan_ida, chan->chan_id); - mutex_unlock(&device->chan_mutex); device_unregister(&chan->dev->device); free_percpu(chan->local); } @@ -1250,7 +1244,6 @@ int dma_async_device_register(struct dma_device *device) if (rc != 0) return rc; - mutex_init(&device->chan_mutex); ida_init(&device->chan_ida); /* represent channels in sysfs. Probably want devs too */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 842d4f7ca752..6db9e03afd0b 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -870,7 +870,6 @@ struct dma_device { struct device *dev; struct module *owner; struct ida chan_ida; - struct mutex chan_mutex; /* to protect chan_ida */ u32 src_addr_widths; u32 dst_addr_widths; -- cgit From 1a95e04e29a116c3424988c70c441ca8ec2779ff Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 12 Apr 2022 11:24:00 +0100 Subject: net: phylink: remove phylink_helper_basex_speed() As there are now no users of phylink_helper_basex_speed(), we can remove this obsolete functionality. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 28 ---------------------------- include/linux/phylink.h | 6 ------ 2 files changed, 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 06943889d747..33c285252584 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2778,34 +2778,6 @@ static const struct sfp_upstream_ops sfp_phylink_ops = { /* Helpers for MAC drivers */ -/** - * phylink_helper_basex_speed() - 1000BaseX/2500BaseX helper - * @state: a pointer to a &struct phylink_link_state - * - * Inspect the interface mode, advertising mask or forced speed and - * decide whether to run at 2.5Gbit or 1Gbit appropriately, switching - * the interface mode to suit. @state->interface is appropriately - * updated, and the advertising mask has the "other" baseX_Full flag - * cleared. - */ -void phylink_helper_basex_speed(struct phylink_link_state *state) -{ - if (phy_interface_mode_is_8023z(state->interface)) { - bool want_2500 = state->an_enabled ? - phylink_test(state->advertising, 2500baseX_Full) : - state->speed == SPEED_2500; - - if (want_2500) { - phylink_clear(state->advertising, 1000baseX_Full); - state->interface = PHY_INTERFACE_MODE_2500BASEX; - } else { - phylink_clear(state->advertising, 2500baseX_Full); - state->interface = PHY_INTERFACE_MODE_1000BASEX; - } - } -} -EXPORT_SYMBOL_GPL(phylink_helper_basex_speed); - static void phylink_decode_c37_word(struct phylink_link_state *state, uint16_t config_reg, int speed) { diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 223781622b33..6d06896fc20d 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -160,11 +160,6 @@ struct phylink_mac_ops { * clearing unsupported speeds and duplex settings. The port modes * should not be cleared; phylink_set_port_modes() will help with this. * - * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX - * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode - * based on @state->advertising and/or @state->speed and update - * @state->interface accordingly. See phylink_helper_basex_speed(). - * * When @config->supported_interfaces has been set, phylink will iterate * over the supported interfaces to determine the full capability of the * MAC. The validation function must not print errors if @state->interface @@ -579,7 +574,6 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); -void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, u16 bmsr, u16 lpa); -- cgit From 1306d5362a591493a2d07f685ed2cc480dcda320 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 13 Apr 2022 13:51:56 +0300 Subject: net: add ndo_fdb_del_bulk Add a new netdev op called ndo_fdb_del_bulk, it will be later used for driver-specific bulk delete implementation dispatched from rtnetlink. The first user will be the bridge, we need it to signal to rtnetlink from the driver that we support bulk delete operation (NLM_F_BULK). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 28ea4f8269d4..a602f29365b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1260,6 +1260,10 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. + * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, + * u16 vid, + * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, * int *idx) @@ -1510,6 +1514,11 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid); + int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + u16 vid, + struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, -- cgit From d6d3146ce532268ad0ffd8d92d2b7492898decf1 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 13 Apr 2022 16:15:52 +0800 Subject: skb: add some helpers for skb drop reasons In order to simply the definition and assignment for 'enum skb_drop_reason', introduce some helpers. SKB_DR() is used to define a variable of type 'enum skb_drop_reason' with the 'SKB_DROP_REASON_NOT_SPECIFIED' initial value. SKB_DR_SET() is used to set the value of the variable. Seems it is a little useless? But it makes the code shorter. SKB_DR_OR() is used to set the value of the variable if it is not set yet, which means its value is SKB_DROP_REASON_NOT_SPECIFIED. Signed-off-by: Menglong Dong Reviewed-by: Jiang Biao Reviewed-by: Hao Peng Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9b81ba497665..0cbd6ada957c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -450,6 +450,18 @@ enum skb_drop_reason { SKB_DROP_REASON_MAX, }; +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED) \ + SKB_DR_SET(name, reason); \ + } while (0) + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. -- cgit From c4eb664191b4a5ff6856478f903924176697719e Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 13 Apr 2022 16:15:53 +0800 Subject: net: ipv4: add skb drop reasons to ip_error() Eventually, I find out the handler function for inputting route lookup fail: ip_error(). The drop reasons we used in ip_error() are almost corresponding to IPSTATS_MIB_*, and following new reasons are introduced: SKB_DROP_REASON_IP_INADDRERRORS SKB_DROP_REASON_IP_INNOROUTES Isn't the name SKB_DROP_REASON_IP_HOSTUNREACH and SKB_DROP_REASON_IP_NETUNREACH more accurate? To make them corresponding to IPSTATS_MIB_*, we keep their name still. Signed-off-by: Menglong Dong Reviewed-by: Jiang Biao Reviewed-by: Hao Peng Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ include/trace/events/skb.h | 2 ++ net/ipv4/route.c | 6 +++++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0cbd6ada957c..886e83ac4b70 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -447,6 +447,12 @@ enum skb_drop_reason { * 2211, such as a broadcasts * ICMP_TIMESTAMP */ + SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 42647114fffe..0acac7e5a019 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -63,6 +63,8 @@ EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ + EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ + EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 80f96170876c..e839d424b861 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -945,6 +945,7 @@ static int ip_error(struct sk_buff *skb) struct inet_peer *peer; unsigned long now; struct net *net; + SKB_DR(reason); bool send; int code; @@ -964,10 +965,12 @@ static int ip_error(struct sk_buff *skb) if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { case EHOSTUNREACH: + SKB_DR_SET(reason, IP_INADDRERRORS); __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); break; case ENETUNREACH: + SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; } @@ -983,6 +986,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; + SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; case EACCES: @@ -1009,7 +1013,7 @@ static int ip_error(struct sk_buff *skb) if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); -out: kfree_skb(skb); +out: kfree_skb_reason(skb, reason); return 0; } -- cgit From 2edc1a383fda8d2f580216292dfd9daeae691e47 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 13 Apr 2022 16:15:55 +0800 Subject: net: ip: add skb drop reasons to ip forwarding Replace kfree_skb() which is used in ip6_forward() and ip_forward() with kfree_skb_reason(). The new drop reason 'SKB_DROP_REASON_PKT_TOO_BIG' is introduced for the case that the length of the packet exceeds MTU and can't fragment. Signed-off-by: Menglong Dong Reviewed-by: Jiang Biao Reviewed-by: Hao Peng Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ include/trace/events/skb.h | 1 + net/ipv4/ip_forward.c | 13 ++++++++++--- net/ipv6/ip6_output.c | 9 ++++++--- 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 886e83ac4b70..0ef11df1bc67 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -453,6 +453,9 @@ enum skb_drop_reason { SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding * to IPSTATS_MIB_INADDRERRORS */ + SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed + * the MTU) + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 0acac7e5a019..2da72a9a5764 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -65,6 +65,7 @@ EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ + EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 92ba3350274b..e3aa436a1bdf 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -90,6 +90,7 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); struct net *net; + SKB_DR(reason); /* that should never happen */ if (skb->pkt_type != PACKET_HOST) @@ -101,8 +102,10 @@ int ip_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) + if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto drop; + } if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; @@ -118,8 +121,10 @@ int ip_forward(struct sk_buff *skb) if (ip_hdr(skb)->ttl <= 1) goto too_many_hops; - if (!xfrm4_route_forward(skb)) + if (!xfrm4_route_forward(skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto drop; + } rt = skb_rtable(skb); @@ -132,6 +137,7 @@ int ip_forward(struct sk_buff *skb) IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + SKB_DR_SET(reason, PKT_TOO_BIG); goto drop; } @@ -169,7 +175,8 @@ too_many_hops: /* Tell the sender its packet died... */ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); + SKB_DR_SET(reason, IP_INHDR); drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NET_RX_DROP; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e23f058166af..3e729cee6486 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -469,6 +469,7 @@ int ip6_forward(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); struct inet6_dev *idev; + SKB_DR(reason); u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); @@ -518,7 +519,7 @@ int ip6_forward(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return -ETIMEDOUT; } @@ -537,6 +538,7 @@ int ip6_forward(struct sk_buff *skb) if (!xfrm6_route_forward(skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); + SKB_DR_SET(reason, XFRM_POLICY); goto drop; } dst = skb_dst(skb); @@ -596,7 +598,7 @@ int ip6_forward(struct sk_buff *skb) __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); return -EMSGSIZE; } @@ -618,8 +620,9 @@ int ip6_forward(struct sk_buff *skb) error: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); + SKB_DR_SET(reason, IP_INADDRERRORS); drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return -EINVAL; } -- cgit From 1ad6d548e2a452f21bcee4606ee4ec7afcde5f37 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 13 Apr 2022 16:15:56 +0800 Subject: net: icmp: introduce function icmpv6_param_prob_reason() In order to add the skb drop reasons support to icmpv6_param_prob(), introduce the function icmpv6_param_prob_reason() and make icmpv6_param_prob() an inline call to it. This new function will be used in the following patches. Signed-off-by: Menglong Dong Reviewed-by: Jiang Biao Reviewed-by: Hao Peng Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 11 +++++++++-- net/ipv6/icmp.c | 7 ++++--- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 9055cb380ee2..db0f4fcfdaf4 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -79,8 +79,9 @@ extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); extern void icmpv6_cleanup(void); -extern void icmpv6_param_prob(struct sk_buff *skb, - u8 code, int pos); +extern void icmpv6_param_prob_reason(struct sk_buff *skb, + u8 code, int pos, + enum skb_drop_reason reason); struct flowi6; struct in6_addr; @@ -91,6 +92,12 @@ extern void icmpv6_flow_init(struct sock *sk, const struct in6_addr *daddr, int oif); +static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +{ + icmpv6_param_prob_reason(skb, code, pos, + SKB_DROP_REASON_NOT_SPECIFIED); +} + static inline bool icmpv6_is_err(int type) { switch (type) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 01c8003c9fc9..61770220774e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -629,12 +629,13 @@ out_bh_enable: } EXPORT_SYMBOL(icmp6_send); -/* Slightly more convenient version of icmp6_send. +/* Slightly more convenient version of icmp6_send with drop reasons. */ -void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, + enum skb_drop_reason reason) { icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); - kfree_skb(skb); + kfree_skb_reason(skb, reason); } /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH -- cgit From 64b97df995f0c943be469a019d6117c89e2131bc Mon Sep 17 00:00:00 2001 From: Lech Perczak Date: Wed, 13 Apr 2022 03:44:14 +0200 Subject: cdc_ether: export usbnet_cdc_zte_rx_fixup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bfe9b9d2df66 ("cdc_ether: Improve ZTE MF823/831/910 handling") introduces a workaround for certain ZTE modems reporting invalid MAC addresses over CDC-ECM. The same issue was present on their RNDIS interface,which was fixed in commit a5a18bdf7453 ("rndis_host: Set valid random MAC on buggy devices"). However, internal modem of ZTE MF286R router, on its RNDIS interface, also exhibits a second issue fixed already in CDC-ECM, of the device not respecting configured random MAC address. In order to share the fixup for this with rndis_host driver, export the workaround function, which will be re-used in the following commit in rndis_host. Cc: Kristian Evensen Cc: Bjørn Mork Cc: Oliver Neukum Signed-off-by: Lech Perczak Signed-off-by: Paolo Abeni --- drivers/net/usb/cdc_ether.c | 3 ++- include/linux/usb/usbnet.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 9b4dfa3001d6..2de09ad5bac0 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -479,7 +479,7 @@ static int usbnet_cdc_zte_bind(struct usbnet *dev, struct usb_interface *intf) * device MAC address has been updated). Always set MAC address to that of the * device. */ -static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { if (skb->len < ETH_HLEN || !(skb->data[0] & 0x02)) return 1; @@ -489,6 +489,7 @@ static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 1; } +EXPORT_SYMBOL_GPL(usbnet_cdc_zte_rx_fixup); /* Ensure correct link state * diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 8336e86ce606..1b4d72d5e891 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -214,6 +214,7 @@ extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); +extern int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb); /* CDC and RNDIS support the same host-chosen packet filters for IN transfers */ #define DEFAULT_FILTER (USB_CDC_PACKET_TYPE_BROADCAST \ -- cgit From 36e747972d8b4c09e6e3275e31a3acba46e2c4d2 Mon Sep 17 00:00:00 2001 From: Lech Perczak Date: Wed, 13 Apr 2022 03:44:15 +0200 Subject: rndis_host: enable the bogus MAC fixup for ZTE devices from cdc_ether MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain ZTE modems, namely: MF823. MF831, MF910, built-in modem from MF286R, expose both CDC-ECM and RNDIS network interfaces. They have a trait of ignoring the locally-administered MAC address configured on the interface both in CDC-ECM and RNDIS part, and this leads to dropping of incoming traffic by the host. However, the workaround was only present in CDC-ECM, and MF286R explicitly requires it in RNDIS mode. Re-use the workaround in rndis_host as well, to fix operation of MF286R module, some versions of which expose only the RNDIS interface. Do so by introducing new flag, RNDIS_DRIVER_DATA_DST_MAC_FIXUP, and testing for it in rndis_rx_fixup. This is required, as RNDIS uses frame batching, and all of the packets inside the batch need the fixup. This might introduce a performance penalty, because test is done for every returned Ethernet frame. Apply the workaround to both "flavors" of RNDIS interfaces, as older ZTE modems, like MF823 found in the wild, report the USB_CLASS_COMM class interfaces, while MF286R reports USB_CLASS_WIRELESS_CONTROLLER. Suggested-by: Bjørn Mork Cc: Kristian Evensen Cc: Oliver Neukum Signed-off-by: Lech Perczak Signed-off-by: Paolo Abeni --- drivers/net/usb/rndis_host.c | 32 ++++++++++++++++++++++++++++++++ include/linux/usb/rndis_host.h | 1 + 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 247f58cb0f84..7a9ece2de2c5 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -485,10 +485,14 @@ EXPORT_SYMBOL_GPL(rndis_unbind); */ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { + bool dst_mac_fixup; + /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) return 0; + dst_mac_fixup = !!(dev->driver_info->data & RNDIS_DRIVER_DATA_DST_MAC_FIXUP); + /* peripheral may have batched packets to us... */ while (likely(skb->len)) { struct rndis_data_hdr *hdr = (void *)skb->data; @@ -523,10 +527,17 @@ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; skb_pull(skb, msg_len - sizeof *hdr); skb_trim(skb2, data_len); + + if (unlikely(dst_mac_fixup)) + usbnet_cdc_zte_rx_fixup(dev, skb2); + usbnet_skb_return(dev, skb2); } /* caller will usbnet_skb_return the remaining packet */ + if (unlikely(dst_mac_fixup)) + usbnet_cdc_zte_rx_fixup(dev, skb); + return 1; } EXPORT_SYMBOL_GPL(rndis_rx_fixup); @@ -600,6 +611,17 @@ static const struct driver_info rndis_poll_status_info = { .tx_fixup = rndis_tx_fixup, }; +static const struct driver_info zte_rndis_info = { + .description = "ZTE RNDIS device", + .flags = FLAG_ETHER | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, + .data = RNDIS_DRIVER_DATA_DST_MAC_FIXUP, + .bind = rndis_bind, + .unbind = rndis_unbind, + .status = rndis_status, + .rx_fixup = rndis_rx_fixup, + .tx_fixup = rndis_tx_fixup, +}; + /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -613,6 +635,16 @@ static const struct usb_device_id products [] = { USB_VENDOR_AND_INTERFACE_INFO(0x238b, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long)&rndis_info, +}, { + /* ZTE WWAN modules */ + USB_VENDOR_AND_INTERFACE_INFO(0x19d2, + USB_CLASS_WIRELESS_CONTROLLER, 1, 3), + .driver_info = (unsigned long)&zte_rndis_info, +}, { + /* ZTE WWAN modules, ACM flavour */ + USB_VENDOR_AND_INTERFACE_INFO(0x19d2, + USB_CLASS_COMM, 2 /* ACM */, 0x0ff), + .driver_info = (unsigned long)&zte_rndis_info, }, { /* RNDIS is MSFT's un-official variant of CDC ACM */ USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff), diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h index 809bccd08455..cc42db51bbba 100644 --- a/include/linux/usb/rndis_host.h +++ b/include/linux/usb/rndis_host.h @@ -197,6 +197,7 @@ struct rndis_keepalive_c { /* IN (optionally OUT) */ /* Flags for driver_info::data */ #define RNDIS_DRIVER_DATA_POLL_STATUS 1 /* poll status before control */ +#define RNDIS_DRIVER_DATA_DST_MAC_FIXUP 2 /* device ignores configured MAC address */ extern void rndis_status(struct usbnet *dev, struct urb *urb); extern int -- cgit From af47d8033fc731f19600efd27ba4a7d0fdfcc77c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 14 Apr 2022 21:42:48 +0300 Subject: gpiolib: Introduce a helper to get first GPIO controller node Introduce a helper to get first GPIO controller node which drivers may want to use. Signed-off-by: Andy Shevchenko Tested-by: Marek Szyprowski --- include/linux/gpio/driver.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 12de0b22b4ef..83e2d72e51bb 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -766,4 +766,14 @@ static inline unsigned int gpiochip_node_count(struct device *dev) return count; } +static inline struct fwnode_handle *gpiochip_node_get_first(struct device *dev) +{ + struct fwnode_handle *fwnode; + + for_each_gpiochip_node(dev, fwnode) + return fwnode; + + return NULL; +} + #endif /* __LINUX_GPIO_DRIVER_H */ -- cgit From f1724d397c60d296c0805c95a46ae7fc7163b70c Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:18 +0800 Subject: crypto: hisilicon/qm - add register checking for ACC Add register detection function to accelerator. Provided a tool that user can checking differential register through Debugfs. e.g. cd /sys/kernel/debug/hisi_zip//zip_dfx cat diff_regs Signed-off-by: Longfang Liu Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 182 +++++++++++++++++++++++++++++++++++++++++- include/linux/hisi_acc_qm.h | 14 ++++ 2 files changed, 195 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c5c507f2d779..5e0695a8900c 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -253,7 +253,15 @@ #define QM_QOS_MAX_CIR_U 6 #define QM_QOS_MAX_CIR_S 11 #define QM_QOS_VAL_MAX_LEN 32 - +#define QM_DFX_BASE 0x0100000 +#define QM_DFX_STATE1 0x0104000 +#define QM_DFX_STATE2 0x01040C8 +#define QM_DFX_COMMON 0x0000 +#define QM_DFX_BASE_LEN 0x5A +#define QM_DFX_STATE1_LEN 0x2E +#define QM_DFX_STATE2_LEN 0x11 +#define QM_DFX_COMMON_LEN 0xC3 +#define QM_DFX_REGS_LEN 4UL #define QM_AUTOSUSPEND_DELAY 3000 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ @@ -467,6 +475,23 @@ static const struct hisi_qm_hw_error qm_hw_error[] = { { /* sentinel */ } }; +/* define the QM's dfx regs region and region length */ +static struct dfx_diff_registers qm_diff_regs[] = { + { + .reg_offset = QM_DFX_BASE, + .reg_len = QM_DFX_BASE_LEN, + }, { + .reg_offset = QM_DFX_STATE1, + .reg_len = QM_DFX_STATE1_LEN, + }, { + .reg_offset = QM_DFX_STATE2, + .reg_len = QM_DFX_STATE2_LEN, + }, { + .reg_offset = QM_DFX_COMMON, + .reg_len = QM_DFX_COMMON_LEN, + }, +}; + static const char * const qm_db_timeout[] = { "sq", "cq", "eq", "aeq", }; @@ -1625,6 +1650,156 @@ static int qm_regs_show(struct seq_file *s, void *unused) DEFINE_SHOW_ATTRIBUTE(qm_regs); +static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, + const struct dfx_diff_registers *cregs, int reg_len) +{ + struct dfx_diff_registers *diff_regs; + u32 j, base_offset; + int i; + + diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL); + if (!diff_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < reg_len; i++) { + if (!cregs[i].reg_len) + continue; + + diff_regs[i].reg_offset = cregs[i].reg_offset; + diff_regs[i].reg_len = cregs[i].reg_len; + diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len, + GFP_KERNEL); + if (!diff_regs[i].regs) + goto alloc_error; + + for (j = 0; j < diff_regs[i].reg_len; j++) { + base_offset = diff_regs[i].reg_offset + + j * QM_DFX_REGS_LEN; + diff_regs[i].regs[j] = readl(qm->io_base + base_offset); + } + } + + return diff_regs; + +alloc_error: + while (i > 0) { + i--; + kfree(diff_regs[i].regs); + } + kfree(diff_regs); + return ERR_PTR(-ENOMEM); +} + +static void dfx_regs_uninit(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + int i; + + /* Setting the pointer is NULL to prevent double free */ + for (i = 0; i < reg_len; i++) { + kfree(dregs[i].regs); + dregs[i].regs = NULL; + } + kfree(dregs); + dregs = NULL; +} + +/** + * hisi_qm_diff_regs_init() - Allocate memory for registers. + * @qm: device qm handle. + * @dregs: diff registers handle. + * @reg_len: diff registers region length. + */ +int hisi_qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + if (!qm || !dregs || reg_len <= 0) + return -EINVAL; + + if (qm->fun_type != QM_HW_PF) + return 0; + + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + if (IS_ERR(qm->debug.qm_diff_regs)) + return PTR_ERR(qm->debug.qm_diff_regs); + + qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); + if (IS_ERR(qm->debug.acc_diff_regs)) { + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + return PTR_ERR(qm->debug.acc_diff_regs); + } + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init); + +/** + * hisi_qm_diff_regs_uninit() - Free memory for registers. + * @qm: device qm handle. + * @reg_len: diff registers region length. + */ +void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len) +{ + if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF) + return; + + dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); +} +EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit); + +/** + * hisi_qm_acc_diff_regs_dump() - Dump registers's value. + * @qm: device qm handle. + * @s: Debugfs file handle. + * @dregs: diff registers handle. + * @regs_len: diff registers region length. + */ +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, int regs_len) +{ + u32 j, val, base_offset; + int i, ret; + + if (!qm || !s || !dregs || regs_len <= 0) + return; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + down_read(&qm->qps_lock); + for (i = 0; i < regs_len; i++) { + if (!dregs[i].reg_len) + continue; + + for (j = 0; j < dregs[i].reg_len; j++) { + base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN; + val = readl(qm->io_base + base_offset); + if (val != dregs[i].regs[j]) + seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n", + base_offset, dregs[i].regs[j], val); + } + } + up_read(&qm->qps_lock); + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump); + +static int qm_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); + static ssize_t qm_cmd_read(struct file *filp, char __user *buffer, size_t count, loff_t *pos) { @@ -4484,6 +4659,7 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm) */ void hisi_qm_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; struct qm_dfx *dfx = &qm->debug.dfx; struct dentry *qm_d; void *data; @@ -4499,6 +4675,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm) qm_create_debugfs_file(qm, qm->debug.qm_d, i); } + if (qm_regs) + debugfs_create_file("diff_regs", 0444, qm->debug.qm_d, + qm, &qm_diff_regs_fops); + debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 177f7b7cd414..39acc0316a60 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -168,6 +168,12 @@ enum qm_vf_state { QM_NOT_READY, }; +struct dfx_diff_registers { + u32 *regs; + u32 reg_offset; + u32 reg_len; +}; + struct qm_dfx { atomic64_t err_irq_cnt; atomic64_t aeq_irq_cnt; @@ -190,6 +196,8 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + struct dfx_diff_registers *qm_diff_regs; + struct dfx_diff_registers *acc_diff_regs; }; struct qm_shaper_factor { @@ -448,6 +456,12 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); void hisi_qm_dev_err_init(struct hisi_qm *qm); void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +int hisi_qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len); +void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len); +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, int regs_len); + pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, pci_channel_state_t state); pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); -- cgit From a888ccd6c66683a49977ba6a2b91fe52fbec9367 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:25 +0800 Subject: crypto: hisilicon/qm - add last word dumping for ACC Add last word dumping function during acc engines controller reset. The last words are reported to the printed information during the reset. The dmesg information included qm debugging registers and engine debugging registers. It can help to improve debugging capability. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 57 +++++++++++++++++++++++++++++++++++++++++++ include/linux/hisi_acc_qm.h | 4 +++ 2 files changed, 61 insertions(+) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 5e0695a8900c..7f5c3186a895 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3673,6 +3673,17 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state) writel(state, qm->io_base + QM_VF_STATE); } +static void qm_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + kfree(debug->qm_last_words); + debug->qm_last_words = NULL; +} + /** * hisi_qm_uninit() - Uninitialize qm. * @qm: The qm needed uninit. @@ -3684,6 +3695,8 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; + qm_last_regs_uninit(qm); + qm_cmd_uninit(qm); kfree(qm->factor); down_write(&qm->qps_lock); @@ -5361,6 +5374,24 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return 0; } +static void qm_show_last_dfx_regs(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + u32 val; + int i; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) { + val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset); + if (debug->qm_last_words[i] != val) + pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", + qm_dfx_regs[i].name, debug->qm_last_words[i], val); + } +} + static int qm_controller_reset(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -5376,6 +5407,10 @@ static int qm_controller_reset(struct hisi_qm *qm) return ret; } + qm_show_last_dfx_regs(qm); + if (qm->err_ini->show_last_dfx_regs) + qm->err_ini->show_last_dfx_regs(qm); + ret = qm_soft_reset(qm); if (ret) { pci_err(pdev, "Controller reset failed (%d)\n", ret); @@ -6086,6 +6121,26 @@ err_alloc_qdma: return ret; } +static void qm_last_regs_init(struct hisi_qm *qm) +{ + int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); + struct qm_debug *debug = &qm->debug; + int i; + + if (qm->fun_type == QM_HW_VF) + return; + + debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), + GFP_KERNEL); + if (!debug->qm_last_words) + return; + + for (i = 0; i < dfx_regs_num; i++) { + debug->qm_last_words[i] = readl_relaxed(qm->io_base + + qm_dfx_regs[i].offset); + } +} + /** * hisi_qm_init() - Initialize configures about qm. * @qm: The qm needing init. @@ -6138,6 +6193,8 @@ int hisi_qm_init(struct hisi_qm *qm) qm_cmd_init(qm); atomic_set(&qm->status.flags, QM_INIT); + qm_last_regs_init(qm); + return 0; err_alloc_uacce: diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 39acc0316a60..e5522eaf88fd 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -196,6 +196,9 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + unsigned int *qm_last_words; + /* ACC engines recoreding last regs */ + unsigned int *last_words; struct dfx_diff_registers *qm_diff_regs; struct dfx_diff_registers *acc_diff_regs; }; @@ -251,6 +254,7 @@ struct hisi_qm_err_ini { void (*open_sva_prefetch)(struct hisi_qm *qm); void (*close_sva_prefetch)(struct hisi_qm *qm); void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); }; -- cgit From f6f586102add59d57bcc6eea06fdeaae11bb17a1 Mon Sep 17 00:00:00 2001 From: Eric Tremblay Date: Wed, 30 Mar 2022 12:46:40 +0200 Subject: serial: 8250: Handle UART without interrupt on TEMT using em485 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the UART_CAP_NOTEMT capability. The capability indicates that the UART doesn't have an interrupt available on TEMT. In the case where the device does not support it, we calculate the maximum time it could take for the transmitter to empty the shift register. When we get in the situation where we get the THRE interrupt, we check if the TEMT bit is set. If it's not, we start the a timer and recall __stop_tx() after the delay. The transmit sequence is a bit modified when the capability is set. The new timer is used between the last interrupt(THRE) and a potential stop_tx timer. Signed-off-by: Giulio Benetti [moved to use added UART_CAP_TEMT] Signed-off-by: Heiko Stuebner [moved to use added UART_CAP_NOTEMT, improve timeout] Signed-off-by: Eric Tremblay [rebased to v5.17, making use of tty_get_frame_size] Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20220330104642.229507-2-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 1 + drivers/tty/serial/8250/8250_port.c | 76 ++++++++++++++++++++++++++++++++++++- include/linux/serial_8250.h | 2 + 3 files changed, 77 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index db784ace25d8..39ffeb37786f 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -83,6 +83,7 @@ struct serial8250_config { #define UART_CAP_MINI BIT(17) /* Mini UART on BCM283X family lacks: * STOP PARITY EPAR SPAR WLEN5 WLEN6 */ +#define UART_CAP_NOTEMT BIT(18) /* UART without interrupt on TEMT available */ #define UART_BUG_QUOT BIT(0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */ diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 318af6f13605..31545cd8276e 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -571,8 +571,21 @@ static void serial8250_clear_fifos(struct uart_8250_port *p) } } +static inline void serial8250_em485_update_temt_delay(struct uart_8250_port *p, + unsigned int cflag, unsigned int baud) +{ + unsigned int bits; + + if (!p->em485) + return; + + bits = tty_get_frame_size(cflag); + p->em485->no_temt_delay = DIV_ROUND_UP(bits * NSEC_PER_SEC, baud); +} + static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t); static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t); +static enum hrtimer_restart serial8250_em485_handle_no_temt(struct hrtimer *t); void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p) { @@ -631,6 +644,16 @@ static int serial8250_em485_init(struct uart_8250_port *p) HRTIMER_MODE_REL); hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + if (p->capabilities & UART_CAP_NOTEMT) { + struct tty_struct *tty = p->port.state->port.tty; + + serial8250_em485_update_temt_delay(p, tty->termios.c_cflag, + tty_get_baud_rate(tty)); + hrtimer_init(&p->em485->no_temt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + p->em485->no_temt_timer.function = &serial8250_em485_handle_no_temt; + } + p->em485->stop_tx_timer.function = &serial8250_em485_handle_stop_tx; p->em485->start_tx_timer.function = &serial8250_em485_handle_start_tx; p->em485->port = p; @@ -662,6 +685,7 @@ void serial8250_em485_destroy(struct uart_8250_port *p) hrtimer_cancel(&p->em485->start_tx_timer); hrtimer_cancel(&p->em485->stop_tx_timer); + hrtimer_cancel(&p->em485->no_temt_timer); kfree(p->em485); p->em485 = NULL; @@ -1504,6 +1528,11 @@ static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec) hrtimer_start(hrt, ms_to_ktime(msec), HRTIMER_MODE_REL); } +static void start_hrtimer_ns(struct hrtimer *hrt, unsigned long nsec) +{ + hrtimer_start(hrt, ns_to_ktime(nsec), HRTIMER_MODE_REL); +} + static void __stop_tx_rs485(struct uart_8250_port *p) { struct uart_8250_em485 *em485 = p->em485; @@ -1535,14 +1564,33 @@ static inline void __stop_tx(struct uart_8250_port *p) if (em485) { unsigned char lsr = serial_in(p, UART_LSR); + + p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + /* - * To provide required timeing and allow FIFO transfer, + * To provide required timing and allow FIFO transfer, * __stop_tx_rs485() must be called only when both FIFO and * shift register are empty. It is for device driver to enable * interrupt on TEMT. */ - if ((lsr & BOTH_EMPTY) != BOTH_EMPTY) + if ((lsr & BOTH_EMPTY) != BOTH_EMPTY) { + if (!(p->capabilities & UART_CAP_NOTEMT)) + /* __stop_tx will be called again once TEMT triggers */ + return; + + if (!(lsr & UART_LSR_THRE)) + /* __stop_tx will be called again once THRE triggers */ + return; + + /* + * On devices with no TEMT interrupt available, start + * a timer for a byte time. The timer will recall + * __stop_tx(). + */ + em485->active_timer = &em485->no_temt_timer; + start_hrtimer_ns(&em485->no_temt_timer, em485->no_temt_delay); return; + } __stop_tx_rs485(p); } @@ -1653,6 +1701,27 @@ static inline void start_tx_rs485(struct uart_port *port) __start_tx(port); } +static enum hrtimer_restart serial8250_em485_handle_no_temt(struct hrtimer *t) +{ + struct uart_8250_em485 *em485; + struct uart_8250_port *p; + unsigned long flags; + + em485 = container_of(t, struct uart_8250_em485, no_temt_timer); + p = em485->port; + + serial8250_rpm_get(p); + spin_lock_irqsave(&p->port.lock, flags); + if (em485->active_timer == &em485->no_temt_timer) { + em485->active_timer = NULL; + __stop_tx(p); + } + + spin_unlock_irqrestore(&p->port.lock, flags); + serial8250_rpm_put(p); + return HRTIMER_NORESTART; +} + static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t) { struct uart_8250_em485 *em485 = container_of(t, struct uart_8250_em485, @@ -2858,6 +2927,9 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, serial8250_set_divisor(port, baud, quot, frac); + if (up->capabilities & UART_CAP_NOTEMT) + serial8250_em485_update_temt_delay(up, termios->c_cflag, baud); + /* * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR * is written without DLAB set, this mode will be disabled. diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index ff84a3ed10ea..de135852107c 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -79,7 +79,9 @@ struct uart_8250_ops { struct uart_8250_em485 { struct hrtimer start_tx_timer; /* "rs485 start tx" timer */ struct hrtimer stop_tx_timer; /* "rs485 stop tx" timer */ + struct hrtimer no_temt_timer; /* "rs485 no TEMT interrupt" timer */ struct hrtimer *active_timer; /* pointer to active timer */ + unsigned long no_temt_delay; /* Delay for no_temt_timer */ struct uart_8250_port *port; /* for hrtimer callbacks */ unsigned int tx_stopped:1; /* tx is currently stopped */ }; -- cgit From 4dc84c06a343fcb95fd5a0acb537aefa4ebdd1b0 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 12 Apr 2022 10:01:19 +0800 Subject: net: ethtool: extend ringparam set/get APIs for tx_push Currently tx push is a standard driver feature which controls use of a fast path descriptor push. So this patch extends the ringparam APIs and data structures to support set/get tx push by ethtool -G/g. Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 8 ++++++++ include/linux/ethtool.h | 4 ++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.h | 2 +- net/ethtool/rings.c | 18 ++++++++++++++++-- 5 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 24d9be69065d..dbca3e9ec782 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -862,6 +862,7 @@ Kernel response contents: ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE + ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ==================================== ====== =========================== ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with @@ -871,6 +872,12 @@ separate buffers. The device configuration must make it possible to receive full memory pages of data, for example because MTU is high enough or through HW-GRO. +``ETHTOOL_A_RINGS_TX_PUSH`` flag is used to enable descriptor fast +path to send packets. In ordinary path, driver fills descriptors in DRAM and +notifies NIC hardware. In fast path, driver pushes descriptors to the device +through MMIO writes, thus reducing the latency. However, enabling this feature +may increase the CPU cost. Drivers may enforce additional per-packet +eligibility checks (e.g. on packet size). RINGS_SET ========= @@ -887,6 +894,7 @@ Request contents: ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE + ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4af58459a1e7..99dc7bfbcd3c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -71,11 +71,13 @@ enum { * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. * @tcp_data_split: Scatter packet headers and data to separate buffers + * @tx_push: The flag of tx push mode * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; u8 tcp_data_split; + u8 tx_push; u32 cqe_size; }; @@ -83,10 +85,12 @@ struct kernel_ethtool_ringparam { * enum ethtool_supported_ring_param - indicator caps for setting ring params * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size + * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), ETHTOOL_RING_USE_CQE_SIZE = BIT(1), + ETHTOOL_RING_USE_TX_PUSH = BIT(2), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 979850221b8d..d2fb4f7be61b 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -338,6 +338,7 @@ enum { ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 29d01662a48b..7919ddb2371c 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -363,7 +363,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_CQE_SIZE + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 9f33c9689b56..9ed60c507d97 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -55,7 +55,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_TX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ - nla_total_size(sizeof(u32)); /* _RINGS_CQE_SIZE */ + nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ + nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */ } static int rings_fill_reply(struct sk_buff *skb, @@ -94,7 +95,8 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, kr->tcp_data_split))) || (kr->cqe_size && - (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size)))) + (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || + nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push)) return -EMSGSIZE; return 0; @@ -123,6 +125,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), + [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), }; int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) @@ -149,6 +152,15 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) if (!ops->get_ringparam || !ops->set_ringparam) goto out_dev; + if (tb[ETHTOOL_A_RINGS_TX_PUSH] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TX_PUSH], + "setting tx push not supported"); + goto out_dev; + } + rtnl_lock(); ret = ethnl_ops_begin(dev); if (ret < 0) @@ -165,6 +177,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); ethnl_update_u32(&kernel_ringparam.cqe_size, tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); + ethnl_update_u8(&kernel_ringparam.tx_push, + tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); ret = 0; if (!mod) goto out_ops; -- cgit From f9a2fb73318eb4dbf8cd84866b8b0dd012d8b116 Mon Sep 17 00:00:00 2001 From: Arun Ajith S Date: Fri, 15 Apr 2022 08:34:02 +0000 Subject: net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131 Add a new neighbour cache entry in STALE state for routers on receiving an unsolicited (gratuitous) neighbour advertisement with target link-layer-address option specified. This is similar to the arp_accept configuration for IPv4. A new sysctl endpoint is created to turn on this behaviour: /proc/sys/net/ipv6/conf/interface/accept_unsolicited_na. Signed-off-by: Arun Ajith S Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 27 +++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 10 + net/ipv6/ndisc.c | 20 +- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ndisc_unsolicited_na_test.sh | 255 +++++++++++++++++++++ 7 files changed, 314 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/ndisc_unsolicited_na_test.sh (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b0024aa7b051..433f2e4a5fed 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2467,6 +2467,33 @@ drop_unsolicited_na - BOOLEAN By default this is turned off. +accept_unsolicited_na - BOOLEAN + Add a new neighbour cache entry in STALE state for routers on receiving an + unsolicited neighbour advertisement with target link-layer address option + specified. This is as per router-side behavior documented in RFC9131. + This has lower precedence than drop_unsolicited_na. + + ==== ====== ====== ============================================== + drop accept fwding behaviour + ---- ------ ------ ---------------------------------------------- + 1 X X Drop NA packet and don't pass up the stack + 0 0 X Pass NA packet up the stack, don't update NC + 0 1 0 Pass NA packet up the stack, don't update NC + 0 1 1 Pass NA packet up the stack, and add a STALE + NC entry + ==== ====== ====== ============================================== + + This will optimize the return path for the initial off-link communication + that is initiated by a directly connected host, by ensuring that + the first-hop router which turns on this setting doesn't have to + buffer the initial return packets to do neighbour-solicitation. + The prerequisite is that the host is configured to send + unsolicited neighbour advertisements on interface bringup. + This setting should be used in conjunction with the ndisc_notify setting + on the host to satisfy this prerequisite. + + By default this is turned off. + enhanced_dad - BOOLEAN Include a nonce option in the IPv6 neighbor solicitation messages used for duplicate address detection per RFC7527. A received DAD NS will only signal diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 16870f86c74d..918bfea4ef5f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -61,6 +61,7 @@ struct ipv6_devconf { __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; + __s32 accept_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index d4178dace0bf..549ddeaf788b 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -194,6 +194,7 @@ enum { DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, DEVCONF_NDISC_EVICT_NOCARRIER, + DEVCONF_ACCEPT_UNSOLICITED_NA, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1afc4c024981..6473dc84b71d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5587,6 +5587,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; + array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na; } static inline size_t inet6_ifla6_size(void) @@ -7037,6 +7038,15 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = (void *)SYSCTL_ZERO, .extra2 = (void *)SYSCTL_ONE, }, + { + .procname = "accept_unsolicited_na", + .data = &ipv6_devconf.accept_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, { /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fcb288b0ae13..254addad0dd3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -979,6 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; + bool create_neigh; if (skb->len < sizeof(struct nd_msg)) { ND_PRINTK(2, warn, "NA: packet too short\n"); @@ -999,6 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. + * drop_unsolicited_na takes precedence over accept_unsolicited_na */ if (!msg->icmph.icmp6_solicited && idev && idev->cnf.drop_unsolicited_na) @@ -1039,7 +1041,23 @@ static void ndisc_recv_na(struct sk_buff *skb) in6_ifa_put(ifp); return; } - neigh = neigh_lookup(&nd_tbl, &msg->target, dev); + /* RFC 9131 updates original Neighbour Discovery RFC 4861. + * An unsolicited NA can now create a neighbour cache entry + * on routers if it has Target LL Address option. + * + * drop accept fwding behaviour + * ---- ------ ------ ---------------------------------------------- + * 1 X X Drop NA packet and don't pass up the stack + * 0 0 X Pass NA packet up the stack, don't update NC + * 0 1 0 Pass NA packet up the stack, don't update NC + * 0 1 1 Pass NA packet up the stack, and add a STALE + * NC entry + * Note that we don't do a (daddr == all-routers-mcast) check. + */ + create_neigh = !msg->icmph.icmp6_solicited && lladdr && + idev && idev->cnf.forwarding && + idev->cnf.accept_unsolicited_na; + neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh); if (neigh) { u8 old_flags = neigh->flags; diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3fe2515aa616..af7f6e6ff182 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -36,6 +36,7 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh +TEST_PROGS += ndisc_unsolicited_na_test.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh new file mode 100755 index 000000000000..f508657ee126 --- /dev/null +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -0,0 +1,255 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for the accept_unsolicited_na feature to +# enable RFC9131 behaviour. The following is the test-matrix. +# drop accept fwding behaviour +# ---- ------ ------ ---------------------------------------------- +# 1 X X Drop NA packet and don't pass up the stack +# 0 0 X Pass NA packet up the stack, don't update NC +# 0 1 0 Pass NA packet up the stack, don't update NC +# 0 1 1 Pass NA packet up the stack, and add a STALE +# NC entry + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +PAUSE_ON_FAIL=no +PAUSE=no + +HOST_NS="ns-host" +ROUTER_NS="ns-router" + +HOST_INTF="veth-host" +ROUTER_INTF="veth-router" + +ROUTER_ADDR="2000:20::1" +HOST_ADDR="2000:20::2" +SUBNET_WIDTH=64 +ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}" +HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}" + +IP_HOST="ip -6 -netns ${HOST_NS}" +IP_HOST_EXEC="ip netns exec ${HOST_NS}" +IP_ROUTER="ip -6 -netns ${ROUTER_NS}" +IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" + +tcpdump_stdout= +tcpdump_stderr= + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi +} + +setup() +{ + set -e + + local drop_unsolicited_na=$1 + local accept_unsolicited_na=$2 + local forwarding=$3 + + # Setup two namespaces and a veth tunnel across them. + # On end of the tunnel is a router and the other end is a host. + ip netns add ${HOST_NS} + ip netns add ${ROUTER_NS} + ${IP_ROUTER} link add ${ROUTER_INTF} type veth \ + peer name ${HOST_INTF} netns ${HOST_NS} + + # Enable IPv6 on both router and host, and configure static addresses. + # The router here is the DUT + # Setup router configuration as specified by the arguments. + # forwarding=0 case is to check that a non-router + # doesn't add neighbour entries. + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.forwarding=${forwarding} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na} + ${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0 + ${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF} + + # Turn on ndisc_notify on host interface so that + # the host sends unsolicited NAs. + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF} + + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ${IP_ROUTER_EXEC} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +cleanup_tcpdump() +{ + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +cleanup() +{ + cleanup_tcpdump + ip netns del ${HOST_NS} + ip netns del ${ROUTER_NS} +} + +link_up() { + set -e + ${IP_ROUTER} link set dev ${ROUTER_INTF} up + ${IP_HOST} link set dev ${HOST_INTF} up + set +e +} + +verify_ndisc() { + local drop_unsolicited_na=$1 + local accept_unsolicited_na=$2 + local forwarding=$3 + + neigh_show_output=$(${IP_ROUTER} neigh show \ + to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale) + if [ ${drop_unsolicited_na} -eq 0 ] && \ + [ ${accept_unsolicited_na} -eq 1 ] && \ + [ ${forwarding} -eq 1 ]; then + # Neighbour entry expected to be present for 011 case + [[ ${neigh_show_output} ]] + else + # Neighbour entry expected to be absent for all other cases + [[ -z ${neigh_show_output} ]] + fi +} + +test_unsolicited_na_common() +{ + # Setup the test bed, but keep links down + setup $1 $2 $3 + + # Bring the link up, wait for the NA, + # and add a delay to ensure neighbour processing is done. + link_up + start_tcpdump + + # Verify the neighbour table + verify_ndisc $1 $2 $3 + +} + +test_unsolicited_na_combination() { + test_unsolicited_na_common $1 $2 $3 + test_msg=("test_unsolicited_na: " + "drop_unsolicited_na=$1 " + "accept_unsolicited_na=$2 " + "forwarding=$3") + log_test $? 0 "${test_msg[*]}" + cleanup +} + +test_unsolicited_na_combinations() { + # Args: drop_unsolicited_na accept_unsolicited_na forwarding + + # Expect entry + test_unsolicited_na_combination 0 1 1 + + # Expect no entry + test_unsolicited_na_combination 0 0 0 + test_unsolicited_na_combination 0 0 1 + test_unsolicited_na_combination 0 1 0 + test_unsolicited_na_combination 1 0 0 + test_unsolicited_na_combination 1 0 1 + test_unsolicited_na_combination 1 1 0 + test_unsolicited_na_combination 1 1 1 +} + +############################################################################### +# usage + +usage() +{ + cat < /dev/null + +test_unsolicited_na_combinations + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret -- cgit From da40b613f89c43c58986e6f30560ad6573a4d569 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Apr 2022 17:10:41 -0700 Subject: tcp: add drop reason support to tcp_validate_incoming() Creates four new drop reasons for the following cases: 1) packet being rejected by RFC 7323 PAWS check 2) packet being rejected by SEQUENCE check 3) Invalid RST packet 4) Invalid SYN packet Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ include/trace/events/skb.h | 5 +++++ net/ipv4/tcp_input.c | 7 ++++++- 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0ef11df1bc67..a903da1fa0ed 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -381,6 +381,12 @@ enum skb_drop_reason { * the ofo queue, corresponding to * LINUX_MIB_TCPOFOMERGE */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ + SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by * BPF_PROG_TYPE_CGROUP_SKB diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 2da72a9a5764..820dacd14bad 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -37,6 +37,11 @@ EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ + EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \ + EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \ + TCP_INVALID_SEQUENCE) \ + EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \ + EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \ EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ BPF_CGROUP_EGRESS) \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b2d5fbef6ce3..9a1cb3f48c3f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5667,6 +5667,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, int syn_inerr) { struct tcp_sock *tp = tcp_sk(sk); + SKB_DR(reason); /* RFC1323: H1. Apply PAWS check first. */ if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) && @@ -5678,6 +5679,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, LINUX_MIB_TCPACKSKIPPEDPAWS, &tp->last_oow_ack_time)) tcp_send_dupack(sk, skb); + SKB_DR_SET(reason, TCP_RFC7323_PAWS); goto discard; } /* Reset is accepted even if it did not pass PAWS. */ @@ -5701,6 +5703,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, } else if (tcp_reset_check(sk, skb)) { goto reset; } + SKB_DR_SET(reason, TCP_INVALID_SEQUENCE); goto discard; } @@ -5743,6 +5746,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, sk->sk_state == TCP_ESTABLISHED) tcp_fastopen_active_disable(sk); tcp_send_challenge_ack(sk); + SKB_DR_SET(reason, TCP_RESET); goto discard; } @@ -5757,6 +5761,7 @@ syn_challenge: TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); tcp_send_challenge_ack(sk); + SKB_DR_SET(reason, TCP_INVALID_SYN); goto discard; } @@ -5765,7 +5770,7 @@ syn_challenge: return true; discard: - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, reason); return false; reset: -- cgit From 669da7a71890b2b2a31a7e9571c0fdf1123e26ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Apr 2022 17:10:43 -0700 Subject: tcp: add drop reasons to tcp_rcv_state_process() Add basic support for drop reasons in tcp_rcv_state_process() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ include/trace/events/skb.h | 3 +++ net/ipv4/tcp_input.c | 24 +++++++++++++++++------- 3 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a903da1fa0ed..6f1410b5ff13 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -387,6 +387,9 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ + SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by * BPF_PROG_TYPE_CGROUP_SKB diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 820dacd14bad..fbe21ad038bc 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -42,6 +42,9 @@ TCP_INVALID_SEQUENCE) \ EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \ EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \ + EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \ + EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \ + EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \ EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ BPF_CGROUP_EGRESS) \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f95a8368981d..85fae79c894d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6413,21 +6413,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) struct request_sock *req; int queued = 0; bool acceptable; + SKB_DR(reason); switch (sk->sk_state) { case TCP_CLOSE: + SKB_DR_SET(reason, TCP_CLOSE); goto discard; case TCP_LISTEN: if (th->ack) return 1; - if (th->rst) + if (th->rst) { + SKB_DR_SET(reason, TCP_RESET); goto discard; - + } if (th->syn) { - if (th->fin) + if (th->fin) { + SKB_DR_SET(reason, TCP_FLAGS); goto discard; + } /* It is possible that we process SYN packets from backlog, * so we need to make sure to disable BH and RCU right there. */ @@ -6442,6 +6447,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) consume_skb(skb); return 0; } + SKB_DR_SET(reason, TCP_FLAGS); goto discard; case TCP_SYN_SENT: @@ -6468,13 +6474,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (!tcp_check_req(sk, skb, req, true, &req_stolen)) + if (!tcp_check_req(sk, skb, req, true, &req_stolen)) { + SKB_DR_SET(reason, TCP_FASTOPEN); goto discard; + } } - if (!th->ack && !th->rst && !th->syn) + if (!th->ack && !th->rst && !th->syn) { + SKB_DR_SET(reason, TCP_FLAGS); goto discard; - + } if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; @@ -6487,6 +6496,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_SYN_RECV) return 1; /* send one RST */ tcp_send_challenge_ack(sk); + SKB_DR_SET(reason, TCP_OLD_ACK); goto discard; } switch (sk->sk_state) { @@ -6647,7 +6657,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!queued) { discard: - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, reason); } return 0; -- cgit From 4b506af9c5b8de0da34097d50d9448dfb33d70c3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Apr 2022 17:10:44 -0700 Subject: tcp: add two drop reasons for tcp_ack() Add TCP_TOO_OLD_ACK and TCP_ACK_UNSENT_DATA drop reasons so that tcp_rcv_established() can report them. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ include/trace/events/skb.h | 3 +++ net/ipv4/tcp_input.c | 7 ++++--- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f1410b5ff13..9ff5557b1909 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -390,6 +390,8 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by * BPF_PROG_TYPE_CGROUP_SKB diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index fbe21ad038bc..eab0b09223f3 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -45,6 +45,9 @@ EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \ EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \ EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \ + EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \ + EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \ + TCP_ACK_UNSENT_DATA) \ EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ BPF_CGROUP_EGRESS) \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85fae79c894d..8a68785b0405 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3766,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una - tp->max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk); - return -1; + return -SKB_DROP_REASON_TCP_TOO_OLD_ACK; } goto old_ack; } @@ -3775,7 +3775,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * this segment (RFC793 Section 3.9). */ if (after(ack, tp->snd_nxt)) - return -1; + return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA; if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; @@ -5962,7 +5962,8 @@ slow_path: return; step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) + reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT); + if (reason < 0) goto discard; tcp_rcv_rtt_measure_ts(sk, skb); -- cgit From e7c89ae4078eab24af71ba26b91642e819a4bd7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Apr 2022 17:10:45 -0700 Subject: tcp: add drop reason support to tcp_prune_ofo_queue() Add one reason for packets dropped from OFO queue because of memory pressure. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 2 ++ net/ipv4/tcp_input.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9ff5557b1909..ad15ad208b56 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -392,6 +392,7 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by * BPF_PROG_TYPE_CGROUP_SKB diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index eab0b09223f3..73d7a6e594cb 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -48,6 +48,8 @@ EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \ EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \ TCP_ACK_UNSENT_DATA) \ + EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \ + TCP_OFO_QUEUE_PRUNE) \ EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ BPF_CGROUP_EGRESS) \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8a68785b0405..a1077adeb1b6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5334,7 +5334,8 @@ static bool tcp_prune_ofo_queue(struct sock *sk) prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); goal -= rb_to_skb(node)->truesize; - tcp_drop(sk, rb_to_skb(node)); + tcp_drop_reason(sk, rb_to_skb(node), + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); if (!prev || goal <= 0) { sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && -- cgit From 8fbf195798b56e1e87f62d01be636a6425c304c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Apr 2022 17:10:48 -0700 Subject: tcp: add drop reason support to tcp_ofo_queue() packets in OFO queue might be redundant, and dropped. tcp_drop() is no longer needed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + net/ipv4/tcp_input.c | 9 ++------- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ad15ad208b56..84d78df60453 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -393,6 +393,7 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by * BPF_PROG_TYPE_CGROUP_SKB diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 73d7a6e594cb..a477bf907498 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -37,6 +37,7 @@ EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ + EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \ EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \ EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \ TCP_INVALID_SEQUENCE) \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 339cc3d40745..cf2dc19bb8c7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4674,7 +4674,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk, { bool res = tcp_try_coalesce(sk, to, from, fragstolen); - /* In case tcp_drop() is called later, update to->gso_segs */ + /* In case tcp_drop_reason() is called later, update to->gso_segs */ if (res) { u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + max_t(u16, 1, skb_shinfo(from)->gso_segs); @@ -4691,11 +4691,6 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb, kfree_skb_reason(skb, reason); } -static void tcp_drop(struct sock *sk, struct sk_buff *skb) -{ - tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED); -} - /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ @@ -4723,7 +4718,7 @@ static void tcp_ofo_queue(struct sock *sk) rb_erase(&skb->rbnode, &tp->out_of_order_queue); if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { - tcp_drop(sk, skb); + tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP); continue; } -- cgit From a2daa27c0c6137481226aee5b3136e453c642929 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Feb 2022 11:44:42 +0100 Subject: swiotlb: simplify swiotlb_max_segment Remove the bogus Xen override that was usually larger than the actual size and just calculate the value on demand. Note that swiotlb_max_segment still doesn't make sense as an interface and should eventually be removed. Signed-off-by: Christoph Hellwig Reviewed-by: Anshuman Khandual Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- drivers/xen/swiotlb-xen.c | 2 -- include/linux/swiotlb.h | 1 - kernel/dma/swiotlb.c | 20 +++----------------- 3 files changed, 3 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 47aebd98f52f..485cd06ed39e 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -202,7 +202,6 @@ retry: rc = swiotlb_late_init_with_tbl(start, nslabs); if (rc) return rc; - swiotlb_set_max_segment(PAGE_SIZE); return 0; error: if (nslabs > 1024 && repeat--) { @@ -254,7 +253,6 @@ retry: if (swiotlb_init_with_tbl(start, nslabs, true)) panic("Cannot allocate SWIOTLB buffer"); - swiotlb_set_max_segment(PAGE_SIZE); } #endif /* CONFIG_X86 */ diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index f6c3638255d5..9fb3a568f0c5 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -164,7 +164,6 @@ static inline void swiotlb_adjust_size(unsigned long size) #endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); -extern void swiotlb_set_max_segment(unsigned int); #ifdef CONFIG_DMA_RESTRICTED_POOL struct page *swiotlb_alloc(struct device *dev, size_t size); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 98bb0eb44a7b..e0127e397335 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -68,12 +68,6 @@ struct io_tlb_mem io_tlb_default_mem; phys_addr_t swiotlb_unencrypted_base; -/* - * Max segment that we can provide which (if pages are contingous) will - * not be bounced (unless SWIOTLB_FORCE is set). - */ -static unsigned int max_segment; - static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; static int __init @@ -97,18 +91,12 @@ early_param("swiotlb", setup_io_tlb_npages); unsigned int swiotlb_max_segment(void) { - return io_tlb_default_mem.nslabs ? max_segment : 0; + if (!io_tlb_default_mem.nslabs) + return 0; + return rounddown(io_tlb_default_mem.nslabs << IO_TLB_SHIFT, PAGE_SIZE); } EXPORT_SYMBOL_GPL(swiotlb_max_segment); -void swiotlb_set_max_segment(unsigned int val) -{ - if (swiotlb_force == SWIOTLB_FORCE) - max_segment = 1; - else - max_segment = rounddown(val, PAGE_SIZE); -} - unsigned long swiotlb_size_or_default(void) { return default_nslabs << IO_TLB_SHIFT; @@ -258,7 +246,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) if (verbose) swiotlb_print_info(); - swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); return 0; } @@ -359,7 +346,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true); swiotlb_print_info(); - swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT); return 0; } -- cgit From 0d5ffd9a256d8995764f9d4a35a8c3917839d169 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Feb 2022 11:07:28 +0100 Subject: swiotlb: rename swiotlb_late_init_with_default_size swiotlb_late_init_with_default_size is an overly verbose name that doesn't even catch what the function is doing, given that the size is not just a default but the actual requested size. Rename it to swiotlb_init_late. Signed-off-by: Christoph Hellwig Reviewed-by: Anshuman Khandual Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- arch/x86/pci/sta2x11-fixup.c | 2 +- include/linux/swiotlb.h | 2 +- kernel/dma/swiotlb.c | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 101081ad64b6..e0c039a75b2d 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev) int size = STA2X11_SWIOTLB_SIZE; /* First instance: register your own swiotlb area */ dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size); - if (swiotlb_late_init_with_default_size(size)) + if (swiotlb_init_late(size)) dev_emerg(&pdev->dev, "init swiotlb failed\n"); } list_add(&instance->list, &sta2x11_instance_list); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 9fb3a568f0c5..b48b26bfa0ed 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -40,7 +40,7 @@ extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); -extern int swiotlb_late_init_with_default_size(size_t default_size); +int swiotlb_init_late(size_t size); extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index e0127e397335..9a4fe6e48a07 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -281,11 +281,9 @@ fail: * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ -int -swiotlb_late_init_with_default_size(size_t default_size) +int swiotlb_init_late(size_t size) { - unsigned long nslabs = - ALIGN(default_size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); + unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); unsigned long bytes; unsigned char *vstart = NULL; unsigned int order; -- cgit From 78013eaadf696d2105982abb4018fbae394ca08f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Feb 2022 14:11:44 +0100 Subject: x86: remove the IOMMU table infrastructure The IOMMU table tries to separate the different IOMMUs into different backends, but actually requires various cross calls. Rewrite the code to do the generic swiotlb/swiotlb-xen setup directly in pci-dma.c and then just call into the IOMMU drivers. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- arch/ia64/include/asm/iommu_table.h | 7 --- arch/x86/include/asm/dma-mapping.h | 1 - arch/x86/include/asm/gart.h | 5 +- arch/x86/include/asm/iommu.h | 6 ++ arch/x86/include/asm/iommu_table.h | 102 ------------------------------- arch/x86/include/asm/swiotlb.h | 30 --------- arch/x86/include/asm/xen/swiotlb-xen.h | 2 - arch/x86/kernel/Makefile | 2 - arch/x86/kernel/amd_gart_64.c | 5 +- arch/x86/kernel/aperture_64.c | 14 ++--- arch/x86/kernel/pci-dma.c | 107 ++++++++++++++++++++++++++------- arch/x86/kernel/pci-iommu_table.c | 77 ------------------------ arch/x86/kernel/pci-swiotlb.c | 77 ------------------------ arch/x86/kernel/tboot.c | 1 - arch/x86/kernel/vmlinux.lds.S | 12 ---- arch/x86/xen/Makefile | 2 - arch/x86/xen/pci-swiotlb-xen.c | 96 ----------------------------- drivers/iommu/amd/init.c | 6 -- drivers/iommu/amd/iommu.c | 5 +- drivers/iommu/intel/dmar.c | 6 +- include/linux/dmar.h | 6 +- 21 files changed, 110 insertions(+), 459 deletions(-) delete mode 100644 arch/ia64/include/asm/iommu_table.h delete mode 100644 arch/x86/include/asm/iommu_table.h delete mode 100644 arch/x86/include/asm/swiotlb.h delete mode 100644 arch/x86/kernel/pci-iommu_table.c delete mode 100644 arch/x86/kernel/pci-swiotlb.c delete mode 100644 arch/x86/xen/pci-swiotlb-xen.c (limited to 'include/linux') diff --git a/arch/ia64/include/asm/iommu_table.h b/arch/ia64/include/asm/iommu_table.h deleted file mode 100644 index cc96116ac276..000000000000 --- a/arch/ia64/include/asm/iommu_table.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_IOMMU_TABLE_H -#define _ASM_IA64_IOMMU_TABLE_H - -#define IOMMU_INIT_POST(_detect) - -#endif /* _ASM_IA64_IOMMU_TABLE_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index bb1654fe0ce7..256fd8115223 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -9,7 +9,6 @@ #include #include -#include extern int iommu_merge; extern int panic_on_overflow; diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 318556574345..5af8088a10df 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -38,7 +38,7 @@ extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); -extern int gart_iommu_hole_init(void); +void gart_iommu_hole_init(void); #else #define gart_iommu_aperture 0 @@ -51,9 +51,8 @@ static inline void early_gart_iommu_check(void) static inline void gart_parse_options(char *options) { } -static inline int gart_iommu_hole_init(void) +static inline void gart_iommu_hole_init(void) { - return -ENODEV; } #endif diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index bf1ed2ddc74b..dba89ed40d38 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -9,6 +9,12 @@ extern int force_iommu, no_iommu; extern int iommu_detected; +#ifdef CONFIG_SWIOTLB +extern bool x86_swiotlb_enable; +#else +#define x86_swiotlb_enable false +#endif + /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h deleted file mode 100644 index 1fb3fd1a83c2..000000000000 --- a/arch/x86/include/asm/iommu_table.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_IOMMU_TABLE_H -#define _ASM_X86_IOMMU_TABLE_H - -#include - -/* - * History lesson: - * The execution chain of IOMMUs in 2.6.36 looks as so: - * - * [xen-swiotlb] - * | - * +----[swiotlb *]--+ - * / | \ - * / | \ - * [GART] [Calgary] [Intel VT-d] - * / - * / - * [AMD-Vi] - * - * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip - * over the rest of IOMMUs and unconditionally initialize the SWIOTLB. - * Also it would surreptitiously initialize set the swiotlb=1 if there were - * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb - * flag would be turned off by all IOMMUs except the Calgary one. - * - * The IOMMU_INIT* macros allow a similar tree (or more complex if desired) - * to be built by defining who we depend on. - * - * And all that needs to be done is to use one of the macros in the IOMMU - * and the pci-dma.c will take care of the rest. - */ - -struct iommu_table_entry { - initcall_t detect; - initcall_t depend; - void (*early_init)(void); /* No memory allocate available. */ - void (*late_init)(void); /* Yes, can allocate memory. */ -#define IOMMU_FINISH_IF_DETECTED (1<<0) -#define IOMMU_DETECTED (1<<1) - int flags; -}; -/* - * Macro fills out an entry in the .iommu_table that is equivalent - * to the fields that 'struct iommu_table_entry' has. The entries - * that are put in the .iommu_table section are not put in any order - * hence during boot-time we will have to resort them based on - * dependency. */ - - -#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ - static const struct iommu_table_entry \ - __iommu_entry_##_detect __used \ - __attribute__ ((unused, __section__(".iommu_table"), \ - aligned((sizeof(void *))))) \ - = {_detect, _depend, _early_init, _late_init, \ - _finish ? IOMMU_FINISH_IF_DETECTED : 0} -/* - * The simplest IOMMU definition. Provide the detection routine - * and it will be run after the SWIOTLB and the other IOMMUs - * that utilize this macro. If the IOMMU is detected (ie, the - * detect routine returns a positive value), the other IOMMUs - * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer - * to stop detecting the other IOMMUs after yours has been detected. - */ -#define IOMMU_INIT_POST(_detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0) - -#define IOMMU_INIT_POST_FINISH(detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1) - -/* - * A more sophisticated version of IOMMU_INIT. This variant requires: - * a). A detection routine function. - * b). The name of the detection routine we depend on to get called - * before us. - * c). The init routine which gets called if the detection routine - * returns a positive value from the pci_iommu_alloc. This means - * no presence of a memory allocator. - * d). Similar to the 'init', except that this gets called from pci_iommu_init - * where we do have a memory allocator. - * - * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant - * in that the former will continue detecting other IOMMUs in the call - * list after the detection routine returns a positive number, while the - * latter will stop the execution chain upon first successful detection. - * Both variants will still call the 'init' and 'late_init' functions if - * they are set. - */ -#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ - __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) - -#define IOMMU_INIT(_detect, _depend, _init, _late_init) \ - __IOMMU_INIT(_detect, _depend, _init, _late_init, 0) - -void sort_iommu_table(struct iommu_table_entry *start, - struct iommu_table_entry *finish); - -void check_iommu_entries(struct iommu_table_entry *start, - struct iommu_table_entry *finish); - -#endif /* _ASM_X86_IOMMU_TABLE_H */ diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h deleted file mode 100644 index ff6c92eff035..000000000000 --- a/arch/x86/include/asm/swiotlb.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_SWIOTLB_H -#define _ASM_X86_SWIOTLB_H - -#include - -#ifdef CONFIG_SWIOTLB -extern int swiotlb; -extern int __init pci_swiotlb_detect_override(void); -extern int __init pci_swiotlb_detect_4gb(void); -extern void __init pci_swiotlb_init(void); -extern void __init pci_swiotlb_late_init(void); -#else -#define swiotlb 0 -static inline int pci_swiotlb_detect_override(void) -{ - return 0; -} -static inline int pci_swiotlb_detect_4gb(void) -{ - return 0; -} -static inline void pci_swiotlb_init(void) -{ -} -static inline void pci_swiotlb_late_init(void) -{ -} -#endif -#endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 66b4ddde7743..e5a90b42e4dd 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -3,10 +3,8 @@ #define _ASM_X86_SWIOTLB_XEN_H #ifdef CONFIG_SWIOTLB_XEN -extern int __init pci_xen_swiotlb_detect(void); extern int pci_xen_swiotlb_init_late(void); #else -#define pci_xen_swiotlb_detect NULL static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c41ef42adbe8..e17b7e92a3fa 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -68,7 +68,6 @@ obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o -obj-y += pci-iommu_table.o obj-y += resource.o obj-y += irqflags.o obj-y += static_call.o @@ -134,7 +133,6 @@ obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o -obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index ed837383de5c..194d54eed537 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -38,11 +38,9 @@ #include #include #include -#include #include #include #include -#include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -808,7 +806,7 @@ int __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; x86_platform.iommu_shutdown = gart_iommu_shutdown; - swiotlb = 0; + x86_swiotlb_enable = false; return 0; } @@ -842,4 +840,3 @@ void __init gart_parse_options(char *p) } } } -IOMMU_INIT_POST(gart_iommu_hole_init); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index af3ba08b684b..7a5630d904b2 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -392,7 +392,7 @@ void __init early_gart_iommu_check(void) static int __initdata printed_gart_size_msg; -int __init gart_iommu_hole_init(void) +void __init gart_iommu_hole_init(void) { u32 agp_aper_base = 0, agp_aper_order = 0; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; @@ -401,11 +401,11 @@ int __init gart_iommu_hole_init(void) int i, node; if (!amd_gart_present()) - return -ENODEV; + return; if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) - return -ENODEV; + return; pr_info("Checking aperture...\n"); @@ -491,10 +491,8 @@ out: * and fixed up the northbridge */ exclude_from_core(last_aper_base, last_aper_order); - - return 1; } - return 0; + return; } if (!fallback_aper_force) { @@ -527,7 +525,7 @@ out: panic("Not enough memory for aperture"); } } else { - return 0; + return; } /* @@ -561,6 +559,4 @@ out: } set_up_gart_resume(aper_order, aper_alloc); - - return 1; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index de234e7a8962..df96926421be 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -7,13 +7,16 @@ #include #include #include +#include #include #include #include #include #include -#include + +#include +#include static bool disable_dac_quirk __read_mostly; @@ -34,24 +37,83 @@ int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; -extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; +#ifdef CONFIG_SWIOTLB +bool x86_swiotlb_enable; + +static void __init pci_swiotlb_detect(void) +{ + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ + if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) + x86_swiotlb_enable = true; + + /* + * Set swiotlb to 1 so that bounce buffers are allocated and used for + * devices that can't support DMA to encrypted memory. + */ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + x86_swiotlb_enable = true; + + if (swiotlb_force == SWIOTLB_FORCE) + x86_swiotlb_enable = true; +} +#else +static inline void __init pci_swiotlb_detect(void) +{ +} +#endif /* CONFIG_SWIOTLB */ + +#ifdef CONFIG_SWIOTLB_XEN +static bool xen_swiotlb; + +static void __init pci_xen_swiotlb_init(void) +{ + if (!xen_initial_domain() && !x86_swiotlb_enable && + swiotlb_force != SWIOTLB_FORCE) + return; + x86_swiotlb_enable = true; + xen_swiotlb = true; + xen_swiotlb_init_early(); + dma_ops = &xen_swiotlb_dma_ops; + if (IS_ENABLED(CONFIG_PCI)) + pci_request_acs(); +} + +int pci_xen_swiotlb_init_late(void) +{ + int rc; + + if (xen_swiotlb) + return 0; + + rc = xen_swiotlb_init(); + if (rc) + return rc; + + /* XXX: this switches the dma ops under live devices! */ + dma_ops = &xen_swiotlb_dma_ops; + if (IS_ENABLED(CONFIG_PCI)) + pci_request_acs(); + return 0; +} +EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); +#else +static inline void __init pci_xen_swiotlb_init(void) +{ +} +#endif /* CONFIG_SWIOTLB_XEN */ void __init pci_iommu_alloc(void) { - struct iommu_table_entry *p; - - sort_iommu_table(__iommu_table, __iommu_table_end); - check_iommu_entries(__iommu_table, __iommu_table_end); - - for (p = __iommu_table; p < __iommu_table_end; p++) { - if (p && p->detect && p->detect() > 0) { - p->flags |= IOMMU_DETECTED; - if (p->early_init) - p->early_init(); - if (p->flags & IOMMU_FINISH_IF_DETECTED) - break; - } + if (xen_pv_domain()) { + pci_xen_swiotlb_init(); + return; } + pci_swiotlb_detect(); + gart_iommu_hole_init(); + amd_iommu_detect(); + detect_intel_iommu(); + if (x86_swiotlb_enable) + swiotlb_init(0); } /* @@ -102,7 +164,7 @@ static __init int iommu_setup(char *p) } #ifdef CONFIG_SWIOTLB if (!strncmp(p, "soft", 4)) - swiotlb = 1; + x86_swiotlb_enable = true; #endif if (!strncmp(p, "pt", 2)) iommu_set_default_passthrough(true); @@ -121,14 +183,17 @@ early_param("iommu", iommu_setup); static int __init pci_iommu_init(void) { - struct iommu_table_entry *p; - x86_init.iommu.iommu_init(); - for (p = __iommu_table; p < __iommu_table_end; p++) { - if (p && (p->flags & IOMMU_DETECTED) && p->late_init) - p->late_init(); +#ifdef CONFIG_SWIOTLB + /* An IOMMU turned us off. */ + if (x86_swiotlb_enable) { + pr_info("PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_print_info(); + } else { + swiotlb_exit(); } +#endif return 0; } diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c deleted file mode 100644 index 42e92ec62973..000000000000 --- a/arch/x86/kernel/pci-iommu_table.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -static struct iommu_table_entry * __init -find_dependents_of(struct iommu_table_entry *start, - struct iommu_table_entry *finish, - struct iommu_table_entry *q) -{ - struct iommu_table_entry *p; - - if (!q) - return NULL; - - for (p = start; p < finish; p++) - if (p->detect == q->depend) - return p; - - return NULL; -} - - -void __init sort_iommu_table(struct iommu_table_entry *start, - struct iommu_table_entry *finish) { - - struct iommu_table_entry *p, *q, tmp; - - for (p = start; p < finish; p++) { -again: - q = find_dependents_of(start, finish, p); - /* We are bit sneaky here. We use the memory address to figure - * out if the node we depend on is past our point, if so, swap. - */ - if (q > p) { - tmp = *p; - memmove(p, q, sizeof(*p)); - *q = tmp; - goto again; - } - } - -} - -#ifdef DEBUG -void __init check_iommu_entries(struct iommu_table_entry *start, - struct iommu_table_entry *finish) -{ - struct iommu_table_entry *p, *q, *x; - - /* Simple cyclic dependency checker. */ - for (p = start; p < finish; p++) { - q = find_dependents_of(start, finish, p); - x = find_dependents_of(start, finish, q); - if (p == x) { - printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n", - p->detect, q->detect); - /* Heavy handed way..*/ - x->depend = NULL; - } - } - - for (p = start; p < finish; p++) { - q = find_dependents_of(p, finish, p); - if (q && q > p) { - printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n", - p->detect, q->detect); - } - } -} -#else -void __init check_iommu_entries(struct iommu_table_entry *start, - struct iommu_table_entry *finish) -{ -} -#endif diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c deleted file mode 100644 index 814ab46a0dad..000000000000 --- a/arch/x86/kernel/pci-swiotlb.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -int swiotlb __read_mostly; - -/* - * pci_swiotlb_detect_override - set swiotlb to 1 if necessary - * - * This returns non-zero if we are forced to use swiotlb (by the boot - * option). - */ -int __init pci_swiotlb_detect_override(void) -{ - if (swiotlb_force == SWIOTLB_FORCE) - swiotlb = 1; - - return swiotlb; -} -IOMMU_INIT_FINISH(pci_swiotlb_detect_override, - pci_xen_swiotlb_detect, - pci_swiotlb_init, - pci_swiotlb_late_init); - -/* - * If 4GB or more detected (and iommu=off not set) or if SME is active - * then set swiotlb to 1 and return 1. - */ -int __init pci_swiotlb_detect_4gb(void) -{ - /* don't initialize swiotlb if iommu=off (no_iommu=1) */ - if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) - swiotlb = 1; - - /* - * Set swiotlb to 1 so that bounce buffers are allocated and used for - * devices that can't support DMA to encrypted memory. - */ - if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) - swiotlb = 1; - - return swiotlb; -} -IOMMU_INIT(pci_swiotlb_detect_4gb, - pci_swiotlb_detect_override, - pci_swiotlb_init, - pci_swiotlb_late_init); - -void __init pci_swiotlb_init(void) -{ - if (swiotlb) - swiotlb_init(0); -} - -void __init pci_swiotlb_late_init(void) -{ - /* An IOMMU turned us off. */ - if (!swiotlb) - swiotlb_exit(); - else { - printk(KERN_INFO "PCI-DMA: " - "Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_print_info(); - } -} diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index f9af561c3cd4..0c1154a1c403 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 7fda7f27e762..f5f6dc2e8007 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -315,18 +315,6 @@ SECTIONS *(.altinstr_replacement) } - /* - * struct iommu_table_entry entries are injected in this section. - * It is an array of IOMMUs which during run time gets sorted depending - * on its dependency order. After rootfs_initcall is complete - * this section can be safely removed. - */ - .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) { - __iommu_table = .; - *(.iommu_table) - __iommu_table_end = .; - } - . = ALIGN(8); .apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) { __apicdrivers = .; diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 4953260e281c..3c5b52fbe4a7 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -47,6 +47,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_PV_DOM0) += vga.o -obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o - obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c deleted file mode 100644 index 46df59aeaa06..000000000000 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* Glue code to lib/swiotlb-xen.c */ - -#include -#include -#include - -#include -#include -#include - - -#include -#ifdef CONFIG_X86_64 -#include -#include -#endif -#include - -static int xen_swiotlb __read_mostly; - -/* - * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary - * - * This returns non-zero if we are forced to use xen_swiotlb (by the boot - * option). - */ -int __init pci_xen_swiotlb_detect(void) -{ - - if (!xen_pv_domain()) - return 0; - - /* If running as PV guest, either iommu=soft, or swiotlb=force will - * activate this IOMMU. If running as PV privileged, activate it - * irregardless. - */ - if (xen_initial_domain() || swiotlb || swiotlb_force == SWIOTLB_FORCE) - xen_swiotlb = 1; - - /* If we are running under Xen, we MUST disable the native SWIOTLB. - * Don't worry about swiotlb_force flag activating the native, as - * the 'swiotlb' flag is the only one turning it on. */ - swiotlb = 0; - -#ifdef CONFIG_X86_64 - /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 - * (so no iommu=X command line over-writes). - * Considering that PV guests do not want the *native SWIOTLB* but - * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. - */ - if (max_pfn > MAX_DMA32_PFN) - no_iommu = 1; -#endif - return xen_swiotlb; -} - -static void __init pci_xen_swiotlb_init(void) -{ - if (xen_swiotlb) { - xen_swiotlb_init_early(); - dma_ops = &xen_swiotlb_dma_ops; - -#ifdef CONFIG_PCI - /* Make sure ACS will be enabled */ - pci_request_acs(); -#endif - } -} - -int pci_xen_swiotlb_init_late(void) -{ - int rc; - - if (xen_swiotlb) - return 0; - - rc = xen_swiotlb_init(); - if (rc) - return rc; - - dma_ops = &xen_swiotlb_dma_ops; -#ifdef CONFIG_PCI - /* Make sure ACS will be enabled */ - pci_request_acs(); -#endif - - return 0; -} -EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); - -IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - NULL, - pci_xen_swiotlb_init, - NULL); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index b4a798c7b347..1a3ad58ba846 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -3257,11 +3256,6 @@ __setup("ivrs_ioapic", parse_ivrs_ioapic); __setup("ivrs_hpet", parse_ivrs_hpet); __setup("ivrs_acpihid", parse_ivrs_acpihid); -IOMMU_INIT_FINISH(amd_iommu_detect, - gart_iommu_hole_init, - NULL, - NULL); - bool amd_iommu_v2_supported(void) { return amd_iommu_v2_present; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a1ada7bff44e..b47220ac09ea 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1840,7 +1840,10 @@ void amd_iommu_domain_update(struct protection_domain *domain) static void __init amd_iommu_init_dma_ops(void) { - swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; + if (iommu_default_passthrough() || sme_me_mask) + x86_swiotlb_enable = true; + else + x86_swiotlb_enable = false; } int __init amd_iommu_init_api(void) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 4de960834a1b..592c1e1a5d4b 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include "../irq_remapping.h" @@ -912,7 +911,7 @@ dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg) return 0; } -int __init detect_intel_iommu(void) +void __init detect_intel_iommu(void) { int ret; struct dmar_res_callback validate_drhd_cb = { @@ -945,8 +944,6 @@ int __init detect_intel_iommu(void) dmar_tbl = NULL; } up_write(&dmar_global_lock); - - return ret ? ret : 1; } static void unmap_iommu(struct intel_iommu *iommu) @@ -2164,7 +2161,6 @@ static int __init dmar_free_unused_resources(void) } late_initcall(dmar_free_unused_resources); -IOMMU_INIT_POST(detect_intel_iommu); /* * DMAR Hotplug Support diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 45e903d84733..cbd714a198a0 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -121,7 +121,7 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, struct dmar_dev_scope *devices, int count); /* Intel IOMMU detection */ -extern int detect_intel_iommu(void); +void detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); extern int dmar_device_add(acpi_handle handle); extern int dmar_device_remove(acpi_handle handle); @@ -197,6 +197,10 @@ static inline bool dmar_platform_optin(void) return false; } +static inline void detect_intel_iommu(void) +{ +} + #endif /* CONFIG_DMAR_TABLE */ struct irte { -- cgit From c6af2aa9ffc9763826607bc2664ef3ea4475ed18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Mar 2022 17:27:33 +0200 Subject: swiotlb: make the swiotlb_init interface more useful Pass a boolean flag to indicate if swiotlb needs to be enabled based on the addressing needs, and replace the verbose argument with a set of flags, including one to force enable bounce buffering. Note that this patch removes the possibility to force xen-swiotlb use with the swiotlb=force parameter on the command line on x86 (arm and arm64 never supported that), but this interface will be restored shortly. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- arch/arm/mm/init.c | 6 +----- arch/arm64/mm/init.c | 6 +----- arch/ia64/mm/init.c | 4 +--- arch/mips/cavium-octeon/dma-octeon.c | 2 +- arch/mips/loongson64/dma.c | 2 +- arch/mips/sibyte/common/dma.c | 2 +- arch/powerpc/mm/mem.c | 3 ++- arch/powerpc/platforms/pseries/setup.c | 3 --- arch/riscv/mm/init.c | 8 +------- arch/s390/mm/init.c | 3 +-- arch/x86/kernel/pci-dma.c | 15 +++++++-------- drivers/xen/swiotlb-xen.c | 4 ++-- include/linux/swiotlb.h | 15 +++++++-------- include/trace/events/swiotlb.h | 29 ++++++++++------------------ kernel/dma/swiotlb.c | 35 ++++++++++++++++++---------------- 15 files changed, 55 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index fe249ea91908..ce64bdb55a16 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -271,11 +271,7 @@ static void __init free_highpages(void) void __init mem_init(void) { #ifdef CONFIG_ARM_LPAE - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > arm_dma_pfn_limit) - swiotlb_init(1); - else - swiotlb_force = SWIOTLB_NO_FORCE; + swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); #endif set_max_mapnr(pfn_to_page(max_pfn) - mem_map); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e7b1550e2fc..bd4095b7fb40 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -398,11 +398,7 @@ void __init bootmem_init(void) */ void __init mem_init(void) { - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit)) - swiotlb_init(1); - else if (!xen_swiotlb_detect()) - swiotlb_force = SWIOTLB_NO_FORCE; + swiotlb_init(max_pfn > PFN_DOWN(arm64_dma_phys_limit), SWIOTLB_VERBOSE); /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 5d165607bf35..3c3e15b22608 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -437,9 +437,7 @@ mem_init (void) if (iommu_detected) break; #endif -#ifdef CONFIG_SWIOTLB - swiotlb_init(1); -#endif + swiotlb_init(true, SWIOTLB_VERBOSE); } while (0); #ifdef CONFIG_FLATMEM diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index fb7547e21726..9fbba6a8fa4c 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -235,5 +235,5 @@ void __init plat_swiotlb_setup(void) #endif swiotlb_adjust_size(swiotlbsize); - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_VERBOSE); } diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index 364f2f27c872..8220a1bc0db6 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -24,5 +24,5 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) void __init plat_swiotlb_setup(void) { - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_VERBOSE); } diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c index eb47a94f3583..c5c2c782aff6 100644 --- a/arch/mips/sibyte/common/dma.c +++ b/arch/mips/sibyte/common/dma.c @@ -10,5 +10,5 @@ void __init plat_swiotlb_setup(void) { - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_VERBOSE); } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4d221d033804..74ca516c3e7e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -251,7 +252,7 @@ void __init mem_init(void) if (is_secure_guest()) svm_swiotlb_init(); else - swiotlb_init(0); + swiotlb_init(ppc_swiotlb_enable, 0); #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 955ff8aa1644..0f74b2284773 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -849,9 +849,6 @@ static void __init pSeries_setup_arch(void) } ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; - - if (swiotlb_force == SWIOTLB_FORCE) - ppc_swiotlb_enable = 1; } static void pseries_panic(char *str) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9535bea8688c..181ffd322eaf 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -120,13 +120,7 @@ void __init mem_init(void) BUG_ON(!mem_map); #endif /* CONFIG_FLATMEM */ -#ifdef CONFIG_SWIOTLB - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(dma32_phys_limit)) - swiotlb_init(1); - else - swiotlb_force = SWIOTLB_NO_FORCE; -#endif + swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); memblock_free_all(); print_vm_layout(); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 86ffd0d51fd5..6fb6bf64326f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -185,8 +185,7 @@ static void pv_init(void) return; /* make sure bounce buffers are shared */ - swiotlb_force = SWIOTLB_FORCE; - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE); swiotlb_update_mem_attributes(); } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 04140e20ef1a..a705a199bf8a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -39,6 +39,7 @@ int iommu_detected __read_mostly = 0; #ifdef CONFIG_SWIOTLB bool x86_swiotlb_enable; +static unsigned int x86_swiotlb_flags; static void __init pci_swiotlb_detect(void) { @@ -58,16 +59,16 @@ static void __init pci_swiotlb_detect(void) * bounce buffers as the hypervisor can't access arbitrary VM memory * that is not explicitly shared with it. */ - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; - - if (swiotlb_force == SWIOTLB_FORCE) + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { x86_swiotlb_enable = true; + x86_swiotlb_flags |= SWIOTLB_FORCE; + } } #else static inline void __init pci_swiotlb_detect(void) { } +#define x86_swiotlb_flags 0 #endif /* CONFIG_SWIOTLB */ #ifdef CONFIG_SWIOTLB_XEN @@ -75,8 +76,7 @@ static bool xen_swiotlb; static void __init pci_xen_swiotlb_init(void) { - if (!xen_initial_domain() && !x86_swiotlb_enable && - swiotlb_force != SWIOTLB_FORCE) + if (!xen_initial_domain() && !x86_swiotlb_enable) return; x86_swiotlb_enable = true; xen_swiotlb = true; @@ -120,8 +120,7 @@ void __init pci_iommu_alloc(void) gart_iommu_hole_init(); amd_iommu_detect(); detect_intel_iommu(); - if (x86_swiotlb_enable) - swiotlb_init(0); + swiotlb_init(x86_swiotlb_enable, x86_swiotlb_flags); } /* diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 485cd06ed39e..c2da3eb4826e 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -251,7 +251,7 @@ retry: panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc); } - if (swiotlb_init_with_tbl(start, nslabs, true)) + if (swiotlb_init_with_tbl(start, nslabs, SWIOTLB_VERBOSE)) panic("Cannot allocate SWIOTLB buffer"); } #endif /* CONFIG_X86 */ @@ -376,7 +376,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, /* * Oh well, have to allocate and map a bounce buffer. */ - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); + trace_swiotlb_bounced(dev, dev_addr, size); map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs); if (map == (phys_addr_t)DMA_MAPPING_ERROR) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b48b26bfa0ed..ae0407173e84 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -13,11 +13,8 @@ struct device; struct page; struct scatterlist; -enum swiotlb_force { - SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ - SWIOTLB_FORCE, /* swiotlb=force */ - SWIOTLB_NO_FORCE, /* swiotlb=noforce */ -}; +#define SWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ +#define SWIOTLB_FORCE (1 << 1) /* force bounce buffering */ /* * Maximum allowable number of contiguous slabs to map, @@ -36,8 +33,7 @@ enum swiotlb_force { /* default to 64MB */ #define IO_TLB_DEFAULT_SIZE (64UL<<20) -extern void swiotlb_init(int verbose); -int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); +int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); int swiotlb_init_late(size_t size); @@ -126,13 +122,16 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) return mem && mem->force_bounce; } +void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); #else -#define swiotlb_force SWIOTLB_NO_FORCE +static inline void swiotlb_init(bool addressing_limited, unsigned int flags) +{ +} static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { return false; diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 705be43b71ab..da05c9ebd224 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -8,20 +8,15 @@ #include TRACE_EVENT(swiotlb_bounced, - - TP_PROTO(struct device *dev, - dma_addr_t dev_addr, - size_t size, - enum swiotlb_force swiotlb_force), - - TP_ARGS(dev, dev_addr, size, swiotlb_force), + TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), + TP_ARGS(dev, dev_addr, size), TP_STRUCT__entry( - __string( dev_name, dev_name(dev) ) - __field( u64, dma_mask ) - __field( dma_addr_t, dev_addr ) - __field( size_t, size ) - __field( enum swiotlb_force, swiotlb_force ) + __string(dev_name, dev_name(dev)) + __field(u64, dma_mask) + __field(dma_addr_t, dev_addr) + __field(size_t, size) + __field(bool, force) ), TP_fast_assign( @@ -29,19 +24,15 @@ TRACE_EVENT(swiotlb_bounced, __entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0); __entry->dev_addr = dev_addr; __entry->size = size; - __entry->swiotlb_force = swiotlb_force; + __entry->force = is_swiotlb_force_bounce(dev); ), - TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx " - "size=%zu %s", + TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s", __get_str(dev_name), __entry->dma_mask, (unsigned long long)__entry->dev_addr, __entry->size, - __print_symbolic(__entry->swiotlb_force, - { SWIOTLB_NORMAL, "NORMAL" }, - { SWIOTLB_FORCE, "FORCE" }, - { SWIOTLB_NO_FORCE, "NO_FORCE" })) + __entry->force ? "FORCE" : "NORMAL") ); #endif /* _TRACE_SWIOTLB_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 9a4fe6e48a07..86e877a96b82 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -62,7 +62,8 @@ #define INVALID_PHYS_ADDR (~(phys_addr_t)0) -enum swiotlb_force swiotlb_force; +static bool swiotlb_force_bounce; +static bool swiotlb_force_disable; struct io_tlb_mem io_tlb_default_mem; @@ -81,9 +82,9 @@ setup_io_tlb_npages(char *str) if (*str == ',') ++str; if (!strcmp(str, "force")) - swiotlb_force = SWIOTLB_FORCE; + swiotlb_force_bounce = true; else if (!strcmp(str, "noforce")) - swiotlb_force = SWIOTLB_NO_FORCE; + swiotlb_force_disable = true; return 0; } @@ -202,7 +203,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->index = 0; mem->late_alloc = late_alloc; - if (swiotlb_force == SWIOTLB_FORCE) + if (swiotlb_force_bounce) mem->force_bounce = true; spin_lock_init(&mem->lock); @@ -224,12 +225,13 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, return; } -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, + unsigned int flags) { struct io_tlb_mem *mem = &io_tlb_default_mem; size_t alloc_size; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (swiotlb_force_disable) return 0; /* protect against double initialization */ @@ -243,8 +245,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) __func__, alloc_size, PAGE_SIZE); swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); + mem->force_bounce = flags & SWIOTLB_FORCE; - if (verbose) + if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); return 0; } @@ -253,20 +256,21 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -void __init -swiotlb_init(int verbose) +void __init swiotlb_init(bool addressing_limit, unsigned int flags) { size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT); void *tlb; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (!addressing_limit && !swiotlb_force_bounce) + return; + if (swiotlb_force_disable) return; /* Get IO TLB memory from the low pages */ tlb = memblock_alloc_low(bytes, PAGE_SIZE); if (!tlb) goto fail; - if (swiotlb_init_with_tbl(tlb, default_nslabs, verbose)) + if (swiotlb_init_with_tbl(tlb, default_nslabs, flags)) goto fail_free_mem; return; @@ -289,7 +293,7 @@ int swiotlb_init_late(size_t size) unsigned int order; int rc = 0; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (swiotlb_force_disable) return 0; /* @@ -328,7 +332,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long bytes = nslabs << IO_TLB_SHIFT; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (swiotlb_force_disable) return 0; /* protect against double initialization */ @@ -353,7 +357,7 @@ void __init swiotlb_exit(void) unsigned long tbl_vaddr; size_t tbl_size, slots_size; - if (swiotlb_force == SWIOTLB_FORCE) + if (swiotlb_force_bounce) return; if (!mem->nslabs) @@ -704,8 +708,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, phys_addr_t swiotlb_addr; dma_addr_t dma_addr; - trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, - swiotlb_force); + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, attrs); -- cgit From 8ba2ed1be90fc210126f68186564707478552c95 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Feb 2022 13:36:57 +0200 Subject: swiotlb: add a SWIOTLB_ANY flag to lift the low memory restriction Power SVM wants to allocate a swiotlb buffer that is not restricted to low memory for the trusted hypervisor scheme. Consolidate the support for this into the swiotlb_init interface by adding a new flag. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- arch/powerpc/include/asm/svm.h | 4 ---- arch/powerpc/include/asm/swiotlb.h | 1 + arch/powerpc/kernel/dma-swiotlb.c | 1 + arch/powerpc/mm/mem.c | 5 +---- arch/powerpc/platforms/pseries/svm.c | 26 +------------------------- include/linux/swiotlb.h | 1 + kernel/dma/swiotlb.c | 11 +++++++++-- 7 files changed, 14 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h index 7546402d796a..85580b30aba4 100644 --- a/arch/powerpc/include/asm/svm.h +++ b/arch/powerpc/include/asm/svm.h @@ -15,8 +15,6 @@ static inline bool is_secure_guest(void) return mfmsr() & MSR_S; } -void __init svm_swiotlb_init(void); - void dtl_cache_ctor(void *addr); #define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL) @@ -27,8 +25,6 @@ static inline bool is_secure_guest(void) return false; } -static inline void svm_swiotlb_init(void) {} - #define get_dtl_cache_ctor() NULL #endif /* CONFIG_PPC_SVM */ diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index 3c1a1cd16128..4203b5e0a88e 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -9,6 +9,7 @@ #include extern unsigned int ppc_swiotlb_enable; +extern unsigned int ppc_swiotlb_flags; #ifdef CONFIG_SWIOTLB void swiotlb_detect_4g(void); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index fc7816126a40..ba256c37bcc0 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -10,6 +10,7 @@ #include unsigned int ppc_swiotlb_enable; +unsigned int ppc_swiotlb_flags; void __init swiotlb_detect_4g(void) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 74ca516c3e7e..46fb78e3bb36 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -249,10 +249,7 @@ void __init mem_init(void) * back to to-down. */ memblock_set_bottom_up(true); - if (is_secure_guest()) - svm_swiotlb_init(); - else - swiotlb_init(ppc_swiotlb_enable, 0); + swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags); #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index c5228f4969eb..3b4045d508ec 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -28,7 +28,7 @@ static int __init init_svm(void) * need to use the SWIOTLB buffer for DMA even if dma_capable() says * otherwise. */ - swiotlb_force = SWIOTLB_FORCE; + ppc_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_FORCE; /* Share the SWIOTLB buffer with the host. */ swiotlb_update_mem_attributes(); @@ -37,30 +37,6 @@ static int __init init_svm(void) } machine_early_initcall(pseries, init_svm); -/* - * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it - * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have - * any addressing limitation, we don't need to allocate it in low addresses. - */ -void __init svm_swiotlb_init(void) -{ - unsigned char *vstart; - unsigned long bytes, io_tlb_nslabs; - - io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false)) - return; - - - memblock_free(vstart, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - panic("SVM: Cannot allocate SWIOTLB buffer"); -} - int set_memory_encrypted(unsigned long addr, int numpages) { if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ae0407173e84..eabdd8998702 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -15,6 +15,7 @@ struct scatterlist; #define SWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ #define SWIOTLB_FORCE (1 << 1) /* force bounce buffering */ +#define SWIOTLB_ANY (1 << 2) /* allow any memory for the buffer */ /* * Maximum allowable number of contiguous slabs to map, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 86e877a96b82..f6e091424af3 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -266,8 +266,15 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags) if (swiotlb_force_disable) return; - /* Get IO TLB memory from the low pages */ - tlb = memblock_alloc_low(bytes, PAGE_SIZE); + /* + * By default allocate the bounce buffer memory from low memory, but + * allow to pick a location everywhere for hypervisors with guest + * memory encryption. + */ + if (flags & SWIOTLB_ANY) + tlb = memblock_alloc(bytes, PAGE_SIZE); + else + tlb = memblock_alloc_low(bytes, PAGE_SIZE); if (!tlb) goto fail; if (swiotlb_init_with_tbl(tlb, default_nslabs, flags)) -- cgit From 742519538e6b07250c8085bbff4bd358bc03bf16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Feb 2022 11:12:59 +0100 Subject: swiotlb: pass a gfp_mask argument to swiotlb_init_late Let the caller chose a zone to allocate from. This will be used later on by the xen-swiotlb initialization on arm. Signed-off-by: Christoph Hellwig Reviewed-by: Anshuman Khandual Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- arch/x86/pci/sta2x11-fixup.c | 2 +- include/linux/swiotlb.h | 2 +- kernel/dma/swiotlb.c | 7 ++----- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index e0c039a75b2d..c7e6faf59a86 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev) int size = STA2X11_SWIOTLB_SIZE; /* First instance: register your own swiotlb area */ dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size); - if (swiotlb_init_late(size)) + if (swiotlb_init_late(size, GFP_DMA)) dev_emerg(&pdev->dev, "init swiotlb failed\n"); } list_add(&instance->list, &sta2x11_instance_list); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index eabdd8998702..ee655f2e4d28 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -37,7 +37,7 @@ struct scatterlist; int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); -int swiotlb_init_late(size_t size); +int swiotlb_init_late(size_t size, gfp_t gfp_mask); extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f6e091424af3..119187afc65e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -292,7 +292,7 @@ fail: * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ -int swiotlb_init_late(size_t size) +int swiotlb_init_late(size_t size, gfp_t gfp_mask) { unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); unsigned long bytes; @@ -303,15 +303,12 @@ int swiotlb_init_late(size_t size) if (swiotlb_force_disable) return 0; - /* - * Get IO TLB memory from the low pages - */ order = get_order(nslabs << IO_TLB_SHIFT); nslabs = SLABS_PER_PAGE << order; bytes = nslabs << IO_TLB_SHIFT; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, order); if (vstart) break; -- cgit From 7374153d294eb51de5a81ac38ff1c4fef8927bec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Mar 2022 08:02:57 +0100 Subject: swiotlb: provide swiotlb_init variants that remap the buffer To shared more code between swiotlb and xen-swiotlb, offer a swiotlb_init_remap interface and add a remap callback to swiotlb_init_late that will allow Xen to remap the buffer without duplicating much of the logic. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- arch/x86/pci/sta2x11-fixup.c | 2 +- include/linux/swiotlb.h | 5 ++++- kernel/dma/swiotlb.c | 36 +++++++++++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index c7e6faf59a86..7368afc03998 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev) int size = STA2X11_SWIOTLB_SIZE; /* First instance: register your own swiotlb area */ dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size); - if (swiotlb_init_late(size, GFP_DMA)) + if (swiotlb_init_late(size, GFP_DMA, NULL)) dev_emerg(&pdev->dev, "init swiotlb failed\n"); } list_add(&instance->list, &sta2x11_instance_list); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ee655f2e4d28..7b50c82f84ce 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -36,8 +36,11 @@ struct scatterlist; int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags); unsigned long swiotlb_size_or_default(void); +void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)); +int swiotlb_init_late(size_t size, gfp_t gfp_mask, + int (*remap)(void *tlb, unsigned long nslabs)); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); -int swiotlb_init_late(size_t size, gfp_t gfp_mask); extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 119187afc65e..f6acfc7a41bf 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -256,9 +256,11 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -void __init swiotlb_init(bool addressing_limit, unsigned int flags) +void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)) { - size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT); + unsigned long nslabs = default_nslabs; + size_t bytes; void *tlb; if (!addressing_limit && !swiotlb_force_bounce) @@ -271,12 +273,23 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags) * allow to pick a location everywhere for hypervisors with guest * memory encryption. */ +retry: + bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT); if (flags & SWIOTLB_ANY) tlb = memblock_alloc(bytes, PAGE_SIZE); else tlb = memblock_alloc_low(bytes, PAGE_SIZE); if (!tlb) goto fail; + if (remap && remap(tlb, nslabs) < 0) { + memblock_free(tlb, PAGE_ALIGN(bytes)); + + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + if (nslabs < IO_TLB_MIN_SLABS) + panic("%s: Failed to remap %zu bytes\n", + __func__, bytes); + goto retry; + } if (swiotlb_init_with_tbl(tlb, default_nslabs, flags)) goto fail_free_mem; return; @@ -287,12 +300,18 @@ fail: pr_warn("Cannot allocate buffer"); } +void __init swiotlb_init(bool addressing_limit, unsigned int flags) +{ + return swiotlb_init_remap(addressing_limit, flags, NULL); +} + /* * Systems with larger DMA zones (those that don't support ISA) can * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ -int swiotlb_init_late(size_t size, gfp_t gfp_mask) +int swiotlb_init_late(size_t size, gfp_t gfp_mask, + int (*remap)(void *tlb, unsigned long nslabs)) { unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); unsigned long bytes; @@ -303,6 +322,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask) if (swiotlb_force_disable) return 0; +retry: order = get_order(nslabs << IO_TLB_SHIFT); nslabs = SLABS_PER_PAGE << order; bytes = nslabs << IO_TLB_SHIFT; @@ -323,6 +343,16 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask) (PAGE_SIZE << order) >> 20); nslabs = SLABS_PER_PAGE << order; } + if (remap) + rc = remap(vstart, nslabs); + if (rc) { + free_pages((unsigned long)vstart, order); + + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + if (nslabs < IO_TLB_MIN_SLABS) + return rc; + goto retry; + } rc = swiotlb_late_init_with_tbl(vstart, nslabs); if (rc) free_pages((unsigned long)vstart, order); -- cgit From 6424e31b1c050a25aea033206d5f626f3523448c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Mar 2022 07:41:04 +0100 Subject: swiotlb: remove swiotlb_init_with_tbl and swiotlb_init_late_with_tbl No users left. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Boris Ostrovsky --- include/linux/swiotlb.h | 2 -- kernel/dma/swiotlb.c | 77 +++++++++++++------------------------------------ 2 files changed, 20 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 7b50c82f84ce..7ed35dd3de6e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -34,13 +34,11 @@ struct scatterlist; /* default to 64MB */ #define IO_TLB_DEFAULT_SIZE (64UL<<20) -int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags); unsigned long swiotlb_size_or_default(void); void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)); int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)); -extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f6acfc7a41bf..e2ef0864eb1e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -225,33 +225,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, return; } -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, - unsigned int flags) -{ - struct io_tlb_mem *mem = &io_tlb_default_mem; - size_t alloc_size; - - if (swiotlb_force_disable) - return 0; - - /* protect against double initialization */ - if (WARN_ON_ONCE(mem->nslabs)) - return -ENOMEM; - - alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); - mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); - if (!mem->slots) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); - - swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); - mem->force_bounce = flags & SWIOTLB_FORCE; - - if (flags & SWIOTLB_VERBOSE) - swiotlb_print_info(); - return 0; -} - /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -259,7 +232,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) { + struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long nslabs = default_nslabs; + size_t alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); size_t bytes; void *tlb; @@ -280,7 +255,8 @@ retry: else tlb = memblock_alloc_low(bytes, PAGE_SIZE); if (!tlb) - goto fail; + panic("%s: failed to allocate tlb structure\n", __func__); + if (remap && remap(tlb, nslabs) < 0) { memblock_free(tlb, PAGE_ALIGN(bytes)); @@ -290,14 +266,17 @@ retry: __func__, bytes); goto retry; } - if (swiotlb_init_with_tbl(tlb, default_nslabs, flags)) - goto fail_free_mem; - return; -fail_free_mem: - memblock_free(tlb, bytes); -fail: - pr_warn("Cannot allocate buffer"); + mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); + if (!mem->slots) + panic("%s: Failed to allocate %zu bytes align=0x%lx\n", + __func__, alloc_size, PAGE_SIZE); + + swiotlb_init_io_tlb_mem(mem, __pa(tlb), default_nslabs, false); + mem->force_bounce = flags & SWIOTLB_FORCE; + + if (flags & SWIOTLB_VERBOSE) + swiotlb_print_info(); } void __init swiotlb_init(bool addressing_limit, unsigned int flags) @@ -313,6 +292,7 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags) int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)) { + struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); unsigned long bytes; unsigned char *vstart = NULL; @@ -353,33 +333,16 @@ retry: return rc; goto retry; } - rc = swiotlb_late_init_with_tbl(vstart, nslabs); - if (rc) - free_pages((unsigned long)vstart, order); - - return rc; -} - -int -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) -{ - struct io_tlb_mem *mem = &io_tlb_default_mem; - unsigned long bytes = nslabs << IO_TLB_SHIFT; - - if (swiotlb_force_disable) - return 0; - - /* protect against double initialization */ - if (WARN_ON_ONCE(mem->nslabs)) - return -ENOMEM; mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(array_size(sizeof(*mem->slots), nslabs))); - if (!mem->slots) + if (!mem->slots) { + free_pages((unsigned long)vstart, order); return -ENOMEM; + } - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true); + set_memory_decrypted((unsigned long)vstart, bytes >> PAGE_SHIFT); + swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true); swiotlb_print_info(); return 0; -- cgit From f47a6113f4e87db7ca066635822e1b3ca3ed9514 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 16 Feb 2022 16:03:03 +0800 Subject: platform/chrome: cros_ec: remove unused variable `was_wake_device` Reviewed-by: Prashant Malani Signed-off-by: Tzung-Bi Shih --- drivers/platform/chrome/cros_ec.c | 1 - include/linux/platform_data/cros_ec_proto.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c index a5cc8f24299e..836794871443 100644 --- a/drivers/platform/chrome/cros_ec.c +++ b/drivers/platform/chrome/cros_ec.c @@ -342,7 +342,6 @@ int cros_ec_suspend(struct cros_ec_device *ec_dev) ec_dev->wake_enabled = !enable_irq_wake(ec_dev->irq); disable_irq(ec_dev->irq); - ec_dev->was_wake_device = ec_dev->wake_enabled; ec_dev->suspended = true; return 0; diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index df3c78c92ca2..c65971ec90ea 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -76,8 +76,6 @@ struct cros_ec_command { * struct cros_ec_device - Information about a ChromeOS EC device. * @phys_name: Name of physical comms layer (e.g. 'i2c-4'). * @dev: Device pointer for physical comms device - * @was_wake_device: True if this device was set to wake the system from - * sleep at the last suspend. * @cros_class: The class structure for this device. * @cmd_readmem: Direct read of the EC memory-mapped region, if supported. * @offset: Is within EC_LPC_ADDR_MEMMAP region. @@ -137,7 +135,6 @@ struct cros_ec_device { /* These are used by other drivers that want to talk to the EC */ const char *phys_name; struct device *dev; - bool was_wake_device; struct class *cros_class; int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset, unsigned int bytes, void *dest); -- cgit From 57b888ca2541785de2fcb90575b378921919b6c0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 18 Mar 2022 09:54:22 -0700 Subject: platform/chrome: Re-introduce cros_ec_cmd_xfer and use it for ioctls Commit 413dda8f2c6f ("platform/chrome: cros_ec_chardev: Use cros_ec_cmd_xfer_status helper") inadvertendly changed the userspace ABI. Previously, cros_ec ioctls would only report errors if the EC communication failed, and otherwise return success and the result of the EC communication. An EC command execution failure was reported in the EC response field. The above mentioned commit changed this behavior, and the ioctl itself would fail. This breaks userspace commands trying to analyze the EC command execution error since the actual EC command response is no longer reported to userspace. Fix the problem by re-introducing the cros_ec_cmd_xfer() helper, and use it to handle ioctl messages. Fixes: 413dda8f2c6f ("platform/chrome: cros_ec_chardev: Use cros_ec_cmd_xfer_status helper") Cc: Daisuke Nojiri Cc: Rob Barnes Cc: Rajat Jain Cc: Brian Norris Cc: Parth Malkan Reviewed-by: Daisuke Nojiri Reviewed-by: Brian Norris Signed-off-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih --- drivers/platform/chrome/cros_ec_chardev.c | 2 +- drivers/platform/chrome/cros_ec_proto.c | 50 +++++++++++++++++++++++------ include/linux/platform_data/cros_ec_proto.h | 3 ++ 3 files changed, 45 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c index e0bce869c49a..fd33de546aee 100644 --- a/drivers/platform/chrome/cros_ec_chardev.c +++ b/drivers/platform/chrome/cros_ec_chardev.c @@ -301,7 +301,7 @@ static long cros_ec_chardev_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) } s_cmd->command += ec->cmd_offset; - ret = cros_ec_cmd_xfer_status(ec->ec_dev, s_cmd); + ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd); /* Only copy data to userland if data was received. */ if (ret < 0) goto exit; diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index c4caf2e2de82..ac1419881ff3 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -560,22 +560,28 @@ exit: EXPORT_SYMBOL(cros_ec_query_all); /** - * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC. + * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC. * @ec_dev: EC device. * @msg: Message to write. * - * Call this to send a command to the ChromeOS EC. This should be used instead of calling the EC's - * cmd_xfer() callback directly. It returns success status only if both the command was transmitted - * successfully and the EC replied with success status. + * Call this to send a command to the ChromeOS EC. This should be used instead + * of calling the EC's cmd_xfer() callback directly. This function does not + * convert EC command execution error codes to Linux error codes. Most + * in-kernel users will want to use cros_ec_cmd_xfer_status() instead since + * that function implements the conversion. * * Return: - * >=0 - The number of bytes transferred - * <0 - Linux error code + * >0 - EC command was executed successfully. The return value is the number + * of bytes returned by the EC (excluding the header). + * =0 - EC communication was successful. EC command execution results are + * reported in msg->result. The result will be EC_RES_SUCCESS if the + * command was executed successfully or report an EC command execution + * error. + * <0 - EC communication error. Return value is the Linux error code. */ -int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, - struct cros_ec_command *msg) +int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, struct cros_ec_command *msg) { - int ret, mapped; + int ret; mutex_lock(&ec_dev->lock); if (ec_dev->proto_version == EC_PROTO_VERSION_UNKNOWN) { @@ -616,6 +622,32 @@ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, ret = send_command(ec_dev, msg); mutex_unlock(&ec_dev->lock); + return ret; +} +EXPORT_SYMBOL(cros_ec_cmd_xfer); + +/** + * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC. + * @ec_dev: EC device. + * @msg: Message to write. + * + * Call this to send a command to the ChromeOS EC. This should be used instead of calling the EC's + * cmd_xfer() callback directly. It returns success status only if both the command was transmitted + * successfully and the EC replied with success status. + * + * Return: + * >=0 - The number of bytes transferred. + * <0 - Linux error code + */ +int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg) +{ + int ret, mapped; + + ret = cros_ec_cmd_xfer(ec_dev, msg); + if (ret < 0) + return ret; + mapped = cros_ec_map_error(msg->result); if (mapped) { dev_dbg(ec_dev->dev, "Command result (err: %d [%d])\n", diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index c65971ec90ea..138fd912c808 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -213,6 +213,9 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, int cros_ec_check_result(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); +int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg); + int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); -- cgit From 2f1e85b1aee459b7d0fd981839042c6a38ffaf0c Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Sat, 16 Apr 2022 00:40:45 +0800 Subject: net: sched: use queue_mapping to pick tx queue This patch fixes issue: * If we install tc filters with act_skbedit in clsact hook. It doesn't work, because netdev_core_pick_tx() overwrites queue_mapping. $ tc filter ... action skbedit queue_mapping 1 And this patch is useful: * We can use FQ + EDT to implement efficient policies. Tx queues are picked by xps, ndo_select_queue of netdev driver, or skb hash in netdev_core_pick_tx(). In fact, the netdev driver, and skb hash are _not_ under control. xps uses the CPUs map to select Tx queues, but we can't figure out which task_struct of pod/containter running on this cpu in most case. We can use clsact filters to classify one pod/container traffic to one Tx queue. Why ? In containter networking environment, there are two kinds of pod/ containter/net-namespace. One kind (e.g. P1, P2), the high throughput is key in these applications. But avoid running out of network resource, the outbound traffic of these pods is limited, using or sharing one dedicated Tx queues assigned HTB/TBF/FQ Qdisc. Other kind of pods (e.g. Pn), the low latency of data access is key. And the traffic is not limited. Pods use or share other dedicated Tx queues assigned FIFO Qdisc. This choice provides two benefits. First, contention on the HTB/FQ Qdisc lock is significantly reduced since fewer CPUs contend for the same queue. More importantly, Qdisc contention can be eliminated completely if each CPU has its own FIFO Qdisc for the second kind of pods. There must be a mechanism in place to support classifying traffic based on pods/container to different Tx queues. Note that clsact is outside of Qdisc while Qdisc can run a classifier to select a sub-queue under the lock. In general recording the decision in the skb seems a little heavy handed. This patch introduces a per-CPU variable, suggested by Eric. The xmit.skip_txqueue flag is firstly cleared in __dev_queue_xmit(). - Tx Qdisc may install that skbedit actions, then xmit.skip_txqueue flag is set in qdisc->enqueue() though tx queue has been selected in netdev_tx_queue_mapping() or netdev_core_pick_tx(). That flag is cleared firstly in __dev_queue_xmit(), is useful: - Avoid picking Tx queue with netdev_tx_queue_mapping() in next netdev in such case: eth0 macvlan - eth0.3 vlan - eth0 ixgbe-phy: For example, eth0, macvlan in pod, which root Qdisc install skbedit queue_mapping, send packets to eth0.3, vlan in host. In __dev_queue_xmit() of eth0.3, clear the flag, does not select tx queue according to skb->queue_mapping because there is no filters in clsact or tx Qdisc of this netdev. Same action taked in eth0, ixgbe in Host. - Avoid picking Tx queue for next packet. If we set xmit.skip_txqueue in tx Qdisc (qdisc->enqueue()), the proper way to clear it is clearing it in __dev_queue_xmit when processing next packets. For performance reasons, use the static key. If user does not config the NET_EGRESS, the patch will not be compiled. +----+ +----+ +----+ | P1 | | P2 | | Pn | +----+ +----+ +----+ | | | +-----------+-----------+ | | clsact/skbedit | MQ v +-----------+-----------+ | q0 | q1 | qn v v v HTB/FQ HTB/FQ ... FIFO Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Jonathan Lemon Cc: Eric Dumazet Cc: Alexander Lobakin Cc: Paolo Abeni Cc: Talal Ahmad Cc: Kevin Hao Cc: Ilias Apalodimas Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Antoine Tenart Cc: Wei Wang Cc: Arnd Bergmann Suggested-by: Eric Dumazet Signed-off-by: Tonghao Zhang Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 3 +++ include/linux/rtnetlink.h | 1 + net/core/dev.c | 31 +++++++++++++++++++++++++++++-- net/sched/act_skbedit.c | 6 +++++- 4 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a602f29365b0..7dccbfd1bf56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3061,6 +3061,9 @@ struct softnet_data { struct { u16 recursion; u8 more; +#ifdef CONFIG_NET_EGRESS + u8 skip_txqueue; +#endif } xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7f970b16da3a..ae2c6a3cec5d 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -100,6 +100,7 @@ void net_dec_ingress_queue(void); #ifdef CONFIG_NET_EGRESS void net_inc_egress_queue(void); void net_dec_egress_queue(void); +void netdev_xmit_skip_txqueue(bool skip); #endif void rtnetlink_init(void); diff --git a/net/core/dev.c b/net/core/dev.c index ba853e878007..4a77ebda4fb1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3925,6 +3925,25 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return skb; } + +static struct netdev_queue * +netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb) +{ + int qm = skb_get_queue_mapping(skb); + + return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm)); +} + +static bool netdev_xmit_txqueue_skipped(void) +{ + return __this_cpu_read(softnet_data.xmit.skip_txqueue); +} + +void netdev_xmit_skip_txqueue(bool skip) +{ + __this_cpu_write(softnet_data.xmit.skip_txqueue, skip); +} +EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_XPS @@ -4095,7 +4114,7 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) { struct net_device *dev = skb->dev; - struct netdev_queue *txq; + struct netdev_queue *txq = NULL; struct Qdisc *q; int rc = -ENOMEM; bool again = false; @@ -4123,11 +4142,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (!skb) goto out; } + + netdev_xmit_skip_txqueue(false); + nf_skip_egress(skb, true); skb = sch_handle_egress(skb, &rc, dev); if (!skb) goto out; nf_skip_egress(skb, false); + + if (netdev_xmit_txqueue_skipped()) + txq = netdev_tx_queue_mapping(dev, skb); } #endif /* If device/qdisc don't need skb->dst, release it right now while @@ -4138,7 +4163,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) else skb_dst_force(skb); - txq = netdev_core_pick_tx(dev, skb, sb_dev); + if (!txq) + txq = netdev_core_pick_tx(dev, skb, sb_dev); + q = rcu_dereference_bh(txq->qdisc); trace_net_dev_queue(skb); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 92d0dc754207..1c5fdb6e7c2f 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -58,8 +58,12 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, } } if (params->flags & SKBEDIT_F_QUEUE_MAPPING && - skb->dev->real_num_tx_queues > params->queue_mapping) + skb->dev->real_num_tx_queues > params->queue_mapping) { +#ifdef CONFIG_NET_EGRESS + netdev_xmit_skip_txqueue(true); +#endif skb_set_queue_mapping(skb, params->queue_mapping); + } if (params->flags & SKBEDIT_F_MARK) { skb->mark &= ~params->mask; skb->mark |= params->mark & params->mask; -- cgit From c6547c2ed0e1487c91983faccad841611ab6a783 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 14 Apr 2022 18:22:37 +0200 Subject: dmaengine: imx: Move header to include/dma/ The i.MX DMA drivers are device tree only, nothing in include/linux/platform_data/dma-imx.h has platform_data in it, so move the file to include/linux/dma/imx-dma.h. Signed-off-by: Sascha Hauer Acked-By: Vinod Koul Link: https://lore.kernel.org/r/20220414162249.3934543-10-s.hauer@pengutronix.de Signed-off-by: Mark Brown --- drivers/dma/imx-dma.c | 2 +- drivers/dma/imx-sdma.c | 2 +- drivers/mmc/host/mxcmmc.c | 2 +- drivers/spi/spi-fsl-lpspi.c | 2 +- drivers/spi/spi-imx.c | 2 +- drivers/tty/serial/imx.c | 2 +- drivers/video/fbdev/mx3fb.c | 2 +- include/linux/dma/imx-dma.h | 68 +++++++++++++++++++++++++++++++++++ include/linux/platform_data/dma-imx.h | 68 ----------------------------------- sound/soc/fsl/fsl_asrc.c | 2 +- sound/soc/fsl/fsl_asrc_dma.c | 2 +- sound/soc/fsl/fsl_easrc.h | 2 +- sound/soc/fsl/imx-pcm.h | 2 +- sound/soc/fsl/imx-ssi.h | 2 +- 14 files changed, 80 insertions(+), 80 deletions(-) create mode 100644 include/linux/dma/imx-dma.h delete mode 100644 include/linux/platform_data/dma-imx.h (limited to 'include/linux') diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 2ddc31e64db0..3bffe3ecbd1b 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include "dmaengine.h" #define IMXDMA_MAX_CHAN_DESCRIPTORS 16 diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 70c0aa931ddf..80261a905eb5 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index 40b6878bea6c..de04b5afef2e 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -39,7 +39,7 @@ #include #include -#include +#include #define DRIVER_NAME "mxc-mmc" #define MXCMCI_TIMEOUT_MS 10000 diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 4c601294f8fa..19b1f3d881b0 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index b2dd0a4d2446..a944c787f53f 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -24,7 +24,7 @@ #include #include -#include +#include #define DRIVER_NAME "spi_imx" diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index fd38e6ed4fda..f8b5400e6267 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -30,7 +30,7 @@ #include #include -#include +#include #include "serial_mctrl_gpio.h" diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c index fabb271337ed..b945b68984b9 100644 --- a/drivers/video/fbdev/mx3fb.c +++ b/drivers/video/fbdev/mx3fb.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/include/linux/dma/imx-dma.h b/include/linux/dma/imx-dma.h new file mode 100644 index 000000000000..b06cba85a6d4 --- /dev/null +++ b/include/linux/dma/imx-dma.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved. + */ + +#ifndef __LINUX_DMA_IMX_H +#define __LINUX_DMA_IMX_H + +#include +#include +#include + +/* + * This enumerates peripheral types. Used for SDMA. + */ +enum sdma_peripheral_type { + IMX_DMATYPE_SSI, /* MCU domain SSI */ + IMX_DMATYPE_SSI_SP, /* Shared SSI */ + IMX_DMATYPE_MMC, /* MMC */ + IMX_DMATYPE_SDHC, /* SDHC */ + IMX_DMATYPE_UART, /* MCU domain UART */ + IMX_DMATYPE_UART_SP, /* Shared UART */ + IMX_DMATYPE_FIRI, /* FIRI */ + IMX_DMATYPE_CSPI, /* MCU domain CSPI */ + IMX_DMATYPE_CSPI_SP, /* Shared CSPI */ + IMX_DMATYPE_SIM, /* SIM */ + IMX_DMATYPE_ATA, /* ATA */ + IMX_DMATYPE_CCM, /* CCM */ + IMX_DMATYPE_EXT, /* External peripheral */ + IMX_DMATYPE_MSHC, /* Memory Stick Host Controller */ + IMX_DMATYPE_MSHC_SP, /* Shared Memory Stick Host Controller */ + IMX_DMATYPE_DSP, /* DSP */ + IMX_DMATYPE_MEMORY, /* Memory */ + IMX_DMATYPE_FIFO_MEMORY,/* FIFO type Memory */ + IMX_DMATYPE_SPDIF, /* SPDIF */ + IMX_DMATYPE_IPU_MEMORY, /* IPU Memory */ + IMX_DMATYPE_ASRC, /* ASRC */ + IMX_DMATYPE_ESAI, /* ESAI */ + IMX_DMATYPE_SSI_DUAL, /* SSI Dual FIFO */ + IMX_DMATYPE_ASRC_SP, /* Shared ASRC */ + IMX_DMATYPE_SAI, /* SAI */ +}; + +enum imx_dma_prio { + DMA_PRIO_HIGH = 0, + DMA_PRIO_MEDIUM = 1, + DMA_PRIO_LOW = 2 +}; + +struct imx_dma_data { + int dma_request; /* DMA request line */ + int dma_request2; /* secondary DMA request line */ + enum sdma_peripheral_type peripheral_type; + int priority; +}; + +static inline int imx_dma_is_ipu(struct dma_chan *chan) +{ + return !strcmp(dev_name(chan->device->dev), "ipu-core"); +} + +static inline int imx_dma_is_general_purpose(struct dma_chan *chan) +{ + return !strcmp(chan->device->dev->driver->name, "imx-sdma") || + !strcmp(chan->device->dev->driver->name, "imx-dma"); +} + +#endif /* __LINUX_DMA_IMX_H */ diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h deleted file mode 100644 index 281adbb26e6b..000000000000 --- a/include/linux/platform_data/dma-imx.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved. - */ - -#ifndef __ASM_ARCH_MXC_DMA_H__ -#define __ASM_ARCH_MXC_DMA_H__ - -#include -#include -#include - -/* - * This enumerates peripheral types. Used for SDMA. - */ -enum sdma_peripheral_type { - IMX_DMATYPE_SSI, /* MCU domain SSI */ - IMX_DMATYPE_SSI_SP, /* Shared SSI */ - IMX_DMATYPE_MMC, /* MMC */ - IMX_DMATYPE_SDHC, /* SDHC */ - IMX_DMATYPE_UART, /* MCU domain UART */ - IMX_DMATYPE_UART_SP, /* Shared UART */ - IMX_DMATYPE_FIRI, /* FIRI */ - IMX_DMATYPE_CSPI, /* MCU domain CSPI */ - IMX_DMATYPE_CSPI_SP, /* Shared CSPI */ - IMX_DMATYPE_SIM, /* SIM */ - IMX_DMATYPE_ATA, /* ATA */ - IMX_DMATYPE_CCM, /* CCM */ - IMX_DMATYPE_EXT, /* External peripheral */ - IMX_DMATYPE_MSHC, /* Memory Stick Host Controller */ - IMX_DMATYPE_MSHC_SP, /* Shared Memory Stick Host Controller */ - IMX_DMATYPE_DSP, /* DSP */ - IMX_DMATYPE_MEMORY, /* Memory */ - IMX_DMATYPE_FIFO_MEMORY,/* FIFO type Memory */ - IMX_DMATYPE_SPDIF, /* SPDIF */ - IMX_DMATYPE_IPU_MEMORY, /* IPU Memory */ - IMX_DMATYPE_ASRC, /* ASRC */ - IMX_DMATYPE_ESAI, /* ESAI */ - IMX_DMATYPE_SSI_DUAL, /* SSI Dual FIFO */ - IMX_DMATYPE_ASRC_SP, /* Shared ASRC */ - IMX_DMATYPE_SAI, /* SAI */ -}; - -enum imx_dma_prio { - DMA_PRIO_HIGH = 0, - DMA_PRIO_MEDIUM = 1, - DMA_PRIO_LOW = 2 -}; - -struct imx_dma_data { - int dma_request; /* DMA request line */ - int dma_request2; /* secondary DMA request line */ - enum sdma_peripheral_type peripheral_type; - int priority; -}; - -static inline int imx_dma_is_ipu(struct dma_chan *chan) -{ - return !strcmp(dev_name(chan->device->dev), "ipu-core"); -} - -static inline int imx_dma_is_general_purpose(struct dma_chan *chan) -{ - return !strcmp(chan->device->dev->driver->name, "imx-sdma") || - !strcmp(chan->device->dev->driver->name, "imx-dma"); -} - -#endif diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index d7d1536a4f37..ad4e6747b839 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c index cd9b36ec0ecb..5038faf035cb 100644 --- a/sound/soc/fsl/fsl_asrc_dma.c +++ b/sound/soc/fsl/fsl_asrc_dma.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/sound/soc/fsl/fsl_easrc.h b/sound/soc/fsl/fsl_easrc.h index 30620d56252c..86d5c360d4f5 100644 --- a/sound/soc/fsl/fsl_easrc.h +++ b/sound/soc/fsl/fsl_easrc.h @@ -7,7 +7,7 @@ #define _FSL_EASRC_H #include -#include +#include #include "fsl_asrc_common.h" diff --git a/sound/soc/fsl/imx-pcm.h b/sound/soc/fsl/imx-pcm.h index 5c6cf1ca8c8a..06b25f4b26b6 100644 --- a/sound/soc/fsl/imx-pcm.h +++ b/sound/soc/fsl/imx-pcm.h @@ -9,7 +9,7 @@ #ifndef _IMX_PCM_H #define _IMX_PCM_H -#include +#include /* * Do not change this as the FIQ handler depends on this size diff --git a/sound/soc/fsl/imx-ssi.h b/sound/soc/fsl/imx-ssi.h index 19cd0937e740..2d30d822451a 100644 --- a/sound/soc/fsl/imx-ssi.h +++ b/sound/soc/fsl/imx-ssi.h @@ -182,7 +182,7 @@ #define DRV_NAME "imx-ssi" #include -#include +#include #include #include "imx-pcm.h" -- cgit From 824a0a02cd74776461aaa30d792b1ed9111c9aa5 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 14 Apr 2022 18:22:39 +0200 Subject: dmaengine: imx-sdma: Add multi fifo support The i.MX SDMA engine can read from / write to multiple successive hardware FIFO registers, referred to as "Multi FIFO support". This is needed for the micfil driver and certain configurations of the SAI driver. This patch adds support for this feature. The number of FIFOs to read from / write to must be communicated from the client driver to the SDMA engine. For this the struct dma_slave_config::peripheral_config field is used. Signed-off-by: Sascha Hauer Acked-By: Vinod Koul Link: https://lore.kernel.org/r/20220414162249.3934543-12-s.hauer@pengutronix.de Signed-off-by: Mark Brown --- drivers/dma/imx-sdma.c | 57 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/dma/imx-dma.h | 20 ++++++++++++++++ 2 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 0e70843567ce..95367a8a81a5 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ #define SDMA_CHNENBL0_IMX35 0x200 #define SDMA_CHNENBL0_IMX31 0x080 #define SDMA_CHNPRI_0 0x100 +#define SDMA_DONE0_CONFIG 0x1000 /* * Buffer descriptor status values. @@ -180,6 +182,12 @@ BIT(DMA_MEM_TO_DEV) | \ BIT(DMA_DEV_TO_DEV)) +#define SDMA_WATERMARK_LEVEL_N_FIFOS GENMASK(15, 12) +#define SDMA_WATERMARK_LEVEL_SW_DONE BIT(23) + +#define SDMA_DONE0_CONFIG_DONE_SEL BIT(7) +#define SDMA_DONE0_CONFIG_DONE_DIS BIT(6) + /** * struct sdma_script_start_addrs - SDMA script start pointers * @@ -441,6 +449,9 @@ struct sdma_channel { struct work_struct terminate_worker; struct list_head terminated; bool is_ram_script; + unsigned int n_fifos_src; + unsigned int n_fifos_dst; + bool sw_done; }; #define IMX_DMA_SG_LOOP BIT(0) @@ -778,6 +789,14 @@ static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event) val = readl_relaxed(sdma->regs + chnenbl); __set_bit(channel, &val); writel_relaxed(val, sdma->regs + chnenbl); + + /* Set SDMA_DONEx_CONFIG is sw_done enabled */ + if (sdmac->sw_done) { + val = readl_relaxed(sdma->regs + SDMA_DONE0_CONFIG); + val |= SDMA_DONE0_CONFIG_DONE_SEL; + val &= ~SDMA_DONE0_CONFIG_DONE_DIS; + writel_relaxed(val, sdma->regs + SDMA_DONE0_CONFIG); + } } static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event) @@ -1038,6 +1057,10 @@ static int sdma_get_pc(struct sdma_channel *sdmac, case IMX_DMATYPE_IPU_MEMORY: emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr; break; + case IMX_DMATYPE_MULTI_SAI: + per_2_emi = sdma->script_addrs->sai_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_sai_addr; + break; default: dev_err(sdma->dev, "Unsupported transfer type %d\n", peripheral_type); @@ -1214,6 +1237,22 @@ static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac) sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT; } +static void sdma_set_watermarklevel_for_sais(struct sdma_channel *sdmac) +{ + unsigned int n_fifos; + + if (sdmac->sw_done) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SW_DONE; + + if (sdmac->direction == DMA_DEV_TO_MEM) + n_fifos = sdmac->n_fifos_src; + else + n_fifos = sdmac->n_fifos_dst; + + sdmac->watermark_level |= + FIELD_PREP(SDMA_WATERMARK_LEVEL_N_FIFOS, n_fifos); +} + static int sdma_config_channel(struct dma_chan *chan) { struct sdma_channel *sdmac = to_sdma_chan(chan); @@ -1250,6 +1289,10 @@ static int sdma_config_channel(struct dma_chan *chan) sdmac->peripheral_type == IMX_DMATYPE_ASRC) sdma_set_watermarklevel_for_p2p(sdmac); } else { + if (sdmac->peripheral_type == + IMX_DMATYPE_MULTI_SAI) + sdma_set_watermarklevel_for_sais(sdmac); + __set_bit(sdmac->event_id0, sdmac->event_mask); } @@ -1707,9 +1750,23 @@ static int sdma_config(struct dma_chan *chan, struct dma_slave_config *dmaengine_cfg) { struct sdma_channel *sdmac = to_sdma_chan(chan); + struct sdma_engine *sdma = sdmac->sdma; memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg)); + if (dmaengine_cfg->peripheral_config) { + struct sdma_peripheral_config *sdmacfg = dmaengine_cfg->peripheral_config; + if (dmaengine_cfg->peripheral_size != sizeof(struct sdma_peripheral_config)) { + dev_err(sdma->dev, "Invalid peripheral size %zu, expected %zu\n", + dmaengine_cfg->peripheral_size, + sizeof(struct sdma_peripheral_config)); + return -EINVAL; + } + sdmac->n_fifos_src = sdmacfg->n_fifos_src; + sdmac->n_fifos_dst = sdmacfg->n_fifos_dst; + sdmac->sw_done = sdmacfg->sw_done; + } + /* Set ENBLn earlier to make sure dma request triggered after that */ if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events) return -EINVAL; diff --git a/include/linux/dma/imx-dma.h b/include/linux/dma/imx-dma.h index b06cba85a6d4..8887762360d4 100644 --- a/include/linux/dma/imx-dma.h +++ b/include/linux/dma/imx-dma.h @@ -39,6 +39,7 @@ enum sdma_peripheral_type { IMX_DMATYPE_SSI_DUAL, /* SSI Dual FIFO */ IMX_DMATYPE_ASRC_SP, /* Shared ASRC */ IMX_DMATYPE_SAI, /* SAI */ + IMX_DMATYPE_MULTI_SAI, /* MULTI FIFOs For Audio */ }; enum imx_dma_prio { @@ -65,4 +66,23 @@ static inline int imx_dma_is_general_purpose(struct dma_chan *chan) !strcmp(chan->device->dev->driver->name, "imx-dma"); } +/** + * struct sdma_peripheral_config - SDMA config for audio + * @n_fifos_src: Number of FIFOs for recording + * @n_fifos_dst: Number of FIFOs for playback + * @sw_done: Use software done. Needed for PDM (micfil) + * + * Some i.MX Audio devices (SAI, micfil) have multiple successive FIFO + * registers. For multichannel recording/playback the SAI/micfil have + * one FIFO register per channel and the SDMA engine has to read/write + * the next channel from/to the next register and wrap around to the + * first register when all channels are handled. The number of active + * channels must be communicated to the SDMA engine using this struct. + */ +struct sdma_peripheral_config { + int n_fifos_src; + int n_fifos_dst; + bool sw_done; +}; + #endif /* __LINUX_DMA_IMX_H */ -- cgit From 08d3df8c81537089fc8f21006b56f2f6fb23c6f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 1 Sep 2019 22:26:10 +0200 Subject: ARM: pxa: split up mach/hardware.h The mach/hardware.h is included in lots of places, and it provides three different things on pxa: - the cpu_is_pxa* macros - an indirect inclusion of mach/addr-map.h - the __REG() and io_pv2() helper macros Split it up into separate and mach/pxa-regs.h headers, then change all the files that use mach/hardware.h to include the exact set of those three headers that they actually need, allowing for further more targeted cleanup. linux/soc/pxa/cpu.h can remain permanently exported and is now in a global location along with similar headers. pxa-regs.h and addr-map.h are only used in a very small number of drivers now and can be moved to arch/arm/mach-pxa/ directly when those drivers are to pass the necessary data as resources. Cc: Michael Turquette Cc: Stephen Boyd Acked-by: Viresh Kumar Acked-by: Dmitry Torokhov Cc: Jacek Anaszewski Cc: Pavel Machek Acked-by: Ulf Hansson Cc: Dominik Brodowski Acked-by: Alexandre Belloni Cc: Greg Kroah-Hartman Cc: Guenter Roeck Acked-by: Mark Brown Cc: linux-clk@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: linux-input@vger.kernel.org Cc: linux-leds@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-mtd@lists.infradead.org Cc: linux-rtc@vger.kernel.org Cc: linux-usb@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-fbdev@vger.kernel.org Cc: linux-watchdog@vger.kernel.org Cc: alsa-devel@alsa-project.org Signed-off-by: Arnd Bergmann --- arch/arm/common/locomo.c | 1 - arch/arm/common/sa1111.c | 5 +- arch/arm/mach-pxa/cm-x300.c | 2 + arch/arm/mach-pxa/colibri-evalboard.c | 1 - arch/arm/mach-pxa/colibri-pxa270-income.c | 1 - arch/arm/mach-pxa/colibri-pxa300.c | 1 + arch/arm/mach-pxa/colibri-pxa3xx.c | 1 - arch/arm/mach-pxa/corgi.c | 1 - arch/arm/mach-pxa/corgi_pm.c | 1 - arch/arm/mach-pxa/csb726.c | 1 + arch/arm/mach-pxa/devices.c | 2 +- arch/arm/mach-pxa/ezx.c | 1 - arch/arm/mach-pxa/generic.c | 3 +- arch/arm/mach-pxa/gumstix.c | 1 - arch/arm/mach-pxa/hx4700.c | 2 +- arch/arm/mach-pxa/idp.c | 1 - arch/arm/mach-pxa/include/mach/hardware.h | 305 --------------------------- arch/arm/mach-pxa/include/mach/pxa-regs.h | 52 +++++ arch/arm/mach-pxa/include/mach/pxa2xx-regs.h | 2 +- arch/arm/mach-pxa/include/mach/pxa3xx-regs.h | 2 +- arch/arm/mach-pxa/include/mach/regs-ac97.h | 2 +- arch/arm/mach-pxa/include/mach/regs-ost.h | 2 +- arch/arm/mach-pxa/include/mach/trizeps4.h | 1 + arch/arm/mach-pxa/irq.c | 3 +- arch/arm/mach-pxa/littleton.c | 1 - arch/arm/mach-pxa/lpd270.c | 2 +- arch/arm/mach-pxa/lubbock.c | 1 - arch/arm/mach-pxa/magician.c | 2 +- arch/arm/mach-pxa/mainstone.c | 2 +- arch/arm/mach-pxa/mfp-pxa2xx.c | 1 + arch/arm/mach-pxa/mfp-pxa3xx.c | 1 - arch/arm/mach-pxa/poodle.c | 1 - arch/arm/mach-pxa/pxa-regs.h | 1 + arch/arm/mach-pxa/pxa25x.c | 3 +- arch/arm/mach-pxa/pxa25x.h | 2 +- arch/arm/mach-pxa/pxa27x-udc.h | 2 + arch/arm/mach-pxa/pxa27x.c | 3 +- arch/arm/mach-pxa/pxa27x.h | 2 +- arch/arm/mach-pxa/pxa2xx.c | 1 - arch/arm/mach-pxa/pxa300.c | 1 + arch/arm/mach-pxa/pxa320.c | 1 + arch/arm/mach-pxa/pxa3xx-ulpi.c | 2 +- arch/arm/mach-pxa/pxa3xx.c | 3 +- arch/arm/mach-pxa/pxa3xx.h | 2 +- arch/arm/mach-pxa/pxa930.c | 1 + arch/arm/mach-pxa/regs-rtc.h | 2 +- arch/arm/mach-pxa/regs-uart.h | 2 + arch/arm/mach-pxa/sleep.S | 1 - arch/arm/mach-pxa/smemc.c | 2 +- arch/arm/mach-pxa/spitz_pm.c | 1 - arch/arm/mach-pxa/standby.S | 1 - arch/arm/mach-pxa/xcep.c | 2 +- arch/arm/mach-pxa/zylonite.c | 1 + arch/arm/mach-pxa/zylonite.h | 2 + arch/arm/mach-pxa/zylonite_pxa300.c | 1 + arch/arm/mach-pxa/zylonite_pxa320.c | 1 + drivers/clk/pxa/clk-pxa3xx.c | 1 + drivers/cpufreq/pxa2xx-cpufreq.c | 1 + drivers/cpufreq/pxa3xx-cpufreq.c | 1 + drivers/input/mouse/pxa930_trkball.c | 1 - drivers/input/touchscreen/zylonite-wm97xx.c | 2 +- drivers/leds/leds-locomo.c | 1 - drivers/mmc/host/pxamci.c | 2 +- drivers/mtd/maps/pxa2xx-flash.c | 2 - drivers/pcmcia/pxa2xx_base.c | 2 +- drivers/pcmcia/pxa2xx_sharpsl.c | 1 - drivers/pcmcia/sa1111_generic.c | 1 - drivers/pcmcia/sa1111_lubbock.c | 1 - drivers/pcmcia/soc_common.c | 2 - drivers/rtc/rtc-pxa.c | 2 - drivers/usb/host/ohci-pxa27x.c | 3 +- drivers/video/fbdev/pxafb.c | 2 +- drivers/watchdog/sa1100_wdt.c | 1 - include/linux/soc/pxa/cpu.h | 252 ++++++++++++++++++++++ sound/arm/pxa2xx-ac97-lib.c | 1 + sound/soc/pxa/pxa2xx-ac97.c | 2 +- sound/soc/pxa/pxa2xx-i2s.c | 2 +- sound/soc/pxa/z2.c | 1 - 78 files changed, 364 insertions(+), 365 deletions(-) delete mode 100644 arch/arm/mach-pxa/include/mach/hardware.h create mode 100644 arch/arm/mach-pxa/include/mach/pxa-regs.h create mode 100644 arch/arm/mach-pxa/pxa-regs.h create mode 100644 include/linux/soc/pxa/cpu.h (limited to 'include/linux') diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 24d21ba63030..da30a4d4f35c 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -23,7 +23,6 @@ #include #include -#include #include #include diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 5367f03beb46..2343e2b6214d 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -26,13 +26,16 @@ #include #include -#include #include #include #include #include +#ifdef CONFIG_ARCH_SA1100 +#include +#endif + /* SA1111 IRQs */ #define IRQ_GPAIN0 (0) #define IRQ_GPAIN1 (1) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 2e35354b61f5..85e2537fdc15 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -40,6 +40,8 @@ #include #include +#include + #include #include #include diff --git a/arch/arm/mach-pxa/colibri-evalboard.c b/arch/arm/mach-pxa/colibri-evalboard.c index b9c173ede891..b62af07b8f96 100644 --- a/arch/arm/mach-pxa/colibri-evalboard.c +++ b/arch/arm/mach-pxa/colibri-evalboard.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-pxa/colibri-pxa270-income.c b/arch/arm/mach-pxa/colibri-pxa270-income.c index e5879e8b0682..f6eaf464ca83 100644 --- a/arch/arm/mach-pxa/colibri-pxa270-income.c +++ b/arch/arm/mach-pxa/colibri-pxa270-income.c @@ -25,7 +25,6 @@ #include #include -#include #include #include #include "pxa27x.h" diff --git a/arch/arm/mach-pxa/colibri-pxa300.c b/arch/arm/mach-pxa/colibri-pxa300.c index 82052dfd96b6..4ceeea142bfd 100644 --- a/arch/arm/mach-pxa/colibri-pxa300.c +++ b/arch/arm/mach-pxa/colibri-pxa300.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-pxa/colibri-pxa3xx.c b/arch/arm/mach-pxa/colibri-pxa3xx.c index 3cead80a2b37..701dfef930eb 100644 --- a/arch/arm/mach-pxa/colibri-pxa3xx.c +++ b/arch/arm/mach-pxa/colibri-pxa3xx.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 44659fbc37ba..f897762c8b58 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/corgi_pm.c b/arch/arm/mach-pxa/corgi_pm.c index 092dcb9fced5..ff1ac9bf37cb 100644 --- a/arch/arm/mach-pxa/corgi_pm.c +++ b/arch/arm/mach-pxa/corgi_pm.c @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c index 98fcdc6e2944..d48493445ae5 100644 --- a/arch/arm/mach-pxa/csb726.c +++ b/arch/arm/mach-pxa/csb726.c @@ -17,6 +17,7 @@ #include #include + #include "csb726.h" #include "pxa27x.h" #include diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index 09b8495f3fd9..7ca97ddef6fe 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "udc.h" #include @@ -20,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c index eb85950e7c0e..69c2ec02a16c 100644 --- a/arch/arm/mach-pxa/ezx.c +++ b/arch/arm/mach-pxa/ezx.c @@ -29,7 +29,6 @@ #include "pxa27x.h" #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index ab7cdffd7ea8..3c3cd90bb9b4 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -17,11 +17,12 @@ #include #include #include +#include -#include #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index 49dd618b10f7..72b08a9bf0fd 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index e1870fbb19e7..191a6c24fe19 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -36,11 +36,11 @@ #include #include -#include #include #include #include "pxa27x.h" +#include #include #include diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index fb0850af8496..57c0511472bc 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h deleted file mode 100644 index ee7eab16135f..000000000000 --- a/arch/arm/mach-pxa/include/mach/hardware.h +++ /dev/null @@ -1,305 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm/mach-pxa/include/mach/hardware.h - * - * Author: Nicolas Pitre - * Created: Jun 15, 2001 - * Copyright: MontaVista Software Inc. - */ - -#ifndef __ASM_ARCH_HARDWARE_H -#define __ASM_ARCH_HARDWARE_H - -#include - -/* - * Workarounds for at least 2 errata so far require this. - * The mapping is set in mach-pxa/generic.c. - */ -#define UNCACHED_PHYS_0 0xfe000000 -#define UNCACHED_PHYS_0_SIZE 0x00100000 - -/* - * Intel PXA2xx internal register mapping: - * - * 0x40000000 - 0x41ffffff <--> 0xf2000000 - 0xf3ffffff - * 0x44000000 - 0x45ffffff <--> 0xf4000000 - 0xf5ffffff - * 0x48000000 - 0x49ffffff <--> 0xf6000000 - 0xf7ffffff - * 0x4c000000 - 0x4dffffff <--> 0xf8000000 - 0xf9ffffff - * 0x50000000 - 0x51ffffff <--> 0xfa000000 - 0xfbffffff - * 0x54000000 - 0x55ffffff <--> 0xfc000000 - 0xfdffffff - * 0x58000000 - 0x59ffffff <--> 0xfe000000 - 0xffffffff - * - * Note that not all PXA2xx chips implement all those addresses, and the - * kernel only maps the minimum needed range of this mapping. - */ -#define io_v2p(x) (0x3c000000 + ((x) & 0x01ffffff) + (((x) & 0x0e000000) << 1)) -#define io_p2v(x) IOMEM(0xf2000000 + ((x) & 0x01ffffff) + (((x) & 0x1c000000) >> 1)) - -#ifndef __ASSEMBLY__ -# define __REG(x) (*((volatile u32 __iomem *)io_p2v(x))) - -/* With indexed regs we don't want to feed the index through io_p2v() - especially if it is a variable, otherwise horrible code will result. */ -# define __REG2(x,y) \ - (*(volatile u32 __iomem*)((u32)&__REG(x) + (y))) - -# define __PREG(x) (io_v2p((u32)&(x))) - -#else - -# define __REG(x) io_p2v(x) -# define __PREG(x) io_v2p(x) - -#endif - -#ifndef __ASSEMBLY__ - -#include - -/* - * CPU Stepping CPU_ID JTAG_ID - * - * PXA210 B0 0x69052922 0x2926C013 - * PXA210 B1 0x69052923 0x3926C013 - * PXA210 B2 0x69052924 0x4926C013 - * PXA210 C0 0x69052D25 0x5926C013 - * - * PXA250 A0 0x69052100 0x09264013 - * PXA250 A1 0x69052101 0x19264013 - * PXA250 B0 0x69052902 0x29264013 - * PXA250 B1 0x69052903 0x39264013 - * PXA250 B2 0x69052904 0x49264013 - * PXA250 C0 0x69052D05 0x59264013 - * - * PXA255 A0 0x69052D06 0x69264013 - * - * PXA26x A0 0x69052903 0x39264013 - * PXA26x B0 0x69052D05 0x59264013 - * - * PXA27x A0 0x69054110 0x09265013 - * PXA27x A1 0x69054111 0x19265013 - * PXA27x B0 0x69054112 0x29265013 - * PXA27x B1 0x69054113 0x39265013 - * PXA27x C0 0x69054114 0x49265013 - * PXA27x C5 0x69054117 0x79265013 - * - * PXA30x A0 0x69056880 0x0E648013 - * PXA30x A1 0x69056881 0x1E648013 - * PXA31x A0 0x69056890 0x0E649013 - * PXA31x A1 0x69056891 0x1E649013 - * PXA31x A2 0x69056892 0x2E649013 - * PXA32x B1 0x69056825 0x5E642013 - * PXA32x B2 0x69056826 0x6E642013 - * - * PXA930 B0 0x69056835 0x5E643013 - * PXA930 B1 0x69056837 0x7E643013 - * PXA930 B2 0x69056838 0x8E643013 - * - * PXA935 A0 0x56056931 0x1E653013 - * PXA935 B0 0x56056936 0x6E653013 - * PXA935 B1 0x56056938 0x8E653013 - */ -#ifdef CONFIG_PXA25x -#define __cpu_is_pxa210(id) \ - ({ \ - unsigned int _id = (id) & 0xf3f0; \ - _id == 0x2120; \ - }) - -#define __cpu_is_pxa250(id) \ - ({ \ - unsigned int _id = (id) & 0xf3ff; \ - _id <= 0x2105; \ - }) - -#define __cpu_is_pxa255(id) \ - ({ \ - unsigned int _id = (id) & 0xffff; \ - _id == 0x2d06; \ - }) - -#define __cpu_is_pxa25x(id) \ - ({ \ - unsigned int _id = (id) & 0xf300; \ - _id == 0x2100; \ - }) -#else -#define __cpu_is_pxa210(id) (0) -#define __cpu_is_pxa250(id) (0) -#define __cpu_is_pxa255(id) (0) -#define __cpu_is_pxa25x(id) (0) -#endif - -#ifdef CONFIG_PXA27x -#define __cpu_is_pxa27x(id) \ - ({ \ - unsigned int _id = (id) >> 4 & 0xfff; \ - _id == 0x411; \ - }) -#else -#define __cpu_is_pxa27x(id) (0) -#endif - -#ifdef CONFIG_CPU_PXA300 -#define __cpu_is_pxa300(id) \ - ({ \ - unsigned int _id = (id) >> 4 & 0xfff; \ - _id == 0x688; \ - }) -#else -#define __cpu_is_pxa300(id) (0) -#endif - -#ifdef CONFIG_CPU_PXA310 -#define __cpu_is_pxa310(id) \ - ({ \ - unsigned int _id = (id) >> 4 & 0xfff; \ - _id == 0x689; \ - }) -#else -#define __cpu_is_pxa310(id) (0) -#endif - -#ifdef CONFIG_CPU_PXA320 -#define __cpu_is_pxa320(id) \ - ({ \ - unsigned int _id = (id) >> 4 & 0xfff; \ - _id == 0x603 || _id == 0x682; \ - }) -#else -#define __cpu_is_pxa320(id) (0) -#endif - -#ifdef CONFIG_CPU_PXA930 -#define __cpu_is_pxa930(id) \ - ({ \ - unsigned int _id = (id) >> 4 & 0xfff; \ - _id == 0x683; \ - }) -#else -#define __cpu_is_pxa930(id) (0) -#endif - -#ifdef CONFIG_CPU_PXA935 -#define __cpu_is_pxa935(id) \ - ({ \ - unsigned int _id = (id) >> 4 & 0xfff; \ - _id == 0x693; \ - }) -#else -#define __cpu_is_pxa935(id) (0) -#endif - -#define cpu_is_pxa210() \ - ({ \ - __cpu_is_pxa210(read_cpuid_id()); \ - }) - -#define cpu_is_pxa250() \ - ({ \ - __cpu_is_pxa250(read_cpuid_id()); \ - }) - -#define cpu_is_pxa255() \ - ({ \ - __cpu_is_pxa255(read_cpuid_id()); \ - }) - -#define cpu_is_pxa25x() \ - ({ \ - __cpu_is_pxa25x(read_cpuid_id()); \ - }) - -#define cpu_is_pxa27x() \ - ({ \ - __cpu_is_pxa27x(read_cpuid_id()); \ - }) - -#define cpu_is_pxa300() \ - ({ \ - __cpu_is_pxa300(read_cpuid_id()); \ - }) - -#define cpu_is_pxa310() \ - ({ \ - __cpu_is_pxa310(read_cpuid_id()); \ - }) - -#define cpu_is_pxa320() \ - ({ \ - __cpu_is_pxa320(read_cpuid_id()); \ - }) - -#define cpu_is_pxa930() \ - ({ \ - __cpu_is_pxa930(read_cpuid_id()); \ - }) - -#define cpu_is_pxa935() \ - ({ \ - __cpu_is_pxa935(read_cpuid_id()); \ - }) - - - -/* - * CPUID Core Generation Bit - * <= 0x2 for pxa21x/pxa25x/pxa26x/pxa27x - */ -#if defined(CONFIG_PXA25x) || defined(CONFIG_PXA27x) -#define __cpu_is_pxa2xx(id) \ - ({ \ - unsigned int _id = (id) >> 13 & 0x7; \ - _id <= 0x2; \ - }) -#else -#define __cpu_is_pxa2xx(id) (0) -#endif - -#ifdef CONFIG_PXA3xx -#define __cpu_is_pxa3xx(id) \ - ({ \ - __cpu_is_pxa300(id) \ - || __cpu_is_pxa310(id) \ - || __cpu_is_pxa320(id) \ - || __cpu_is_pxa93x(id); \ - }) -#else -#define __cpu_is_pxa3xx(id) (0) -#endif - -#if defined(CONFIG_CPU_PXA930) || defined(CONFIG_CPU_PXA935) -#define __cpu_is_pxa93x(id) \ - ({ \ - __cpu_is_pxa930(id) \ - || __cpu_is_pxa935(id); \ - }) -#else -#define __cpu_is_pxa93x(id) (0) -#endif - -#define cpu_is_pxa2xx() \ - ({ \ - __cpu_is_pxa2xx(read_cpuid_id()); \ - }) - -#define cpu_is_pxa3xx() \ - ({ \ - __cpu_is_pxa3xx(read_cpuid_id()); \ - }) - -#define cpu_is_pxa93x() \ - ({ \ - __cpu_is_pxa93x(read_cpuid_id()); \ - }) - - -/* - * return current memory and LCD clock frequency in units of 10kHz - */ -extern unsigned int get_memclk_frequency_10khz(void); - -#endif - -#endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mach-pxa/include/mach/pxa-regs.h b/arch/arm/mach-pxa/include/mach/pxa-regs.h new file mode 100644 index 000000000000..ba5120c06b8a --- /dev/null +++ b/arch/arm/mach-pxa/include/mach/pxa-regs.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Nicolas Pitre + * Created: Jun 15, 2001 + * Copyright: MontaVista Software Inc. + */ +#ifndef __ASM_MACH_PXA_REGS_H +#define __ASM_MACH_PXA_REGS_H + +/* + * Workarounds for at least 2 errata so far require this. + * The mapping is set in mach-pxa/generic.c. + */ +#define UNCACHED_PHYS_0 0xfe000000 +#define UNCACHED_PHYS_0_SIZE 0x00100000 + +/* + * Intel PXA2xx internal register mapping: + * + * 0x40000000 - 0x41ffffff <--> 0xf2000000 - 0xf3ffffff + * 0x44000000 - 0x45ffffff <--> 0xf4000000 - 0xf5ffffff + * 0x48000000 - 0x49ffffff <--> 0xf6000000 - 0xf7ffffff + * 0x4c000000 - 0x4dffffff <--> 0xf8000000 - 0xf9ffffff + * 0x50000000 - 0x51ffffff <--> 0xfa000000 - 0xfbffffff + * 0x54000000 - 0x55ffffff <--> 0xfc000000 - 0xfdffffff + * 0x58000000 - 0x59ffffff <--> 0xfe000000 - 0xffffffff + * + * Note that not all PXA2xx chips implement all those addresses, and the + * kernel only maps the minimum needed range of this mapping. + */ +#define io_v2p(x) (0x3c000000 + ((x) & 0x01ffffff) + (((x) & 0x0e000000) << 1)) +#define io_p2v(x) IOMEM(0xf2000000 + ((x) & 0x01ffffff) + (((x) & 0x1c000000) >> 1)) + +#ifndef __ASSEMBLY__ +# define __REG(x) (*((volatile u32 __iomem *)io_p2v(x))) + +/* With indexed regs we don't want to feed the index through io_p2v() + especially if it is a variable, otherwise horrible code will result. */ +# define __REG2(x,y) \ + (*(volatile u32 __iomem*)((u32)&__REG(x) + (y))) + +# define __PREG(x) (io_v2p((u32)&(x))) + +#else + +# define __REG(x) io_p2v(x) +# define __PREG(x) io_v2p(x) + +#endif + + +#endif diff --git a/arch/arm/mach-pxa/include/mach/pxa2xx-regs.h b/arch/arm/mach-pxa/include/mach/pxa2xx-regs.h index fa121e135915..f68b573ab4a0 100644 --- a/arch/arm/mach-pxa/include/mach/pxa2xx-regs.h +++ b/arch/arm/mach-pxa/include/mach/pxa2xx-regs.h @@ -11,7 +11,7 @@ #ifndef __PXA2XX_REGS_H #define __PXA2XX_REGS_H -#include +#include "pxa-regs.h" /* * Power Manager diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h index 070f6c74196e..8eb1ba533e1c 100644 --- a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h +++ b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h @@ -10,7 +10,7 @@ #ifndef __ASM_ARCH_PXA3XX_REGS_H #define __ASM_ARCH_PXA3XX_REGS_H -#include +#include "pxa-regs.h" /* * Oscillator Configuration Register (OSCC) diff --git a/arch/arm/mach-pxa/include/mach/regs-ac97.h b/arch/arm/mach-pxa/include/mach/regs-ac97.h index 1db96fd4df32..ec09b9635e25 100644 --- a/arch/arm/mach-pxa/include/mach/regs-ac97.h +++ b/arch/arm/mach-pxa/include/mach/regs-ac97.h @@ -2,7 +2,7 @@ #ifndef __ASM_ARCH_REGS_AC97_H #define __ASM_ARCH_REGS_AC97_H -#include +#include "pxa-regs.h" /* * AC97 Controller registers diff --git a/arch/arm/mach-pxa/include/mach/regs-ost.h b/arch/arm/mach-pxa/include/mach/regs-ost.h index deb564ed8ee7..109d0ed264df 100644 --- a/arch/arm/mach-pxa/include/mach/regs-ost.h +++ b/arch/arm/mach-pxa/include/mach/regs-ost.h @@ -2,7 +2,7 @@ #ifndef __ASM_MACH_REGS_OST_H #define __ASM_MACH_REGS_OST_H -#include +#include "pxa-regs.h" /* * OS Timer & Match Registers diff --git a/arch/arm/mach-pxa/include/mach/trizeps4.h b/arch/arm/mach-pxa/include/mach/trizeps4.h index 3cddb1428c5e..27926629f9c6 100644 --- a/arch/arm/mach-pxa/include/mach/trizeps4.h +++ b/arch/arm/mach-pxa/include/mach/trizeps4.h @@ -11,6 +11,7 @@ #ifndef _TRIPEPS4_H_ #define _TRIPEPS4_H_ +#include #include "irqs.h" /* PXA_GPIO_TO_IRQ */ /* physical memory regions */ diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 74efc3ab595f..f25c30e8a834 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -17,13 +17,14 @@ #include #include #include +#include #include -#include #include #include "generic.h" +#include "pxa-regs.h" #define ICIP (0x000) #define ICMR (0x004) diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index 73f5953b3bb6..f98dc61e87af 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index 6fc40bc06910..eac32bd9e385 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -39,6 +38,7 @@ #include "pxa27x.h" #include "lpd270.h" +#include #include #include #include diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index e2411971422d..72816e7c206f 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index 200fd35168e0..345a44d15a2c 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -29,12 +29,12 @@ #include #include -#include #include #include #include #include "pxa27x.h" +#include #include #include #include diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 997f6e502201..cf74adfe65df 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -52,6 +51,7 @@ #include #include #include +#include #include #include "generic.h" diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c index 6a5451b186c2..6bc7206fd2ac 100644 --- a/arch/arm/mach-pxa/mfp-pxa2xx.c +++ b/arch/arm/mach-pxa/mfp-pxa2xx.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "mfp-pxa2xx.h" diff --git a/arch/arm/mach-pxa/mfp-pxa3xx.c b/arch/arm/mach-pxa/mfp-pxa3xx.c index 56114df9700d..f26b5e5412cf 100644 --- a/arch/arm/mach-pxa/mfp-pxa3xx.c +++ b/arch/arm/mach-pxa/mfp-pxa3xx.c @@ -16,7 +16,6 @@ #include #include -#include #include "mfp-pxa3xx.h" #include diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 58cfa434afde..ca52882433d4 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -30,7 +30,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm/mach-pxa/pxa-regs.h b/arch/arm/mach-pxa/pxa-regs.h new file mode 100644 index 000000000000..584d2ac592cc --- /dev/null +++ b/arch/arm/mach-pxa/pxa-regs.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index 0d25cc45f825..305047ebd2f1 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -26,14 +26,15 @@ #include #include #include +#include #include #include -#include #include #include "pxa25x.h" #include #include "pm.h" +#include #include #include "generic.h" diff --git a/arch/arm/mach-pxa/pxa25x.h b/arch/arm/mach-pxa/pxa25x.h index b58d0fbdb4db..403bc16c2ed2 100644 --- a/arch/arm/mach-pxa/pxa25x.h +++ b/arch/arm/mach-pxa/pxa25x.h @@ -2,7 +2,7 @@ #ifndef __MACH_PXA25x_H #define __MACH_PXA25x_H -#include +#include #include #include "mfp-pxa25x.h" #include diff --git a/arch/arm/mach-pxa/pxa27x-udc.h b/arch/arm/mach-pxa/pxa27x-udc.h index faf73804697f..2d3df3b1cb68 100644 --- a/arch/arm/mach-pxa/pxa27x-udc.h +++ b/arch/arm/mach-pxa/pxa27x-udc.h @@ -2,6 +2,8 @@ #ifndef _ASM_ARCH_PXA27X_UDC_H #define _ASM_ARCH_PXA27X_UDC_H +#include "pxa-regs.h" + #ifdef _ASM_ARCH_PXA25X_UDC_H #error You cannot include both PXA25x and PXA27x UDC support #endif diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index f7e89831e85b..a81ac88ecbfd 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -23,9 +23,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -33,6 +33,7 @@ #include #include #include "pm.h" +#include #include #include "generic.h" diff --git a/arch/arm/mach-pxa/pxa27x.h b/arch/arm/mach-pxa/pxa27x.h index abdc02fb4f03..6c99090647d2 100644 --- a/arch/arm/mach-pxa/pxa27x.h +++ b/arch/arm/mach-pxa/pxa27x.h @@ -3,7 +3,7 @@ #define __MACH_PXA27x_H #include -#include +#include #include #include "mfp-pxa27x.h" #include diff --git a/arch/arm/mach-pxa/pxa2xx.c b/arch/arm/mach-pxa/pxa2xx.c index 2d26cd2afbf3..ac72acb43e26 100644 --- a/arch/arm/mach-pxa/pxa2xx.c +++ b/arch/arm/mach-pxa/pxa2xx.c @@ -12,7 +12,6 @@ #include #include -#include #include #include "mfp-pxa25x.h" #include diff --git a/arch/arm/mach-pxa/pxa300.c b/arch/arm/mach-pxa/pxa300.c index 7f2f5a6a2263..f77ec118d5b9 100644 --- a/arch/arm/mach-pxa/pxa300.c +++ b/arch/arm/mach-pxa/pxa300.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "pxa300.h" diff --git a/arch/arm/mach-pxa/pxa320.c b/arch/arm/mach-pxa/pxa320.c index 78abcc741df7..e372e6c118de 100644 --- a/arch/arm/mach-pxa/pxa320.c +++ b/arch/arm/mach-pxa/pxa320.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "pxa320.h" diff --git a/arch/arm/mach-pxa/pxa3xx-ulpi.c b/arch/arm/mach-pxa/pxa3xx-ulpi.c index 4bd7da1f8657..c29a7f0fa1b0 100644 --- a/arch/arm/mach-pxa/pxa3xx-ulpi.c +++ b/arch/arm/mach-pxa/pxa3xx-ulpi.c @@ -21,8 +21,8 @@ #include #include #include +#include -#include #include "regs-u2d.h" #include diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 6eb1c24d7395..fc84aed99481 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -24,14 +24,15 @@ #include #include #include +#include #include #include -#include #include #include #include #include "pm.h" +#include #include #include diff --git a/arch/arm/mach-pxa/pxa3xx.h b/arch/arm/mach-pxa/pxa3xx.h index 6d4502aa9d06..22ace053ea25 100644 --- a/arch/arm/mach-pxa/pxa3xx.h +++ b/arch/arm/mach-pxa/pxa3xx.h @@ -2,7 +2,7 @@ #ifndef __MACH_PXA3XX_H #define __MACH_PXA3XX_H -#include +#include #include #include diff --git a/arch/arm/mach-pxa/pxa930.c b/arch/arm/mach-pxa/pxa930.c index bf91de4267e5..b9021a40cbd1 100644 --- a/arch/arm/mach-pxa/pxa930.c +++ b/arch/arm/mach-pxa/pxa930.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "pxa930.h" diff --git a/arch/arm/mach-pxa/regs-rtc.h b/arch/arm/mach-pxa/regs-rtc.h index b1f9ff14e335..96255a0f595e 100644 --- a/arch/arm/mach-pxa/regs-rtc.h +++ b/arch/arm/mach-pxa/regs-rtc.h @@ -2,7 +2,7 @@ #ifndef __ASM_MACH_REGS_RTC_H #define __ASM_MACH_REGS_RTC_H -#include +#include "pxa-regs.h" /* * Real Time Clock diff --git a/arch/arm/mach-pxa/regs-uart.h b/arch/arm/mach-pxa/regs-uart.h index 9a168f83afeb..490e9ca16297 100644 --- a/arch/arm/mach-pxa/regs-uart.h +++ b/arch/arm/mach-pxa/regs-uart.h @@ -2,6 +2,8 @@ #ifndef __ASM_ARCH_REGS_UART_H #define __ASM_ARCH_REGS_UART_H +#include "pxa-regs.h" + /* * UARTs */ diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S index 6c5b3ffd2cd3..272efeb954f4 100644 --- a/arch/arm/mach-pxa/sleep.S +++ b/arch/arm/mach-pxa/sleep.S @@ -13,7 +13,6 @@ #include #include -#include #include #include diff --git a/arch/arm/mach-pxa/smemc.c b/arch/arm/mach-pxa/smemc.c index 32e82cc92ea5..47b99549d616 100644 --- a/arch/arm/mach-pxa/smemc.c +++ b/arch/arm/mach-pxa/smemc.c @@ -8,8 +8,8 @@ #include #include #include +#include -#include #include #ifdef CONFIG_PM diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c index 25a1f8c5a738..201dabe883b6 100644 --- a/arch/arm/mach-pxa/spitz_pm.c +++ b/arch/arm/mach-pxa/spitz_pm.c @@ -18,7 +18,6 @@ #include #include -#include #include #include "pxa27x.h" diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S index eab1645bb4ad..626fecdefb1c 100644 --- a/arch/arm/mach-pxa/standby.S +++ b/arch/arm/mach-pxa/standby.S @@ -11,7 +11,6 @@ #include #include -#include #include diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c index f485146b899f..e6ab428287ae 100644 --- a/arch/arm/mach-pxa/xcep.c +++ b/arch/arm/mach-pxa/xcep.c @@ -24,8 +24,8 @@ #include #include -#include #include "pxa25x.h" +#include #include #include "generic.h" diff --git a/arch/arm/mach-pxa/zylonite.c b/arch/arm/mach-pxa/zylonite.c index 79f0025fa17a..9bcb81688201 100644 --- a/arch/arm/mach-pxa/zylonite.c +++ b/arch/arm/mach-pxa/zylonite.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-pxa/zylonite.h b/arch/arm/mach-pxa/zylonite.h index 7300ec2aac0d..afe3efcb8e04 100644 --- a/arch/arm/mach-pxa/zylonite.h +++ b/arch/arm/mach-pxa/zylonite.h @@ -2,6 +2,8 @@ #ifndef __ASM_ARCH_ZYLONITE_H #define __ASM_ARCH_ZYLONITE_H +#include + #define ZYLONITE_ETH_PHYS 0x14000000 #define EXT_GPIO(x) (128 + (x)) diff --git a/arch/arm/mach-pxa/zylonite_pxa300.c b/arch/arm/mach-pxa/zylonite_pxa300.c index 956fec1c4940..50a8a3547dbc 100644 --- a/arch/arm/mach-pxa/zylonite_pxa300.c +++ b/arch/arm/mach-pxa/zylonite_pxa300.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "pxa300.h" #include "devices.h" diff --git a/arch/arm/mach-pxa/zylonite_pxa320.c b/arch/arm/mach-pxa/zylonite_pxa320.c index 94cb834f36cd..67cab4f1194b 100644 --- a/arch/arm/mach-pxa/zylonite_pxa320.c +++ b/arch/arm/mach-pxa/zylonite_pxa320.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "pxa320.h" #include "zylonite.h" diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index 60db92772e72..027b78183565 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index f0b6f52eb2c3..0f0e676ff781 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/cpufreq/pxa3xx-cpufreq.c b/drivers/cpufreq/pxa3xx-cpufreq.c index 32f993c94675..d3b398b4aa6a 100644 --- a/drivers/cpufreq/pxa3xx-cpufreq.c +++ b/drivers/cpufreq/pxa3xx-cpufreq.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/drivers/input/mouse/pxa930_trkball.c b/drivers/input/mouse/pxa930_trkball.c index 3332b77eef2a..f04ba12dbfa8 100644 --- a/drivers/input/mouse/pxa930_trkball.c +++ b/drivers/input/mouse/pxa930_trkball.c @@ -15,7 +15,6 @@ #include #include -#include #include /* Trackball Controller Register Definitions */ diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c index 0f4ac7f844ce..f57bdf083188 100644 --- a/drivers/input/touchscreen/zylonite-wm97xx.c +++ b/drivers/input/touchscreen/zylonite-wm97xx.c @@ -21,9 +21,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/drivers/leds/leds-locomo.c b/drivers/leds/leds-locomo.c index 42dc46e3f00f..9aa3fccd71fb 100644 --- a/drivers/leds/leds-locomo.c +++ b/drivers/leds/leds-locomo.c @@ -11,7 +11,6 @@ #include #include -#include #include static void locomoled_brightness_set(struct led_classdev *led_cdev, diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 316393c694d7..0db9490dc659 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -31,10 +31,10 @@ #include #include #include +#include #include -#include #include #include "pxamci.h" diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c index 7d96758a8f04..1749dbbacc13 100644 --- a/drivers/mtd/maps/pxa2xx-flash.c +++ b/drivers/mtd/maps/pxa2xx-flash.c @@ -16,8 +16,6 @@ #include #include -#include - #include #define CACHELINESIZE 32 diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index d6d2f75f8f47..7cd1375d6087 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -23,8 +23,8 @@ #include #include #include +#include -#include #include #include #include diff --git a/drivers/pcmcia/pxa2xx_sharpsl.c b/drivers/pcmcia/pxa2xx_sharpsl.c index 5fdd25a9e28e..66fe1d1af12a 100644 --- a/drivers/pcmcia/pxa2xx_sharpsl.c +++ b/drivers/pcmcia/pxa2xx_sharpsl.c @@ -15,7 +15,6 @@ #include #include -#include #include #include diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index 29fdd174bc23..bce664bbdc98 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -17,7 +17,6 @@ #include -#include #include #include #include diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c index 7feb8d61c639..f1b5160cb8fa 100644 --- a/drivers/pcmcia/sa1111_lubbock.c +++ b/drivers/pcmcia/sa1111_lubbock.c @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index 3a8c84bb174d..9276a628473d 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -47,8 +47,6 @@ #include #include -#include - #include "soc_common.h" static irqreturn_t soc_common_pcmcia_interrupt(int irq, void *dev); diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index cf8119b6d320..eeacf480cf36 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -16,8 +16,6 @@ #include #include -#include - #include "rtc-sa1100.h" #define RTC_DEF_DIVIDER (32768 - 1) diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c index 54aa5c77e549..ab4f610a0140 100644 --- a/drivers/usb/host/ohci-pxa27x.c +++ b/drivers/usb/host/ohci-pxa27x.c @@ -36,8 +36,7 @@ #include #include #include - -#include +#include #include "ohci.h" diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index f1551e00eb12..e3d1a184d2be 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -57,10 +57,10 @@ #include #include #include +#include #include