From 4946f15e8c334840bf277a0bf924371eae120fcd Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Tue, 30 Nov 2021 22:40:43 +0100 Subject: genirq/generic_chip: Constify irq_generic_chip_ops The only usage of irq_generic_chip_ops is to pass its address to irq_domain_add_linear() which takes a pointer to const struct irq_domain_ops. Make it const to allow the compiler to put it in read-only memory. [ tglx: Fixed subject prefix ] Signed-off-by: Rikard Falkeborn Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20211130214043.1257585-1-rikard.falkeborn@gmail.com --- include/linux/irqdomain.h | 2 +- kernel/irq/generic-chip.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 553da4899f55..d476405802e9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -131,7 +131,7 @@ struct irq_domain_ops { #endif }; -extern struct irq_domain_ops irq_generic_chip_ops; +extern const struct irq_domain_ops irq_generic_chip_ops; struct irq_domain_chip_generic; diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 6f29bf4c8515..f0862eb6b506 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -451,7 +451,7 @@ static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) } -struct irq_domain_ops irq_generic_chip_ops = { +const struct irq_domain_ops irq_generic_chip_ops = { .map = irq_map_generic_chip, .unmap = irq_unmap_generic_chip, .xlate = irq_domain_xlate_onetwocell, -- cgit v1.2.3-73-gaa49b From 65c7cdedeb3026fabcc967a7aae2f755ad4d0783 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Sep 2021 11:24:17 -0400 Subject: genirq: Provide new interfaces for affinity hints The discussion about removing the side effect of irq_set_affinity_hint() of actually applying the cpumask (if not NULL) as affinity to the interrupt, unearthed a few unpleasantries: 1) The modular perf drivers rely on the current behaviour for the very wrong reasons. 2) While none of the other drivers prevents user space from changing the affinity, a cursorily inspection shows that there are at least expectations in some drivers. #1 needs to be cleaned up anyway, so that's not a problem #2 might result in subtle regressions especially when irqbalanced (which nowadays ignores the affinity hint) is disabled. Provide new interfaces: irq_update_affinity_hint() - Only sets the affinity hint pointer irq_set_affinity_and_hint() - Set the pointer and apply the affinity to the interrupt Make irq_set_affinity_hint() a wrapper around irq_apply_affinity_hint() and document it to be phased out. Signed-off-by: Thomas Gleixner Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210501021832.743094-1-jesse.brandeburg@intel.com Link: https://lore.kernel.org/r/20210903152430.244937-2-nitesh@redhat.com --- include/linux/interrupt.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++- kernel/irq/manage.c | 8 +++---- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 1f22a30c0963..9367f1cb2e3c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -329,7 +329,46 @@ extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); -extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); +extern int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, + bool setaffinity); + +/** + * irq_update_affinity_hint - Update the affinity hint + * @irq: Interrupt to update + * @m: cpumask pointer (NULL to clear the hint) + * + * Updates the affinity hint, but does not change the affinity of the interrupt. + */ +static inline int +irq_update_affinity_hint(unsigned int irq, const struct cpumask *m) +{ + return __irq_apply_affinity_hint(irq, m, false); +} + +/** + * irq_set_affinity_and_hint - Update the affinity hint and apply the provided + * cpumask to the interrupt + * @irq: Interrupt to update + * @m: cpumask pointer (NULL to clear the hint) + * + * Updates the affinity hint and if @m is not NULL it applies it as the + * affinity of that interrupt. + */ +static inline int +irq_set_affinity_and_hint(unsigned int irq, const struct cpumask *m) +{ + return __irq_apply_affinity_hint(irq, m, true); +} + +/* + * Deprecated. Use irq_update_affinity_hint() or irq_set_affinity_and_hint() + * instead. + */ +static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +{ + return irq_set_affinity_and_hint(irq, m); +} + extern int irq_update_affinity_desc(unsigned int irq, struct irq_affinity_desc *affinity); @@ -361,6 +400,18 @@ static inline int irq_can_set_affinity(unsigned int irq) static inline int irq_select_affinity(unsigned int irq) { return 0; } +static inline int irq_update_affinity_hint(unsigned int irq, + const struct cpumask *m) +{ + return -EINVAL; +} + +static inline int irq_set_affinity_and_hint(unsigned int irq, + const struct cpumask *m) +{ + return -EINVAL; +} + static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7405e384e5ed..f23ffd30385b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -486,7 +486,8 @@ int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) } EXPORT_SYMBOL_GPL(irq_force_affinity); -int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, + bool setaffinity) { unsigned long flags; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); @@ -495,12 +496,11 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) return -EINVAL; desc->affinity_hint = m; irq_put_desc_unlock(desc, flags); - /* set the initial affinity to prevent every interrupt being on CPU0 */ - if (m) + if (m && setaffinity) __irq_set_affinity(irq, m, false); return 0; } -EXPORT_SYMBOL_GPL(irq_set_affinity_hint); +EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); static void irq_affinity_notify(struct work_struct *work) { -- cgit v1.2.3-73-gaa49b From 0f9744f4ed539f2e847d7ed41993b243e3ba5cff Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:18 -0400 Subject: iavf: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() for two purposes: - To set the affinity_hint which is consumed by the userspace for distributing the interrupts - To apply an affinity that it provides for the iavf interrupts The latter is done to ensure that all the interrupts are evenly spread across all available CPUs. However, since commit a0c9259dc4e1 ("irq/matrix: Spread interrupts on allocation") the spreading of interrupts is dynamically performed at the time of allocation. Hence, there is no need for the drivers to enforce their own affinity for the spreading of interrupts. Also, irq_set_affinity_hint() applying the provided cpumask as an affinity for the interrupt is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only sets the pointer for the affinity_hint. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Acked-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20210903152430.244937-3-nitesh@redhat.com --- drivers/net/ethernet/intel/iavf/iavf_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 14934a7a13ef..1980e62a644a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -492,10 +492,10 @@ iavf_request_traffic_irqs(struct iavf_adapter *adapter, char *basename) irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); /* Spread the IRQ affinity hints across online CPUs. Note that * get_cpu_mask returns a mask with a permanent lifetime so - * it's safe to use as a hint for irq_set_affinity_hint. + * it's safe to use as a hint for irq_update_affinity_hint. */ cpu = cpumask_local_spread(q_vector->v_idx, -1); - irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); + irq_update_affinity_hint(irq_num, get_cpu_mask(cpu)); } return 0; @@ -505,7 +505,7 @@ free_queue_irqs: vector--; irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; irq_set_affinity_notifier(irq_num, NULL); - irq_set_affinity_hint(irq_num, NULL); + irq_update_affinity_hint(irq_num, NULL); free_irq(irq_num, &adapter->q_vectors[vector]); } return err; @@ -557,7 +557,7 @@ static void iavf_free_traffic_irqs(struct iavf_adapter *adapter) for (vector = 0; vector < q_vectors; vector++) { irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; irq_set_affinity_notifier(irq_num, NULL); - irq_set_affinity_hint(irq_num, NULL); + irq_update_affinity_hint(irq_num, NULL); free_irq(irq_num, &adapter->q_vectors[vector]); } } -- cgit v1.2.3-73-gaa49b From d34c54d1739c2cdf2e4437b74e6da269147f4987 Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:19 -0400 Subject: i40e: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() for two purposes: - To set the affinity_hint which is consumed by the userspace for distributing the interrupts - To apply an affinity that it provides for the i40e interrupts The latter is done to ensure that all the interrupts are evenly spread across all available CPUs. However, since commit a0c9259dc4e1 ("irq/matrix: Spread interrupts on allocation") the spreading of interrupts is dynamically performed at the time of allocation. Hence, there is no need for the drivers to enforce their own affinity for the spreading of interrupts. Also, irq_set_affinity_hint() applying the provided cpumask as an affinity for the interrupt is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only sets the pointer for the affinity_hint. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Acked-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20210903152430.244937-4-nitesh@redhat.com --- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e118cf9265c7..ef3375e84821 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3891,10 +3891,10 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) * * get_cpu_mask returns a static constant mask with * a permanent lifetime so it's ok to pass to - * irq_set_affinity_hint without making a copy. + * irq_update_affinity_hint without making a copy. */ cpu = cpumask_local_spread(q_vector->v_idx, -1); - irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); + irq_update_affinity_hint(irq_num, get_cpu_mask(cpu)); } vsi->irqs_ready = true; @@ -3905,7 +3905,7 @@ free_queue_irqs: vector--; irq_num = pf->msix_entries[base + vector].vector; irq_set_affinity_notifier(irq_num, NULL); - irq_set_affinity_hint(irq_num, NULL); + irq_update_affinity_hint(irq_num, NULL); free_irq(irq_num, &vsi->q_vectors[vector]); } return err; @@ -4726,7 +4726,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) /* clear the affinity notifier in the IRQ descriptor */ irq_set_affinity_notifier(irq_num, NULL); /* remove our suggested affinity mask for this IRQ */ - irq_set_affinity_hint(irq_num, NULL); + irq_update_affinity_hint(irq_num, NULL); synchronize_irq(irq_num); free_irq(irq_num, vsi->q_vectors[i]); -- cgit v1.2.3-73-gaa49b From 8049da6f3943d0ac51931b8064b2e4769a69a967 Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:20 -0400 Subject: scsi: megaraid_sas: Use irq_set_affinity_and_hint() The driver uses irq_set_affinity_hint() specifically for the high IOPS queue interrupts for two purposes: - To set the affinity_hint which is consumed by the userspace for distributing the interrupts - To apply an affinity that it provides The driver enforces its own affinity to bind the high IOPS queue interrupts to the local NUMA node. However, irq_set_affinity_hint() applying the provided cpumask as an affinity for the interrupt is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_set_affinity_and_hint() where the provided mask needs to be applied as the affinity and affinity_hint pointer needs to be set and replace with irq_update_affinity_hint() where only affinity_hint needs to be updated. Change the megasas_set_high_iops_queue_affinity_hint function name to megasas_set_high_iops_queue_affinity_and_hint to clearly indicate that the function is setting both affinity and affinity_hint. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Acked-by: Sumit Saxena Link: https://lore.kernel.org/r/20210903152430.244937-5-nitesh@redhat.com --- drivers/scsi/megaraid/megaraid_sas_base.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index aeb95f409826..82e1e24257bc 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5720,7 +5720,7 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe) "Failed to register IRQ for vector %d.\n", i); for (j = 0; j < i; j++) { if (j < instance->low_latency_index_start) - irq_set_affinity_hint( + irq_update_affinity_hint( pci_irq_vector(pdev, j), NULL); free_irq(pci_irq_vector(pdev, j), &instance->irq_context[j]); @@ -5763,7 +5763,7 @@ megasas_destroy_irqs(struct megasas_instance *instance) { if (instance->msix_vectors) for (i = 0; i < instance->msix_vectors; i++) { if (i < instance->low_latency_index_start) - irq_set_affinity_hint( + irq_update_affinity_hint( pci_irq_vector(instance->pdev, i), NULL); free_irq(pci_irq_vector(instance->pdev, i), &instance->irq_context[i]); @@ -5894,22 +5894,25 @@ int megasas_get_device_list(struct megasas_instance *instance) } /** - * megasas_set_high_iops_queue_affinity_hint - Set affinity hint for high IOPS queues - * @instance: Adapter soft state - * return: void + * megasas_set_high_iops_queue_affinity_and_hint - Set affinity and hint + * for high IOPS queues + * @instance: Adapter soft state + * return: void */ static inline void -megasas_set_high_iops_queue_affinity_hint(struct megasas_instance *instance) +megasas_set_high_iops_queue_affinity_and_hint(struct megasas_instance *instance) { int i; - int local_numa_node; + unsigned int irq; + const struct cpumask *mask; if (instance->perf_mode == MR_BALANCED_PERF_MODE) { - local_numa_node = dev_to_node(&instance->pdev->dev); + mask = cpumask_of_node(dev_to_node(&instance->pdev->dev)); - for (i = 0; i < instance->low_latency_index_start; i++) - irq_set_affinity_hint(pci_irq_vector(instance->pdev, i), - cpumask_of_node(local_numa_node)); + for (i = 0; i < instance->low_latency_index_start; i++) { + irq = pci_irq_vector(instance->pdev, i); + irq_set_affinity_and_hint(irq, mask); + } } } @@ -5998,7 +6001,7 @@ megasas_alloc_irq_vectors(struct megasas_instance *instance) instance->msix_vectors = 0; if (instance->smp_affinity_enable) - megasas_set_high_iops_queue_affinity_hint(instance); + megasas_set_high_iops_queue_affinity_and_hint(instance); } /** -- cgit v1.2.3-73-gaa49b From fdb8ed13a77270c8e6e05b3ff9f4cb2f57e16d6a Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:21 -0400 Subject: scsi: mpt3sas: Use irq_set_affinity_and_hint() The driver uses irq_set_affinity_hint() specifically for the high IOPS queue interrupts for two purposes: - To set the affinity_hint which is consumed by the userspace for distributing the interrupts - To apply an affinity that it provides The driver enforces its own affinity to bind the high IOPS queue interrupts to the local NUMA node. However, irq_set_affinity_hint() applying the provided cpumask as an affinity (if not NULL) for the interrupt is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_set_affinity_and_hint() where the provided mask needs to be applied as the affinity and affinity_hint pointer needs to be set and replace with irq_update_affinity_hint() where only affinity_hint needs to be updated. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Reviewed-by: Sreekanth Reddy Link: https://lore.kernel.org/r/20210903152430.244937-6-nitesh@redhat.com --- drivers/scsi/mpt3sas/mpt3sas_base.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 81dab9b82f79..511726f92d9a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3086,6 +3086,7 @@ _base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc) void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc) { + unsigned int irq; struct adapter_reply_queue *reply_q, *next; if (list_empty(&ioc->reply_queue_list)) @@ -3098,9 +3099,10 @@ mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc) continue; } - if (ioc->smp_affinity_enable) - irq_set_affinity_hint(pci_irq_vector(ioc->pdev, - reply_q->msix_index), NULL); + if (ioc->smp_affinity_enable) { + irq = pci_irq_vector(ioc->pdev, reply_q->msix_index); + irq_update_affinity_hint(irq, NULL); + } free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index), reply_q); kfree(reply_q); @@ -3167,18 +3169,15 @@ out: * @ioc: per adapter object * * The enduser would need to set the affinity via /proc/irq/#/smp_affinity - * - * It would nice if we could call irq_set_affinity, however it is not - * an exported symbol */ static void _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc) { - unsigned int cpu, nr_cpus, nr_msix, index = 0; + unsigned int cpu, nr_cpus, nr_msix, index = 0, irq; struct adapter_reply_queue *reply_q; - int local_numa_node; int iopoll_q_count = ioc->reply_queue_count - ioc->iopoll_q_start_index; + const struct cpumask *mask; if (!_base_is_controller_msix_enabled(ioc)) return; @@ -3201,11 +3200,11 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc) * corresponding to high iops queues. */ if (ioc->high_iops_queues) { - local_numa_node = dev_to_node(&ioc->pdev->dev); + mask = cpumask_of_node(dev_to_node(&ioc->pdev->dev)); for (index = 0; index < ioc->high_iops_queues; index++) { - irq_set_affinity_hint(pci_irq_vector(ioc->pdev, - index), cpumask_of_node(local_numa_node)); + irq = pci_irq_vector(ioc->pdev, index); + irq_set_affinity_and_hint(irq, mask); } } -- cgit v1.2.3-73-gaa49b From fb5bd854710e410e5544f8ba348a5be6541e4939 Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:22 -0400 Subject: RDMA/irdma: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() to update the affinity_hint mask that is consumed by the userspace to distribute the interrupts. However, under the hood irq_set_affinity_hint() also applies the provided cpumask (if not NULL) as the affinity for the given interrupt which is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only updates the affinity_hint pointer. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Acked-by: Tatyana Nikolova Link: https://lore.kernel.org/r/20210903152430.244937-7-nitesh@redhat.com --- drivers/infiniband/hw/irdma/hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 4108dcabece2..367f121b500f 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -545,7 +545,7 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, struct irdma_sc_dev *dev = &rf->sc_dev; dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); - irq_set_affinity_hint(msix_vec->irq, NULL); + irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); } @@ -1095,7 +1095,7 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, } cpumask_clear(&msix_vec->mask); cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask); - irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask); + irq_update_affinity_hint(msix_vec->irq, &msix_vec->mask); if (status) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: ceq irq config fail\n"); return IRDMA_ERR_CFG; -- cgit v1.2.3-73-gaa49b From cb39ca92eb74d00b5b2e1debdba1f33e6e4c8ebd Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:23 -0400 Subject: enic: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() to update the affinity_hint mask that is consumed by the userspace to distribute the interrupts. However, under the hood irq_set_affinity_hint() also applies the provided cpumask (if not NULL) as the affinity for the given interrupt which is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only updates the affinity_hint pointer. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Reviewed-by: Christian Benvenuti Link: https://lore.kernel.org/r/20210903152430.244937-8-nitesh@redhat.com --- drivers/net/ethernet/cisco/enic/enic_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index aacf141986d5..2faba079b4fb 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -150,10 +150,10 @@ static void enic_set_affinity_hint(struct enic *enic) !cpumask_available(enic->msix[i].affinity_mask) || cpumask_empty(enic->msix[i].affinity_mask)) continue; - err = irq_set_affinity_hint(enic->msix_entry[i].vector, - enic->msix[i].affinity_mask); + err = irq_update_affinity_hint(enic->msix_entry[i].vector, + enic->msix[i].affinity_mask); if (err) - netdev_warn(enic->netdev, "irq_set_affinity_hint failed, err %d\n", + netdev_warn(enic->netdev, "irq_update_affinity_hint failed, err %d\n", err); } @@ -173,7 +173,7 @@ static void enic_unset_affinity_hint(struct enic *enic) int i; for (i = 0; i < enic->intr_count; i++) - irq_set_affinity_hint(enic->msix_entry[i].vector, NULL); + irq_update_affinity_hint(enic->msix_entry[i].vector, NULL); } static int enic_udp_tunnel_set_port(struct net_device *netdev, -- cgit v1.2.3-73-gaa49b From b8b9dd525203304d36bd123fca3ce0c3b318339b Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:24 -0400 Subject: be2net: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() to update the affinity_hint mask that is consumed by the userspace to distribute the interrupts. However, under the hood irq_set_affinity_hint() also applies the provided cpumask (if not NULL) as the affinity for the given interrupt which is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only updates the affinity_hint pointer. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210903152430.244937-9-nitesh@redhat.com --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index d51f24c9e1b8..84b3ba9bdb18 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3491,7 +3491,7 @@ static int be_msix_register(struct be_adapter *adapter) if (status) goto err_msix; - irq_set_affinity_hint(vec, eqo->affinity_mask); + irq_update_affinity_hint(vec, eqo->affinity_mask); } return 0; @@ -3552,7 +3552,7 @@ static void be_irq_unregister(struct be_adapter *adapter) /* MSIx */ for_all_evt_queues(adapter, eqo, i) { vec = be_msix_vec_get(adapter, eqo); - irq_set_affinity_hint(vec, NULL); + irq_update_affinity_hint(vec, NULL); free_irq(vec, eqo); } -- cgit v1.2.3-73-gaa49b From cc493264c01d055742a34cfbaecaffb258dcc58c Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:25 -0400 Subject: ixgbe: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() to update the affinity_hint mask that is consumed by the userspace to distribute the interrupts. However, under the hood irq_set_affinity_hint() also applies the provided cpumask (if not NULL) as the affinity for the given interrupt which is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only updates the affinity_hint pointer. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Acked-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20210903152430.244937-10-nitesh@redhat.com --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0f9f022260d7..66068e17ee1f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3247,8 +3247,8 @@ static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter) /* If Flow Director is enabled, set interrupt affinity */ if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { /* assign the mask for this irq */ - irq_set_affinity_hint(entry->vector, - &q_vector->affinity_mask); + irq_update_affinity_hint(entry->vector, + &q_vector->affinity_mask); } } @@ -3264,8 +3264,8 @@ static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter) free_queue_irqs: while (vector) { vector--; - irq_set_affinity_hint(adapter->msix_entries[vector].vector, - NULL); + irq_update_affinity_hint(adapter->msix_entries[vector].vector, + NULL); free_irq(adapter->msix_entries[vector].vector, adapter->q_vector[vector]); } @@ -3398,7 +3398,7 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter) continue; /* clear the affinity_mask in the IRQ descriptor */ - irq_set_affinity_hint(entry->vector, NULL); + irq_update_affinity_hint(entry->vector, NULL); free_irq(entry->vector, q_vector); } -- cgit v1.2.3-73-gaa49b From bf886e1ef11a0155714ca93c975bb3f770e79d72 Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:26 -0400 Subject: mailbox: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() to: - Set the affinity_hint which is consumed by the userspace for distributing the interrupts - Enforce affinity As per commit 6ac17fe8c14a ("mailbox: bcm-flexrm-mailbox: Set IRQ affinity hint for FlexRM ring IRQs") the latter is done to ensure that the FlexRM ring interrupts are evenly spread across all available CPUs. However, since commit a0c9259dc4e1 ("irq/matrix: Spread interrupts on allocation") the spreading of interrupts is dynamically performed at the time of allocation. Hence, there is no need for the drivers to enforce their own affinity for the spreading of interrupts. Also, irq_set_affinity_hint() applying the provided cpumask as an affinity for the interrupt is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only sets the affinity_hint pointer. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Acked-by: Jassi Brar Link: https://lore.kernel.org/r/20210903152430.244937-11-nitesh@redhat.com --- drivers/mailbox/bcm-flexrm-mailbox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c index 78073ad1f2f1..16982c13d323 100644 --- a/drivers/mailbox/bcm-flexrm-mailbox.c +++ b/drivers/mailbox/bcm-flexrm-mailbox.c @@ -1298,7 +1298,7 @@ static int flexrm_startup(struct mbox_chan *chan) val = (num_online_cpus() < val) ? val / num_online_cpus() : 1; cpumask_set_cpu((ring->num / val) % num_online_cpus(), &ring->irq_aff_hint); - ret = irq_set_affinity_hint(ring->irq, &ring->irq_aff_hint); + ret = irq_update_affinity_hint(ring->irq, &ring->irq_aff_hint); if (ret) { dev_err(ring->mbox->dev, "failed to set IRQ affinity hint for ring%d\n", @@ -1425,7 +1425,7 @@ static void flexrm_shutdown(struct mbox_chan *chan) /* Release IRQ */ if (ring->irq_requested) { - irq_set_affinity_hint(ring->irq, NULL); + irq_update_affinity_hint(ring->irq, NULL); free_irq(ring->irq, ring); ring->irq_requested = false; } -- cgit v1.2.3-73-gaa49b From ce5a58a96ccc241c6e5cbb3360494ddfaf118afe Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:27 -0400 Subject: scsi: lpfc: Use irq_set_affinity() The driver uses irq_set_affinity_hint to set the affinity for the lpfc interrupts to a mask corresponding to the local NUMA node to avoid performance overhead on AMD architectures. However, irq_set_affinity_hint() setting the affinity is an undocumented side effect that this function also sets the affinity under the hood. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Also, as per the commit dcaa21367938 ("scsi: lpfc: Change default IRQ model on AMD architectures"): "On AMD architecture, revert the irq allocation to the normal style (non-managed) and then use irq_set_affinity_hint() to set the cpu affinity and disable user-space rebalancing." we don't really need to set the affinity_hint as user-space rebalancing for the lpfc interrupts is not desired. Hence, replace the irq_set_affinity_hint() with irq_set_affinity() which only applies the affinity for the interrupts. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Reviewed-by: James Smart Link: https://lore.kernel.org/r/20210903152430.244937-12-nitesh@redhat.com --- drivers/scsi/lpfc/lpfc_init.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index ba17a8f740a9..945755266c49 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -12709,7 +12709,7 @@ lpfc_irq_set_aff(struct lpfc_hba_eq_hdl *eqhdl, unsigned int cpu) cpumask_clear(&eqhdl->aff_mask); cpumask_set_cpu(cpu, &eqhdl->aff_mask); irq_set_status_flags(eqhdl->irq, IRQ_NO_BALANCING); - irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); + irq_set_affinity(eqhdl->irq, &eqhdl->aff_mask); } /** @@ -12998,7 +12998,6 @@ cfg_fail_out: for (--index; index >= 0; index--) { eqhdl = lpfc_get_eq_hdl(index); lpfc_irq_clear_aff(eqhdl); - irq_set_affinity_hint(eqhdl->irq, NULL); free_irq(eqhdl->irq, eqhdl); } @@ -13159,7 +13158,6 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba) for (index = 0; index < phba->cfg_irq_chann; index++) { eqhdl = lpfc_get_eq_hdl(index); lpfc_irq_clear_aff(eqhdl); - irq_set_affinity_hint(eqhdl->irq, NULL); free_irq(eqhdl->irq, eqhdl); } } else { -- cgit v1.2.3-73-gaa49b From 2d1e72f235d665aa699fb821f6da6e8bde84cbde Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:28 -0400 Subject: hinic: Use irq_set_affinity_and_hint() The driver uses irq_set_affinity_hint() to: - Set the affinity_hint which is consumed by the userspace for distributing the interrupts - Enforce affinity As per commit 352f58b0d9f2 ("net-next/hinic: Set Rxq irq to specific cpu for NUMA"), the hinic driver enforces its own affinity to bind IRQs to the local NUMA node. However, irq_set_affinity_hint() applying the provided cpumask as an affinity for the interrupt is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_set_affinity_and_hint() where the provided mask needs to be applied as the affinity and affinity_hint pointer needs to be set and replace with irq_update_affinity_hint() where only affinity_hint needs to be updated. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210903152430.244937-13-nitesh@redhat.com --- drivers/net/ethernet/huawei/hinic/hinic_rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index fed3b6bc0d76..b33ed4d92b71 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -548,7 +548,7 @@ static int rx_request_irq(struct hinic_rxq *rxq) goto err_req_irq; cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask); - err = irq_set_affinity_hint(rq->irq, &rq->affinity_mask); + err = irq_set_affinity_and_hint(rq->irq, &rq->affinity_mask); if (err) goto err_irq_affinity; @@ -565,7 +565,7 @@ static void rx_free_irq(struct hinic_rxq *rxq) { struct hinic_rq *rq = rxq->rq; - irq_set_affinity_hint(rq->irq, NULL); + irq_update_affinity_hint(rq->irq, NULL); free_irq(rq->irq, rxq); rx_del_napi(rxq); } -- cgit v1.2.3-73-gaa49b From 7451e9ea8e2055af39afe7ff39a5f68d8ec6b98d Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:29 -0400 Subject: net/mlx5: Use irq_set_affinity_and_hint() The driver uses irq_set_affinity_hint() to update the affinity_hint mask that is consumed by the userspace to distribute the interrupts and to apply the provided mask as the affinity for the mlx5 interrupts. However, irq_set_affinity_hint() applying the provided cpumask as an affinity for the interrupt is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_set_affinity_and_hint() where the provided mask needs to be applied as the affinity and affinity_hint pointer needs to be set and replace with irq_update_affinity_hint() where only affinity_hint needs to be updated. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20210903152430.244937-14-nitesh@redhat.com --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 830444f927d4..54fb67cec544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -143,11 +143,11 @@ static void irq_release(struct mlx5_irq *irq) struct mlx5_irq_pool *pool = irq->pool; xa_erase(&pool->irqs, irq->index); - /* free_irq requires that affinity and rmap will be cleared + /* free_irq requires that affinity_hint and rmap will be cleared * before calling it. This is why there is asymmetry with set_rmap * which should be called after alloc_irq but before request_irq. */ - irq_set_affinity_hint(irq->irqn, NULL); + irq_update_affinity_hint(irq->irqn, NULL); free_cpumask_var(irq->mask); free_irq(irq->irqn, &irq->nh); kfree(irq); @@ -316,7 +316,7 @@ static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool, if (IS_ERR(irq)) return irq; cpumask_copy(irq->mask, affinity); - irq_set_affinity_hint(irq->irqn, irq->mask); + irq_set_affinity_and_hint(irq->irqn, irq->mask); return irq; } @@ -399,7 +399,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && cpumask_empty(irq->mask)) cpumask_set_cpu(0, irq->mask); - irq_set_affinity_hint(irq->irqn, irq->mask); + irq_set_affinity_and_hint(irq->irqn, irq->mask); unlock: mutex_unlock(&pool->lock); return irq; -- cgit v1.2.3-73-gaa49b From 4b3ddc6462e83452182177b48c4bc53607acd68e Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 3 Sep 2021 11:24:30 -0400 Subject: net/mlx4: Use irq_update_affinity_hint() The driver uses irq_set_affinity_hint() to update the affinity_hint mask that is consumed by the userspace to distribute the interrupts. However, under the hood irq_set_affinity_hint() also applies the provided cpumask (if not NULL) as the affinity for the given interrupt which is an undocumented side effect. To remove this side effect irq_set_affinity_hint() has been marked as deprecated and new interfaces have been introduced. Hence, replace the irq_set_affinity_hint() with the new interface irq_update_affinity_hint() that only updates the affinity_hint pointer. Signed-off-by: Nitesh Narayan Lal Signed-off-by: Thomas Gleixner Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20210903152430.244937-15-nitesh@redhat.com --- drivers/net/ethernet/mellanox/mlx4/eq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 9e48509ed3b2..414e390e6b48 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -244,9 +244,9 @@ static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec) cpumask_empty(eq->affinity_mask)) return; - hint_err = irq_set_affinity_hint(eq->irq, eq->affinity_mask); + hint_err = irq_update_affinity_hint(eq->irq, eq->affinity_mask); if (hint_err) - mlx4_warn(dev, "irq_set_affinity_hint failed, err %d\n", hint_err); + mlx4_warn(dev, "irq_update_affinity_hint failed, err %d\n", hint_err); } #endif @@ -1123,9 +1123,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) { free_cpumask_var(eq_table->eq[i].affinity_mask); -#if defined(CONFIG_SMP) - irq_set_affinity_hint(eq_table->eq[i].irq, NULL); -#endif + irq_update_affinity_hint(eq_table->eq[i].irq, NULL); free_irq(eq_table->eq[i].irq, eq_table->eq + i); eq_table->eq[i].have_irq = 0; } -- cgit v1.2.3-73-gaa49b From c0cdc89072a3e1ae3981437f385de14b7bba8fd8 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 27 Oct 2021 16:15:04 +0100 Subject: irqchip/gic-v3-its: Give the percpu rdist struct its own flags field Later patches will require tracking some per-rdist status. Reuse the bytes "lost" to padding within the __percpu rdist struct as a flags field, and re-encode ->lpi_enabled within said flags. No change in functionality intended. Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211027151506.2085066-2-valentin.schneider@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 8 +++++--- include/linux/irqchip/arm-gic-v3.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index eb0882d15366..74c22741f3ce 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -46,6 +46,8 @@ #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) #define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) +#define RD_LOCAL_LPI_ENABLED BIT(0) + static u32 lpi_id_bits; /* @@ -3044,7 +3046,7 @@ static void its_cpu_init_lpis(void) phys_addr_t paddr; u64 val, tmp; - if (gic_data_rdist()->lpi_enabled) + if (gic_data_rdist()->flags & RD_LOCAL_LPI_ENABLED) return; val = readl_relaxed(rbase + GICR_CTLR); @@ -3158,7 +3160,7 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); out: - gic_data_rdist()->lpi_enabled = true; + gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), gic_data_rdist()->pend_page ? "allocated" : "reserved", @@ -5138,7 +5140,7 @@ static int redist_disable_lpis(void) * * If running with preallocated tables, there is nothing to do. */ - if (gic_data_rdist()->lpi_enabled || + if ((gic_data_rdist()->flags & RD_LOCAL_LPI_ENABLED) || (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED)) return 0; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 81cbf85f73de..0dc34d7d735a 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -615,7 +615,7 @@ struct rdists { void __iomem *rd_base; struct page *pend_page; phys_addr_t phys_base; - bool lpi_enabled; + u64 flags; cpumask_t *vpe_table_mask; void *vpe_l1_base; } __percpu *rdist; -- cgit v1.2.3-73-gaa49b From d23bc2bc1d634658d7fa96395419c1c553a784f0 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 27 Oct 2021 16:15:05 +0100 Subject: irqchip/gic-v3-its: Postpone LPI pending table freeing and memreserve Memory used by the LPI tables have to be made persistent for kexec to have a chance to work, as explained in [1]. If they have been made persistent and we are booting into a kexec'd kernel, we also need to free the pages that were preemptively allocated by the new kernel for those tables. Both of those operations currently happen during its_cpu_init(), which happens in a _STARTING (IOW atomic) cpuhp callback for secondary CPUs. efi_mem_reserve_iomem() issues a GFP_ATOMIC allocation, which unfortunately doesn't work under PREEMPT_RT (this ends up grabbing a non-raw spinlock, which can sleep under PREEMPT_RT). Similarly, freeing the pages ends up grabbing a sleepable spinlock. Since the memreserve is only required by kexec, it doesn't have to be done so early in the secondary boot process. Issue the reservation in a new CPUHP_AP_ONLINE_DYN cpuhp callback, and piggy-back the page freeing on top of it. A CPU gets to run the body of this new callback exactly once. As kexec issues a machine_shutdown() prior to machine_kexec(), it will be serialized vs a CPU being plugged to life by the hotplug machinery - either the CPU will have been brought up and have had its redistributor's pending table memreserved, or it never went online and will have its table allocated by the new kernel. [1]: https://lore.kernel.org/lkml/20180921195954.21574-1-marc.zyngier@arm.com/ Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211027151506.2085066-3-valentin.schneider@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 58 +++++++++++++++++++++++++++++++++++--- drivers/irqchip/irq-gic-v3.c | 1 + include/linux/irqchip/arm-gic-v3.h | 1 + 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 74c22741f3ce..f860733d3e4e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -47,6 +47,8 @@ #define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) #define RD_LOCAL_LPI_ENABLED BIT(0) +#define RD_LOCAL_PENDTABLE_PREALLOCATED BIT(1) +#define RD_LOCAL_MEMRESERVE_DONE BIT(2) static u32 lpi_id_bits; @@ -3065,15 +3067,13 @@ static void its_cpu_init_lpis(void) paddr &= GENMASK_ULL(51, 16); WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ)); - its_free_pending_table(gic_data_rdist()->pend_page); - gic_data_rdist()->pend_page = NULL; + gic_data_rdist()->flags |= RD_LOCAL_PENDTABLE_PREALLOCATED; goto out; } pend_page = gic_data_rdist()->pend_page; paddr = page_to_phys(pend_page); - WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ)); /* set PROPBASE */ val = (gic_rdists->prop_table_pa | @@ -3163,7 +3163,8 @@ out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), - gic_data_rdist()->pend_page ? "allocated" : "reserved", + gic_data_rdist()->flags & RD_LOCAL_PENDTABLE_PREALLOCATED ? + "reserved" : "allocated", &paddr); } @@ -5202,6 +5203,38 @@ int its_cpu_init(void) return 0; } +static int its_cpu_memreserve_lpi(unsigned int cpu) +{ + struct page *pend_page; + int ret = 0; + + /* This gets to run exactly once per CPU */ + if (gic_data_rdist()->flags & RD_LOCAL_MEMRESERVE_DONE) + return 0; + + pend_page = gic_data_rdist()->pend_page; + if (WARN_ON(!pend_page)) { + ret = -ENOMEM; + goto out; + } + /* + * If the pending table was pre-programmed, free the memory we + * preemptively allocated. Otherwise, reserve that memory for + * later kexecs. + */ + if (gic_data_rdist()->flags & RD_LOCAL_PENDTABLE_PREALLOCATED) { + its_free_pending_table(pend_page); + gic_data_rdist()->pend_page = NULL; + } else { + phys_addr_t paddr = page_to_phys(pend_page); + WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ)); + } + +out: + gic_data_rdist()->flags |= RD_LOCAL_MEMRESERVE_DONE; + return ret; +} + static const struct of_device_id its_device_id[] = { { .compatible = "arm,gic-v3-its", }, {}, @@ -5385,6 +5418,23 @@ static void __init its_acpi_probe(void) static void __init its_acpi_probe(void) { } #endif +int __init its_lpi_memreserve_init(void) +{ + int state; + + if (!efi_enabled(EFI_CONFIG_TABLES)) + return 0; + + state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "irqchip/arm/gicv3/memreserve:online", + its_cpu_memreserve_lpi, + NULL); + if (state < 0) + return state; + + return 0; +} + int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *parent_domain) { diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index daec3309b014..9fa3e1d16721 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1802,6 +1802,7 @@ static int __init gic_init_bases(void __iomem *dist_base, if (gic_dist_supports_lpis()) { its_init(handle, &gic_data.rdists, gic_data.domain); its_cpu_init(); + its_lpi_memreserve_init(); } else { if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) gicv2m_init(handle, gic_data.domain); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 0dc34d7d735a..51b85506ae90 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -632,6 +632,7 @@ struct rdists { struct irq_domain; struct fwnode_handle; +int __init its_lpi_memreserve_init(void); int its_cpu_init(void); int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); -- cgit v1.2.3-73-gaa49b From 835f442fdbce33a47a6bde356643fd7e3ef7ec1b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 27 Oct 2021 16:15:06 +0100 Subject: irqchip/gic-v3-its: Limit memreserve cpuhp state lifetime The new memreserve cpuhp callback only needs to survive up until a point where every CPU in the system has booted once. Beyond that, it becomes a no-op and can be put in the bin. Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211027151506.2085066-4-valentin.schneider@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++++++++ include/linux/irqchip/arm-gic-v3.h | 1 + 2 files changed, 17 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f860733d3e4e..ee83eb377d7e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -5203,6 +5203,15 @@ int its_cpu_init(void) return 0; } +static void rdist_memreserve_cpuhp_cleanup_workfn(struct work_struct *work) +{ + cpuhp_remove_state_nocalls(gic_rdists->cpuhp_memreserve_state); + gic_rdists->cpuhp_memreserve_state = CPUHP_INVALID; +} + +static DECLARE_WORK(rdist_memreserve_cpuhp_cleanup_work, + rdist_memreserve_cpuhp_cleanup_workfn); + static int its_cpu_memreserve_lpi(unsigned int cpu) { struct page *pend_page; @@ -5231,6 +5240,10 @@ static int its_cpu_memreserve_lpi(unsigned int cpu) } out: + /* Last CPU being brought up gets to issue the cleanup */ + if (cpumask_equal(&cpus_booted_once_mask, cpu_possible_mask)) + schedule_work(&rdist_memreserve_cpuhp_cleanup_work); + gic_data_rdist()->flags |= RD_LOCAL_MEMRESERVE_DONE; return ret; } @@ -5425,6 +5438,7 @@ int __init its_lpi_memreserve_init(void) if (!efi_enabled(EFI_CONFIG_TABLES)) return 0; + gic_rdists->cpuhp_memreserve_state = CPUHP_INVALID; state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "irqchip/arm/gicv3/memreserve:online", its_cpu_memreserve_lpi, @@ -5432,6 +5446,8 @@ int __init its_lpi_memreserve_init(void) if (state < 0) return state; + gic_rdists->cpuhp_memreserve_state = state; + return 0; } diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 51b85506ae90..12d91f0dedf9 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -624,6 +624,7 @@ struct rdists { u64 flags; u32 gicd_typer; u32 gicd_typer2; + int cpuhp_memreserve_state; bool has_vlpis; bool has_rvpeid; bool has_direct_lpi; -- cgit v1.2.3-73-gaa49b From f63c862587c9ba1c62de8c549e8f2fbe0fc88b6b Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 2 Dec 2021 10:52:54 +0100 Subject: irqchip/spear-shirq: Add support for IRQ 0..6 IRQ 0..7 are not supported by the driver for SPEAr320 SOC family. IRQ 0 is not reserved in SPEAr320 SOC (assigned to GPIOINT). Furthermore, in SPEAr320s SOC variant, IRQ 0..6 are assigned as follow: IRQ 6 - NGPIO_INTR: Combined status of edge programmable interrupts from GPIO ports IRQ 5 - TX_OR_INTR: I2S interrupt on Transmit FIFO overrun IRQ 4 - TX_EMP_INTR: I2S interrupt on Transmit FIFO empty IRQ 3 - RX_OR_INTR: I2S interrupt on Receive FIFO overrun IRQ 2 - RX_DA_INTR: I2S interrupt on data available in Receive FIFO IRQ 1 - Reserved IRQ 0 - GPIO_INTR: Legacy interrupt from GPIO ports Add support for these IRQs in SPEAr320 SOC family. Signed-off-by: Herve Codina Acked-by: Linus Walleij Acked-by: Arnd Bergmann Acked-by: Viresh Kumar Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211202095255.165797-6-herve.codina@bootlin.com --- drivers/irqchip/spear-shirq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c index 1518ba31a80c..7c17a6f643ef 100644 --- a/drivers/irqchip/spear-shirq.c +++ b/drivers/irqchip/spear-shirq.c @@ -149,6 +149,8 @@ static struct spear_shirq spear320_shirq_ras3 = { .offset = 0, .nr_irqs = 7, .mask = ((0x1 << 7) - 1) << 0, + .irq_chip = &dummy_irq_chip, + .status_reg = SPEAR320_INT_STS_MASK_REG, }; static struct spear_shirq spear320_shirq_ras1 = { -- cgit v1.2.3-73-gaa49b From 29e525cc825e33ea0da2ac0fbf951e418fc28c69 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 14 Dec 2021 16:47:11 +0800 Subject: irqchip/imx-gpcv2: Mark imx_gpcv2_instance with __ro_after_init imx_gpcv2_instance will not be updated after init, so mark it with __ro_after_init. Signed-off-by: Peng Fan Reviewed-by: Lucas Stach Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211214084711.1357325-1-peng.fan@oss.nxp.com --- drivers/irqchip/irq-imx-gpcv2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index 5b5a365dbd5e..b9c22f764b4d 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -26,7 +26,7 @@ struct gpcv2_irqchip_data { u32 cpu2wakeup; }; -static struct gpcv2_irqchip_data *imx_gpcv2_instance; +static struct gpcv2_irqchip_data *imx_gpcv2_instance __ro_after_init; static void __iomem *gpcv2_idx_to_reg(struct gpcv2_irqchip_data *cd, int i) { -- cgit v1.2.3-73-gaa49b From c10f2f8b5d8027c1ea77f777f2d16cb9043a6c09 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Thu, 9 Dec 2021 21:24:53 +0800 Subject: irqchip/gic-v2m: Add const to of_device_id struct of_device_id should normally be const. Signed-off-by: Xiang wangx Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211209132453.25623-1-wangxiang@cdjrlc.com --- drivers/irqchip/irq-gic-v2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 9349fc68b81a..f2d252dff5f3 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -405,7 +405,7 @@ err_free_v2m: return ret; } -static struct of_device_id gicv2m_device_id[] = { +static const struct of_device_id gicv2m_device_id[] = { { .compatible = "arm,gic-v2m-frame", }, {}, }; -- cgit v1.2.3-73-gaa49b From 0859bbb07d06d1357734496ff198319f9bfcf1bf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 15 Dec 2021 15:24:57 -0800 Subject: irqchip/ingenic-tcu: Use correctly sized arguments for bit field The find.h APIs are designed to be used only on unsigned long arguments. This can technically result in a over-read, but it is harmless in this case. Regardless, fix it to avoid the warning seen under -Warray-bounds, which we'd like to enable globally: In file included from ./include/linux/bitmap.h:9, from ./include/linux/cpumask.h:12, from ./include/linux/smp.h:13, from ./include/linux/lockdep.h:14, from ./include/linux/mutex.h:17, from ./include/linux/notifier.h:14, from ./include/linux/clk.h:14, from drivers/irqchip/irq-ingenic-tcu.c:7: drivers/irqchip/irq-ingenic-tcu.c: In function 'ingenic_tcu_intc_cascade': ./include/linux/find.h:40:23: warning: array subscript 'long unsigned int[0]' is partly outside array bounds of 'uint32_t[1]' {aka 'unsigned int[1]'} [-Warray-bounds] 40 | val = *addr & GENMASK(size - 1, offset); | ^~~~~ drivers/irqchip/irq-ingenic-tcu.c:30:18: note: while referencing 'irq_reg' 30 | uint32_t irq_reg, irq_mask; | ^~~~~~~ Signed-off-by: Kees Cook Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215232457.2069969-1-keescook@chromium.org --- drivers/irqchip/irq-ingenic-tcu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-ingenic-tcu.c b/drivers/irqchip/irq-ingenic-tcu.c index 34a7d261b710..3363f83bd7e9 100644 --- a/drivers/irqchip/irq-ingenic-tcu.c +++ b/drivers/irqchip/irq-ingenic-tcu.c @@ -28,6 +28,7 @@ static void ingenic_tcu_intc_cascade(struct irq_desc *desc) struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0); struct regmap *map = gc->private; uint32_t irq_reg, irq_mask; + unsigned long bits; unsigned int i; regmap_read(map, TCU_REG_TFR, &irq_reg); @@ -36,8 +37,9 @@ static void ingenic_tcu_intc_cascade(struct irq_desc *desc) chained_irq_enter(irq_chip, desc); irq_reg &= ~irq_mask; + bits = irq_reg; - for_each_set_bit(i, (unsigned long *)&irq_reg, 32) + for_each_set_bit(i, &bits, 32) generic_handle_domain_irq(domain, i); chained_irq_exit(irq_chip, desc); -- cgit v1.2.3-73-gaa49b From 79a7f77b9b154d572bd9d2f1eecf58c4d018d8e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Dec 2021 14:32:27 +0000 Subject: irqchip/gic-v4: Disable redistributors' view of the VPE table at boot time Jay Chen reported that using a kdump kernel on a GICv4.1 system results in a RAS error being delivered when the secondary kernel configures the ITS's view of the new VPE table. As it turns out, that's because each RD still has a pointer to the previous instance of the VPE table, and that particular implementation is very upset by seeing two bits of the HW that should point to the same table with different values. To solve this, let's invalidate any reference that any RD has to the VPE table when discovering the RDs. The ITS can then be programmed as expected. Reported-by: Jay Chen Signed-off-by: Marc Zyngier Cc: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20211214064716.21407-1-jkchen@linux.alibaba.com Link: https://lore.kernel.org/r/20211216144804.1578566-1-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index daec3309b014..86397522e786 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -920,6 +920,22 @@ static int __gic_update_rdist_properties(struct redist_region *region, { u64 typer = gic_read_typer(ptr + GICR_TYPER); + /* Boot-time cleanip */ + if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) { + u64 val; + + /* Deactivate any present vPE */ + val = gicr_read_vpendbaser(ptr + SZ_128K + GICR_VPENDBASER); + if (val & GICR_VPENDBASER_Valid) + gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast, + ptr + SZ_128K + GICR_VPENDBASER); + + /* Mark the VPE table as invalid */ + val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER); + val &= ~GICR_VPROPBASER_4_1_VALID; + gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER); + } + gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS); /* RVPEID implies some form of DirectLPI, no matter what the doc says... :-/ */ -- cgit v1.2.3-73-gaa49b From befbfe6f8f744acb65c4334cc224b855d31aff1b Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 16 Dec 2021 18:21:21 +0000 Subject: irqchip/renesas-irqc: Use platform_get_irq_optional() to get the interrupt platform_get_resource(pdev, IORESOURCE_IRQ, ..) relies on static allocation of IRQ resources in DT core code, this causes an issue when using hierarchical interrupt domains using "interrupts" property in the node as this bypassed the hierarchical setup and messed up the irq chaining. In preparation for removal of static setup of IRQ resource from DT core code use platform_get_irq_optional(). Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211216182121.5323-2-prabhakar.mahadev-lad.rj@bp.renesas.com --- drivers/irqchip/irq-renesas-irqc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 07a6d8b42b63..909325f88239 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -126,7 +126,6 @@ static int irqc_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const char *name = dev_name(dev); struct irqc_priv *p; - struct resource *irq; int ret; int k; @@ -142,13 +141,15 @@ static int irqc_probe(struct platform_device *pdev) /* allow any number of IRQs between 1 and IRQC_IRQ_MAX */ for (k = 0; k < IRQC_IRQ_MAX; k++) { - irq = platform_get_resource(pdev, IORESOURCE_IRQ, k); - if (!irq) + ret = platform_get_irq_optional(pdev, k); + if (ret == -ENXIO) break; + if (ret < 0) + goto err_runtime_pm_disable; p->irq[k].p = p; p->irq[k].hw_irq = k; - p->irq[k].requested_irq = irq->start; + p->irq[k].requested_irq = ret; } p->number_of_irqs = k; -- cgit v1.2.3-73-gaa49b From 31bd548f40cd0b754ed80a372bc2019e7c4fa31d Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 16 Dec 2021 18:21:20 +0000 Subject: irqchip/renesas-intc-irqpin: Use platform_get_irq_optional() to get the interrupt platform_get_resource(pdev, IORESOURCE_IRQ, ..) relies on static allocation of IRQ resources in DT core code, this causes an issue when using hierarchical interrupt domains using "interrupts" property in the node as this bypassed the hierarchical setup and messed up the irq chaining. In preparation for removal of static setup of IRQ resource from DT core code use platform_get_irq_optional(). Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211216182121.5323-1-prabhakar.mahadev-lad.rj@bp.renesas.com --- drivers/irqchip/irq-renesas-intc-irqpin.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index cb7f60b3b4a9..37f9a4499fdb 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -375,7 +375,6 @@ static int intc_irqpin_probe(struct platform_device *pdev) struct intc_irqpin_priv *p; struct intc_irqpin_iomem *i; struct resource *io[INTC_IRQPIN_REG_NR]; - struct resource *irq; struct irq_chip *irq_chip; void (*enable_fn)(struct irq_data *d); void (*disable_fn)(struct irq_data *d); @@ -418,12 +417,14 @@ static int intc_irqpin_probe(struct platform_device *pdev) /* allow any number of IRQs between 1 and INTC_IRQPIN_MAX */ for (k = 0; k < INTC_IRQPIN_MAX; k++) { - irq = platform_get_resource(pdev, IORESOURCE_IRQ, k); - if (!irq) + ret = platform_get_irq_optional(pdev, k); + if (ret == -ENXIO) break; + if (ret < 0) + goto err0; p->irq[k].p = p; - p->irq[k].requested_irq = irq->start; + p->irq[k].requested_irq = ret; } nirqs = k; -- cgit v1.2.3-73-gaa49b