anolis: genirq/affinity: add support for limiting managed interrupts
ANBZ: #10929
Commit c410abbbac
(genirq/affinity: Add is_managed to struct irq_affinity_desc)
introduced is_managed bit to struct irq_affinity_desc. Due to this commit treating
queue interrupts as managed interrupts, in scenarios where a large number of
devices are present (using massive msix queue interrupts), an excessive number
of IRQ matrix bits are reserved during interrupt allocation. This sequently leads
to the situation where interrupts for some devices cannot be properly allocated.
Support for limiting managed interrupts on every node.
Signed-off-by: Guanjun <guanjun@linux.alibaba.com>
Reviewed-by: Zelin Deng <zelin.deng@linux.alibaba.com>
Reviewed-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/3856
This commit is contained in:
parent
3ebad7e2c0
commit
c6f538a97a
|
@ -2717,6 +2717,15 @@
|
|||
different yeeloong laptops.
|
||||
Example: machtype=lemote-yeeloong-2f-7inch
|
||||
|
||||
managed_irqs_per_node=
|
||||
[KNL] Support for limiting the number of managed
|
||||
interrupts on every node to prevent the case that
|
||||
interrupts cannot be properly allocated where a large
|
||||
number of devices are present. The default number is 0,
|
||||
that means no limit to the number of managed irqs.
|
||||
Format: integer between 0 and num_possible_cpus() / num_possible_nodes()
|
||||
Default: 0
|
||||
|
||||
max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
|
||||
than or equal to this physical address is ignored.
|
||||
|
||||
|
|
|
@ -648,6 +648,7 @@ static inline void irq_force_complete_move(struct irq_desc *desc) { }
|
|||
#endif
|
||||
|
||||
extern int no_irq_affinity;
|
||||
extern unsigned int managed_irqs_per_node;
|
||||
|
||||
#ifdef CONFIG_HARDIRQS_SW_RESEND
|
||||
int irq_set_parent(int irq, int parent_irq);
|
||||
|
|
|
@ -9,6 +9,31 @@
|
|||
#include <linux/cpu.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
unsigned int __read_mostly managed_irqs_per_node;
|
||||
|
||||
static struct cpumask managed_irqs_free_cpumsk[MAX_NUMNODES] __cacheline_aligned_in_smp = {
|
||||
[0 ... MAX_NUMNODES-1] = {CPU_BITS_ALL}
|
||||
};
|
||||
|
||||
static int __init irq_managed_setup(char *str)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kstrtouint(str, 10, &managed_irqs_per_node);
|
||||
if (ret < 0) {
|
||||
pr_warn("managed_irqs_per_node= cannot parse, ignored\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (managed_irqs_per_node * num_possible_nodes() > num_possible_cpus()) {
|
||||
managed_irqs_per_node = num_possible_cpus() / num_possible_nodes();
|
||||
pr_warn("managed_irqs_per_node= cannot be larger than %u\n",
|
||||
managed_irqs_per_node);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
__setup("managed_irqs_per_node=", irq_managed_setup);
|
||||
|
||||
static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
|
||||
unsigned int cpus_per_vec)
|
||||
{
|
||||
|
@ -244,6 +269,39 @@ static void alloc_nodes_vectors(unsigned int numvecs,
|
|||
}
|
||||
}
|
||||
|
||||
static void __irq_prepare_affinity_mask(struct cpumask *premask,
|
||||
cpumask_var_t *node_to_cpumask)
|
||||
{
|
||||
nodemask_t nodemsk = NODE_MASK_NONE;
|
||||
unsigned int ncpus, n;
|
||||
|
||||
get_nodes_in_cpumask(node_to_cpumask, cpu_present_mask, &nodemsk);
|
||||
|
||||
for_each_node_mask(n, nodemsk) {
|
||||
/*
|
||||
* Try to allocate manage_irqs_per_node CPU bits on each numa
|
||||
* node. If an insufficient number can be allocated, the free
|
||||
* CPU bits will be reset to CPU_BITS_ALL for the next
|
||||
* allocation. This design is considered for lockless
|
||||
* and load balancing.
|
||||
*/
|
||||
cpumask_and(&managed_irqs_free_cpumsk[n],
|
||||
&managed_irqs_free_cpumsk[n], cpu_present_mask);
|
||||
cpumask_and(&managed_irqs_free_cpumsk[n],
|
||||
&managed_irqs_free_cpumsk[n], node_to_cpumask[n]);
|
||||
|
||||
ncpus = cpumask_weight(&managed_irqs_free_cpumsk[n]);
|
||||
if (ncpus < managed_irqs_per_node) {
|
||||
/* Reset node n to current node cpumask */
|
||||
cpumask_copy(&managed_irqs_free_cpumsk[n], node_to_cpumask[n]);
|
||||
continue;
|
||||
}
|
||||
|
||||
irq_spread_init_one(premask,
|
||||
&managed_irqs_free_cpumsk[n], managed_irqs_per_node);
|
||||
}
|
||||
}
|
||||
|
||||
static int __irq_build_affinity_masks(unsigned int startvec,
|
||||
unsigned int numvecs,
|
||||
unsigned int firstvec,
|
||||
|
@ -359,9 +417,14 @@ static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
|
|||
get_online_cpus();
|
||||
build_node_to_cpumask(node_to_cpumask);
|
||||
|
||||
/* Limit the count of managed interrupts on every node */
|
||||
if (masks[startvec].is_managed && managed_irqs_per_node)
|
||||
__irq_prepare_affinity_mask(npresmsk, node_to_cpumask);
|
||||
|
||||
/* Spread on present CPUs starting from affd->pre_vectors */
|
||||
ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
|
||||
node_to_cpumask, cpu_present_mask,
|
||||
node_to_cpumask,
|
||||
cpumask_empty(npresmsk) ? cpu_present_mask : npresmsk,
|
||||
nmsk, masks);
|
||||
if (ret < 0)
|
||||
goto fail_build_affinity;
|
||||
|
@ -455,6 +518,10 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
|
|||
for (curvec = 0; curvec < affd->pre_vectors; curvec++)
|
||||
cpumask_copy(&masks[curvec].mask, irq_default_affinity);
|
||||
|
||||
/* Mark the managed interrupts */
|
||||
for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
|
||||
masks[i].is_managed = 1;
|
||||
|
||||
/*
|
||||
* Spread on present CPUs starting from affd->pre_vectors. If we
|
||||
* have multiple sets, build each sets affinity mask separately.
|
||||
|
@ -481,10 +548,6 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
|
|||
for (; curvec < nvecs; curvec++)
|
||||
cpumask_copy(&masks[curvec].mask, irq_default_affinity);
|
||||
|
||||
/* Mark the managed interrupts */
|
||||
for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
|
||||
masks[i].is_managed = 1;
|
||||
|
||||
return masks;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue