Skip to content

Commit 880cb3c

Browse files
author
Marc Zyngier
committed
irqchip/gic-v3-its: Refactor LPI allocator
Our current LPI allocator relies on a bitmap, each bit representing a chunk of 32 LPIs, meaning that each device gets allocated LPIs in multiple of 32. It served us well so far, but new use cases now require much more finer grain allocations, down the the individual LPI. Given the size of the IntID space (up to 32bit), it isn't practical to continue using a bitmap, so let's use a different data structure altogether. We switch to a list, where each element represent a contiguous range of LPIs. On allocation, we simply grab the first group big enough to satisfy the allocation, and substract what we need from it. If the group becomes empty, we just remove it. On freeing interrupts, we insert a new group of interrupt in the list, sort it and fuse the adjacent groups. This makes freeing interrupt much more expensive than allocating them (an unusual behaviour), but that's fine as long as we consider that freeing interrupts is an extremely rare event. We still allocate interrupts in blocks of 32 for the time being, but subsequent patches will relax this. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
1 parent 9d3cce1 commit 880cb3c

File tree

1 file changed

+129
-62
lines changed

1 file changed

+129
-62
lines changed

drivers/irqchip/irq-gic-v3-its.c

Lines changed: 129 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <linux/dma-iommu.h>
2424
#include <linux/interrupt.h>
2525
#include <linux/irqdomain.h>
26+
#include <linux/list.h>
27+
#include <linux/list_sort.h>
2628
#include <linux/log2.h>
2729
#include <linux/mm.h>
2830
#include <linux/msi.h>
@@ -1421,112 +1423,177 @@ static struct irq_chip its_irq_chip = {
14211423
.irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
14221424
};
14231425

1426+
14241427
/*
14251428
* How we allocate LPIs:
14261429
*
1427-
* The GIC has id_bits bits for interrupt identifiers. From there, we
1428-
* must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as
1429-
* we allocate LPIs by chunks of 32, we can shift the whole thing by 5
1430-
* bits to the right.
1430+
* lpi_range_list contains ranges of LPIs that are to available to
1431+
* allocate from. To allocate LPIs, just pick the first range that
1432+
* fits the required allocation, and reduce it by the required
1433+
* amount. Once empty, remove the range from the list.
1434+
*
1435+
* To free a range of LPIs, add a free range to the list, sort it and
1436+
* merge the result if the new range happens to be adjacent to an
1437+
* already free block.
14311438
*
1432-
* This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations.
1439+
* The consequence of the above is that allocation is cost is low, but
1440+
* freeing is expensive. We assumes that freeing rarely occurs.
1441+
*/
1442+
1443+
/*
1444+
* Compatibility defines until we fully refactor the allocator
14331445
*/
14341446
#define IRQS_PER_CHUNK_SHIFT 5
14351447
#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT)
14361448
#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */
14371449

1438-
static unsigned long *lpi_bitmap;
1439-
static u32 lpi_chunks;
1440-
static DEFINE_SPINLOCK(lpi_lock);
1450+
static DEFINE_MUTEX(lpi_range_lock);
1451+
static LIST_HEAD(lpi_range_list);
14411452

1442-
static int its_lpi_to_chunk(int lpi)
1453+
struct lpi_range {
1454+
struct list_head entry;
1455+
u32 base_id;
1456+
u32 span;
1457+
};
1458+
1459+
static struct lpi_range *mk_lpi_range(u32 base, u32 span)
14431460
{
1444-
return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT;
1461+
struct lpi_range *range;
1462+
1463+
range = kzalloc(sizeof(*range), GFP_KERNEL);
1464+
if (range) {
1465+
INIT_LIST_HEAD(&range->entry);
1466+
range->base_id = base;
1467+
range->span = span;
1468+
}
1469+
1470+
return range;
14451471
}
14461472

1447-
static int its_chunk_to_lpi(int chunk)
1473+
static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b)
14481474
{
1449-
return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
1475+
struct lpi_range *ra, *rb;
1476+
1477+
ra = container_of(a, struct lpi_range, entry);
1478+
rb = container_of(b, struct lpi_range, entry);
1479+
1480+
return rb->base_id - ra->base_id;
14501481
}
14511482

1452-
static int __init its_lpi_init(u32 id_bits)
1483+
static void merge_lpi_ranges(void)
14531484
{
1454-
lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
1485+
struct lpi_range *range, *tmp;
14551486

1456-
lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long),
1457-
GFP_KERNEL);
1458-
if (!lpi_bitmap) {
1459-
lpi_chunks = 0;
1460-
return -ENOMEM;
1487+
list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1488+
if (!list_is_last(&range->entry, &lpi_range_list) &&
1489+
(tmp->base_id == (range->base_id + range->span))) {
1490+
tmp->base_id = range->base_id;
1491+
tmp->span += range->span;
1492+
list_del(&range->entry);
1493+
kfree(range);
1494+
}
14611495
}
1496+
}
14621497

1463-
pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks);
1464-
return 0;
1498+
static int alloc_lpi_range(u32 nr_lpis, u32 *base)
1499+
{
1500+
struct lpi_range *range, *tmp;
1501+
int err = -ENOSPC;
1502+
1503+
mutex_lock(&lpi_range_lock);
1504+
1505+
list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
1506+
if (range->span >= nr_lpis) {
1507+
*base = range->base_id;
1508+
range->base_id += nr_lpis;
1509+
range->span -= nr_lpis;
1510+
1511+
if (range->span == 0) {
1512+
list_del(&range->entry);
1513+
kfree(range);
1514+
}
1515+
1516+
err = 0;
1517+
break;
1518+
}
1519+
}
1520+
1521+
mutex_unlock(&lpi_range_lock);
1522+
1523+
pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis);
1524+
return err;
14651525
}
14661526

1467-
static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids)
1527+
static int free_lpi_range(u32 base, u32 nr_lpis)
14681528
{
1469-
unsigned long *bitmap = NULL;
1470-
int chunk_id;
1471-
int nr_chunks;
1472-
int i;
1529+
struct lpi_range *new;
1530+
int err = 0;
14731531

1474-
nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK);
1532+
mutex_lock(&lpi_range_lock);
14751533

1476-
spin_lock(&lpi_lock);
1534+
new = mk_lpi_range(base, nr_lpis);
1535+
if (!new) {
1536+
err = -ENOMEM;
1537+
goto out;
1538+
}
1539+
1540+
list_add(&new->entry, &lpi_range_list);
1541+
list_sort(NULL, &lpi_range_list, lpi_range_cmp);
1542+
merge_lpi_ranges();
1543+
out:
1544+
mutex_unlock(&lpi_range_lock);
1545+
return err;
1546+
}
1547+
1548+
static int __init its_lpi_init(u32 id_bits)
1549+
{
1550+
u32 lpis = (1UL << id_bits) - 8192;
1551+
int err;
1552+
1553+
/*
1554+
* Initializing the allocator is just the same as freeing the
1555+
* full range of LPIs.
1556+
*/
1557+
err = free_lpi_range(8192, lpis);
1558+
pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis);
1559+
return err;
1560+
}
1561+
1562+
static unsigned long *its_lpi_alloc_chunks(int nr_irqs, u32 *base, int *nr_ids)
1563+
{
1564+
unsigned long *bitmap = NULL;
1565+
int err = 0;
1566+
int nr_lpis;
1567+
1568+
nr_lpis = round_up(nr_irqs, IRQS_PER_CHUNK);
14771569

14781570
do {
1479-
chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks,
1480-
0, nr_chunks, 0);
1481-
if (chunk_id < lpi_chunks)
1571+
err = alloc_lpi_range(nr_lpis, base);
1572+
if (!err)
14821573
break;
14831574

1484-
nr_chunks--;
1485-
} while (nr_chunks > 0);
1575+
nr_lpis -= IRQS_PER_CHUNK;
1576+
} while (nr_lpis > 0);
14861577

1487-
if (!nr_chunks)
1578+
if (err)
14881579
goto out;
14891580

1490-
bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK),
1491-
sizeof(long),
1492-
GFP_ATOMIC);
1581+
bitmap = kcalloc(BITS_TO_LONGS(nr_lpis), sizeof (long), GFP_ATOMIC);
14931582
if (!bitmap)
14941583
goto out;
14951584

1496-
for (i = 0; i < nr_chunks; i++)
1497-
set_bit(chunk_id + i, lpi_bitmap);
1498-
1499-
*base = its_chunk_to_lpi(chunk_id);
1500-
*nr_ids = nr_chunks * IRQS_PER_CHUNK;
1585+
*nr_ids = nr_lpis;
15011586

15021587
out:
1503-
spin_unlock(&lpi_lock);
1504-
15051588
if (!bitmap)
15061589
*base = *nr_ids = 0;
15071590

15081591
return bitmap;
15091592
}
15101593

1511-
static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids)
1594+
static void its_lpi_free_chunks(unsigned long *bitmap, u32 base, u32 nr_ids)
15121595
{
1513-
int lpi;
1514-
1515-
spin_lock(&lpi_lock);
1516-
1517-
for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
1518-
int chunk = its_lpi_to_chunk(lpi);
1519-
1520-
BUG_ON(chunk > lpi_chunks);
1521-
if (test_bit(chunk, lpi_bitmap)) {
1522-
clear_bit(chunk, lpi_bitmap);
1523-
} else {
1524-
pr_err("Bad LPI chunk %d\n", chunk);
1525-
}
1526-
}
1527-
1528-
spin_unlock(&lpi_lock);
1529-
1596+
WARN_ON(free_lpi_range(base, nr_ids));
15301597
kfree(bitmap);
15311598
}
15321599

0 commit comments

Comments
 (0)