Skip to content

Commit b1516a1

Browse files
committed
iommu/amd: Implement flush queue
With the flush queue the IOMMU TLBs will not be flushed at every dma-ops unmap operation. The unmapped ranges will be queued and flushed at once, when the queue is full. This makes unmapping operations a lot faster (on average) and restores the performance of the old address allocator. Signed-off-by: Joerg Roedel <jroedel@suse.de>
1 parent f1eae7c commit b1516a1

File tree

1 file changed

+67
-4
lines changed

1 file changed

+67
-4
lines changed

drivers/iommu/amd_iommu.c

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2120,6 +2120,66 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
21202120
*
21212121
*****************************************************************************/
21222122

2123+
static void __queue_flush(struct flush_queue *queue)
2124+
{
2125+
struct protection_domain *domain;
2126+
unsigned long flags;
2127+
int idx;
2128+
2129+
/* First flush TLB of all known domains */
2130+
spin_lock_irqsave(&amd_iommu_pd_lock, flags);
2131+
list_for_each_entry(domain, &amd_iommu_pd_list, list)
2132+
domain_flush_tlb(domain);
2133+
spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
2134+
2135+
/* Wait until flushes have completed */
2136+
domain_flush_complete(NULL);
2137+
2138+
for (idx = 0; idx < queue->next; ++idx) {
2139+
struct flush_queue_entry *entry;
2140+
2141+
entry = queue->entries + idx;
2142+
2143+
free_iova_fast(&entry->dma_dom->iovad,
2144+
entry->iova_pfn,
2145+
entry->pages);
2146+
2147+
/* Not really necessary, just to make sure we catch any bugs */
2148+
entry->dma_dom = NULL;
2149+
}
2150+
2151+
queue->next = 0;
2152+
}
2153+
2154+
static void queue_add(struct dma_ops_domain *dma_dom,
2155+
unsigned long address, unsigned long pages)
2156+
{
2157+
struct flush_queue_entry *entry;
2158+
struct flush_queue *queue;
2159+
unsigned long flags;
2160+
int idx;
2161+
2162+
pages = __roundup_pow_of_two(pages);
2163+
address >>= PAGE_SHIFT;
2164+
2165+
queue = get_cpu_ptr(&flush_queue);
2166+
spin_lock_irqsave(&queue->lock, flags);
2167+
2168+
if (queue->next == FLUSH_QUEUE_SIZE)
2169+
__queue_flush(queue);
2170+
2171+
idx = queue->next++;
2172+
entry = queue->entries + idx;
2173+
2174+
entry->iova_pfn = address;
2175+
entry->pages = pages;
2176+
entry->dma_dom = dma_dom;
2177+
2178+
spin_unlock_irqrestore(&queue->lock, flags);
2179+
put_cpu_ptr(&flush_queue);
2180+
}
2181+
2182+
21232183
/*
21242184
* In the dma_ops path we only have the struct device. This function
21252185
* finds the corresponding IOMMU, the protection domain and the
@@ -2258,10 +2318,13 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
22582318
start += PAGE_SIZE;
22592319
}
22602320

2261-
domain_flush_tlb(&dma_dom->domain);
2262-
domain_flush_complete(&dma_dom->domain);
2263-
2264-
dma_ops_free_iova(dma_dom, dma_addr, pages);
2321+
if (amd_iommu_unmap_flush) {
2322+
dma_ops_free_iova(dma_dom, dma_addr, pages);
2323+
domain_flush_tlb(&dma_dom->domain);
2324+
domain_flush_complete(&dma_dom->domain);
2325+
} else {
2326+
queue_add(dma_dom, dma_addr, pages);
2327+
}
22652328
}
22662329

22672330
/*

0 commit comments

Comments
 (0)