Skip to content

Commit d75b1ad

Browse files
Eric Dumazetdavem330
authored andcommitted
net: less interrupt masking in NAPI
net_rx_action() can mask irqs a single time to transfert sd->poll_list into a private list, for a very short duration. Then, napi_complete() can avoid masking irqs again, and net_rx_action() only needs to mask irq again in slow path. This patch removes 2 couples of irq mask/unmask per typical NAPI run, more if multiple napi were triggered. Note this also allows to give control back to caller (do_softirq()) more often, so that other softirq handlers can be called a bit earlier, or ksoftirqd can be wakeup earlier under pressure. This was developed while testing an alternative to RX interrupt mitigation to reduce latencies while keeping or improving GRO aggregation on fast NIC. Idea is to test napi->gro_list at the end of a napi->poll() and reschedule one NAPI poll, but after servicing a full round of softirqs (timers, TX, rcu, ...). This will be allowed only if softirq is currently serviced by idle task or ksoftirqd, and resched not needed. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 4cdb1e2 commit d75b1ad

File tree

1 file changed

+43
-25
lines changed

1 file changed

+43
-25
lines changed

net/core/dev.c

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4316,20 +4316,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
43164316
local_irq_enable();
43174317
}
43184318

4319+
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
4320+
{
4321+
#ifdef CONFIG_RPS
4322+
return sd->rps_ipi_list != NULL;
4323+
#else
4324+
return false;
4325+
#endif
4326+
}
4327+
43194328
static int process_backlog(struct napi_struct *napi, int quota)
43204329
{
43214330
int work = 0;
43224331
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
43234332

4324-
#ifdef CONFIG_RPS
43254333
/* Check if we have pending ipi, its better to send them now,
43264334
* not waiting net_rx_action() end.
43274335
*/
4328-
if (sd->rps_ipi_list) {
4336+
if (sd_has_rps_ipi_waiting(sd)) {
43294337
local_irq_disable();
43304338
net_rps_action_and_irq_enable(sd);
43314339
}
4332-
#endif
4340+
43334341
napi->weight = weight_p;
43344342
local_irq_disable();
43354343
while (1) {
@@ -4356,7 +4364,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
43564364
* We can use a plain write instead of clear_bit(),
43574365
* and we dont need an smp_mb() memory barrier.
43584366
*/
4359-
list_del(&napi->poll_list);
43604367
napi->state = 0;
43614368
rps_unlock(sd);
43624369

@@ -4406,7 +4413,7 @@ void __napi_complete(struct napi_struct *n)
44064413
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
44074414
BUG_ON(n->gro_list);
44084415

4409-
list_del(&n->poll_list);
4416+
list_del_init(&n->poll_list);
44104417
smp_mb__before_atomic();
44114418
clear_bit(NAPI_STATE_SCHED, &n->state);
44124419
}
@@ -4424,9 +4431,15 @@ void napi_complete(struct napi_struct *n)
44244431
return;
44254432

44264433
napi_gro_flush(n, false);
4427-
local_irq_save(flags);
4428-
__napi_complete(n);
4429-
local_irq_restore(flags);
4434+
4435+
if (likely(list_empty(&n->poll_list))) {
4436+
WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
4437+
} else {
4438+
/* If n->poll_list is not empty, we need to mask irqs */
4439+
local_irq_save(flags);
4440+
__napi_complete(n);
4441+
local_irq_restore(flags);
4442+
}
44304443
}
44314444
EXPORT_SYMBOL(napi_complete);
44324445

@@ -4520,29 +4533,28 @@ static void net_rx_action(struct softirq_action *h)
45204533
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
45214534
unsigned long time_limit = jiffies + 2;
45224535
int budget = netdev_budget;
4536+
LIST_HEAD(list);
4537+
LIST_HEAD(repoll);
45234538
void *have;
45244539

45254540
local_irq_disable();
4541+
list_splice_init(&sd->poll_list, &list);
4542+
local_irq_enable();
45264543

4527-
while (!list_empty(&sd->poll_list)) {
4544+
while (!list_empty(&list)) {
45284545
struct napi_struct *n;
45294546
int work, weight;
45304547

4531-
/* If softirq window is exhuasted then punt.
4548+
/* If softirq window is exhausted then punt.
45324549
* Allow this to run for 2 jiffies since which will allow
45334550
* an average latency of 1.5/HZ.
45344551
*/
45354552
if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
45364553
goto softnet_break;
45374554

4538-
local_irq_enable();
45394555

4540-
/* Even though interrupts have been re-enabled, this
4541-
* access is safe because interrupts can only add new
4542-
* entries to the tail of this list, and only ->poll()
4543-
* calls can remove this head entry from the list.
4544-
*/
4545-
n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
4556+
n = list_first_entry(&list, struct napi_struct, poll_list);
4557+
list_del_init(&n->poll_list);
45464558

45474559
have = netpoll_poll_lock(n);
45484560

@@ -4564,41 +4576,47 @@ static void net_rx_action(struct softirq_action *h)
45644576

45654577
budget -= work;
45664578

4567-
local_irq_disable();
4568-
45694579
/* Drivers must not modify the NAPI state if they
45704580
* consume the entire weight. In such cases this code
45714581
* still "owns" the NAPI instance and therefore can
45724582
* move the instance around on the list at-will.
45734583
*/
45744584
if (unlikely(work == weight)) {
45754585
if (unlikely(napi_disable_pending(n))) {
4576-
local_irq_enable();
45774586
napi_complete(n);
4578-
local_irq_disable();
45794587
} else {
45804588
if (n->gro_list) {
45814589
/* flush too old packets
45824590
* If HZ < 1000, flush all packets.
45834591
*/
4584-
local_irq_enable();
45854592
napi_gro_flush(n, HZ >= 1000);
4586-
local_irq_disable();
45874593
}
4588-
list_move_tail(&n->poll_list, &sd->poll_list);
4594+
list_add_tail(&n->poll_list, &repoll);
45894595
}
45904596
}
45914597

45924598
netpoll_poll_unlock(have);
45934599
}
4600+
4601+
if (!sd_has_rps_ipi_waiting(sd) &&
4602+
list_empty(&list) &&
4603+
list_empty(&repoll))
4604+
return;
45944605
out:
4606+
local_irq_disable();
4607+
4608+
list_splice_tail_init(&sd->poll_list, &list);
4609+
list_splice_tail(&repoll, &list);
4610+
list_splice(&list, &sd->poll_list);
4611+
if (!list_empty(&sd->poll_list))
4612+
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
4613+
45954614
net_rps_action_and_irq_enable(sd);
45964615

45974616
return;
45984617

45994618
softnet_break:
46004619
sd->time_squeeze++;
4601-
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
46024620
goto out;
46034621
}
46044622

0 commit comments

Comments
 (0)