Skip to content

Commit 8b16cef

Browse files
Hoang-Nam NguyenRoland Dreier
authored andcommitted
IB/ehca: Fix race condition/locking issues in scaling code
Fix a race condition in find_next_cpu_online() and some other locking issues in ehca scaling code. Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
1 parent 78d8d5f commit 8b16cef

File tree

1 file changed

+33
-35
lines changed

1 file changed

+33
-35
lines changed

drivers/infiniband/hw/ehca/ehca_irq.c

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)
544544

545545
static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
546546
{
547-
unsigned long flags_last_cpu;
547+
int cpu;
548+
unsigned long flags;
548549

550+
WARN_ON_ONCE(!in_interrupt());
549551
if (ehca_debug_level)
550552
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
551553

552-
spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
553-
pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
554-
if (pool->last_cpu == NR_CPUS)
555-
pool->last_cpu = first_cpu(cpu_online_map);
556-
spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
554+
spin_lock_irqsave(&pool->last_cpu_lock, flags);
555+
cpu = next_cpu(pool->last_cpu, cpu_online_map);
556+
if (cpu == NR_CPUS)
557+
cpu = first_cpu(cpu_online_map);
558+
pool->last_cpu = cpu;
559+
spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
557560

558-
return pool->last_cpu;
561+
return cpu;
559562
}
560563

561564
static void __queue_comp_task(struct ehca_cq *__cq,
562565
struct ehca_cpu_comp_task *cct)
563566
{
564-
unsigned long flags_cct;
565-
unsigned long flags_cq;
567+
unsigned long flags;
566568

567-
spin_lock_irqsave(&cct->task_lock, flags_cct);
568-
spin_lock_irqsave(&__cq->task_lock, flags_cq);
569+
spin_lock_irqsave(&cct->task_lock, flags);
570+
spin_lock(&__cq->task_lock);
569571

570572
if (__cq->nr_callbacks == 0) {
571573
__cq->nr_callbacks++;
@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehca_cq *__cq,
576578
else
577579
__cq->nr_callbacks++;
578580

579-
spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
580-
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
581+
spin_unlock(&__cq->task_lock);
582+
spin_unlock_irqrestore(&cct->task_lock, flags);
581583
}
582584

583585
static void queue_comp_task(struct ehca_cq *__cq)
@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_cq *__cq)
588590

589591
cpu = get_cpu();
590592
cpu_id = find_next_online_cpu(pool);
591-
592593
BUG_ON(!cpu_online(cpu_id));
593594

594595
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
596+
BUG_ON(!cct);
595597

596598
if (cct->cq_jobs > 0) {
597599
cpu_id = find_next_online_cpu(pool);
598600
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
601+
BUG_ON(!cct);
599602
}
600603

601604
__queue_comp_task(__cq, cct);
602-
603-
put_cpu();
604-
605-
return;
606605
}
607606

608607
static void run_comp_task(struct ehca_cpu_comp_task* cct)
609608
{
610609
struct ehca_cq *cq;
611-
unsigned long flags_cct;
612-
unsigned long flags_cq;
610+
unsigned long flags;
613611

614-
spin_lock_irqsave(&cct->task_lock, flags_cct);
612+
spin_lock_irqsave(&cct->task_lock, flags);
615613

616614
while (!list_empty(&cct->cq_list)) {
617615
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
618-
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
616+
spin_unlock_irqrestore(&cct->task_lock, flags);
619617
comp_event_callback(cq);
620-
spin_lock_irqsave(&cct->task_lock, flags_cct);
618+
spin_lock_irqsave(&cct->task_lock, flags);
621619

622-
spin_lock_irqsave(&cq->task_lock, flags_cq);
620+
spin_lock(&cq->task_lock);
623621
cq->nr_callbacks--;
624622
if (cq->nr_callbacks == 0) {
625623
list_del_init(cct->cq_list.next);
626624
cct->cq_jobs--;
627625
}
628-
spin_unlock_irqrestore(&cq->task_lock, flags_cq);
629-
626+
spin_unlock(&cq->task_lock);
630627
}
631628

632-
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
633-
634-
return;
629+
spin_unlock_irqrestore(&cct->task_lock, flags);
635630
}
636631

637632
static int comp_task(void *__cct)
638633
{
639634
struct ehca_cpu_comp_task* cct = __cct;
635+
int cql_empty;
640636
DECLARE_WAITQUEUE(wait, current);
641637

642638
set_current_state(TASK_INTERRUPTIBLE);
643639
while(!kthread_should_stop()) {
644640
add_wait_queue(&cct->wait_queue, &wait);
645641

646-
if (list_empty(&cct->cq_list))
642+
spin_lock_irq(&cct->task_lock);
643+
cql_empty = list_empty(&cct->cq_list);
644+
spin_unlock_irq(&cct->task_lock);
645+
if (cql_empty)
647646
schedule();
648647
else
649648
__set_current_state(TASK_RUNNING);
650649

651650
remove_wait_queue(&cct->wait_queue, &wait);
652651

653-
if (!list_empty(&cct->cq_list))
652+
spin_lock_irq(&cct->task_lock);
653+
cql_empty = list_empty(&cct->cq_list);
654+
spin_unlock_irq(&cct->task_lock);
655+
if (!cql_empty)
654656
run_comp_task(__cct);
655657

656658
set_current_state(TASK_INTERRUPTIBLE);
@@ -693,8 +695,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
693695

694696
if (task)
695697
kthread_stop(task);
696-
697-
return;
698698
}
699699

700700
static void take_over_work(struct ehca_comp_pool *pool,
@@ -815,6 +815,4 @@ void ehca_destroy_comp_pool(void)
815815
free_percpu(pool->cpu_comp_tasks);
816816
kfree(pool);
817817
#endif
818-
819-
return;
820818
}

0 commit comments

Comments
 (0)