Skip to content

Commit cb8c65c

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
Pull sparc fixes from David Miller: - Fix DMA regression in 4.13 merge window, only certain chips can do 64-bit DMA. From Dave Dushar. - Correct cpu cross-call algorithm to correctly detect stalled or stuck remote cpus, from Jane Chu. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc: sparc64: Measure receiver forward progress to avoid send mondo timeout SPARC64: Fix sun4v DMA panic
2 parents 935acd3 + 9d53cae commit cb8c65c

File tree

5 files changed

+139
-75
lines changed

5 files changed

+139
-75
lines changed

arch/sparc/include/asm/trap_block.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS];
5454
void init_cur_cpu_trap(struct thread_info *);
5555
void setup_tba(void);
5656
extern int ncpus_probed;
57+
extern u64 cpu_mondo_counter[NR_CPUS];
5758

5859
unsigned long real_hard_smp_processor_id(void);
5960

arch/sparc/kernel/pci_sun4v.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
673673
static int dma_4v_supported(struct device *dev, u64 device_mask)
674674
{
675675
struct iommu *iommu = dev->archdata.iommu;
676-
u64 dma_addr_mask;
676+
u64 dma_addr_mask = iommu->dma_addr_mask;
677677

678-
if (device_mask > DMA_BIT_MASK(32) && iommu->atu)
679-
dma_addr_mask = iommu->atu->dma_addr_mask;
680-
else
681-
dma_addr_mask = iommu->dma_addr_mask;
678+
if (device_mask > DMA_BIT_MASK(32)) {
679+
if (iommu->atu)
680+
dma_addr_mask = iommu->atu->dma_addr_mask;
681+
else
682+
return 0;
683+
}
682684

683685
if ((device_mask & dma_addr_mask) == dma_addr_mask)
684686
return 1;

arch/sparc/kernel/smp_64.c

Lines changed: 115 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -622,117 +622,162 @@ static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
622622
}
623623
}
624624

625-
/* Multi-cpu list version. */
625+
#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid])
626+
#define MONDO_USEC_WAIT_MIN 2
627+
#define MONDO_USEC_WAIT_MAX 100
628+
#define MONDO_RETRY_LIMIT 500000
629+
630+
/* Multi-cpu list version.
631+
*
632+
* Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
633+
* Sometimes not all cpus receive the mondo, requiring us to re-send
634+
* the mondo until all cpus have received, or cpus are truly stuck
635+
* unable to receive mondo, and we timeout.
636+
* Occasionally a target cpu strand is borrowed briefly by hypervisor to
637+
* perform guest service, such as PCIe error handling. Consider the
638+
* service time, 1 second overall wait is reasonable for 1 cpu.
639+
* Here two in-between mondo check wait time are defined: 2 usec for
640+
* single cpu quick turn around and up to 100usec for large cpu count.
641+
* Deliver mondo to large number of cpus could take longer, we adjusts
642+
* the retry count as long as target cpus are making forward progress.
643+
*/
626644
static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
627645
{
628-
int retries, this_cpu, prev_sent, i, saw_cpu_error;
646+
int this_cpu, tot_cpus, prev_sent, i, rem;
647+
int usec_wait, retries, tot_retries;
648+
u16 first_cpu = 0xffff;
649+
unsigned long xc_rcvd = 0;
629650
unsigned long status;
651+
int ecpuerror_id = 0;
652+
int enocpu_id = 0;
630653
u16 *cpu_list;
654+
u16 cpu;
631655

632656
this_cpu = smp_processor_id();
633-
634657
cpu_list = __va(tb->cpu_list_pa);
635-
636-
saw_cpu_error = 0;
637-
retries = 0;
658+
usec_wait = cnt * MONDO_USEC_WAIT_MIN;
659+
if (usec_wait > MONDO_USEC_WAIT_MAX)
660+
usec_wait = MONDO_USEC_WAIT_MAX;
661+
retries = tot_retries = 0;
662+
tot_cpus = cnt;
638663
prev_sent = 0;
664+
639665
do {
640-
int forward_progress, n_sent;
666+
int n_sent, mondo_delivered, target_cpu_busy;
641667

642668
status = sun4v_cpu_mondo_send(cnt,
643669
tb->cpu_list_pa,
644670
tb->cpu_mondo_block_pa);
645671

646672
/* HV_EOK means all cpus received the xcall, we're done. */
647673
if (likely(status == HV_EOK))
648-
break;
674+
goto xcall_done;
675+
676+
/* If not these non-fatal errors, panic */
677+
if (unlikely((status != HV_EWOULDBLOCK) &&
678+
(status != HV_ECPUERROR) &&
679+
(status != HV_ENOCPU)))
680+
goto fatal_errors;
649681

650682
/* First, see if we made any forward progress.
683+
*
684+
* Go through the cpu_list, count the target cpus that have
685+
* received our mondo (n_sent), and those that did not (rem).
686+
* Re-pack cpu_list with the cpus remain to be retried in the
687+
* front - this simplifies tracking the truly stalled cpus.
651688
*
652689
* The hypervisor indicates successful sends by setting
653690
* cpu list entries to the value 0xffff.
691+
*
692+
* EWOULDBLOCK means some target cpus did not receive the
693+
* mondo and retry usually helps.
694+
*
695+
* ECPUERROR means at least one target cpu is in error state,
696+
* it's usually safe to skip the faulty cpu and retry.
697+
*
698+
* ENOCPU means one of the target cpu doesn't belong to the
699+
* domain, perhaps offlined which is unexpected, but not
700+
* fatal and it's okay to skip the offlined cpu.
654701
*/
702+
rem = 0;
655703
n_sent = 0;
656704
for (i = 0; i < cnt; i++) {
657-
if (likely(cpu_list[i] == 0xffff))
705+
cpu = cpu_list[i];
706+
if (likely(cpu == 0xffff)) {
658707
n_sent++;
708+
} else if ((status == HV_ECPUERROR) &&
709+
(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
710+
ecpuerror_id = cpu + 1;
711+
} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
712+
enocpu_id = cpu + 1;
713+
} else {
714+
cpu_list[rem++] = cpu;
715+
}
659716
}
660717

661-
forward_progress = 0;
662-
if (n_sent > prev_sent)
663-
forward_progress = 1;
718+
/* No cpu remained, we're done. */
719+
if (rem == 0)
720+
break;
664721

665-
prev_sent = n_sent;
722+
/* Otherwise, update the cpu count for retry. */
723+
cnt = rem;
666724

667-
/* If we get a HV_ECPUERROR, then one or more of the cpus
668-
* in the list are in error state. Use the cpu_state()
669-
* hypervisor call to find out which cpus are in error state.
725+
/* Record the overall number of mondos received by the
726+
* first of the remaining cpus.
670727
*/
671-
if (unlikely(status == HV_ECPUERROR)) {
672-
for (i = 0; i < cnt; i++) {
673-
long err;
674-
u16 cpu;
728+
if (first_cpu != cpu_list[0]) {
729+
first_cpu = cpu_list[0];
730+
xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
731+
}
675732

676-
cpu = cpu_list[i];
677-
if (cpu == 0xffff)
678-
continue;
733+
/* Was any mondo delivered successfully? */
734+
mondo_delivered = (n_sent > prev_sent);
735+
prev_sent = n_sent;
679736

680-
err = sun4v_cpu_state(cpu);
681-
if (err == HV_CPU_STATE_ERROR) {
682-
saw_cpu_error = (cpu + 1);
683-
cpu_list[i] = 0xffff;
684-
}
685-
}
686-
} else if (unlikely(status != HV_EWOULDBLOCK))
687-
goto fatal_mondo_error;
737+
/* or, was any target cpu busy processing other mondos? */
738+
target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
739+
xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
688740

689-
/* Don't bother rewriting the CPU list, just leave the
690-
* 0xffff and non-0xffff entries in there and the
691-
* hypervisor will do the right thing.
692-
*
693-
* Only advance timeout state if we didn't make any
694-
* forward progress.
741+
/* Retry count is for no progress. If we're making progress,
742+
* reset the retry count.
695743
*/
696-
if (unlikely(!forward_progress)) {
697-
if (unlikely(++retries > 10000))
698-
goto fatal_mondo_timeout;
699-
700-
/* Delay a little bit to let other cpus catch up
701-
* on their cpu mondo queue work.
702-
*/
703-
udelay(2 * cnt);
744+
if (likely(mondo_delivered || target_cpu_busy)) {
745+
tot_retries += retries;
746+
retries = 0;
747+
} else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
748+
goto fatal_mondo_timeout;
704749
}
705-
} while (1);
706750

707-
if (unlikely(saw_cpu_error))
708-
goto fatal_mondo_cpu_error;
751+
/* Delay a little bit to let other cpus catch up on
752+
* their cpu mondo queue work.
753+
*/
754+
if (!mondo_delivered)
755+
udelay(usec_wait);
709756

710-
return;
757+
retries++;
758+
} while (1);
711759

712-
fatal_mondo_cpu_error:
713-
printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
714-
"(including %d) were in error state\n",
715-
this_cpu, saw_cpu_error - 1);
760+
xcall_done:
761+
if (unlikely(ecpuerror_id > 0)) {
762+
pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
763+
this_cpu, ecpuerror_id - 1);
764+
} else if (unlikely(enocpu_id > 0)) {
765+
pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
766+
this_cpu, enocpu_id - 1);
767+
}
716768
return;
717769

770+
fatal_errors:
771+
/* fatal errors include bad alignment, etc */
772+
pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
773+
this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
774+
panic("Unexpected SUN4V mondo error %lu\n", status);
775+
718776
fatal_mondo_timeout:
719-
printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
720-
" progress after %d retries.\n",
721-
this_cpu, retries);
722-
goto dump_cpu_list_and_out;
723-
724-
fatal_mondo_error:
725-
printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
726-
this_cpu, status);
727-
printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
728-
"mondo_block_pa(%lx)\n",
729-
this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
730-
731-
dump_cpu_list_and_out:
732-
printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
733-
for (i = 0; i < cnt; i++)
734-
printk("%u ", cpu_list[i]);
735-
printk("]\n");
777+
/* some cpus being non-responsive to the cpu mondo */
778+
pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
779+
this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
780+
panic("SUN4V mondo timeout panic\n");
736781
}
737782

738783
static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);

arch/sparc/kernel/sun4v_ivec.S

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,21 @@ sun4v_cpu_mondo:
2626
ldxa [%g0] ASI_SCRATCHPAD, %g4
2727
sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4
2828

29+
/* Get smp_processor_id() into %g3 */
30+
sethi %hi(trap_block), %g5
31+
or %g5, %lo(trap_block), %g5
32+
sub %g4, %g5, %g3
33+
srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3
34+
35+
/* Increment cpu_mondo_counter[smp_processor_id()] */
36+
sethi %hi(cpu_mondo_counter), %g5
37+
or %g5, %lo(cpu_mondo_counter), %g5
38+
sllx %g3, 3, %g3
39+
add %g5, %g3, %g5
40+
ldx [%g5], %g3
41+
add %g3, 1, %g3
42+
stx %g3, [%g5]
43+
2944
/* Get CPU mondo queue base phys address into %g7. */
3045
ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
3146

arch/sparc/kernel/traps_64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
27332733
}
27342734
}
27352735

2736+
u64 cpu_mondo_counter[NR_CPUS] = {0};
27362737
struct trap_per_cpu trap_block[NR_CPUS];
27372738
EXPORT_SYMBOL(trap_block);
27382739

0 commit comments

Comments
 (0)