@@ -622,117 +622,162 @@ static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
622
622
}
623
623
}
624
624
625
- /* Multi-cpu list version. */
625
+ #define CPU_MONDO_COUNTER (cpuid ) (cpu_mondo_counter[cpuid])
626
+ #define MONDO_USEC_WAIT_MIN 2
627
+ #define MONDO_USEC_WAIT_MAX 100
628
+ #define MONDO_RETRY_LIMIT 500000
629
+
630
+ /* Multi-cpu list version.
631
+ *
632
+ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
633
+ * Sometimes not all cpus receive the mondo, requiring us to re-send
634
+ * the mondo until all cpus have received, or cpus are truly stuck
635
+ * unable to receive mondo, and we timeout.
636
+ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
637
+ * perform guest service, such as PCIe error handling. Consider the
638
+ * service time, 1 second overall wait is reasonable for 1 cpu.
639
+ * Here two in-between mondo check wait time are defined: 2 usec for
640
+ * single cpu quick turn around and up to 100usec for large cpu count.
641
+ * Deliver mondo to large number of cpus could take longer, we adjusts
642
+ * the retry count as long as target cpus are making forward progress.
643
+ */
626
644
static void hypervisor_xcall_deliver (struct trap_per_cpu * tb , int cnt )
627
645
{
628
- int retries , this_cpu , prev_sent , i , saw_cpu_error ;
646
+ int this_cpu , tot_cpus , prev_sent , i , rem ;
647
+ int usec_wait , retries , tot_retries ;
648
+ u16 first_cpu = 0xffff ;
649
+ unsigned long xc_rcvd = 0 ;
629
650
unsigned long status ;
651
+ int ecpuerror_id = 0 ;
652
+ int enocpu_id = 0 ;
630
653
u16 * cpu_list ;
654
+ u16 cpu ;
631
655
632
656
this_cpu = smp_processor_id ();
633
-
634
657
cpu_list = __va (tb -> cpu_list_pa );
635
-
636
- saw_cpu_error = 0 ;
637
- retries = 0 ;
658
+ usec_wait = cnt * MONDO_USEC_WAIT_MIN ;
659
+ if (usec_wait > MONDO_USEC_WAIT_MAX )
660
+ usec_wait = MONDO_USEC_WAIT_MAX ;
661
+ retries = tot_retries = 0 ;
662
+ tot_cpus = cnt ;
638
663
prev_sent = 0 ;
664
+
639
665
do {
640
- int forward_progress , n_sent ;
666
+ int n_sent , mondo_delivered , target_cpu_busy ;
641
667
642
668
status = sun4v_cpu_mondo_send (cnt ,
643
669
tb -> cpu_list_pa ,
644
670
tb -> cpu_mondo_block_pa );
645
671
646
672
/* HV_EOK means all cpus received the xcall, we're done. */
647
673
if (likely (status == HV_EOK ))
648
- break ;
674
+ goto xcall_done ;
675
+
676
+ /* If not these non-fatal errors, panic */
677
+ if (unlikely ((status != HV_EWOULDBLOCK ) &&
678
+ (status != HV_ECPUERROR ) &&
679
+ (status != HV_ENOCPU )))
680
+ goto fatal_errors ;
649
681
650
682
/* First, see if we made any forward progress.
683
+ *
684
+ * Go through the cpu_list, count the target cpus that have
685
+ * received our mondo (n_sent), and those that did not (rem).
686
+ * Re-pack cpu_list with the cpus remain to be retried in the
687
+ * front - this simplifies tracking the truly stalled cpus.
651
688
*
652
689
* The hypervisor indicates successful sends by setting
653
690
* cpu list entries to the value 0xffff.
691
+ *
692
+ * EWOULDBLOCK means some target cpus did not receive the
693
+ * mondo and retry usually helps.
694
+ *
695
+ * ECPUERROR means at least one target cpu is in error state,
696
+ * it's usually safe to skip the faulty cpu and retry.
697
+ *
698
+ * ENOCPU means one of the target cpu doesn't belong to the
699
+ * domain, perhaps offlined which is unexpected, but not
700
+ * fatal and it's okay to skip the offlined cpu.
654
701
*/
702
+ rem = 0 ;
655
703
n_sent = 0 ;
656
704
for (i = 0 ; i < cnt ; i ++ ) {
657
- if (likely (cpu_list [i ] == 0xffff ))
705
+ cpu = cpu_list [i ];
706
+ if (likely (cpu == 0xffff )) {
658
707
n_sent ++ ;
708
+ } else if ((status == HV_ECPUERROR ) &&
709
+ (sun4v_cpu_state (cpu ) == HV_CPU_STATE_ERROR )) {
710
+ ecpuerror_id = cpu + 1 ;
711
+ } else if (status == HV_ENOCPU && !cpu_online (cpu )) {
712
+ enocpu_id = cpu + 1 ;
713
+ } else {
714
+ cpu_list [rem ++ ] = cpu ;
715
+ }
659
716
}
660
717
661
- forward_progress = 0 ;
662
- if (n_sent > prev_sent )
663
- forward_progress = 1 ;
718
+ /* No cpu remained, we're done. */
719
+ if (rem == 0 )
720
+ break ;
664
721
665
- prev_sent = n_sent ;
722
+ /* Otherwise, update the cpu count for retry. */
723
+ cnt = rem ;
666
724
667
- /* If we get a HV_ECPUERROR, then one or more of the cpus
668
- * in the list are in error state. Use the cpu_state()
669
- * hypervisor call to find out which cpus are in error state.
725
+ /* Record the overall number of mondos received by the
726
+ * first of the remaining cpus.
670
727
*/
671
- if (unlikely ( status == HV_ECPUERROR ) ) {
672
- for ( i = 0 ; i < cnt ; i ++ ) {
673
- long err ;
674
- u16 cpu ;
728
+ if (first_cpu != cpu_list [ 0 ] ) {
729
+ first_cpu = cpu_list [ 0 ];
730
+ xc_rcvd = CPU_MONDO_COUNTER ( first_cpu ) ;
731
+ }
675
732
676
- cpu = cpu_list [ i ];
677
- if ( cpu == 0xffff )
678
- continue ;
733
+ /* Was any mondo delivered successfully? */
734
+ mondo_delivered = ( n_sent > prev_sent );
735
+ prev_sent = n_sent ;
679
736
680
- err = sun4v_cpu_state (cpu );
681
- if (err == HV_CPU_STATE_ERROR ) {
682
- saw_cpu_error = (cpu + 1 );
683
- cpu_list [i ] = 0xffff ;
684
- }
685
- }
686
- } else if (unlikely (status != HV_EWOULDBLOCK ))
687
- goto fatal_mondo_error ;
737
+ /* or, was any target cpu busy processing other mondos? */
738
+ target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER (first_cpu ));
739
+ xc_rcvd = CPU_MONDO_COUNTER (first_cpu );
688
740
689
- /* Don't bother rewriting the CPU list, just leave the
690
- * 0xffff and non-0xffff entries in there and the
691
- * hypervisor will do the right thing.
692
- *
693
- * Only advance timeout state if we didn't make any
694
- * forward progress.
741
+ /* Retry count is for no progress. If we're making progress,
742
+ * reset the retry count.
695
743
*/
696
- if (unlikely (!forward_progress )) {
697
- if (unlikely (++ retries > 10000 ))
698
- goto fatal_mondo_timeout ;
699
-
700
- /* Delay a little bit to let other cpus catch up
701
- * on their cpu mondo queue work.
702
- */
703
- udelay (2 * cnt );
744
+ if (likely (mondo_delivered || target_cpu_busy )) {
745
+ tot_retries += retries ;
746
+ retries = 0 ;
747
+ } else if (unlikely (retries > MONDO_RETRY_LIMIT )) {
748
+ goto fatal_mondo_timeout ;
704
749
}
705
- } while (1 );
706
750
707
- if (unlikely (saw_cpu_error ))
708
- goto fatal_mondo_cpu_error ;
751
+ /* Delay a little bit to let other cpus catch up on
752
+ * their cpu mondo queue work.
753
+ */
754
+ if (!mondo_delivered )
755
+ udelay (usec_wait );
709
756
710
- return ;
757
+ retries ++ ;
758
+ } while (1 );
711
759
712
- fatal_mondo_cpu_error :
713
- printk (KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
714
- "(including %d) were in error state\n" ,
715
- this_cpu , saw_cpu_error - 1 );
760
+ xcall_done :
761
+ if (unlikely (ecpuerror_id > 0 )) {
762
+ pr_crit ("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n" ,
763
+ this_cpu , ecpuerror_id - 1 );
764
+ } else if (unlikely (enocpu_id > 0 )) {
765
+ pr_crit ("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n" ,
766
+ this_cpu , enocpu_id - 1 );
767
+ }
716
768
return ;
717
769
770
+ fatal_errors :
771
+ /* fatal errors include bad alignment, etc */
772
+ pr_crit ("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n" ,
773
+ this_cpu , tot_cpus , tb -> cpu_list_pa , tb -> cpu_mondo_block_pa );
774
+ panic ("Unexpected SUN4V mondo error %lu\n" , status );
775
+
718
776
fatal_mondo_timeout :
719
- printk (KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
720
- " progress after %d retries.\n" ,
721
- this_cpu , retries );
722
- goto dump_cpu_list_and_out ;
723
-
724
- fatal_mondo_error :
725
- printk (KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n" ,
726
- this_cpu , status );
727
- printk (KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
728
- "mondo_block_pa(%lx)\n" ,
729
- this_cpu , cnt , tb -> cpu_list_pa , tb -> cpu_mondo_block_pa );
730
-
731
- dump_cpu_list_and_out :
732
- printk (KERN_CRIT "CPU[%d]: CPU list [ " , this_cpu );
733
- for (i = 0 ; i < cnt ; i ++ )
734
- printk ("%u " , cpu_list [i ]);
735
- printk ("]\n" );
777
+ /* some cpus being non-responsive to the cpu mondo */
778
+ pr_crit ("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n" ,
779
+ this_cpu , first_cpu , (tot_retries + retries ), tot_cpus );
780
+ panic ("SUN4V mondo timeout panic\n" );
736
781
}
737
782
738
783
static void (* xcall_deliver_impl )(struct trap_per_cpu * , int );
0 commit comments