@@ -46,10 +46,9 @@ static const struct kvm_irq_level default_vtimer_irq = {
46
46
.level = 1 ,
47
47
};
48
48
49
- void kvm_timer_vcpu_put (struct kvm_vcpu * vcpu )
50
- {
51
- vcpu_vtimer (vcpu )-> active_cleared_last = false;
52
- }
49
+ static bool kvm_timer_irq_can_fire (struct arch_timer_context * timer_ctx );
50
+ static void kvm_timer_update_irq (struct kvm_vcpu * vcpu , bool new_level ,
51
+ struct arch_timer_context * timer_ctx );
53
52
54
53
u64 kvm_phys_timer_read (void )
55
54
{
@@ -69,17 +68,45 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
69
68
cancel_work_sync (work );
70
69
}
71
70
72
- static irqreturn_t kvm_arch_timer_handler ( int irq , void * dev_id )
71
+ static void kvm_vtimer_update_mask_user ( struct kvm_vcpu * vcpu )
73
72
{
74
- struct kvm_vcpu * vcpu = * ( struct kvm_vcpu * * ) dev_id ;
73
+ struct arch_timer_context * vtimer = vcpu_vtimer ( vcpu ) ;
75
74
76
75
/*
77
- * We disable the timer in the world switch and let it be
78
- * handled by kvm_timer_sync_hwstate(). Getting a timer
79
- * interrupt at this point is a sure sign of some major
80
- * breakage.
76
+ * When using a userspace irqchip with the architected timers, we must
77
+ * prevent continuously exiting from the guest, and therefore mask the
78
+ * physical interrupt by disabling it on the host interrupt controller
79
+ * when the virtual level is high, such that the guest can make
80
+ * forward progress. Once we detect the output level being
81
+ * de-asserted, we unmask the interrupt again so that we exit from the
82
+ * guest when the timer fires.
81
83
*/
82
- pr_warn ("Unexpected interrupt %d on vcpu %p\n" , irq , vcpu );
84
+ if (vtimer -> irq .level )
85
+ disable_percpu_irq (host_vtimer_irq );
86
+ else
87
+ enable_percpu_irq (host_vtimer_irq , 0 );
88
+ }
89
+
90
+ static irqreturn_t kvm_arch_timer_handler (int irq , void * dev_id )
91
+ {
92
+ struct kvm_vcpu * vcpu = * (struct kvm_vcpu * * )dev_id ;
93
+ struct arch_timer_context * vtimer ;
94
+
95
+ if (!vcpu ) {
96
+ pr_warn_once ("Spurious arch timer IRQ on non-VCPU thread\n" );
97
+ return IRQ_NONE ;
98
+ }
99
+ vtimer = vcpu_vtimer (vcpu );
100
+
101
+ if (!vtimer -> irq .level ) {
102
+ vtimer -> cnt_ctl = read_sysreg_el0 (cntv_ctl );
103
+ if (kvm_timer_irq_can_fire (vtimer ))
104
+ kvm_timer_update_irq (vcpu , true, vtimer );
105
+ }
106
+
107
+ if (unlikely (!irqchip_in_kernel (vcpu -> kvm )))
108
+ kvm_vtimer_update_mask_user (vcpu );
109
+
83
110
return IRQ_HANDLED ;
84
111
}
85
112
@@ -215,7 +242,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
215
242
{
216
243
int ret ;
217
244
218
- timer_ctx -> active_cleared_last = false;
219
245
timer_ctx -> irq .level = new_level ;
220
246
trace_kvm_timer_update_irq (vcpu -> vcpu_id , timer_ctx -> irq .irq ,
221
247
timer_ctx -> irq .level );
@@ -271,10 +297,16 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu,
271
297
soft_timer_start (& timer -> phys_timer , kvm_timer_compute_delta (timer_ctx ));
272
298
}
273
299
274
- static void timer_save_state (struct kvm_vcpu * vcpu )
300
+ static void vtimer_save_state (struct kvm_vcpu * vcpu )
275
301
{
276
302
struct arch_timer_cpu * timer = & vcpu -> arch .timer_cpu ;
277
303
struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
304
+ unsigned long flags ;
305
+
306
+ local_irq_save (flags );
307
+
308
+ if (!vtimer -> loaded )
309
+ goto out ;
278
310
279
311
if (timer -> enabled ) {
280
312
vtimer -> cnt_ctl = read_sysreg_el0 (cntv_ctl );
@@ -283,6 +315,10 @@ static void timer_save_state(struct kvm_vcpu *vcpu)
283
315
284
316
/* Disable the virtual timer */
285
317
write_sysreg_el0 (0 , cntv_ctl );
318
+
319
+ vtimer -> loaded = false;
320
+ out :
321
+ local_irq_restore (flags );
286
322
}
287
323
288
324
/*
@@ -296,6 +332,8 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
296
332
struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
297
333
struct arch_timer_context * ptimer = vcpu_ptimer (vcpu );
298
334
335
+ vtimer_save_state (vcpu );
336
+
299
337
/*
300
338
* No need to schedule a background timer if any guest timer has
301
339
* already expired, because kvm_vcpu_block will return before putting
@@ -318,22 +356,34 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
318
356
soft_timer_start (& timer -> bg_timer , kvm_timer_earliest_exp (vcpu ));
319
357
}
320
358
321
- static void timer_restore_state (struct kvm_vcpu * vcpu )
359
+ static void vtimer_restore_state (struct kvm_vcpu * vcpu )
322
360
{
323
361
struct arch_timer_cpu * timer = & vcpu -> arch .timer_cpu ;
324
362
struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
363
+ unsigned long flags ;
364
+
365
+ local_irq_save (flags );
366
+
367
+ if (vtimer -> loaded )
368
+ goto out ;
325
369
326
370
if (timer -> enabled ) {
327
371
write_sysreg_el0 (vtimer -> cnt_cval , cntv_cval );
328
372
isb ();
329
373
write_sysreg_el0 (vtimer -> cnt_ctl , cntv_ctl );
330
374
}
375
+
376
+ vtimer -> loaded = true;
377
+ out :
378
+ local_irq_restore (flags );
331
379
}
332
380
333
381
void kvm_timer_unschedule (struct kvm_vcpu * vcpu )
334
382
{
335
383
struct arch_timer_cpu * timer = & vcpu -> arch .timer_cpu ;
336
384
385
+ vtimer_restore_state (vcpu );
386
+
337
387
soft_timer_cancel (& timer -> bg_timer , & timer -> expired );
338
388
}
339
389
@@ -352,61 +402,45 @@ static void set_cntvoff(u64 cntvoff)
352
402
kvm_call_hyp (__kvm_timer_set_cntvoff , low , high );
353
403
}
354
404
355
- static void kvm_timer_flush_hwstate_vgic (struct kvm_vcpu * vcpu )
405
+ static void kvm_timer_vcpu_load_vgic (struct kvm_vcpu * vcpu )
356
406
{
357
407
struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
358
408
bool phys_active ;
359
409
int ret ;
360
410
361
- /*
362
- * If we enter the guest with the virtual input level to the VGIC
363
- * asserted, then we have already told the VGIC what we need to, and
364
- * we don't need to exit from the guest until the guest deactivates
365
- * the already injected interrupt, so therefore we should set the
366
- * hardware active state to prevent unnecessary exits from the guest.
367
- *
368
- * Also, if we enter the guest with the virtual timer interrupt active,
369
- * then it must be active on the physical distributor, because we set
370
- * the HW bit and the guest must be able to deactivate the virtual and
371
- * physical interrupt at the same time.
372
- *
373
- * Conversely, if the virtual input level is deasserted and the virtual
374
- * interrupt is not active, then always clear the hardware active state
375
- * to ensure that hardware interrupts from the timer triggers a guest
376
- * exit.
377
- */
378
411
phys_active = vtimer -> irq .level ||
379
- kvm_vgic_map_is_active (vcpu , vtimer -> irq .irq );
380
-
381
- /*
382
- * We want to avoid hitting the (re)distributor as much as
383
- * possible, as this is a potentially expensive MMIO access
384
- * (not to mention locks in the irq layer), and a solution for
385
- * this is to cache the "active" state in memory.
386
- *
387
- * Things to consider: we cannot cache an "active set" state,
388
- * because the HW can change this behind our back (it becomes
389
- * "clear" in the HW). We must then restrict the caching to
390
- * the "clear" state.
391
- *
392
- * The cache is invalidated on:
393
- * - vcpu put, indicating that the HW cannot be trusted to be
394
- * in a sane state on the next vcpu load,
395
- * - any change in the interrupt state
396
- *
397
- * Usage conditions:
398
- * - cached value is "active clear"
399
- * - value to be programmed is "active clear"
400
- */
401
- if (vtimer -> active_cleared_last && !phys_active )
402
- return ;
412
+ kvm_vgic_map_is_active (vcpu , vtimer -> irq .irq );
403
413
404
414
ret = irq_set_irqchip_state (host_vtimer_irq ,
405
415
IRQCHIP_STATE_ACTIVE ,
406
416
phys_active );
407
417
WARN_ON (ret );
418
+ }
408
419
409
- vtimer -> active_cleared_last = !phys_active ;
420
+ static void kvm_timer_vcpu_load_user (struct kvm_vcpu * vcpu )
421
+ {
422
+ kvm_vtimer_update_mask_user (vcpu );
423
+ }
424
+
425
+ void kvm_timer_vcpu_load (struct kvm_vcpu * vcpu )
426
+ {
427
+ struct arch_timer_cpu * timer = & vcpu -> arch .timer_cpu ;
428
+ struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
429
+
430
+ if (unlikely (!timer -> enabled ))
431
+ return ;
432
+
433
+ if (unlikely (!irqchip_in_kernel (vcpu -> kvm )))
434
+ kvm_timer_vcpu_load_user (vcpu );
435
+ else
436
+ kvm_timer_vcpu_load_vgic (vcpu );
437
+
438
+ set_cntvoff (vtimer -> cntvoff );
439
+
440
+ vtimer_restore_state (vcpu );
441
+
442
+ if (has_vhe ())
443
+ disable_el1_phys_timer_access ();
410
444
}
411
445
412
446
bool kvm_timer_should_notify_user (struct kvm_vcpu * vcpu )
@@ -426,23 +460,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
426
460
ptimer -> irq .level != plevel ;
427
461
}
428
462
429
- static void kvm_timer_flush_hwstate_user (struct kvm_vcpu * vcpu )
430
- {
431
- struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
432
-
433
- /*
434
- * To prevent continuously exiting from the guest, we mask the
435
- * physical interrupt such that the guest can make forward progress.
436
- * Once we detect the output level being deasserted, we unmask the
437
- * interrupt again so that we exit from the guest when the timer
438
- * fires.
439
- */
440
- if (vtimer -> irq .level )
441
- disable_percpu_irq (host_vtimer_irq );
442
- else
443
- enable_percpu_irq (host_vtimer_irq , 0 );
444
- }
445
-
446
463
/**
447
464
* kvm_timer_flush_hwstate - prepare timers before running the vcpu
448
465
* @vcpu: The vcpu pointer
@@ -455,23 +472,61 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
455
472
void kvm_timer_flush_hwstate (struct kvm_vcpu * vcpu )
456
473
{
457
474
struct arch_timer_cpu * timer = & vcpu -> arch .timer_cpu ;
458
- struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
475
+ struct arch_timer_context * ptimer = vcpu_ptimer (vcpu );
459
476
460
477
if (unlikely (!timer -> enabled ))
461
478
return ;
462
479
463
- kvm_timer_update_state (vcpu );
480
+ if (kvm_timer_should_fire (ptimer ) != ptimer -> irq .level )
481
+ kvm_timer_update_irq (vcpu , !ptimer -> irq .level , ptimer );
464
482
465
483
/* Set the background timer for the physical timer emulation. */
466
484
phys_timer_emulate (vcpu , vcpu_ptimer (vcpu ));
485
+ }
467
486
468
- if (unlikely (!irqchip_in_kernel (vcpu -> kvm )))
469
- kvm_timer_flush_hwstate_user (vcpu );
470
- else
471
- kvm_timer_flush_hwstate_vgic (vcpu );
487
+ void kvm_timer_vcpu_put (struct kvm_vcpu * vcpu )
488
+ {
489
+ struct arch_timer_cpu * timer = & vcpu -> arch .timer_cpu ;
472
490
473
- set_cntvoff (vtimer -> cntvoff );
474
- timer_restore_state (vcpu );
491
+ if (unlikely (!timer -> enabled ))
492
+ return ;
493
+
494
+ if (has_vhe ())
495
+ enable_el1_phys_timer_access ();
496
+
497
+ vtimer_save_state (vcpu );
498
+
499
+ /*
500
+ * The kernel may decide to run userspace after calling vcpu_put, so
501
+ * we reset cntvoff to 0 to ensure a consistent read between user
502
+ * accesses to the virtual counter and kernel access to the physical
503
+ * counter.
504
+ */
505
+ set_cntvoff (0 );
506
+ }
507
+
508
+ static void unmask_vtimer_irq (struct kvm_vcpu * vcpu )
509
+ {
510
+ struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
511
+
512
+ if (unlikely (!irqchip_in_kernel (vcpu -> kvm ))) {
513
+ kvm_vtimer_update_mask_user (vcpu );
514
+ return ;
515
+ }
516
+
517
+ /*
518
+ * If the guest disabled the timer without acking the interrupt, then
519
+ * we must make sure the physical and virtual active states are in
520
+ * sync by deactivating the physical interrupt, because otherwise we
521
+ * wouldn't see the next timer interrupt in the host.
522
+ */
523
+ if (!kvm_vgic_map_is_active (vcpu , vtimer -> irq .irq )) {
524
+ int ret ;
525
+ ret = irq_set_irqchip_state (host_vtimer_irq ,
526
+ IRQCHIP_STATE_ACTIVE ,
527
+ false);
528
+ WARN_ON (ret );
529
+ }
475
530
}
476
531
477
532
/**
@@ -484,21 +539,27 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
484
539
void kvm_timer_sync_hwstate (struct kvm_vcpu * vcpu )
485
540
{
486
541
struct arch_timer_cpu * timer = & vcpu -> arch .timer_cpu ;
542
+ struct arch_timer_context * vtimer = vcpu_vtimer (vcpu );
487
543
488
544
/*
489
545
* This is to cancel the background timer for the physical timer
490
546
* emulation if it is set.
491
547
*/
492
548
soft_timer_cancel (& timer -> phys_timer , NULL );
493
549
494
- timer_save_state (vcpu );
495
- set_cntvoff (0 );
496
-
497
550
/*
498
- * The guest could have modified the timer registers or the timer
499
- * could have expired, update the timer state.
551
+ * If we entered the guest with the vtimer output asserted we have to
552
+ * check if the guest has modified the timer so that we should lower
553
+ * the line at this point.
500
554
*/
501
- kvm_timer_update_state (vcpu );
555
+ if (vtimer -> irq .level ) {
556
+ vtimer -> cnt_ctl = read_sysreg_el0 (cntv_ctl );
557
+ vtimer -> cnt_cval = read_sysreg_el0 (cntv_cval );
558
+ if (!kvm_timer_should_fire (vtimer )) {
559
+ kvm_timer_update_irq (vcpu , false, vtimer );
560
+ unmask_vtimer_irq (vcpu );
561
+ }
562
+ }
502
563
}
503
564
504
565
int kvm_timer_vcpu_reset (struct kvm_vcpu * vcpu )
0 commit comments