Skip to content

Commit b103cc3

Browse files
Christoffer Dallchazy
authored andcommitted
KVM: arm/arm64: Avoid timer save/restore in vcpu entry/exit
We don't need to save and restore the hardware timer state and examine if it generates interrupts on on every entry/exit to the guest. The timer hardware is perfectly capable of telling us when it has expired by signaling interrupts. When taking a vtimer interrupt in the host, we don't want to mess with the timer configuration, we just want to forward the physical interrupt to the guest as a virtual interrupt. We can use the split priority drop and deactivate feature of the GIC to do this, which leaves an EOI'ed interrupt active on the physical distributor, making sure we don't keep taking timer interrupts which would prevent the guest from running. We can then forward the physical interrupt to the VM using the HW bit in the LR of the GIC, like we do already, which lets the guest directly deactivate both the physical and virtual timer simultaneously, allowing the timer hardware to exit the VM and generate a new physical interrupt when the timer output is again asserted later on. We do need to capture this state when migrating VCPUs between physical CPUs, however, which we use the vcpu put/load functions for, which are called through preempt notifiers whenever the thread is scheduled away from the CPU or called directly if we return from the ioctl to userspace. One caveat is that we have to save and restore the timer state in both kvm_timer_vcpu_[put/load] and kvm_timer_[schedule/unschedule], because we can have the following flows: 1. kvm_vcpu_block 2. kvm_timer_schedule 3. schedule 4. kvm_timer_vcpu_put (preempt notifier) 5. schedule (vcpu thread gets scheduled back) 6. kvm_timer_vcpu_load (preempt notifier) 7. kvm_timer_unschedule And a version where we don't actually call schedule: 1. kvm_vcpu_block 2. kvm_timer_schedule 7. kvm_timer_unschedule Since kvm_timer_[schedule/unschedule] may not be followed by put/load, but put/load also may be called independently, we call the timer save/restore functions from both paths. Since they rely on the loaded flag to never save/restore when unnecessary, this doesn't cause any harm, and we ensure that all invokations of either set of functions work as intended. An added benefit beyond not having to read and write the timer sysregs on every entry and exit is that we no longer have to actively write the active state to the physical distributor, because we configured the irq for the vtimer to only get a priority drop when handling the interrupt in the GIC driver (we called irq_set_vcpu_affinity()), and the interrupt stays active after firing on the host. Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Christoffer Dall <cdall@linaro.org>
1 parent 40f4cba commit b103cc3

File tree

3 files changed

+178
-94
lines changed

3 files changed

+178
-94
lines changed

include/kvm/arm_arch_timer.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,15 @@ struct arch_timer_context {
3131
/* Timer IRQ */
3232
struct kvm_irq_level irq;
3333

34-
/* Active IRQ state caching */
35-
bool active_cleared_last;
34+
/*
35+
* We have multiple paths which can save/restore the timer state
36+
* onto the hardware, so we need some way of keeping track of
37+
* where the latest state is.
38+
*
39+
* loaded == true: State is loaded on the hardware registers.
40+
* loaded == false: State is stored in memory.
41+
*/
42+
bool loaded;
3643

3744
/* Virtual offset */
3845
u64 cntvoff;
@@ -78,10 +85,15 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
7885

7986
u64 kvm_phys_timer_read(void);
8087

88+
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu);
8189
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
8290

8391
void kvm_timer_init_vhe(void);
8492

8593
#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
8694
#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
95+
96+
void enable_el1_phys_timer_access(void);
97+
void disable_el1_phys_timer_access(void);
98+
8799
#endif

virt/kvm/arm/arch_timer.c

Lines changed: 149 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,9 @@ static const struct kvm_irq_level default_vtimer_irq = {
4646
.level = 1,
4747
};
4848

49-
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
50-
{
51-
vcpu_vtimer(vcpu)->active_cleared_last = false;
52-
}
49+
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
50+
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
51+
struct arch_timer_context *timer_ctx);
5352

5453
u64 kvm_phys_timer_read(void)
5554
{
@@ -69,17 +68,45 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
6968
cancel_work_sync(work);
7069
}
7170

72-
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
71+
static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
7372
{
74-
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
73+
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
7574

7675
/*
77-
* We disable the timer in the world switch and let it be
78-
* handled by kvm_timer_sync_hwstate(). Getting a timer
79-
* interrupt at this point is a sure sign of some major
80-
* breakage.
76+
* When using a userspace irqchip with the architected timers, we must
77+
* prevent continuously exiting from the guest, and therefore mask the
78+
* physical interrupt by disabling it on the host interrupt controller
79+
* when the virtual level is high, such that the guest can make
80+
* forward progress. Once we detect the output level being
81+
* de-asserted, we unmask the interrupt again so that we exit from the
82+
* guest when the timer fires.
8183
*/
82-
pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
84+
if (vtimer->irq.level)
85+
disable_percpu_irq(host_vtimer_irq);
86+
else
87+
enable_percpu_irq(host_vtimer_irq, 0);
88+
}
89+
90+
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
91+
{
92+
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
93+
struct arch_timer_context *vtimer;
94+
95+
if (!vcpu) {
96+
pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
97+
return IRQ_NONE;
98+
}
99+
vtimer = vcpu_vtimer(vcpu);
100+
101+
if (!vtimer->irq.level) {
102+
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
103+
if (kvm_timer_irq_can_fire(vtimer))
104+
kvm_timer_update_irq(vcpu, true, vtimer);
105+
}
106+
107+
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
108+
kvm_vtimer_update_mask_user(vcpu);
109+
83110
return IRQ_HANDLED;
84111
}
85112

@@ -215,7 +242,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
215242
{
216243
int ret;
217244

218-
timer_ctx->active_cleared_last = false;
219245
timer_ctx->irq.level = new_level;
220246
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
221247
timer_ctx->irq.level);
@@ -271,10 +297,16 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu,
271297
soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx));
272298
}
273299

274-
static void timer_save_state(struct kvm_vcpu *vcpu)
300+
static void vtimer_save_state(struct kvm_vcpu *vcpu)
275301
{
276302
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
277303
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
304+
unsigned long flags;
305+
306+
local_irq_save(flags);
307+
308+
if (!vtimer->loaded)
309+
goto out;
278310

279311
if (timer->enabled) {
280312
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
@@ -283,6 +315,10 @@ static void timer_save_state(struct kvm_vcpu *vcpu)
283315

284316
/* Disable the virtual timer */
285317
write_sysreg_el0(0, cntv_ctl);
318+
319+
vtimer->loaded = false;
320+
out:
321+
local_irq_restore(flags);
286322
}
287323

288324
/*
@@ -296,6 +332,8 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
296332
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
297333
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
298334

335+
vtimer_save_state(vcpu);
336+
299337
/*
300338
* No need to schedule a background timer if any guest timer has
301339
* already expired, because kvm_vcpu_block will return before putting
@@ -318,22 +356,34 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
318356
soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
319357
}
320358

321-
static void timer_restore_state(struct kvm_vcpu *vcpu)
359+
static void vtimer_restore_state(struct kvm_vcpu *vcpu)
322360
{
323361
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
324362
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
363+
unsigned long flags;
364+
365+
local_irq_save(flags);
366+
367+
if (vtimer->loaded)
368+
goto out;
325369

326370
if (timer->enabled) {
327371
write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
328372
isb();
329373
write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
330374
}
375+
376+
vtimer->loaded = true;
377+
out:
378+
local_irq_restore(flags);
331379
}
332380

333381
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
334382
{
335383
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
336384

385+
vtimer_restore_state(vcpu);
386+
337387
soft_timer_cancel(&timer->bg_timer, &timer->expired);
338388
}
339389

@@ -352,61 +402,45 @@ static void set_cntvoff(u64 cntvoff)
352402
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
353403
}
354404

355-
static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
405+
static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
356406
{
357407
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
358408
bool phys_active;
359409
int ret;
360410

361-
/*
362-
* If we enter the guest with the virtual input level to the VGIC
363-
* asserted, then we have already told the VGIC what we need to, and
364-
* we don't need to exit from the guest until the guest deactivates
365-
* the already injected interrupt, so therefore we should set the
366-
* hardware active state to prevent unnecessary exits from the guest.
367-
*
368-
* Also, if we enter the guest with the virtual timer interrupt active,
369-
* then it must be active on the physical distributor, because we set
370-
* the HW bit and the guest must be able to deactivate the virtual and
371-
* physical interrupt at the same time.
372-
*
373-
* Conversely, if the virtual input level is deasserted and the virtual
374-
* interrupt is not active, then always clear the hardware active state
375-
* to ensure that hardware interrupts from the timer triggers a guest
376-
* exit.
377-
*/
378411
phys_active = vtimer->irq.level ||
379-
kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
380-
381-
/*
382-
* We want to avoid hitting the (re)distributor as much as
383-
* possible, as this is a potentially expensive MMIO access
384-
* (not to mention locks in the irq layer), and a solution for
385-
* this is to cache the "active" state in memory.
386-
*
387-
* Things to consider: we cannot cache an "active set" state,
388-
* because the HW can change this behind our back (it becomes
389-
* "clear" in the HW). We must then restrict the caching to
390-
* the "clear" state.
391-
*
392-
* The cache is invalidated on:
393-
* - vcpu put, indicating that the HW cannot be trusted to be
394-
* in a sane state on the next vcpu load,
395-
* - any change in the interrupt state
396-
*
397-
* Usage conditions:
398-
* - cached value is "active clear"
399-
* - value to be programmed is "active clear"
400-
*/
401-
if (vtimer->active_cleared_last && !phys_active)
402-
return;
412+
kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
403413

404414
ret = irq_set_irqchip_state(host_vtimer_irq,
405415
IRQCHIP_STATE_ACTIVE,
406416
phys_active);
407417
WARN_ON(ret);
418+
}
408419

409-
vtimer->active_cleared_last = !phys_active;
420+
static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
421+
{
422+
kvm_vtimer_update_mask_user(vcpu);
423+
}
424+
425+
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
426+
{
427+
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
428+
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
429+
430+
if (unlikely(!timer->enabled))
431+
return;
432+
433+
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
434+
kvm_timer_vcpu_load_user(vcpu);
435+
else
436+
kvm_timer_vcpu_load_vgic(vcpu);
437+
438+
set_cntvoff(vtimer->cntvoff);
439+
440+
vtimer_restore_state(vcpu);
441+
442+
if (has_vhe())
443+
disable_el1_phys_timer_access();
410444
}
411445

412446
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -426,23 +460,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
426460
ptimer->irq.level != plevel;
427461
}
428462

429-
static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
430-
{
431-
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
432-
433-
/*
434-
* To prevent continuously exiting from the guest, we mask the
435-
* physical interrupt such that the guest can make forward progress.
436-
* Once we detect the output level being deasserted, we unmask the
437-
* interrupt again so that we exit from the guest when the timer
438-
* fires.
439-
*/
440-
if (vtimer->irq.level)
441-
disable_percpu_irq(host_vtimer_irq);
442-
else
443-
enable_percpu_irq(host_vtimer_irq, 0);
444-
}
445-
446463
/**
447464
* kvm_timer_flush_hwstate - prepare timers before running the vcpu
448465
* @vcpu: The vcpu pointer
@@ -455,23 +472,61 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
455472
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
456473
{
457474
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
458-
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
475+
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
459476

460477
if (unlikely(!timer->enabled))
461478
return;
462479

463-
kvm_timer_update_state(vcpu);
480+
if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
481+
kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
464482

465483
/* Set the background timer for the physical timer emulation. */
466484
phys_timer_emulate(vcpu, vcpu_ptimer(vcpu));
485+
}
467486

468-
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
469-
kvm_timer_flush_hwstate_user(vcpu);
470-
else
471-
kvm_timer_flush_hwstate_vgic(vcpu);
487+
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
488+
{
489+
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
472490

473-
set_cntvoff(vtimer->cntvoff);
474-
timer_restore_state(vcpu);
491+
if (unlikely(!timer->enabled))
492+
return;
493+
494+
if (has_vhe())
495+
enable_el1_phys_timer_access();
496+
497+
vtimer_save_state(vcpu);
498+
499+
/*
500+
* The kernel may decide to run userspace after calling vcpu_put, so
501+
* we reset cntvoff to 0 to ensure a consistent read between user
502+
* accesses to the virtual counter and kernel access to the physical
503+
* counter.
504+
*/
505+
set_cntvoff(0);
506+
}
507+
508+
static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
509+
{
510+
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
511+
512+
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
513+
kvm_vtimer_update_mask_user(vcpu);
514+
return;
515+
}
516+
517+
/*
518+
* If the guest disabled the timer without acking the interrupt, then
519+
* we must make sure the physical and virtual active states are in
520+
* sync by deactivating the physical interrupt, because otherwise we
521+
* wouldn't see the next timer interrupt in the host.
522+
*/
523+
if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
524+
int ret;
525+
ret = irq_set_irqchip_state(host_vtimer_irq,
526+
IRQCHIP_STATE_ACTIVE,
527+
false);
528+
WARN_ON(ret);
529+
}
475530
}
476531

477532
/**
@@ -484,21 +539,27 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
484539
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
485540
{
486541
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
542+
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
487543

488544
/*
489545
* This is to cancel the background timer for the physical timer
490546
* emulation if it is set.
491547
*/
492548
soft_timer_cancel(&timer->phys_timer, NULL);
493549

494-
timer_save_state(vcpu);
495-
set_cntvoff(0);
496-
497550
/*
498-
* The guest could have modified the timer registers or the timer
499-
* could have expired, update the timer state.
551+
* If we entered the guest with the vtimer output asserted we have to
552+
* check if the guest has modified the timer so that we should lower
553+
* the line at this point.
500554
*/
501-
kvm_timer_update_state(vcpu);
555+
if (vtimer->irq.level) {
556+
vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
557+
vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
558+
if (!kvm_timer_should_fire(vtimer)) {
559+
kvm_timer_update_irq(vcpu, false, vtimer);
560+
unmask_vtimer_irq(vcpu);
561+
}
562+
}
502563
}
503564

504565
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)

0 commit comments

Comments
 (0)