Skip to content

Commit d0659d9

Browse files
matosattibonzini
authored andcommitted
KVM: x86: add option to advance tscdeadline hrtimer expiration
For the hrtimer which emulates the tscdeadline timer in the guest, add an option to advance expiration, and busy spin on VM-entry waiting for the actual expiration time to elapse. This allows achieving low latencies in cyclictest (or any scenario which requires strict timing regarding timer expiration). Reduces average cyclictest latency from 12us to 8us on Core i5 desktop. Note: this option requires tuning to find the appropriate value for a particular hardware/guest combination. One method is to measure the average delay between apic_timer_fn and VM-entry. Another method is to start with 1000ns, and increase the value in say 500ns increments until avg cyclictest numbers stop decreasing. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 7c6a98d commit d0659d9

File tree

4 files changed

+66
-1
lines changed

4 files changed

+66
-1
lines changed

arch/x86/kvm/lapic.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <asm/page.h>
3434
#include <asm/current.h>
3535
#include <asm/apicdef.h>
36+
#include <asm/delay.h>
3637
#include <linux/atomic.h>
3738
#include <linux/jump_label.h>
3839
#include "kvm_cache_regs.h"
@@ -1073,6 +1074,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
10731074
{
10741075
struct kvm_vcpu *vcpu = apic->vcpu;
10751076
wait_queue_head_t *q = &vcpu->wq;
1077+
struct kvm_timer *ktimer = &apic->lapic_timer;
10761078

10771079
/*
10781080
* Note: KVM_REQ_PENDING_TIMER is implicitly checked in
@@ -1087,11 +1089,61 @@ static void apic_timer_expired(struct kvm_lapic *apic)
10871089

10881090
if (waitqueue_active(q))
10891091
wake_up_interruptible(q);
1092+
1093+
if (apic_lvtt_tscdeadline(apic))
1094+
ktimer->expired_tscdeadline = ktimer->tscdeadline;
1095+
}
1096+
1097+
/*
1098+
* On APICv, this test will cause a busy wait
1099+
* during a higher-priority task.
1100+
*/
1101+
1102+
static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1103+
{
1104+
struct kvm_lapic *apic = vcpu->arch.apic;
1105+
u32 reg = kvm_apic_get_reg(apic, APIC_LVTT);
1106+
1107+
if (kvm_apic_hw_enabled(apic)) {
1108+
int vec = reg & APIC_VECTOR_MASK;
1109+
1110+
if (kvm_x86_ops->test_posted_interrupt)
1111+
return kvm_x86_ops->test_posted_interrupt(vcpu, vec);
1112+
else {
1113+
if (apic_test_vector(vec, apic->regs + APIC_ISR))
1114+
return true;
1115+
}
1116+
}
1117+
return false;
1118+
}
1119+
1120+
void wait_lapic_expire(struct kvm_vcpu *vcpu)
1121+
{
1122+
struct kvm_lapic *apic = vcpu->arch.apic;
1123+
u64 guest_tsc, tsc_deadline;
1124+
1125+
if (!kvm_vcpu_has_lapic(vcpu))
1126+
return;
1127+
1128+
if (apic->lapic_timer.expired_tscdeadline == 0)
1129+
return;
1130+
1131+
if (!lapic_timer_int_injected(vcpu))
1132+
return;
1133+
1134+
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1135+
apic->lapic_timer.expired_tscdeadline = 0;
1136+
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
1137+
1138+
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
1139+
if (guest_tsc < tsc_deadline)
1140+
__delay(tsc_deadline - guest_tsc);
10901141
}
10911142

10921143
static void start_apic_timer(struct kvm_lapic *apic)
10931144
{
10941145
ktime_t now;
1146+
10951147
atomic_set(&apic->lapic_timer.pending, 0);
10961148

10971149
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
@@ -1137,6 +1189,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
11371189
/* lapic timer in tsc deadline mode */
11381190
u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
11391191
u64 ns = 0;
1192+
ktime_t expire;
11401193
struct kvm_vcpu *vcpu = apic->vcpu;
11411194
unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
11421195
unsigned long flags;
@@ -1151,8 +1204,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
11511204
if (likely(tscdeadline > guest_tsc)) {
11521205
ns = (tscdeadline - guest_tsc) * 1000000ULL;
11531206
do_div(ns, this_tsc_khz);
1207+
expire = ktime_add_ns(now, ns);
1208+
expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
11541209
hrtimer_start(&apic->lapic_timer.timer,
1155-
ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
1210+
expire, HRTIMER_MODE_ABS);
11561211
} else
11571212
apic_timer_expired(apic);
11581213

arch/x86/kvm/lapic.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ struct kvm_timer {
1414
u32 timer_mode;
1515
u32 timer_mode_mask;
1616
u64 tscdeadline;
17+
u64 expired_tscdeadline;
1718
atomic_t pending; /* accumulated triggered timers */
1819
};
1920

@@ -170,4 +171,6 @@ static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
170171

171172
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
172173

174+
void wait_lapic_expire(struct kvm_vcpu *vcpu);
175+
173176
#endif

arch/x86/kvm/x86.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
108108
static u32 tsc_tolerance_ppm = 250;
109109
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
110110

111+
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
112+
unsigned int lapic_timer_advance_ns = 0;
113+
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
114+
111115
static bool backwards_tsc_observed = false;
112116

113117
#define KVM_NR_SHARED_MSRS 16
@@ -6312,6 +6316,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
63126316
}
63136317

63146318
trace_kvm_entry(vcpu->vcpu_id);
6319+
wait_lapic_expire(vcpu);
63156320
kvm_x86_ops->run(vcpu);
63166321

63176322
/*

arch/x86/kvm/x86.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,5 +170,7 @@ extern u64 kvm_supported_xcr0(void);
170170

171171
extern unsigned int min_timer_period_us;
172172

173+
extern unsigned int lapic_timer_advance_ns;
174+
173175
extern struct static_key kvm_no_apic_vcpu;
174176
#endif

0 commit comments

Comments
 (0)