Skip to content

Commit f21dd49

Browse files
vittyvkbonzini
authored andcommitted
KVM: x86: hyperv: optimize sparse VP set processing
Rewrite kvm_hv_flush_tlb()/send_ipi_vcpus_mask() making them cleaner and somewhat more optimal. hv_vcpu_in_sparse_set() is converted to sparse_set_to_vcpu_mask() which copies sparse banks u64-at-a-time and then, depending on the num_mismatched_vp_indexes value, returns immediately or does vp index to vcpu index conversion by walking all vCPUs. To support the change and make kvm_hv_send_ipi() look similar to kvm_hv_flush_tlb() send_ipi_vcpus_mask() is introduced. Suggested-by: Roman Kagan <rkagan@virtuozzo.com> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Reviewed-by: Roman Kagan <rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent e6b6c48 commit f21dd49

File tree

1 file changed

+67
-98
lines changed

1 file changed

+67
-98
lines changed

arch/x86/kvm/hyperv.c

Lines changed: 67 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636

3737
#include "trace.h"
3838

39+
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
40+
3941
static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
4042
{
4143
return atomic64_read(&synic->sint[sint]);
@@ -1277,37 +1279,47 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
12771279
return kvm_hv_get_msr(vcpu, msr, pdata, host);
12781280
}
12791281

1280-
static __always_inline bool hv_vcpu_in_sparse_set(struct kvm_vcpu_hv *hv_vcpu,
1281-
u64 sparse_banks[],
1282-
u64 valid_bank_mask)
1282+
static __always_inline unsigned long *sparse_set_to_vcpu_mask(
1283+
struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
1284+
u64 *vp_bitmap, unsigned long *vcpu_bitmap)
12831285
{
1284-
int bank = hv_vcpu->vp_index / 64, sbank;
1285-
1286-
if (bank >= 64)
1287-
return false;
1286+
struct kvm_hv *hv = &kvm->arch.hyperv;
1287+
struct kvm_vcpu *vcpu;
1288+
int i, bank, sbank = 0;
12881289

1289-
if (!(valid_bank_mask & BIT_ULL(bank)))
1290-
return false;
1290+
memset(vp_bitmap, 0,
1291+
KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
1292+
for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
1293+
KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
1294+
vp_bitmap[bank] = sparse_banks[sbank++];
12911295

1292-
/* Sparse bank number equals to the number of set bits before it */
1293-
sbank = bitmap_weight((unsigned long *)&valid_bank_mask, bank);
1296+
if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
1297+
/* for all vcpus vp_index == vcpu_idx */
1298+
return (unsigned long *)vp_bitmap;
1299+
}
12941300

1295-
return !!(sparse_banks[sbank] & BIT_ULL(hv_vcpu->vp_index % 64));
1301+
bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
1302+
kvm_for_each_vcpu(i, vcpu, kvm) {
1303+
if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
1304+
(unsigned long *)vp_bitmap))
1305+
__set_bit(i, vcpu_bitmap);
1306+
}
1307+
return vcpu_bitmap;
12961308
}
12971309

12981310
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
12991311
u16 rep_cnt, bool ex)
13001312
{
13011313
struct kvm *kvm = current_vcpu->kvm;
1302-
struct kvm_hv *hv = &kvm->arch.hyperv;
13031314
struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
13041315
struct hv_tlb_flush_ex flush_ex;
13051316
struct hv_tlb_flush flush;
1306-
struct kvm_vcpu *vcpu;
1307-
unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
1317+
u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
1318+
DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
1319+
unsigned long *vcpu_mask;
13081320
u64 valid_bank_mask;
13091321
u64 sparse_banks[64];
1310-
int sparse_banks_len, i, bank, sbank;
1322+
int sparse_banks_len;
13111323
bool all_cpus;
13121324

13131325
if (!ex) {
@@ -1350,73 +1362,58 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
13501362
return HV_STATUS_INVALID_HYPERCALL_INPUT;
13511363
}
13521364

1353-
/*
1354-
* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
1355-
* analyze it here, flush TLB regardless of the specified address space.
1356-
*/
13571365
cpumask_clear(&hv_vcpu->tlb_flush);
13581366

1359-
if (all_cpus) {
1360-
kvm_make_vcpus_request_mask(kvm,
1361-
KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
1362-
NULL, &hv_vcpu->tlb_flush);
1363-
goto ret_success;
1364-
}
1365-
1366-
if (atomic_read(&hv->num_mismatched_vp_indexes)) {
1367-
kvm_for_each_vcpu(i, vcpu, kvm) {
1368-
if (hv_vcpu_in_sparse_set(&vcpu->arch.hyperv,
1369-
sparse_banks,
1370-
valid_bank_mask))
1371-
__set_bit(i, vcpu_bitmap);
1372-
}
1373-
goto flush_request;
1374-
}
1367+
vcpu_mask = all_cpus ? NULL :
1368+
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
1369+
vp_bitmap, vcpu_bitmap);
13751370

13761371
/*
1377-
* num_mismatched_vp_indexes is zero so every vcpu has
1378-
* vp_index == vcpu_idx.
1372+
* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
1373+
* analyze it here, flush TLB regardless of the specified address space.
13791374
*/
1380-
sbank = 0;
1381-
for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
1382-
BITS_PER_LONG) {
1383-
for_each_set_bit(i,
1384-
(unsigned long *)&sparse_banks[sbank],
1385-
BITS_PER_LONG) {
1386-
u32 vp_index = bank * 64 + i;
1387-
1388-
/* A non-existent vCPU was specified */
1389-
if (vp_index >= KVM_MAX_VCPUS)
1390-
return HV_STATUS_INVALID_HYPERCALL_INPUT;
1391-
1392-
__set_bit(vp_index, vcpu_bitmap);
1393-
}
1394-
sbank++;
1395-
}
1396-
1397-
flush_request:
13981375
kvm_make_vcpus_request_mask(kvm,
13991376
KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
1400-
vcpu_bitmap, &hv_vcpu->tlb_flush);
1377+
vcpu_mask, &hv_vcpu->tlb_flush);
14011378

14021379
ret_success:
14031380
/* We always do full TLB flush, set rep_done = rep_cnt. */
14041381
return (u64)HV_STATUS_SUCCESS |
14051382
((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
14061383
}
14071384

1385+
static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
1386+
unsigned long *vcpu_bitmap)
1387+
{
1388+
struct kvm_lapic_irq irq = {
1389+
.delivery_mode = APIC_DM_FIXED,
1390+
.vector = vector
1391+
};
1392+
struct kvm_vcpu *vcpu;
1393+
int i;
1394+
1395+
kvm_for_each_vcpu(i, vcpu, kvm) {
1396+
if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
1397+
continue;
1398+
1399+
/* We fail only when APIC is disabled */
1400+
kvm_apic_set_irq(vcpu, &irq, NULL);
1401+
}
1402+
}
1403+
14081404
static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
14091405
bool ex, bool fast)
14101406
{
14111407
struct kvm *kvm = current_vcpu->kvm;
1412-
struct kvm_hv *hv = &kvm->arch.hyperv;
14131408
struct hv_send_ipi_ex send_ipi_ex;
14141409
struct hv_send_ipi send_ipi;
1415-
struct kvm_vcpu *vcpu;
1410+
u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
1411+
DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
1412+
unsigned long *vcpu_mask;
14161413
unsigned long valid_bank_mask;
14171414
u64 sparse_banks[64];
1418-
int sparse_banks_len, bank, i, sbank;
1419-
struct kvm_lapic_irq irq = {.delivery_mode = APIC_DM_FIXED};
1415+
int sparse_banks_len;
1416+
u32 vector;
14201417
bool all_cpus;
14211418

14221419
if (!ex) {
@@ -1425,18 +1422,18 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
14251422
sizeof(send_ipi))))
14261423
return HV_STATUS_INVALID_HYPERCALL_INPUT;
14271424
sparse_banks[0] = send_ipi.cpu_mask;
1428-
irq.vector = send_ipi.vector;
1425+
vector = send_ipi.vector;
14291426
} else {
14301427
/* 'reserved' part of hv_send_ipi should be 0 */
14311428
if (unlikely(ingpa >> 32 != 0))
14321429
return HV_STATUS_INVALID_HYPERCALL_INPUT;
14331430
sparse_banks[0] = outgpa;
1434-
irq.vector = (u32)ingpa;
1431+
vector = (u32)ingpa;
14351432
}
14361433
all_cpus = false;
14371434
valid_bank_mask = BIT_ULL(0);
14381435

1439-
trace_kvm_hv_send_ipi(irq.vector, sparse_banks[0]);
1436+
trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
14401437
} else {
14411438
if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
14421439
sizeof(send_ipi_ex))))
@@ -1446,7 +1443,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
14461443
send_ipi_ex.vp_set.format,
14471444
send_ipi_ex.vp_set.valid_bank_mask);
14481445

1449-
irq.vector = send_ipi_ex.vector;
1446+
vector = send_ipi_ex.vector;
14501447
valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
14511448
sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
14521449
sizeof(sparse_banks[0]);
@@ -1465,42 +1462,14 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
14651462
return HV_STATUS_INVALID_HYPERCALL_INPUT;
14661463
}
14671464

1468-
if ((irq.vector < HV_IPI_LOW_VECTOR) ||
1469-
(irq.vector > HV_IPI_HIGH_VECTOR))
1465+
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
14701466
return HV_STATUS_INVALID_HYPERCALL_INPUT;
14711467

1472-
if (all_cpus || atomic_read(&hv->num_mismatched_vp_indexes)) {
1473-
kvm_for_each_vcpu(i, vcpu, kvm) {
1474-
if (all_cpus || hv_vcpu_in_sparse_set(
1475-
&vcpu->arch.hyperv, sparse_banks,
1476-
valid_bank_mask)) {
1477-
/* We fail only when APIC is disabled */
1478-
kvm_apic_set_irq(vcpu, &irq, NULL);
1479-
}
1480-
}
1481-
goto ret_success;
1482-
}
1468+
vcpu_mask = all_cpus ? NULL :
1469+
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
1470+
vp_bitmap, vcpu_bitmap);
14831471

1484-
/*
1485-
* num_mismatched_vp_indexes is zero so every vcpu has
1486-
* vp_index == vcpu_idx.
1487-
*/
1488-
sbank = 0;
1489-
for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64) {
1490-
for_each_set_bit(i, (unsigned long *)&sparse_banks[sbank], 64) {
1491-
u32 vp_index = bank * 64 + i;
1492-
struct kvm_vcpu *vcpu =
1493-
get_vcpu_by_vpidx(kvm, vp_index);
1494-
1495-
/* Unknown vCPU specified */
1496-
if (!vcpu)
1497-
continue;
1498-
1499-
/* We fail only when APIC is disabled */
1500-
kvm_apic_set_irq(vcpu, &irq, NULL);
1501-
}
1502-
sbank++;
1503-
}
1472+
kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
15041473

15051474
ret_success:
15061475
return HV_STATUS_SUCCESS;

0 commit comments

Comments
 (0)