|
31 | 31 | #include <linux/pci.h>
|
32 | 32 | #include <linux/gfp.h>
|
33 | 33 | #include <linux/memblock.h>
|
| 34 | +#include <linux/syscore_ops.h> |
34 | 35 |
|
35 | 36 | #include <xen/xen.h>
|
36 | 37 | #include <xen/interface/xen.h>
|
@@ -1471,38 +1472,130 @@ asmlinkage void __init xen_start_kernel(void)
|
1471 | 1472 | #endif
|
1472 | 1473 | }
|
1473 | 1474 |
|
1474 |
| -void __ref xen_hvm_init_shared_info(void) |
| 1475 | +#ifdef CONFIG_XEN_PVHVM |
| 1476 | +/* |
| 1477 | + * The pfn containing the shared_info is located somewhere in RAM. This |
| 1478 | + * will cause trouble if the current kernel is doing a kexec boot into a |
| 1479 | + * new kernel. The new kernel (and its startup code) can not know where |
| 1480 | + * the pfn is, so it can not reserve the page. The hypervisor will |
| 1481 | + * continue to update the pfn, and as a result memory corruption occours |
| 1482 | + * in the new kernel. |
| 1483 | + * |
| 1484 | + * One way to work around this issue is to allocate a page in the |
| 1485 | + * xen-platform pci device's BAR memory range. But pci init is done very |
| 1486 | + * late and the shared_info page is already in use very early to read |
| 1487 | + * the pvclock. So moving the pfn from RAM to MMIO is racy because some |
| 1488 | + * code paths on other vcpus could access the pfn during the small |
| 1489 | + * window when the old pfn is moved to the new pfn. There is even a |
| 1490 | + * small window were the old pfn is not backed by a mfn, and during that |
| 1491 | + * time all reads return -1. |
| 1492 | + * |
| 1493 | + * Because it is not known upfront where the MMIO region is located it |
| 1494 | + * can not be used right from the start in xen_hvm_init_shared_info. |
| 1495 | + * |
| 1496 | + * To minimise trouble the move of the pfn is done shortly before kexec. |
| 1497 | + * This does not eliminate the race because all vcpus are still online |
| 1498 | + * when the syscore_ops will be called. But hopefully there is no work |
| 1499 | + * pending at this point in time. Also the syscore_op is run last which |
| 1500 | + * reduces the risk further. |
| 1501 | + */ |
| 1502 | + |
| 1503 | +static struct shared_info *xen_hvm_shared_info; |
| 1504 | + |
| 1505 | +static void xen_hvm_connect_shared_info(unsigned long pfn) |
1475 | 1506 | {
|
1476 |
| - int cpu; |
1477 | 1507 | struct xen_add_to_physmap xatp;
|
1478 |
| - static struct shared_info *shared_info_page = 0; |
1479 | 1508 |
|
1480 |
| - if (!shared_info_page) |
1481 |
| - shared_info_page = (struct shared_info *) |
1482 |
| - extend_brk(PAGE_SIZE, PAGE_SIZE); |
1483 | 1509 | xatp.domid = DOMID_SELF;
|
1484 | 1510 | xatp.idx = 0;
|
1485 | 1511 | xatp.space = XENMAPSPACE_shared_info;
|
1486 |
| - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; |
| 1512 | + xatp.gpfn = pfn; |
1487 | 1513 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
1488 | 1514 | BUG();
|
1489 | 1515 |
|
1490 |
| - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; |
| 1516 | +} |
| 1517 | +static void xen_hvm_set_shared_info(struct shared_info *sip) |
| 1518 | +{ |
| 1519 | + int cpu; |
| 1520 | + |
| 1521 | + HYPERVISOR_shared_info = sip; |
1491 | 1522 |
|
1492 | 1523 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
|
1493 | 1524 | * page, we use it in the event channel upcall and in some pvclock
|
1494 | 1525 | * related functions. We don't need the vcpu_info placement
|
1495 | 1526 | * optimizations because we don't use any pv_mmu or pv_irq op on
|
1496 | 1527 | * HVM.
|
1497 |
| - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is |
1498 |
| - * online but xen_hvm_init_shared_info is run at resume time too and |
| 1528 | + * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is |
| 1529 | + * online but xen_hvm_set_shared_info is run at resume time too and |
1499 | 1530 | * in that case multiple vcpus might be online. */
|
1500 | 1531 | for_each_online_cpu(cpu) {
|
1501 | 1532 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
|
1502 | 1533 | }
|
1503 | 1534 | }
|
1504 | 1535 |
|
1505 |
| -#ifdef CONFIG_XEN_PVHVM |
| 1536 | +/* Reconnect the shared_info pfn to a mfn */ |
| 1537 | +void xen_hvm_resume_shared_info(void) |
| 1538 | +{ |
| 1539 | + xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); |
| 1540 | +} |
| 1541 | + |
| 1542 | +#ifdef CONFIG_KEXEC |
| 1543 | +static struct shared_info *xen_hvm_shared_info_kexec; |
| 1544 | +static unsigned long xen_hvm_shared_info_pfn_kexec; |
| 1545 | + |
| 1546 | +/* Remember a pfn in MMIO space for kexec reboot */ |
| 1547 | +void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) |
| 1548 | +{ |
| 1549 | + xen_hvm_shared_info_kexec = sip; |
| 1550 | + xen_hvm_shared_info_pfn_kexec = pfn; |
| 1551 | +} |
| 1552 | + |
| 1553 | +static void xen_hvm_syscore_shutdown(void) |
| 1554 | +{ |
| 1555 | + struct xen_memory_reservation reservation = { |
| 1556 | + .domid = DOMID_SELF, |
| 1557 | + .nr_extents = 1, |
| 1558 | + }; |
| 1559 | + unsigned long prev_pfn; |
| 1560 | + int rc; |
| 1561 | + |
| 1562 | + if (!xen_hvm_shared_info_kexec) |
| 1563 | + return; |
| 1564 | + |
| 1565 | + prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; |
| 1566 | + set_xen_guest_handle(reservation.extent_start, &prev_pfn); |
| 1567 | + |
| 1568 | + /* Move pfn to MMIO, disconnects previous pfn from mfn */ |
| 1569 | + xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); |
| 1570 | + |
| 1571 | + /* Update pointers, following hypercall is also a memory barrier */ |
| 1572 | + xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); |
| 1573 | + |
| 1574 | + /* Allocate new mfn for previous pfn */ |
| 1575 | + do { |
| 1576 | + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); |
| 1577 | + if (rc == 0) |
| 1578 | + msleep(123); |
| 1579 | + } while (rc == 0); |
| 1580 | + |
| 1581 | + /* Make sure the previous pfn is really connected to a (new) mfn */ |
| 1582 | + BUG_ON(rc != 1); |
| 1583 | +} |
| 1584 | + |
| 1585 | +static struct syscore_ops xen_hvm_syscore_ops = { |
| 1586 | + .shutdown = xen_hvm_syscore_shutdown, |
| 1587 | +}; |
| 1588 | +#endif |
| 1589 | + |
| 1590 | +/* Use a pfn in RAM, may move to MMIO before kexec. */ |
| 1591 | +static void __init xen_hvm_init_shared_info(void) |
| 1592 | +{ |
| 1593 | + /* Remember pointer for resume */ |
| 1594 | + xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); |
| 1595 | + xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); |
| 1596 | + xen_hvm_set_shared_info(xen_hvm_shared_info); |
| 1597 | +} |
| 1598 | + |
1506 | 1599 | static void __init init_hvm_pv_info(void)
|
1507 | 1600 | {
|
1508 | 1601 | int major, minor;
|
@@ -1553,6 +1646,9 @@ static void __init xen_hvm_guest_init(void)
|
1553 | 1646 | init_hvm_pv_info();
|
1554 | 1647 |
|
1555 | 1648 | xen_hvm_init_shared_info();
|
| 1649 | +#ifdef CONFIG_KEXEC |
| 1650 | + register_syscore_ops(&xen_hvm_syscore_ops); |
| 1651 | +#endif |
1556 | 1652 |
|
1557 | 1653 | if (xen_feature(XENFEAT_hvm_callback_vector))
|
1558 | 1654 | xen_have_vector_callback = 1;
|
|
0 commit comments