Skip to content

Commit d2f7cbe

Browse files
suryasaimadhuMatt Fleming
authored andcommitted
x86/efi: Runtime services virtual mapping
We map the EFI regions needed for runtime services non-contiguously, with preserved alignment on virtual addresses starting from -4G down for a total max space of 64G. This way, we provide for stable runtime services addresses across kernels so that a kexec'd kernel can still use them. Thus, they're mapped in a separate pagetable so that we don't pollute the kernel namespace. Add an efi= kernel command line parameter for passing miscellaneous options and chicken bits from the command line. While at it, add a chicken bit called "efi=old_map" which can be used as a fallback to the old runtime services mapping method in case there's some b0rkage with a particular EFI implementation (haha, it is hard to hold up the sarcasm here...). Also, add the UEFI RT VA space to Documentation/x86/x86_64/mm.txt. Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Matt Fleming <matt.fleming@intel.com>
1 parent 82f0712 commit d2f7cbe

File tree

9 files changed

+300
-47
lines changed

9 files changed

+300
-47
lines changed

Documentation/kernel-parameters.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
835835
edd= [EDD]
836836
Format: {"off" | "on" | "skip[mbr]"}
837837

838+
efi= [EFI]
839+
Format: { "old_map" }
840+
old_map [X86-64]: switch to the old ioremap-based EFI
841+
runtime services mapping. 32-bit still uses this one by
842+
default.
843+
838844
efi_no_storage_paranoia [EFI; X86]
839845
Using this parameter you can use more than 50% of
840846
your efi variable storage. Use this parameter only if

Documentation/x86/x86_64/mm.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,11 @@ reference.
2828
Current X86-64 implementations only support 40 bits of address space,
2929
but we support up to 46 bits. This expands into MBZ space in the page tables.
3030

31+
->trampoline_pgd:
32+
33+
We map EFI runtime services in the aforementioned PGD in the virtual
34+
range of 64Gb (arbitrarily set, can be raised if needed)
35+
36+
0xffffffef00000000 - 0xffffffff00000000
37+
3138
-Andi Kleen, Jul 2004

arch/x86/include/asm/efi.h

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,24 @@
11
#ifndef _ASM_X86_EFI_H
22
#define _ASM_X86_EFI_H
33

4+
/*
5+
* We map the EFI regions needed for runtime services non-contiguously,
6+
* with preserved alignment on virtual addresses starting from -4G down
7+
* for a total max space of 64G. This way, we provide for stable runtime
8+
* services addresses across kernels so that a kexec'd kernel can still
9+
* use them.
10+
*
11+
* This is the main reason why we're doing stable VA mappings for RT
12+
* services.
13+
*
14+
* This flag is used in conjuction with a chicken bit called
15+
* "efi=old_map" which can be used as a fallback to the old runtime
16+
* services mapping method in case there's some b0rkage with a
17+
* particular EFI implementation (haha, it is hard to hold up the
18+
* sarcasm here...).
19+
*/
20+
#define EFI_OLD_MEMMAP EFI_ARCH_1
21+
422
#ifdef CONFIG_X86_32
523

624
#define EFI_LOADER_SIGNATURE "EL32"
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
6987
efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
7088
(u64)(a4), (u64)(a5), (u64)(a6))
7189

90+
#define _efi_call_virtX(x, f, ...) \
91+
({ \
92+
efi_status_t __s; \
93+
\
94+
efi_sync_low_kernel_mappings(); \
95+
preempt_disable(); \
96+
__s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \
97+
preempt_enable(); \
98+
__s; \
99+
})
100+
72101
#define efi_call_virt0(f) \
73-
efi_call0((efi.systab->runtime->f))
74-
#define efi_call_virt1(f, a1) \
75-
efi_call1((efi.systab->runtime->f), (u64)(a1))
76-
#define efi_call_virt2(f, a1, a2) \
77-
efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
78-
#define efi_call_virt3(f, a1, a2, a3) \
79-
efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
80-
(u64)(a3))
81-
#define efi_call_virt4(f, a1, a2, a3, a4) \
82-
efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
83-
(u64)(a3), (u64)(a4))
84-
#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
85-
efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
86-
(u64)(a3), (u64)(a4), (u64)(a5))
87-
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
88-
efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
89-
(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
102+
_efi_call_virtX(0, f)
103+
#define efi_call_virt1(f, a1) \
104+
_efi_call_virtX(1, f, (u64)(a1))
105+
#define efi_call_virt2(f, a1, a2) \
106+
_efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
107+
#define efi_call_virt3(f, a1, a2, a3) \
108+
_efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
109+
#define efi_call_virt4(f, a1, a2, a3, a4) \
110+
_efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
111+
#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
112+
_efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
113+
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
114+
_efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
90115

91116
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
92117
u32 type, u64 attribute);
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
95120

96121
extern int add_efi_memmap;
97122
extern unsigned long x86_efi_facility;
123+
extern struct efi_scratch efi_scratch;
98124
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
99125
extern int efi_memblock_x86_reserve_range(void);
100126
extern void efi_call_phys_prelog(void);
101127
extern void efi_call_phys_epilog(void);
102128
extern void efi_unmap_memmap(void);
103129
extern void efi_memory_uc(u64 addr, unsigned long size);
130+
extern void __init efi_map_region(efi_memory_desc_t *md);
131+
extern void efi_sync_low_kernel_mappings(void);
132+
extern void efi_setup_page_tables(void);
133+
extern void __init old_map_region(efi_memory_desc_t *md);
104134

105135
#ifdef CONFIG_EFI
106136

arch/x86/include/asm/pgtable_types.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
379379
*/
380380
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
381381
extern phys_addr_t slow_virt_to_phys(void *__address);
382-
382+
extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
383+
unsigned numpages, unsigned long page_flags);
383384
#endif /* !__ASSEMBLY__ */
384385

385386
#endif /* _ASM_X86_PGTABLE_DEFS_H */

arch/x86/platform/efi/efi.c

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
* Bibo Mao <bibo.mao@intel.com>
1313
* Chandramouli Narayanan <mouli@linux.intel.com>
1414
* Huang Ying <ying.huang@intel.com>
15+
* Copyright (C) 2013 SuSE Labs
16+
* Borislav Petkov <bp@suse.de> - runtime services VA mapping
1517
*
1618
* Copied from efi_32.c to eliminate the duplicated code between EFI
1719
* 32/64 support code. --ying 2007-10-26
@@ -745,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size)
745747
set_memory_uc(addr, npages);
746748
}
747749

750+
void __init old_map_region(efi_memory_desc_t *md)
751+
{
752+
u64 start_pfn, end_pfn, end;
753+
unsigned long size;
754+
void *va;
755+
756+
start_pfn = PFN_DOWN(md->phys_addr);
757+
size = md->num_pages << PAGE_SHIFT;
758+
end = md->phys_addr + size;
759+
end_pfn = PFN_UP(end);
760+
761+
if (pfn_range_is_mapped(start_pfn, end_pfn)) {
762+
va = __va(md->phys_addr);
763+
764+
if (!(md->attribute & EFI_MEMORY_WB))
765+
efi_memory_uc((u64)(unsigned long)va, size);
766+
} else
767+
va = efi_ioremap(md->phys_addr, size,
768+
md->type, md->attribute);
769+
770+
md->virt_addr = (u64) (unsigned long) va;
771+
if (!va)
772+
pr_err("ioremap of 0x%llX failed!\n",
773+
(unsigned long long)md->phys_addr);
774+
}
775+
748776
/*
749777
* This function will switch the EFI runtime services to virtual mode.
750-
* Essentially, look through the EFI memmap and map every region that
751-
* has the runtime attribute bit set in its memory descriptor and update
752-
* that memory descriptor with the virtual address obtained from ioremap().
753-
* This enables the runtime services to be called without having to
778+
* Essentially, we look through the EFI memmap and map every region that
779+
* has the runtime attribute bit set in its memory descriptor into the
780+
* ->trampoline_pgd page table using a top-down VA allocation scheme.
781+
*
782+
* The old method which used to update that memory descriptor with the
783+
* virtual address obtained from ioremap() is still supported when the
784+
* kernel is booted with efi=old_map on its command line. Same old
785+
* method enabled the runtime services to be called without having to
754786
* thunk back into physical mode for every invocation.
787+
*
788+
* The new method does a pagetable switch in a preemption-safe manner
789+
* so that we're in a different address space when calling a runtime
790+
* function. For function arguments passing we do copy the PGDs of the
791+
* kernel page table into ->trampoline_pgd prior to each call.
755792
*/
756793
void __init efi_enter_virtual_mode(void)
757794
{
758795
efi_memory_desc_t *md, *prev_md = NULL;
759-
efi_status_t status;
796+
void *p, *new_memmap = NULL;
760797
unsigned long size;
761-
u64 end, systab, start_pfn, end_pfn;
762-
void *p, *va, *new_memmap = NULL;
798+
efi_status_t status;
799+
u64 end, systab;
763800
int count = 0;
764801

765802
efi.systab = NULL;
@@ -768,7 +805,6 @@ void __init efi_enter_virtual_mode(void)
768805
* We don't do virtual mode, since we don't do runtime services, on
769806
* non-native EFI
770807
*/
771-
772808
if (!efi_is_native()) {
773809
efi_unmap_memmap();
774810
return;
@@ -799,6 +835,7 @@ void __init efi_enter_virtual_mode(void)
799835
continue;
800836
}
801837
prev_md = md;
838+
802839
}
803840

804841
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -808,33 +845,18 @@ void __init efi_enter_virtual_mode(void)
808845
md->type != EFI_BOOT_SERVICES_DATA)
809846
continue;
810847

848+
efi_map_region(md);
849+
811850
size = md->num_pages << EFI_PAGE_SHIFT;
812851
end = md->phys_addr + size;
813852

814-
start_pfn = PFN_DOWN(md->phys_addr);
815-
end_pfn = PFN_UP(end);
816-
if (pfn_range_is_mapped(start_pfn, end_pfn)) {
817-
va = __va(md->phys_addr);
818-
819-
if (!(md->attribute & EFI_MEMORY_WB))
820-
efi_memory_uc((u64)(unsigned long)va, size);
821-
} else
822-
va = efi_ioremap(md->phys_addr, size,
823-
md->type, md->attribute);
824-
825-
md->virt_addr = (u64) (unsigned long) va;
826-
827-
if (!va) {
828-
pr_err("ioremap of 0x%llX failed!\n",
829-
(unsigned long long)md->phys_addr);
830-
continue;
831-
}
832-
833853
systab = (u64) (unsigned long) efi_phys.systab;
834854
if (md->phys_addr <= systab && systab < end) {
835855
systab += md->virt_addr - md->phys_addr;
856+
836857
efi.systab = (efi_system_table_t *) (unsigned long) systab;
837858
}
859+
838860
new_memmap = krealloc(new_memmap,
839861
(count + 1) * memmap.desc_size,
840862
GFP_KERNEL);
@@ -845,6 +867,9 @@ void __init efi_enter_virtual_mode(void)
845867

846868
BUG_ON(!efi.systab);
847869

870+
efi_setup_page_tables();
871+
efi_sync_low_kernel_mappings();
872+
848873
status = phys_efi_set_virtual_address_map(
849874
memmap.desc_size * count,
850875
memmap.desc_size,
@@ -877,7 +902,8 @@ void __init efi_enter_virtual_mode(void)
877902
efi.query_variable_info = virt_efi_query_variable_info;
878903
efi.update_capsule = virt_efi_update_capsule;
879904
efi.query_capsule_caps = virt_efi_query_capsule_caps;
880-
if (__supported_pte_mask & _PAGE_NX)
905+
906+
if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
881907
runtime_code_page_mkexec();
882908

883909
kfree(new_memmap);
@@ -1007,3 +1033,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
10071033
return EFI_SUCCESS;
10081034
}
10091035
EXPORT_SYMBOL_GPL(efi_query_variable_store);
1036+
1037+
static int __init parse_efi_cmdline(char *str)
1038+
{
1039+
if (*str == '=')
1040+
str++;
1041+
1042+
if (!strncmp(str, "old_map", 7))
1043+
set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
1044+
1045+
return 0;
1046+
}
1047+
early_param("efi", parse_efi_cmdline);

arch/x86/platform/efi/efi_32.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,16 @@
3737
* claim EFI runtime service handler exclusively and to duplicate a memory in
3838
* low memory space say 0 - 3G.
3939
*/
40-
4140
static unsigned long efi_rt_eflags;
4241

42+
void efi_sync_low_kernel_mappings(void) {}
43+
void efi_setup_page_tables(void) {}
44+
45+
void __init efi_map_region(efi_memory_desc_t *md)
46+
{
47+
old_map_region(md);
48+
}
49+
4350
void efi_call_phys_prelog(void)
4451
{
4552
struct desc_ptr gdt_descr;

0 commit comments

Comments
 (0)