Skip to content

Commit 139ec7c

Browse files
rustyrussellAndi Kleen
authored andcommitted
[PATCH] paravirt: Patch inline replacements for paravirt intercepts
It turns out that the most called ops, by several orders of magnitude, are the interrupt manipulation ops. These are obvious candidates for patching, so mark them up and create infrastructure for it. The method used is that the ops structure has a patch function, which is called for each place which needs to be patched: this returns a number of instructions (the rest are NOP-padded). Usually we can spare a register (%eax) for the binary patched code to use, but in a couple of critical places in entry.S we can't: we make the clobbers explicit at the call site, and manually clobber the allowed registers in debug mode as an extra check. And: Don't abuse CONFIG_DEBUG_KERNEL, add CONFIG_DEBUG_PARAVIRT. And: AK: Fix warnings in x86-64 alternative.c build And: AK: Fix compilation with defconfig And: ^From: Andrew Morton <akpm@osdl.org> Some binutlises still like to emit references to __stop_parainstructions and __start_parainstructions. And: AK: Fix warnings about unused variables when PARAVIRT is disabled. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org>
1 parent d3561b7 commit 139ec7c

File tree

14 files changed

+459
-188
lines changed

14 files changed

+459
-188
lines changed

arch/i386/Kconfig.debug

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,14 @@ config DOUBLEFAULT
8585
option saves about 4k and might cause you much additional grey
8686
hair.
8787

88+
config DEBUG_PARAVIRT
89+
bool "Enable some paravirtualization debugging"
90+
default y
91+
depends on PARAVIRT && DEBUG_KERNEL
92+
help
93+
Currently deliberately clobbers regs which are allowed to be
94+
clobbered in inlined paravirt hooks, even in native mode.
95+
If turning this off solves a problem, then DISABLE_INTERRUPTS() or
96+
ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
97+
8898
endmenu

arch/i386/kernel/alternative.c

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,20 @@ static unsigned char** find_nop_table(void)
124124

125125
#endif /* CONFIG_X86_64 */
126126

127+
static void nop_out(void *insns, unsigned int len)
128+
{
129+
unsigned char **noptable = find_nop_table();
130+
131+
while (len > 0) {
132+
unsigned int noplen = len;
133+
if (noplen > ASM_NOP_MAX)
134+
noplen = ASM_NOP_MAX;
135+
memcpy(insns, noptable[noplen], noplen);
136+
insns += noplen;
137+
len -= noplen;
138+
}
139+
}
140+
127141
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
128142
extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
129143
extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -138,10 +152,9 @@ extern u8 __smp_alt_begin[], __smp_alt_end[];
138152

139153
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
140154
{
141-
unsigned char **noptable = find_nop_table();
142155
struct alt_instr *a;
143156
u8 *instr;
144-
int diff, i, k;
157+
int diff;
145158

146159
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
147160
for (a = start; a < end; a++) {
@@ -159,13 +172,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
159172
#endif
160173
memcpy(instr, a->replacement, a->replacementlen);
161174
diff = a->instrlen - a->replacementlen;
162-
/* Pad the rest with nops */
163-
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
164-
k = diff;
165-
if (k > ASM_NOP_MAX)
166-
k = ASM_NOP_MAX;
167-
memcpy(a->instr + i, noptable[k], k);
168-
}
175+
nop_out(instr + a->replacementlen, diff);
169176
}
170177
}
171178

@@ -209,15 +216,14 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
209216

210217
static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
211218
{
212-
unsigned char **noptable = find_nop_table();
213219
u8 **ptr;
214220

215221
for (ptr = start; ptr < end; ptr++) {
216222
if (*ptr < text)
217223
continue;
218224
if (*ptr > text_end)
219225
continue;
220-
**ptr = noptable[1][0];
226+
nop_out(*ptr, 1);
221227
};
222228
}
223229

@@ -343,6 +349,40 @@ void alternatives_smp_switch(int smp)
343349

344350
#endif
345351

352+
#ifdef CONFIG_PARAVIRT
353+
void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
354+
{
355+
struct paravirt_patch *p;
356+
357+
for (p = start; p < end; p++) {
358+
unsigned int used;
359+
360+
used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
361+
p->len);
362+
#ifdef CONFIG_DEBUG_PARAVIRT
363+
{
364+
int i;
365+
/* Deliberately clobber regs using "not %reg" to find bugs. */
366+
for (i = 0; i < 3; i++) {
367+
if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
368+
memcpy(p->instr + used, "\xf7\xd0", 2);
369+
p->instr[used+1] |= i;
370+
used += 2;
371+
}
372+
}
373+
}
374+
#endif
375+
/* Pad the rest with nops */
376+
nop_out(p->instr + used, p->len - used);
377+
}
378+
379+
/* Sync to be conservative, in case we patched following instructions */
380+
sync_core();
381+
}
382+
extern struct paravirt_patch __start_parainstructions[],
383+
__stop_parainstructions[];
384+
#endif /* CONFIG_PARAVIRT */
385+
346386
void __init alternative_instructions(void)
347387
{
348388
unsigned long flags;
@@ -390,5 +430,6 @@ void __init alternative_instructions(void)
390430
alternatives_smp_switch(0);
391431
}
392432
#endif
433+
apply_paravirt(__start_parainstructions, __stop_parainstructions);
393434
local_irq_restore(flags);
394435
}

arch/i386/kernel/entry.S

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,19 @@
5353
#include <asm/dwarf2.h>
5454
#include "irq_vectors.h"
5555

56+
/*
57+
* We use macros for low-level operations which need to be overridden
58+
* for paravirtualization. The following will never clobber any registers:
59+
* INTERRUPT_RETURN (aka. "iret")
60+
* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
61+
* ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
62+
*
63+
* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
64+
* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
65+
* Allowing a register to be clobbered can shrink the paravirt replacement
66+
* enough to patch inline, increasing performance.
67+
*/
68+
5669
#define nr_syscalls ((syscall_table_size)/4)
5770

5871
CF_MASK = 0x00000001
@@ -63,9 +76,9 @@ NT_MASK = 0x00004000
6376
VM_MASK = 0x00020000
6477

6578
#ifdef CONFIG_PREEMPT
66-
#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
79+
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
6780
#else
68-
#define preempt_stop
81+
#define preempt_stop(clobbers)
6982
#define resume_kernel restore_nocheck
7083
#endif
7184

@@ -226,7 +239,7 @@ ENTRY(ret_from_fork)
226239
ALIGN
227240
RING0_PTREGS_FRAME
228241
ret_from_exception:
229-
preempt_stop
242+
preempt_stop(CLBR_ANY)
230243
ret_from_intr:
231244
GET_THREAD_INFO(%ebp)
232245
check_userspace:
@@ -237,7 +250,7 @@ check_userspace:
237250
jb resume_kernel # not returning to v8086 or userspace
238251

239252
ENTRY(resume_userspace)
240-
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
253+
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
241254
# setting need_resched or sigpending
242255
# between sampling and the iret
243256
movl TI_flags(%ebp), %ecx
@@ -248,7 +261,7 @@ ENTRY(resume_userspace)
248261

249262
#ifdef CONFIG_PREEMPT
250263
ENTRY(resume_kernel)
251-
DISABLE_INTERRUPTS
264+
DISABLE_INTERRUPTS(CLBR_ANY)
252265
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
253266
jnz restore_nocheck
254267
need_resched:
@@ -277,7 +290,7 @@ sysenter_past_esp:
277290
* No need to follow this irqs on/off section: the syscall
278291
* disabled irqs and here we enable it straight after entry:
279292
*/
280-
ENABLE_INTERRUPTS
293+
ENABLE_INTERRUPTS(CLBR_NONE)
281294
pushl $(__USER_DS)
282295
CFI_ADJUST_CFA_OFFSET 4
283296
/*CFI_REL_OFFSET ss, 0*/
@@ -322,7 +335,7 @@ sysenter_past_esp:
322335
jae syscall_badsys
323336
call *sys_call_table(,%eax,4)
324337
movl %eax,PT_EAX(%esp)
325-
DISABLE_INTERRUPTS
338+
DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
326339
TRACE_IRQS_OFF
327340
movl TI_flags(%ebp), %ecx
328341
testw $_TIF_ALLWORK_MASK, %cx
@@ -364,7 +377,7 @@ syscall_call:
364377
call *sys_call_table(,%eax,4)
365378
movl %eax,PT_EAX(%esp) # store the return value
366379
syscall_exit:
367-
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
380+
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
368381
# setting need_resched or sigpending
369382
# between sampling and the iret
370383
TRACE_IRQS_OFF
@@ -393,7 +406,7 @@ restore_nocheck_notrace:
393406
.section .fixup,"ax"
394407
iret_exc:
395408
TRACE_IRQS_ON
396-
ENABLE_INTERRUPTS
409+
ENABLE_INTERRUPTS(CLBR_NONE)
397410
pushl $0 # no error code
398411
pushl $do_iret_error
399412
jmp error_code
@@ -436,7 +449,7 @@ ldt_ss:
436449
CFI_ADJUST_CFA_OFFSET 4
437450
pushl %eax
438451
CFI_ADJUST_CFA_OFFSET 4
439-
DISABLE_INTERRUPTS
452+
DISABLE_INTERRUPTS(CLBR_EAX)
440453
TRACE_IRQS_OFF
441454
lss (%esp), %esp
442455
CFI_ADJUST_CFA_OFFSET -8
@@ -451,7 +464,7 @@ work_pending:
451464
jz work_notifysig
452465
work_resched:
453466
call schedule
454-
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
467+
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
455468
# setting need_resched or sigpending
456469
# between sampling and the iret
457470
TRACE_IRQS_OFF
@@ -509,7 +522,7 @@ syscall_exit_work:
509522
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
510523
jz work_pending
511524
TRACE_IRQS_ON
512-
ENABLE_INTERRUPTS # could let do_syscall_trace() call
525+
ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
513526
# schedule() instead
514527
movl %esp, %eax
515528
movl $1, %edx
@@ -693,7 +706,7 @@ ENTRY(device_not_available)
693706
GET_CR0_INTO_EAX
694707
testl $0x4, %eax # EM (math emulation bit)
695708
jne device_not_available_emulate
696-
preempt_stop
709+
preempt_stop(CLBR_ANY)
697710
call math_state_restore
698711
jmp ret_from_exception
699712
device_not_available_emulate:

arch/i386/kernel/module.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ int module_finalize(const Elf_Ehdr *hdr,
108108
const Elf_Shdr *sechdrs,
109109
struct module *me)
110110
{
111-
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
111+
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
112+
*para = NULL;
112113
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
113114

114115
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -118,6 +119,8 @@ int module_finalize(const Elf_Ehdr *hdr,
118119
alt = s;
119120
if (!strcmp(".smp_locks", secstrings + s->sh_name))
120121
locks= s;
122+
if (!strcmp(".parainstructions", secstrings + s->sh_name))
123+
para = s;
121124
}
122125

123126
if (alt) {
@@ -132,6 +135,12 @@ int module_finalize(const Elf_Ehdr *hdr,
132135
lseg, lseg + locks->sh_size,
133136
tseg, tseg + text->sh_size);
134137
}
138+
139+
if (para) {
140+
void *pseg = (void *)para->sh_addr;
141+
apply_paravirt(pseg, pseg + para->sh_size);
142+
}
143+
135144
return 0;
136145
}
137146

arch/i386/kernel/paravirt.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,49 @@ char *memory_setup(void)
4545
return paravirt_ops.memory_setup();
4646
}
4747

48+
/* Simple instruction patching code. */
49+
#define DEF_NATIVE(name, code) \
50+
extern const char start_##name[], end_##name[]; \
51+
asm("start_" #name ": " code "; end_" #name ":")
52+
DEF_NATIVE(cli, "cli");
53+
DEF_NATIVE(sti, "sti");
54+
DEF_NATIVE(popf, "push %eax; popf");
55+
DEF_NATIVE(pushf, "pushf; pop %eax");
56+
DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
57+
DEF_NATIVE(iret, "iret");
58+
DEF_NATIVE(sti_sysexit, "sti; sysexit");
59+
60+
static const struct native_insns
61+
{
62+
const char *start, *end;
63+
} native_insns[] = {
64+
[PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
65+
[PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
66+
[PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
67+
[PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
68+
[PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
69+
[PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
70+
[PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
71+
};
72+
73+
static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
74+
{
75+
unsigned int insn_len;
76+
77+
/* Don't touch it if we don't have a replacement */
78+
if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
79+
return len;
80+
81+
insn_len = native_insns[type].end - native_insns[type].start;
82+
83+
/* Similarly if we can't fit replacement. */
84+
if (len < insn_len)
85+
return len;
86+
87+
memcpy(insns, native_insns[type].start, insn_len);
88+
return insn_len;
89+
}
90+
4891
static fastcall unsigned long native_get_debugreg(int regno)
4992
{
5093
unsigned long val = 0; /* Damn you, gcc! */
@@ -349,6 +392,7 @@ struct paravirt_ops paravirt_ops = {
349392
.paravirt_enabled = 0,
350393
.kernel_rpl = 0,
351394

395+
.patch = native_patch,
352396
.banner = default_banner,
353397
.arch_setup = native_nop,
354398
.memory_setup = machine_specific_memory_setup,

arch/i386/kernel/vmlinux.lds.S

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,12 @@ SECTIONS
165165
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
166166
*(.altinstr_replacement)
167167
}
168+
. = ALIGN(4);
169+
__start_parainstructions = .;
170+
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
171+
*(.parainstructions)
172+
}
173+
__stop_parainstructions = .;
168174
/* .exit.text is discard at runtime, not link time, to deal with references
169175
from .altinstructions and .eh_frame */
170176
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }

include/asm-i386/alternative.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#ifdef __KERNEL__
55

66
#include <asm/types.h>
7-
7+
#include <linux/stddef.h>
88
#include <linux/types.h>
99

1010
struct alt_instr {
@@ -118,4 +118,15 @@ static inline void alternatives_smp_switch(int smp) {}
118118
#define LOCK_PREFIX ""
119119
#endif
120120

121+
struct paravirt_patch;
122+
#ifdef CONFIG_PARAVIRT
123+
void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
124+
#else
125+
static inline void
126+
apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
127+
{}
128+
#define __start_parainstructions NULL
129+
#define __stop_parainstructions NULL
130+
#endif
131+
121132
#endif /* _I386_ALTERNATIVE_H */

0 commit comments

Comments
 (0)