Skip to content

Commit 9e1725b

Browse files
anadavIngo Molnar
authored andcommitted
x86/refcount: Work around GCC inlining bug
As described in: 77b0bf5: ("kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs") GCC's inlining heuristics are broken with common asm() patterns used in kernel code, resulting in the effective disabling of inlining. The workaround is to set an assembly macro and call it from the inline assembly block. As a result GCC considers the inline assembly block as a single instruction. (Which it isn't, but that's the best we can get.) This patch allows GCC to inline simple functions such as __get_seccomp_filter(). To no-one's surprise the result is that GCC performs more aggressive (read: correct) inlining decisions in these senarios, which reduces the kernel size and presumably also speeds it up: text data bss dec hex filename 18140970 10225412 2957312 31323694 1ddf62e ./vmlinux before 18140140 10225284 2957312 31322736 1ddf270 ./vmlinux after (-958) 16 fewer static text symbols: Before: 40302 After: 40286 (-16) these got inlined instead. Functions such as kref_get(), free_user(), fuse_file_get() now get inlined. Hurray! [ mingo: Rewrote the changelog. ] Tested-by: Kees Cook <keescook@chromium.org> Signed-off-by: Nadav Amit <namit@vmware.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jan Beulich <JBeulich@suse.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20181003213100.189959-5-namit@vmware.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent c06c4d8 commit 9e1725b

File tree

2 files changed

+46
-29
lines changed

2 files changed

+46
-29
lines changed

arch/x86/include/asm/refcount.h

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,41 @@
44
* x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
55
* PaX/grsecurity.
66
*/
7+
8+
#ifdef __ASSEMBLY__
9+
10+
#include <asm/asm.h>
11+
#include <asm/bug.h>
12+
13+
.macro REFCOUNT_EXCEPTION counter:req
14+
.pushsection .text..refcount
15+
111: lea \counter, %_ASM_CX
16+
112: ud2
17+
ASM_UNREACHABLE
18+
.popsection
19+
113: _ASM_EXTABLE_REFCOUNT(112b, 113b)
20+
.endm
21+
22+
/* Trigger refcount exception if refcount result is negative. */
23+
.macro REFCOUNT_CHECK_LT_ZERO counter:req
24+
js 111f
25+
REFCOUNT_EXCEPTION counter="\counter"
26+
.endm
27+
28+
/* Trigger refcount exception if refcount result is zero or negative. */
29+
.macro REFCOUNT_CHECK_LE_ZERO counter:req
30+
jz 111f
31+
REFCOUNT_CHECK_LT_ZERO counter="\counter"
32+
.endm
33+
34+
/* Trigger refcount exception unconditionally. */
35+
.macro REFCOUNT_ERROR counter:req
36+
jmp 111f
37+
REFCOUNT_EXCEPTION counter="\counter"
38+
.endm
39+
40+
#else /* __ASSEMBLY__ */
41+
742
#include <linux/refcount.h>
843
#include <asm/bug.h>
944

@@ -15,34 +50,11 @@
1550
* central refcount exception. The fixup address for the exception points
1651
* back to the regular execution flow in .text.
1752
*/
18-
#define _REFCOUNT_EXCEPTION \
19-
".pushsection .text..refcount\n" \
20-
"111:\tlea %[counter], %%" _ASM_CX "\n" \
21-
"112:\t" ASM_UD2 "\n" \
22-
ASM_UNREACHABLE \
23-
".popsection\n" \
24-
"113:\n" \
25-
_ASM_EXTABLE_REFCOUNT(112b, 113b)
26-
27-
/* Trigger refcount exception if refcount result is negative. */
28-
#define REFCOUNT_CHECK_LT_ZERO \
29-
"js 111f\n\t" \
30-
_REFCOUNT_EXCEPTION
31-
32-
/* Trigger refcount exception if refcount result is zero or negative. */
33-
#define REFCOUNT_CHECK_LE_ZERO \
34-
"jz 111f\n\t" \
35-
REFCOUNT_CHECK_LT_ZERO
36-
37-
/* Trigger refcount exception unconditionally. */
38-
#define REFCOUNT_ERROR \
39-
"jmp 111f\n\t" \
40-
_REFCOUNT_EXCEPTION
4153

4254
static __always_inline void refcount_add(unsigned int i, refcount_t *r)
4355
{
4456
asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
45-
REFCOUNT_CHECK_LT_ZERO
57+
"REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
4658
: [counter] "+m" (r->refs.counter)
4759
: "ir" (i)
4860
: "cc", "cx");
@@ -51,29 +63,31 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r)
5163
static __always_inline void refcount_inc(refcount_t *r)
5264
{
5365
asm volatile(LOCK_PREFIX "incl %0\n\t"
54-
REFCOUNT_CHECK_LT_ZERO
66+
"REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
5567
: [counter] "+m" (r->refs.counter)
5668
: : "cc", "cx");
5769
}
5870

5971
static __always_inline void refcount_dec(refcount_t *r)
6072
{
6173
asm volatile(LOCK_PREFIX "decl %0\n\t"
62-
REFCOUNT_CHECK_LE_ZERO
74+
"REFCOUNT_CHECK_LE_ZERO counter=\"%[counter]\""
6375
: [counter] "+m" (r->refs.counter)
6476
: : "cc", "cx");
6577
}
6678

6779
static __always_inline __must_check
6880
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
6981
{
70-
GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
82+
GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
83+
"REFCOUNT_CHECK_LT_ZERO counter=\"%0\"",
7184
r->refs.counter, "er", i, "%0", e, "cx");
7285
}
7386

7487
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
7588
{
76-
GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
89+
GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
90+
"REFCOUNT_CHECK_LT_ZERO counter=\"%0\"",
7791
r->refs.counter, "%0", e, "cx");
7892
}
7993

@@ -91,7 +105,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
91105

92106
/* Did we try to increment from/to an undesirable state? */
93107
if (unlikely(c < 0 || c == INT_MAX || result < c)) {
94-
asm volatile(REFCOUNT_ERROR
108+
asm volatile("REFCOUNT_ERROR counter=\"%[counter]\""
95109
: : [counter] "m" (r->refs.counter)
96110
: "cc", "cx");
97111
break;
@@ -107,4 +121,6 @@ static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
107121
return refcount_add_not_zero(1, r);
108122
}
109123

124+
#endif /* __ASSEMBLY__ */
125+
110126
#endif

arch/x86/kernel/macros.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
*/
88

99
#include <linux/compiler.h>
10+
#include <asm/refcount.h>

0 commit comments

Comments
 (0)