12
12
#include <linux/filter.h>
13
13
#include <linux/if_vlan.h>
14
14
#include <asm/cacheflush.h>
15
+ #include <linux/bpf.h>
15
16
16
17
int bpf_jit_enable __read_mostly ;
17
18
@@ -37,7 +38,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
37
38
return ptr + len ;
38
39
}
39
40
40
- #define EMIT (bytes , len ) do { prog = emit_code(prog, bytes, len); } while (0)
41
+ #define EMIT (bytes , len ) \
42
+ do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
41
43
42
44
#define EMIT1 (b1 ) EMIT(b1, 1)
43
45
#define EMIT2 (b1 , b2 ) EMIT((b1) + ((b2) << 8), 2)
@@ -186,31 +188,31 @@ struct jit_context {
186
188
#define BPF_MAX_INSN_SIZE 128
187
189
#define BPF_INSN_SAFETY 64
188
190
189
- static int do_jit (struct bpf_prog * bpf_prog , int * addrs , u8 * image ,
190
- int oldproglen , struct jit_context * ctx )
191
+ #define STACKSIZE \
192
+ (MAX_BPF_STACK + \
193
+ 32 /* space for rbx, r13, r14, r15 */ + \
194
+ 8 /* space for skb_copy_bits() buffer */ )
195
+
196
+ #define PROLOGUE_SIZE 51
197
+
198
+ /* emit x64 prologue code for BPF program and check it's size.
199
+ * bpf_tail_call helper will skip it while jumping into another program
200
+ */
201
+ static void emit_prologue (u8 * * pprog )
191
202
{
192
- struct bpf_insn * insn = bpf_prog -> insnsi ;
193
- int insn_cnt = bpf_prog -> len ;
194
- bool seen_ld_abs = ctx -> seen_ld_abs | (oldproglen == 0 );
195
- bool seen_exit = false;
196
- u8 temp [BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY ];
197
- int i ;
198
- int proglen = 0 ;
199
- u8 * prog = temp ;
200
- int stacksize = MAX_BPF_STACK +
201
- 32 /* space for rbx, r13, r14, r15 */ +
202
- 8 /* space for skb_copy_bits() buffer */ ;
203
+ u8 * prog = * pprog ;
204
+ int cnt = 0 ;
203
205
204
206
EMIT1 (0x55 ); /* push rbp */
205
207
EMIT3 (0x48 , 0x89 , 0xE5 ); /* mov rbp,rsp */
206
208
207
- /* sub rsp, stacksize */
208
- EMIT3_off32 (0x48 , 0x81 , 0xEC , stacksize );
209
+ /* sub rsp, STACKSIZE */
210
+ EMIT3_off32 (0x48 , 0x81 , 0xEC , STACKSIZE );
209
211
210
212
/* all classic BPF filters use R6(rbx) save it */
211
213
212
214
/* mov qword ptr [rbp-X],rbx */
213
- EMIT3_off32 (0x48 , 0x89 , 0x9D , - stacksize );
215
+ EMIT3_off32 (0x48 , 0x89 , 0x9D , - STACKSIZE );
214
216
215
217
/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
216
218
* as temporary, so all tcpdump filters need to spill/fill R7(r13) and
@@ -221,16 +223,112 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
221
223
*/
222
224
223
225
/* mov qword ptr [rbp-X],r13 */
224
- EMIT3_off32 (0x4C , 0x89 , 0xAD , - stacksize + 8 );
226
+ EMIT3_off32 (0x4C , 0x89 , 0xAD , - STACKSIZE + 8 );
225
227
/* mov qword ptr [rbp-X],r14 */
226
- EMIT3_off32 (0x4C , 0x89 , 0xB5 , - stacksize + 16 );
228
+ EMIT3_off32 (0x4C , 0x89 , 0xB5 , - STACKSIZE + 16 );
227
229
/* mov qword ptr [rbp-X],r15 */
228
- EMIT3_off32 (0x4C , 0x89 , 0xBD , - stacksize + 24 );
230
+ EMIT3_off32 (0x4C , 0x89 , 0xBD , - STACKSIZE + 24 );
229
231
230
232
/* clear A and X registers */
231
233
EMIT2 (0x31 , 0xc0 ); /* xor eax, eax */
232
234
EMIT3 (0x4D , 0x31 , 0xED ); /* xor r13, r13 */
233
235
236
+ /* clear tail_cnt: mov qword ptr [rbp-X], rax */
237
+ EMIT3_off32 (0x48 , 0x89 , 0x85 , - STACKSIZE + 32 );
238
+
239
+ BUILD_BUG_ON (cnt != PROLOGUE_SIZE );
240
+ * pprog = prog ;
241
+ }
242
+
243
+ /* generate the following code:
244
+ * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
245
+ * if (index >= array->map.max_entries)
246
+ * goto out;
247
+ * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
248
+ * goto out;
249
+ * prog = array->prog[index];
250
+ * if (prog == NULL)
251
+ * goto out;
252
+ * goto *(prog->bpf_func + prologue_size);
253
+ * out:
254
+ */
255
+ static void emit_bpf_tail_call (u8 * * pprog )
256
+ {
257
+ u8 * prog = * pprog ;
258
+ int label1 , label2 , label3 ;
259
+ int cnt = 0 ;
260
+
261
+ /* rdi - pointer to ctx
262
+ * rsi - pointer to bpf_array
263
+ * rdx - index in bpf_array
264
+ */
265
+
266
+ /* if (index >= array->map.max_entries)
267
+ * goto out;
268
+ */
269
+ EMIT4 (0x48 , 0x8B , 0x46 , /* mov rax, qword ptr [rsi + 16] */
270
+ offsetof(struct bpf_array , map .max_entries ));
271
+ EMIT3 (0x48 , 0x39 , 0xD0 ); /* cmp rax, rdx */
272
+ #define OFFSET1 44 /* number of bytes to jump */
273
+ EMIT2 (X86_JBE , OFFSET1 ); /* jbe out */
274
+ label1 = cnt ;
275
+
276
+ /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
277
+ * goto out;
278
+ */
279
+ EMIT2_off32 (0x8B , 0x85 , - STACKSIZE + 36 ); /* mov eax, dword ptr [rbp - 516] */
280
+ EMIT3 (0x83 , 0xF8 , MAX_TAIL_CALL_CNT ); /* cmp eax, MAX_TAIL_CALL_CNT */
281
+ #define OFFSET2 33
282
+ EMIT2 (X86_JA , OFFSET2 ); /* ja out */
283
+ label2 = cnt ;
284
+ EMIT3 (0x83 , 0xC0 , 0x01 ); /* add eax, 1 */
285
+ EMIT2_off32 (0x89 , 0x85 , - STACKSIZE + 36 ); /* mov dword ptr [rbp - 516], eax */
286
+
287
+ /* prog = array->prog[index]; */
288
+ EMIT4 (0x48 , 0x8D , 0x44 , 0xD6 ); /* lea rax, [rsi + rdx * 8 + 0x50] */
289
+ EMIT1 (offsetof(struct bpf_array , prog ));
290
+ EMIT3 (0x48 , 0x8B , 0x00 ); /* mov rax, qword ptr [rax] */
291
+
292
+ /* if (prog == NULL)
293
+ * goto out;
294
+ */
295
+ EMIT4 (0x48 , 0x83 , 0xF8 , 0x00 ); /* cmp rax, 0 */
296
+ #define OFFSET3 10
297
+ EMIT2 (X86_JE , OFFSET3 ); /* je out */
298
+ label3 = cnt ;
299
+
300
+ /* goto *(prog->bpf_func + prologue_size); */
301
+ EMIT4 (0x48 , 0x8B , 0x40 , /* mov rax, qword ptr [rax + 32] */
302
+ offsetof(struct bpf_prog , bpf_func ));
303
+ EMIT4 (0x48 , 0x83 , 0xC0 , PROLOGUE_SIZE ); /* add rax, prologue_size */
304
+
305
+ /* now we're ready to jump into next BPF program
306
+ * rdi == ctx (1st arg)
307
+ * rax == prog->bpf_func + prologue_size
308
+ */
309
+ EMIT2 (0xFF , 0xE0 ); /* jmp rax */
310
+
311
+ /* out: */
312
+ BUILD_BUG_ON (cnt - label1 != OFFSET1 );
313
+ BUILD_BUG_ON (cnt - label2 != OFFSET2 );
314
+ BUILD_BUG_ON (cnt - label3 != OFFSET3 );
315
+ * pprog = prog ;
316
+ }
317
+
318
+ static int do_jit (struct bpf_prog * bpf_prog , int * addrs , u8 * image ,
319
+ int oldproglen , struct jit_context * ctx )
320
+ {
321
+ struct bpf_insn * insn = bpf_prog -> insnsi ;
322
+ int insn_cnt = bpf_prog -> len ;
323
+ bool seen_ld_abs = ctx -> seen_ld_abs | (oldproglen == 0 );
324
+ bool seen_exit = false;
325
+ u8 temp [BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY ];
326
+ int i , cnt = 0 ;
327
+ int proglen = 0 ;
328
+ u8 * prog = temp ;
329
+
330
+ emit_prologue (& prog );
331
+
234
332
if (seen_ld_abs ) {
235
333
/* r9d : skb->len - skb->data_len (headlen)
236
334
* r10 : skb->data
@@ -739,6 +837,10 @@ xadd: if (is_imm8(insn->off))
739
837
}
740
838
break ;
741
839
840
+ case BPF_JMP | BPF_CALL | BPF_X :
841
+ emit_bpf_tail_call (& prog );
842
+ break ;
843
+
742
844
/* cond jump */
743
845
case BPF_JMP | BPF_JEQ | BPF_X :
744
846
case BPF_JMP | BPF_JNE | BPF_X :
@@ -891,13 +993,13 @@ xadd: if (is_imm8(insn->off))
891
993
/* update cleanup_addr */
892
994
ctx -> cleanup_addr = proglen ;
893
995
/* mov rbx, qword ptr [rbp-X] */
894
- EMIT3_off32 (0x48 , 0x8B , 0x9D , - stacksize );
996
+ EMIT3_off32 (0x48 , 0x8B , 0x9D , - STACKSIZE );
895
997
/* mov r13, qword ptr [rbp-X] */
896
- EMIT3_off32 (0x4C , 0x8B , 0xAD , - stacksize + 8 );
998
+ EMIT3_off32 (0x4C , 0x8B , 0xAD , - STACKSIZE + 8 );
897
999
/* mov r14, qword ptr [rbp-X] */
898
- EMIT3_off32 (0x4C , 0x8B , 0xB5 , - stacksize + 16 );
1000
+ EMIT3_off32 (0x4C , 0x8B , 0xB5 , - STACKSIZE + 16 );
899
1001
/* mov r15, qword ptr [rbp-X] */
900
- EMIT3_off32 (0x4C , 0x8B , 0xBD , - stacksize + 24 );
1002
+ EMIT3_off32 (0x4C , 0x8B , 0xBD , - STACKSIZE + 24 );
901
1003
902
1004
EMIT1 (0xC9 ); /* leave */
903
1005
EMIT1 (0xC3 ); /* ret */
0 commit comments