Skip to content

Commit 9c572dc

Browse files
committed
Merge branch 'bpf-helper-improvements'
Daniel Borkmann says: ==================== BPF updates This set contains various updates for eBPF, i.e. the addition of a generic csum helper function and other misc bits that mostly improve existing helpers and ease programming with eBPF on cls_bpf. For more details, please see individual patches. Set is rebased on top of http://patchwork.ozlabs.org/patch/584465/. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 8b393f8 + 6205b9c commit 9c572dc

File tree

7 files changed

+142
-49
lines changed

7 files changed

+142
-49
lines changed

include/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ enum bpf_arg_type {
6565
*/
6666
ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */
6767
ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */
68+
ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */
6869

6970
ARG_PTR_TO_CTX, /* pointer to context */
7071
ARG_ANYTHING, /* any (initialized) argument is ok */

include/linux/skbuff.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2630,6 +2630,13 @@ static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len
26302630
skb_headroom(skb) + len <= skb->hdr_len;
26312631
}
26322632

2633+
static inline int skb_try_make_writable(struct sk_buff *skb,
2634+
unsigned int write_len)
2635+
{
2636+
return skb_cloned(skb) && !skb_clone_writable(skb, write_len) &&
2637+
pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2638+
}
2639+
26332640
static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
26342641
int cloned)
26352642
{

include/uapi/linux/bpf.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,17 @@ enum bpf_func_id {
287287
* Return: >= 0 stackid on success or negative error
288288
*/
289289
BPF_FUNC_get_stackid,
290+
291+
/**
292+
* bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff
293+
* @from: raw from buffer
294+
* @from_size: length of from buffer
295+
* @to: raw to buffer
296+
* @to_size: length of to buffer
297+
* @seed: optional seed
298+
* Return: csum result
299+
*/
300+
BPF_FUNC_csum_diff,
290301
__BPF_FUNC_MAX_ID,
291302
};
292303

@@ -302,6 +313,7 @@ enum bpf_func_id {
302313

303314
/* BPF_FUNC_l4_csum_replace flags. */
304315
#define BPF_F_PSEUDO_HDR (1ULL << 4)
316+
#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
305317

306318
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
307319
#define BPF_F_INGRESS (1ULL << 0)

kernel/bpf/verifier.c

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -779,15 +779,24 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
779779
* bytes from that pointer, make sure that it's within stack boundary
780780
* and all elements of stack are initialized
781781
*/
782-
static int check_stack_boundary(struct verifier_env *env,
783-
int regno, int access_size)
782+
static int check_stack_boundary(struct verifier_env *env, int regno,
783+
int access_size, bool zero_size_allowed)
784784
{
785785
struct verifier_state *state = &env->cur_state;
786786
struct reg_state *regs = state->regs;
787787
int off, i;
788788

789-
if (regs[regno].type != PTR_TO_STACK)
789+
if (regs[regno].type != PTR_TO_STACK) {
790+
if (zero_size_allowed && access_size == 0 &&
791+
regs[regno].type == CONST_IMM &&
792+
regs[regno].imm == 0)
793+
return 0;
794+
795+
verbose("R%d type=%s expected=%s\n", regno,
796+
reg_type_str[regs[regno].type],
797+
reg_type_str[PTR_TO_STACK]);
790798
return -EACCES;
799+
}
791800

792801
off = regs[regno].imm;
793802
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -830,15 +839,24 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
830839
return 0;
831840
}
832841

833-
if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
842+
if (arg_type == ARG_PTR_TO_MAP_KEY ||
834843
arg_type == ARG_PTR_TO_MAP_VALUE) {
835844
expected_type = PTR_TO_STACK;
836-
} else if (arg_type == ARG_CONST_STACK_SIZE) {
845+
} else if (arg_type == ARG_CONST_STACK_SIZE ||
846+
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
837847
expected_type = CONST_IMM;
838848
} else if (arg_type == ARG_CONST_MAP_PTR) {
839849
expected_type = CONST_PTR_TO_MAP;
840850
} else if (arg_type == ARG_PTR_TO_CTX) {
841851
expected_type = PTR_TO_CTX;
852+
} else if (arg_type == ARG_PTR_TO_STACK) {
853+
expected_type = PTR_TO_STACK;
854+
/* One exception here. In case function allows for NULL to be
855+
* passed in as argument, it's a CONST_IMM type. Final test
856+
* happens during stack boundary checking.
857+
*/
858+
if (reg->type == CONST_IMM && reg->imm == 0)
859+
expected_type = CONST_IMM;
842860
} else {
843861
verbose("unsupported arg_type %d\n", arg_type);
844862
return -EFAULT;
@@ -868,8 +886,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
868886
verbose("invalid map_ptr to access map->key\n");
869887
return -EACCES;
870888
}
871-
err = check_stack_boundary(env, regno, (*mapp)->key_size);
872-
889+
err = check_stack_boundary(env, regno, (*mapp)->key_size,
890+
false);
873891
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
874892
/* bpf_map_xxx(..., map_ptr, ..., value) call:
875893
* check [value, value + map->value_size) validity
@@ -879,9 +897,12 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
879897
verbose("invalid map_ptr to access map->value\n");
880898
return -EACCES;
881899
}
882-
err = check_stack_boundary(env, regno, (*mapp)->value_size);
900+
err = check_stack_boundary(env, regno, (*mapp)->value_size,
901+
false);
902+
} else if (arg_type == ARG_CONST_STACK_SIZE ||
903+
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
904+
bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
883905

884-
} else if (arg_type == ARG_CONST_STACK_SIZE) {
885906
/* bpf_xxx(..., buf, len) call will access 'len' bytes
886907
* from stack pointer 'buf'. Check it
887908
* note: regno == len, regno - 1 == buf
@@ -891,7 +912,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
891912
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
892913
return -EACCES;
893914
}
894-
err = check_stack_boundary(env, regno - 1, reg->imm);
915+
err = check_stack_boundary(env, regno - 1, reg->imm,
916+
zero_size_allowed);
895917
}
896918

897919
return err;

net/core/filter.c

Lines changed: 83 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -530,12 +530,14 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
530530
*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
531531
break;
532532

533-
/* RET_K, RET_A are remaped into 2 insns. */
533+
/* RET_K is remaped into 2 insns. RET_A case doesn't need an
534+
* extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
535+
*/
534536
case BPF_RET | BPF_A:
535537
case BPF_RET | BPF_K:
536-
*insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
537-
BPF_K : BPF_X, BPF_REG_0,
538-
BPF_REG_A, fp->k);
538+
if (BPF_RVAL(fp->code) == BPF_K)
539+
*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
540+
0, fp->k);
539541
*insn = BPF_EXIT_INSN();
540542
break;
541543

@@ -1333,15 +1335,22 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
13331335
return 0;
13341336
}
13351337

1336-
#define BPF_LDST_LEN 16U
1338+
struct bpf_scratchpad {
1339+
union {
1340+
__be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1341+
u8 buff[MAX_BPF_STACK];
1342+
};
1343+
};
1344+
1345+
static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
13371346

13381347
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
13391348
{
1349+
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
13401350
struct sk_buff *skb = (struct sk_buff *) (long) r1;
13411351
int offset = (int) r2;
13421352
void *from = (void *) (long) r3;
13431353
unsigned int len = (unsigned int) r4;
1344-
char buf[BPF_LDST_LEN];
13451354
void *ptr;
13461355

13471356
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM)))
@@ -1355,14 +1364,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
13551364
*
13561365
* so check for invalid 'offset' and too large 'len'
13571366
*/
1358-
if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
1367+
if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
13591368
return -EFAULT;
1360-
1361-
if (unlikely(skb_cloned(skb) &&
1362-
!skb_clone_writable(skb, offset + len)))
1369+
if (unlikely(skb_try_make_writable(skb, offset + len)))
13631370
return -EFAULT;
13641371

1365-
ptr = skb_header_pointer(skb, offset, len, buf);
1372+
ptr = skb_header_pointer(skb, offset, len, sp->buff);
13661373
if (unlikely(!ptr))
13671374
return -EFAULT;
13681375

@@ -1371,7 +1378,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
13711378

13721379
memcpy(ptr, from, len);
13731380

1374-
if (ptr == buf)
1381+
if (ptr == sp->buff)
13751382
/* skb_store_bits cannot return -EFAULT here */
13761383
skb_store_bits(skb, offset, ptr, len);
13771384

@@ -1400,7 +1407,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
14001407
unsigned int len = (unsigned int) r4;
14011408
void *ptr;
14021409

1403-
if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN))
1410+
if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
14041411
return -EFAULT;
14051412

14061413
ptr = skb_header_pointer(skb, offset, len, to);
@@ -1432,9 +1439,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
14321439
return -EINVAL;
14331440
if (unlikely((u32) offset > 0xffff))
14341441
return -EFAULT;
1435-
1436-
if (unlikely(skb_cloned(skb) &&
1437-
!skb_clone_writable(skb, offset + sizeof(sum))))
1442+
if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
14381443
return -EFAULT;
14391444

14401445
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1474,23 +1479,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
14741479
{
14751480
struct sk_buff *skb = (struct sk_buff *) (long) r1;
14761481
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1482+
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
14771483
int offset = (int) r2;
14781484
__sum16 sum, *ptr;
14791485

1480-
if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
1486+
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
1487+
BPF_F_HDR_FIELD_MASK)))
14811488
return -EINVAL;
14821489
if (unlikely((u32) offset > 0xffff))
14831490
return -EFAULT;
1484-
1485-
if (unlikely(skb_cloned(skb) &&
1486-
!skb_clone_writable(skb, offset + sizeof(sum))))
1491+
if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
14871492
return -EFAULT;
14881493

14891494
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
14901495
if (unlikely(!ptr))
14911496
return -EFAULT;
1497+
if (is_mmzero && !*ptr)
1498+
return 0;
14921499

14931500
switch (flags & BPF_F_HDR_FIELD_MASK) {
1501+
case 0:
1502+
if (unlikely(from != 0))
1503+
return -EINVAL;
1504+
1505+
inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1506+
break;
14941507
case 2:
14951508
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
14961509
break;
@@ -1501,6 +1514,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
15011514
return -EINVAL;
15021515
}
15031516

1517+
if (is_mmzero && !*ptr)
1518+
*ptr = CSUM_MANGLED_0;
15041519
if (ptr == &sum)
15051520
/* skb_store_bits guaranteed to not return -EFAULT here */
15061521
skb_store_bits(skb, offset, ptr, sizeof(sum));
@@ -1519,6 +1534,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
15191534
.arg5_type = ARG_ANYTHING,
15201535
};
15211536

1537+
static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
1538+
{
1539+
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1540+
u64 diff_size = from_size + to_size;
1541+
__be32 *from = (__be32 *) (long) r1;
1542+
__be32 *to = (__be32 *) (long) r3;
1543+
int i, j = 0;
1544+
1545+
/* This is quite flexible, some examples:
1546+
*
1547+
* from_size == 0, to_size > 0, seed := csum --> pushing data
1548+
* from_size > 0, to_size == 0, seed := csum --> pulling data
1549+
* from_size > 0, to_size > 0, seed := 0 --> diffing data
1550+
*
1551+
* Even for diffing, from_size and to_size don't need to be equal.
1552+
*/
1553+
if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1554+
diff_size > sizeof(sp->diff)))
1555+
return -EINVAL;
1556+
1557+
for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1558+
sp->diff[j] = ~from[i];
1559+
for (i = 0; i < to_size / sizeof(__be32); i++, j++)
1560+
sp->diff[j] = to[i];
1561+
1562+
return csum_partial(sp->diff, diff_size, seed);
1563+
}
1564+
1565+
const struct bpf_func_proto bpf_csum_diff_proto = {
1566+
.func = bpf_csum_diff,
1567+
.gpl_only = false,
1568+
.ret_type = RET_INTEGER,
1569+
.arg1_type = ARG_PTR_TO_STACK,
1570+
.arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1571+
.arg3_type = ARG_PTR_TO_STACK,
1572+
.arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1573+
.arg5_type = ARG_ANYTHING,
1574+
};
1575+
15221576
static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
15231577
{
15241578
struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
@@ -1682,6 +1736,13 @@ bool bpf_helper_changes_skb_data(void *func)
16821736
return true;
16831737
if (func == bpf_skb_vlan_pop)
16841738
return true;
1739+
if (func == bpf_skb_store_bytes)
1740+
return true;
1741+
if (func == bpf_l3_csum_replace)
1742+
return true;
1743+
if (func == bpf_l4_csum_replace)
1744+
return true;
1745+
16851746
return false;
16861747
}
16871748

@@ -1849,6 +1910,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
18491910
return &bpf_skb_store_bytes_proto;
18501911
case BPF_FUNC_skb_load_bytes:
18511912
return &bpf_skb_load_bytes_proto;
1913+
case BPF_FUNC_csum_diff:
1914+
return &bpf_csum_diff_proto;
18521915
case BPF_FUNC_l3_csum_replace:
18531916
return &bpf_l3_csum_replace_proto;
18541917
case BPF_FUNC_l4_csum_replace:

net/sched/act_csum.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
105105
int hl = ihl + jhl;
106106

107107
if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
108-
(skb_cloned(skb) &&
109-
!skb_clone_writable(skb, hl + ntkoff) &&
110-
pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
108+
skb_try_make_writable(skb, hl + ntkoff))
111109
return NULL;
112110
else
113111
return (void *)(skb_network_header(skb) + ihl);
@@ -365,9 +363,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
365363
}
366364

367365
if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
368-
if (skb_cloned(skb) &&
369-
!skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
370-
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
366+
if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
371367
goto fail;
372368

373369
ip_send_check(ip_hdr(skb));

0 commit comments

Comments
 (0)