Skip to content

Commit 17266ee

Browse files
ecree-solarflaredavem330
authored andcommitted
net: ipv4: listified version of ip_rcv
Also involved adding a way to run a netfilter hook over a list of packets. Rather than attempting to make netfilter know about lists (which would be a major project in itself) we just let it call the regular okfn (in this case ip_rcv_finish()) for any packets it steals, and have it give us back a list of packets it's synchronously accepted (which normally NF_HOOK would automatically call okfn() on, but we want to be able to potentially pass the list to a listified version of okfn().) The netfilter hooks themselves are indirect calls that still happen per- packet (see nf_hook_entry_hookfn()), but again, changing that can be left for future work. There is potential for out-of-order receives if the netfilter hook ends up synchronously stealing packets, as they will be processed before any accepts earlier in the list. However, it was already possible for an asynchronous accept to cause out-of-order receives, so presumably this is considered OK. Signed-off-by: Edward Cree <ecree@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 88eb194 commit 17266ee

File tree

6 files changed

+94
-10
lines changed

6 files changed

+94
-10
lines changed

include/linux/netdevice.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,6 +2297,9 @@ struct packet_type {
22972297
struct net_device *,
22982298
struct packet_type *,
22992299
struct net_device *);
2300+
void (*list_func) (struct list_head *,
2301+
struct packet_type *,
2302+
struct net_device *);
23002303
bool (*id_match)(struct packet_type *ptype,
23012304
struct sock *sk);
23022305
void *af_packet_priv;

include/linux/netfilter.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,20 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
288288
return ret;
289289
}
290290

291+
static inline void
292+
NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
293+
struct list_head *head, struct net_device *in, struct net_device *out,
294+
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
295+
{
296+
struct sk_buff *skb, *next;
297+
298+
list_for_each_entry_safe(skb, next, head, list) {
299+
int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
300+
if (ret != 1)
301+
list_del(&skb->list);
302+
}
303+
}
304+
291305
/* Call setsockopt() */
292306
int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
293307
unsigned int len);
@@ -369,6 +383,14 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
369383
return okfn(net, sk, skb);
370384
}
371385

386+
static inline void
387+
NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
388+
struct list_head *head, struct net_device *in, struct net_device *out,
389+
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
390+
{
391+
/* nothing to do */
392+
}
393+
372394
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
373395
struct sock *sk, struct sk_buff *skb,
374396
struct net_device *indev, struct net_device *outdev,

include/net/ip.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
138138
struct ip_options_rcu *opt);
139139
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
140140
struct net_device *orig_dev);
141+
void ip_list_rcv(struct list_head *head, struct packet_type *pt,
142+
struct net_device *orig_dev);
141143
int ip_local_deliver(struct sk_buff *skb);
142144
int ip_mr_input(struct sk_buff *skb);
143145
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);

net/core/dev.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4806,9 +4806,11 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
48064806
return;
48074807
if (list_empty(head))
48084808
return;
4809-
4810-
list_for_each_entry_safe(skb, next, head, list)
4811-
pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4809+
if (pt_prev->list_func != NULL)
4810+
pt_prev->list_func(head, pt_prev, orig_dev);
4811+
else
4812+
list_for_each_entry_safe(skb, next, head, list)
4813+
pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
48124814
}
48134815

48144816
static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)

net/ipv4/af_inet.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,7 @@ fs_initcall(ipv4_offload_init);
18821882
static struct packet_type ip_packet_type __read_mostly = {
18831883
.type = cpu_to_be16(ETH_P_IP),
18841884
.func = ip_rcv,
1885+
.list_func = ip_list_rcv,
18851886
};
18861887

18871888
static int __init inet_init(void)

net/ipv4/ip_input.c

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -408,10 +408,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
408408
/*
409409
* Main IP Receive routine.
410410
*/
411-
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
411+
static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
412412
{
413413
const struct iphdr *iph;
414-
struct net *net;
415414
u32 len;
416415

417416
/* When the interface is in promisc. mode, drop all the crap
@@ -421,7 +420,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
421420
goto drop;
422421

423422

424-
net = dev_net(dev);
425423
__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
426424

427425
skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,9 +487,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
489487
/* Must drop socket now because of tproxy. */
490488
skb_orphan(skb);
491489

492-
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
493-
net, NULL, skb, dev, NULL,
494-
ip_rcv_finish);
490+
return skb;
495491

496492
csum_error:
497493
__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
@@ -500,5 +496,63 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
500496
drop:
501497
kfree_skb(skb);
502498
out:
503-
return NET_RX_DROP;
499+
return NULL;
500+
}
501+
502+
/*
503+
* IP receive entry point
504+
*/
505+
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
506+
struct net_device *orig_dev)
507+
{
508+
struct net *net = dev_net(dev);
509+
510+
skb = ip_rcv_core(skb, net);
511+
if (skb == NULL)
512+
return NET_RX_DROP;
513+
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
514+
net, NULL, skb, dev, NULL,
515+
ip_rcv_finish);
516+
}
517+
518+
static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
519+
struct net *net)
520+
{
521+
struct sk_buff *skb, *next;
522+
523+
NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
524+
head, dev, NULL, ip_rcv_finish);
525+
list_for_each_entry_safe(skb, next, head, list)
526+
ip_rcv_finish(net, NULL, skb);
527+
}
528+
529+
/* Receive a list of IP packets */
530+
void ip_list_rcv(struct list_head *head, struct packet_type *pt,
531+
struct net_device *orig_dev)
532+
{
533+
struct net_device *curr_dev = NULL;
534+
struct net *curr_net = NULL;
535+
struct sk_buff *skb, *next;
536+
struct list_head sublist;
537+
538+
list_for_each_entry_safe(skb, next, head, list) {
539+
struct net_device *dev = skb->dev;
540+
struct net *net = dev_net(dev);
541+
542+
skb = ip_rcv_core(skb, net);
543+
if (skb == NULL)
544+
continue;
545+
546+
if (curr_dev != dev || curr_net != net) {
547+
/* dispatch old sublist */
548+
list_cut_before(&sublist, head, &skb->list);
549+
if (!list_empty(&sublist))
550+
ip_sublist_rcv(&sublist, dev, net);
551+
/* start new sublist */
552+
curr_dev = dev;
553+
curr_net = net;
554+
}
555+
}
556+
/* dispatch final sublist */
557+
ip_sublist_rcv(head, curr_dev, curr_net);
504558
}

0 commit comments

Comments
 (0)