Skip to content

Commit b4b9e35

Browse files
Mel Gormantorvalds
authored andcommitted
netvm: set PF_MEMALLOC as appropriate during SKB processing
In order to make sure pfmemalloc packets receive all memory needed to proceed, ensure processing of pfmemalloc SKBs happens under PF_MEMALLOC. This is limited to a subset of protocols that are expected to be used for writing to swap. Taps are not allowed to use PF_MEMALLOC as these are expected to communicate with userspace processes which could be paged out. [a.p.zijlstra@chello.nl: Ideas taken from various patches] [jslaby@suse.cz: Lock imbalance fix] Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: David S. Miller <davem@davemloft.net> Cc: Neil Brown <neilb@suse.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Christie <michaelc@cs.wisc.edu> Cc: Eric B Munson <emunson@mgebm.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> Cc: Mel Gorman <mgorman@suse.de> Cc: Christoph Lameter <cl@linux.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 0614002 commit b4b9e35

File tree

3 files changed

+68
-6
lines changed

3 files changed

+68
-6
lines changed

include/net/sock.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,8 +754,13 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s
754754
return 0;
755755
}
756756

757+
extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
758+
757759
static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
758760
{
761+
if (sk_memalloc_socks() && skb_pfmemalloc(skb))
762+
return __sk_backlog_rcv(sk, skb);
763+
759764
return sk->sk_backlog_rcv(sk, skb);
760765
}
761766

net/core/dev.c

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3155,6 +3155,23 @@ void netdev_rx_handler_unregister(struct net_device *dev)
31553155
}
31563156
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
31573157

3158+
/*
3159+
* Limit the use of PFMEMALLOC reserves to those protocols that implement
3160+
* the special handling of PFMEMALLOC skbs.
3161+
*/
3162+
static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3163+
{
3164+
switch (skb->protocol) {
3165+
case __constant_htons(ETH_P_ARP):
3166+
case __constant_htons(ETH_P_IP):
3167+
case __constant_htons(ETH_P_IPV6):
3168+
case __constant_htons(ETH_P_8021Q):
3169+
return true;
3170+
default:
3171+
return false;
3172+
}
3173+
}
3174+
31583175
static int __netif_receive_skb(struct sk_buff *skb)
31593176
{
31603177
struct packet_type *ptype, *pt_prev;
@@ -3164,14 +3181,27 @@ static int __netif_receive_skb(struct sk_buff *skb)
31643181
bool deliver_exact = false;
31653182
int ret = NET_RX_DROP;
31663183
__be16 type;
3184+
unsigned long pflags = current->flags;
31673185

31683186
net_timestamp_check(!netdev_tstamp_prequeue, skb);
31693187

31703188
trace_netif_receive_skb(skb);
31713189

3190+
/*
3191+
* PFMEMALLOC skbs are special, they should
3192+
* - be delivered to SOCK_MEMALLOC sockets only
3193+
* - stay away from userspace
3194+
* - have bounded memory usage
3195+
*
3196+
* Use PF_MEMALLOC as this saves us from propagating the allocation
3197+
* context down to all allocation sites.
3198+
*/
3199+
if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3200+
current->flags |= PF_MEMALLOC;
3201+
31723202
/* if we've gotten here through NAPI, check netpoll */
31733203
if (netpoll_receive_skb(skb))
3174-
return NET_RX_DROP;
3204+
goto out;
31753205

31763206
orig_dev = skb->dev;
31773207

@@ -3191,7 +3221,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
31913221
if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
31923222
skb = vlan_untag(skb);
31933223
if (unlikely(!skb))
3194-
goto out;
3224+
goto unlock;
31953225
}
31963226

31973227
#ifdef CONFIG_NET_CLS_ACT
@@ -3201,6 +3231,9 @@ static int __netif_receive_skb(struct sk_buff *skb)
32013231
}
32023232
#endif
32033233

3234+
if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3235+
goto skip_taps;
3236+
32043237
list_for_each_entry_rcu(ptype, &ptype_all, list) {
32053238
if (!ptype->dev || ptype->dev == skb->dev) {
32063239
if (pt_prev)
@@ -3209,13 +3242,18 @@ static int __netif_receive_skb(struct sk_buff *skb)
32093242
}
32103243
}
32113244

3245+
skip_taps:
32123246
#ifdef CONFIG_NET_CLS_ACT
32133247
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
32143248
if (!skb)
3215-
goto out;
3249+
goto unlock;
32163250
ncls:
32173251
#endif
32183252

3253+
if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3254+
&& !skb_pfmemalloc_protocol(skb))
3255+
goto drop;
3256+
32193257
rx_handler = rcu_dereference(skb->dev->rx_handler);
32203258
if (vlan_tx_tag_present(skb)) {
32213259
if (pt_prev) {
@@ -3225,7 +3263,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
32253263
if (vlan_do_receive(&skb, !rx_handler))
32263264
goto another_round;
32273265
else if (unlikely(!skb))
3228-
goto out;
3266+
goto unlock;
32293267
}
32303268

32313269
if (rx_handler) {
@@ -3235,7 +3273,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
32353273
}
32363274
switch (rx_handler(&skb)) {
32373275
case RX_HANDLER_CONSUMED:
3238-
goto out;
3276+
goto unlock;
32393277
case RX_HANDLER_ANOTHER:
32403278
goto another_round;
32413279
case RX_HANDLER_EXACT:
@@ -3268,6 +3306,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
32683306
else
32693307
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
32703308
} else {
3309+
drop:
32713310
atomic_long_inc(&skb->dev->rx_dropped);
32723311
kfree_skb(skb);
32733312
/* Jamal, now you will not able to escape explaining
@@ -3276,8 +3315,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
32763315
ret = NET_RX_DROP;
32773316
}
32783317

3279-
out:
3318+
unlock:
32803319
rcu_read_unlock();
3320+
out:
3321+
tsk_restore_flags(current, pflags, PF_MEMALLOC);
32813322
return ret;
32823323
}
32833324

net/core/sock.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,22 @@ void sk_clear_memalloc(struct sock *sk)
298298
}
299299
EXPORT_SYMBOL_GPL(sk_clear_memalloc);
300300

301+
int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
302+
{
303+
int ret;
304+
unsigned long pflags = current->flags;
305+
306+
/* these should have been dropped before queueing */
307+
BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
308+
309+
current->flags |= PF_MEMALLOC;
310+
ret = sk->sk_backlog_rcv(sk, skb);
311+
tsk_restore_flags(current, pflags, PF_MEMALLOC);
312+
313+
return ret;
314+
}
315+
EXPORT_SYMBOL(__sk_backlog_rcv);
316+
301317
#if defined(CONFIG_CGROUPS)
302318
#if !defined(CONFIG_NET_CLS_CGROUP)
303319
int net_cls_subsys_id = -1;

0 commit comments

Comments
 (0)