@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
145
145
BUG ();
146
146
}
147
147
148
+
149
+ /*
150
+ * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
151
+ * the caller if emergency pfmemalloc reserves are being used. If it is and
152
+ * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
153
+ * may be used. Otherwise, the packet data may be discarded until enough
154
+ * memory is free
155
+ */
156
+ #define kmalloc_reserve (size , gfp , node , pfmemalloc ) \
157
+ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
158
+ void * __kmalloc_reserve (size_t size , gfp_t flags , int node , unsigned long ip ,
159
+ bool * pfmemalloc )
160
+ {
161
+ void * obj ;
162
+ bool ret_pfmemalloc = false;
163
+
164
+ /*
165
+ * Try a regular allocation, when that fails and we're not entitled
166
+ * to the reserves, fail.
167
+ */
168
+ obj = kmalloc_node_track_caller (size ,
169
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN ,
170
+ node );
171
+ if (obj || !(gfp_pfmemalloc_allowed (flags )))
172
+ goto out ;
173
+
174
+ /* Try again but now we are using pfmemalloc reserves */
175
+ ret_pfmemalloc = true;
176
+ obj = kmalloc_node_track_caller (size , flags , node );
177
+
178
+ out :
179
+ if (pfmemalloc )
180
+ * pfmemalloc = ret_pfmemalloc ;
181
+
182
+ return obj ;
183
+ }
184
+
148
185
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
149
186
* 'private' fields and also do memory statistics to find all the
150
187
* [BEEP] leaks.
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
155
192
* __alloc_skb - allocate a network buffer
156
193
* @size: size to allocate
157
194
* @gfp_mask: allocation mask
158
- * @fclone: allocate from fclone cache instead of head cache
159
- * and allocate a cloned (child) skb
195
+ * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
196
+ * instead of head cache and allocate a cloned (child) skb.
197
+ * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
198
+ * allocations in case the data is required for writeback
160
199
* @node: numa node to allocate memory on
161
200
*
162
201
* Allocate a new &sk_buff. The returned buffer has no headroom and a
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
167
206
* %GFP_ATOMIC.
168
207
*/
169
208
struct sk_buff * __alloc_skb (unsigned int size , gfp_t gfp_mask ,
170
- int fclone , int node )
209
+ int flags , int node )
171
210
{
172
211
struct kmem_cache * cache ;
173
212
struct skb_shared_info * shinfo ;
174
213
struct sk_buff * skb ;
175
214
u8 * data ;
215
+ bool pfmemalloc ;
176
216
177
- cache = fclone ? skbuff_fclone_cache : skbuff_head_cache ;
217
+ cache = (flags & SKB_ALLOC_FCLONE )
218
+ ? skbuff_fclone_cache : skbuff_head_cache ;
219
+
220
+ if (sk_memalloc_socks () && (flags & SKB_ALLOC_RX ))
221
+ gfp_mask |= __GFP_MEMALLOC ;
178
222
179
223
/* Get the HEAD */
180
224
skb = kmem_cache_alloc_node (cache , gfp_mask & ~__GFP_DMA , node );
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
189
233
*/
190
234
size = SKB_DATA_ALIGN (size );
191
235
size += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
192
- data = kmalloc_node_track_caller (size , gfp_mask , node );
236
+ data = kmalloc_reserve (size , gfp_mask , node , & pfmemalloc );
193
237
if (!data )
194
238
goto nodata ;
195
239
/* kmalloc(size) might give us more room than requested.
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
207
251
memset (skb , 0 , offsetof(struct sk_buff , tail ));
208
252
/* Account for allocated memory : skb + skb->head */
209
253
skb -> truesize = SKB_TRUESIZE (size );
254
+ skb -> pfmemalloc = pfmemalloc ;
210
255
atomic_set (& skb -> users , 1 );
211
256
skb -> head = data ;
212
257
skb -> data = data ;
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
222
267
atomic_set (& shinfo -> dataref , 1 );
223
268
kmemcheck_annotate_variable (shinfo -> destructor_arg );
224
269
225
- if (fclone ) {
270
+ if (flags & SKB_ALLOC_FCLONE ) {
226
271
struct sk_buff * child = skb + 1 ;
227
272
atomic_t * fclone_ref = (atomic_t * ) (child + 1 );
228
273
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
232
277
atomic_set (fclone_ref , 1 );
233
278
234
279
child -> fclone = SKB_FCLONE_UNAVAILABLE ;
280
+ child -> pfmemalloc = pfmemalloc ;
235
281
}
236
282
out :
237
283
return skb ;
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
302
348
303
349
#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
304
350
305
- /**
306
- * netdev_alloc_frag - allocate a page fragment
307
- * @fragsz: fragment size
308
- *
309
- * Allocates a frag from a page for receive buffer.
310
- * Uses GFP_ATOMIC allocations.
311
- */
312
- void * netdev_alloc_frag (unsigned int fragsz )
351
+ static void * __netdev_alloc_frag (unsigned int fragsz , gfp_t gfp_mask )
313
352
{
314
353
struct netdev_alloc_cache * nc ;
315
354
void * data = NULL ;
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz)
319
358
nc = & __get_cpu_var (netdev_alloc_cache );
320
359
if (unlikely (!nc -> page )) {
321
360
refill :
322
- nc -> page = alloc_page (GFP_ATOMIC | __GFP_COLD );
361
+ nc -> page = alloc_page (gfp_mask );
323
362
if (unlikely (!nc -> page ))
324
363
goto end ;
325
364
recycle :
@@ -343,6 +382,18 @@ void *netdev_alloc_frag(unsigned int fragsz)
343
382
local_irq_restore (flags );
344
383
return data ;
345
384
}
385
+
386
+ /**
387
+ * netdev_alloc_frag - allocate a page fragment
388
+ * @fragsz: fragment size
389
+ *
390
+ * Allocates a frag from a page for receive buffer.
391
+ * Uses GFP_ATOMIC allocations.
392
+ */
393
+ void * netdev_alloc_frag (unsigned int fragsz )
394
+ {
395
+ return __netdev_alloc_frag (fragsz , GFP_ATOMIC | __GFP_COLD );
396
+ }
346
397
EXPORT_SYMBOL (netdev_alloc_frag );
347
398
348
399
/**
@@ -366,15 +417,21 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
366
417
SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
367
418
368
419
if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA ))) {
369
- void * data = netdev_alloc_frag (fragsz );
420
+ void * data ;
421
+
422
+ if (sk_memalloc_socks ())
423
+ gfp_mask |= __GFP_MEMALLOC ;
424
+
425
+ data = __netdev_alloc_frag (fragsz , gfp_mask );
370
426
371
427
if (likely (data )) {
372
428
skb = build_skb (data , fragsz );
373
429
if (unlikely (!skb ))
374
430
put_page (virt_to_head_page (data ));
375
431
}
376
432
} else {
377
- skb = __alloc_skb (length + NET_SKB_PAD , gfp_mask , 0 , NUMA_NO_NODE );
433
+ skb = __alloc_skb (length + NET_SKB_PAD , gfp_mask ,
434
+ SKB_ALLOC_RX , NUMA_NO_NODE );
378
435
}
379
436
if (likely (skb )) {
380
437
skb_reserve (skb , NET_SKB_PAD );
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
656
713
#if IS_ENABLED (CONFIG_IP_VS )
657
714
new -> ipvs_property = old -> ipvs_property ;
658
715
#endif
716
+ new -> pfmemalloc = old -> pfmemalloc ;
659
717
new -> protocol = old -> protocol ;
660
718
new -> mark = old -> mark ;
661
719
new -> skb_iif = old -> skb_iif ;
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
814
872
n -> fclone = SKB_FCLONE_CLONE ;
815
873
atomic_inc (fclone_ref );
816
874
} else {
875
+ if (skb_pfmemalloc (skb ))
876
+ gfp_mask |= __GFP_MEMALLOC ;
877
+
817
878
n = kmem_cache_alloc (skbuff_head_cache , gfp_mask );
818
879
if (!n )
819
880
return NULL ;
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
850
911
skb_shinfo (new )-> gso_type = skb_shinfo (old )-> gso_type ;
851
912
}
852
913
914
+ static inline int skb_alloc_rx_flag (const struct sk_buff * skb )
915
+ {
916
+ if (skb_pfmemalloc (skb ))
917
+ return SKB_ALLOC_RX ;
918
+ return 0 ;
919
+ }
920
+
853
921
/**
854
922
* skb_copy - create private copy of an sk_buff
855
923
* @skb: buffer to copy
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
871
939
{
872
940
int headerlen = skb_headroom (skb );
873
941
unsigned int size = skb_end_offset (skb ) + skb -> data_len ;
874
- struct sk_buff * n = alloc_skb (size , gfp_mask );
942
+ struct sk_buff * n = __alloc_skb (size , gfp_mask ,
943
+ skb_alloc_rx_flag (skb ), NUMA_NO_NODE );
875
944
876
945
if (!n )
877
946
return NULL ;
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy);
906
975
struct sk_buff * __pskb_copy (struct sk_buff * skb , int headroom , gfp_t gfp_mask )
907
976
{
908
977
unsigned int size = skb_headlen (skb ) + headroom ;
909
- struct sk_buff * n = alloc_skb (size , gfp_mask );
978
+ struct sk_buff * n = __alloc_skb (size , gfp_mask ,
979
+ skb_alloc_rx_flag (skb ), NUMA_NO_NODE );
910
980
911
981
if (!n )
912
982
goto out ;
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
979
1049
980
1050
size = SKB_DATA_ALIGN (size );
981
1051
982
- data = kmalloc (size + SKB_DATA_ALIGN (sizeof (struct skb_shared_info )),
983
- gfp_mask );
1052
+ if (skb_pfmemalloc (skb ))
1053
+ gfp_mask |= __GFP_MEMALLOC ;
1054
+ data = kmalloc_reserve (size + SKB_DATA_ALIGN (sizeof (struct skb_shared_info )),
1055
+ gfp_mask , NUMA_NO_NODE , NULL );
984
1056
if (!data )
985
1057
goto nodata ;
986
1058
size = SKB_WITH_OVERHEAD (ksize (data ));
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1092
1164
/*
1093
1165
* Allocate the copy buffer
1094
1166
*/
1095
- struct sk_buff * n = alloc_skb (newheadroom + skb -> len + newtailroom ,
1096
- gfp_mask );
1167
+ struct sk_buff * n = __alloc_skb (newheadroom + skb -> len + newtailroom ,
1168
+ gfp_mask , skb_alloc_rx_flag (skb ),
1169
+ NUMA_NO_NODE );
1097
1170
int oldheadroom = skb_headroom (skb );
1098
1171
int head_copy_len , head_copy_off ;
1099
1172
int off ;
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2775
2848
skb_release_head_state (nskb );
2776
2849
__skb_push (nskb , doffset );
2777
2850
} else {
2778
- nskb = alloc_skb (hsize + doffset + headroom ,
2779
- GFP_ATOMIC );
2851
+ nskb = __alloc_skb (hsize + doffset + headroom ,
2852
+ GFP_ATOMIC , skb_alloc_rx_flag (skb ),
2853
+ NUMA_NO_NODE );
2780
2854
2781
2855
if (unlikely (!nskb ))
2782
2856
goto err ;
0 commit comments