@@ -112,9 +112,9 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
112
112
kmem_cache_free (anon_vma_cachep , anon_vma );
113
113
}
114
114
115
- static inline struct anon_vma_chain * anon_vma_chain_alloc (void )
115
+ static inline struct anon_vma_chain * anon_vma_chain_alloc (gfp_t gfp )
116
116
{
117
- return kmem_cache_alloc (anon_vma_chain_cachep , GFP_KERNEL );
117
+ return kmem_cache_alloc (anon_vma_chain_cachep , gfp );
118
118
}
119
119
120
120
static void anon_vma_chain_free (struct anon_vma_chain * anon_vma_chain )
@@ -159,7 +159,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
159
159
struct mm_struct * mm = vma -> vm_mm ;
160
160
struct anon_vma * allocated ;
161
161
162
- avc = anon_vma_chain_alloc ();
162
+ avc = anon_vma_chain_alloc (GFP_KERNEL );
163
163
if (!avc )
164
164
goto out_enomem ;
165
165
@@ -200,6 +200,32 @@ int anon_vma_prepare(struct vm_area_struct *vma)
200
200
return - ENOMEM ;
201
201
}
202
202
203
+ /*
204
+ * This is a useful helper function for locking the anon_vma root as
205
+ * we traverse the vma->anon_vma_chain, looping over anon_vma's that
206
+ * have the same vma.
207
+ *
208
+ * Such anon_vma's should have the same root, so you'd expect to see
209
+ * just a single mutex_lock for the whole traversal.
210
+ */
211
+ static inline struct anon_vma * lock_anon_vma_root (struct anon_vma * root , struct anon_vma * anon_vma )
212
+ {
213
+ struct anon_vma * new_root = anon_vma -> root ;
214
+ if (new_root != root ) {
215
+ if (WARN_ON_ONCE (root ))
216
+ mutex_unlock (& root -> mutex );
217
+ root = new_root ;
218
+ mutex_lock (& root -> mutex );
219
+ }
220
+ return root ;
221
+ }
222
+
223
+ static inline void unlock_anon_vma_root (struct anon_vma * root )
224
+ {
225
+ if (root )
226
+ mutex_unlock (& root -> mutex );
227
+ }
228
+
203
229
static void anon_vma_chain_link (struct vm_area_struct * vma ,
204
230
struct anon_vma_chain * avc ,
205
231
struct anon_vma * anon_vma )
@@ -208,13 +234,11 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
208
234
avc -> anon_vma = anon_vma ;
209
235
list_add (& avc -> same_vma , & vma -> anon_vma_chain );
210
236
211
- anon_vma_lock (anon_vma );
212
237
/*
213
238
* It's critical to add new vmas to the tail of the anon_vma,
214
239
* see comment in huge_memory.c:__split_huge_page().
215
240
*/
216
241
list_add_tail (& avc -> same_anon_vma , & anon_vma -> head );
217
- anon_vma_unlock (anon_vma );
218
242
}
219
243
220
244
/*
@@ -224,13 +248,24 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
224
248
int anon_vma_clone (struct vm_area_struct * dst , struct vm_area_struct * src )
225
249
{
226
250
struct anon_vma_chain * avc , * pavc ;
251
+ struct anon_vma * root = NULL ;
227
252
228
253
list_for_each_entry_reverse (pavc , & src -> anon_vma_chain , same_vma ) {
229
- avc = anon_vma_chain_alloc ();
230
- if (!avc )
231
- goto enomem_failure ;
232
- anon_vma_chain_link (dst , avc , pavc -> anon_vma );
254
+ struct anon_vma * anon_vma ;
255
+
256
+ avc = anon_vma_chain_alloc (GFP_NOWAIT | __GFP_NOWARN );
257
+ if (unlikely (!avc )) {
258
+ unlock_anon_vma_root (root );
259
+ root = NULL ;
260
+ avc = anon_vma_chain_alloc (GFP_KERNEL );
261
+ if (!avc )
262
+ goto enomem_failure ;
263
+ }
264
+ anon_vma = pavc -> anon_vma ;
265
+ root = lock_anon_vma_root (root , anon_vma );
266
+ anon_vma_chain_link (dst , avc , anon_vma );
233
267
}
268
+ unlock_anon_vma_root (root );
234
269
return 0 ;
235
270
236
271
enomem_failure :
@@ -263,7 +298,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
263
298
anon_vma = anon_vma_alloc ();
264
299
if (!anon_vma )
265
300
goto out_error ;
266
- avc = anon_vma_chain_alloc ();
301
+ avc = anon_vma_chain_alloc (GFP_KERNEL );
267
302
if (!avc )
268
303
goto out_error_free_anon_vma ;
269
304
@@ -280,7 +315,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
280
315
get_anon_vma (anon_vma -> root );
281
316
/* Mark this anon_vma as the one where our new (COWed) pages go. */
282
317
vma -> anon_vma = anon_vma ;
318
+ anon_vma_lock (anon_vma );
283
319
anon_vma_chain_link (vma , avc , anon_vma );
320
+ anon_vma_unlock (anon_vma );
284
321
285
322
return 0 ;
286
323
@@ -291,36 +328,43 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
291
328
return - ENOMEM ;
292
329
}
293
330
294
- static void anon_vma_unlink (struct anon_vma_chain * anon_vma_chain )
295
- {
296
- struct anon_vma * anon_vma = anon_vma_chain -> anon_vma ;
297
- int empty ;
298
-
299
- /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
300
- if (!anon_vma )
301
- return ;
302
-
303
- anon_vma_lock (anon_vma );
304
- list_del (& anon_vma_chain -> same_anon_vma );
305
-
306
- /* We must garbage collect the anon_vma if it's empty */
307
- empty = list_empty (& anon_vma -> head );
308
- anon_vma_unlock (anon_vma );
309
-
310
- if (empty )
311
- put_anon_vma (anon_vma );
312
- }
313
-
314
331
void unlink_anon_vmas (struct vm_area_struct * vma )
315
332
{
316
333
struct anon_vma_chain * avc , * next ;
334
+ struct anon_vma * root = NULL ;
317
335
318
336
/*
319
337
* Unlink each anon_vma chained to the VMA. This list is ordered
320
338
* from newest to oldest, ensuring the root anon_vma gets freed last.
321
339
*/
322
340
list_for_each_entry_safe (avc , next , & vma -> anon_vma_chain , same_vma ) {
323
- anon_vma_unlink (avc );
341
+ struct anon_vma * anon_vma = avc -> anon_vma ;
342
+
343
+ root = lock_anon_vma_root (root , anon_vma );
344
+ list_del (& avc -> same_anon_vma );
345
+
346
+ /*
347
+ * Leave empty anon_vmas on the list - we'll need
348
+ * to free them outside the lock.
349
+ */
350
+ if (list_empty (& anon_vma -> head ))
351
+ continue ;
352
+
353
+ list_del (& avc -> same_vma );
354
+ anon_vma_chain_free (avc );
355
+ }
356
+ unlock_anon_vma_root (root );
357
+
358
+ /*
359
+ * Iterate the list once more, it now only contains empty and unlinked
360
+ * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
361
+ * needing to acquire the anon_vma->root->mutex.
362
+ */
363
+ list_for_each_entry_safe (avc , next , & vma -> anon_vma_chain , same_vma ) {
364
+ struct anon_vma * anon_vma = avc -> anon_vma ;
365
+
366
+ put_anon_vma (anon_vma );
367
+
324
368
list_del (& avc -> same_vma );
325
369
anon_vma_chain_free (avc );
326
370
}
0 commit comments