|
8 | 8 | #include <linux/memory.h>
|
9 | 9 | #include <linux/vmalloc.h>
|
10 | 10 | #include <linux/cgroup.h>
|
| 11 | +#include <linux/swapops.h> |
11 | 12 |
|
12 | 13 | static void __meminit
|
13 | 14 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
|
@@ -270,3 +271,199 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
|
270 | 271 | }
|
271 | 272 |
|
272 | 273 | #endif
|
| 274 | + |
| 275 | + |
| 276 | +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
| 277 | + |
| 278 | +static DEFINE_MUTEX(swap_cgroup_mutex); |
| 279 | +struct swap_cgroup_ctrl { |
| 280 | + struct page **map; |
| 281 | + unsigned long length; |
| 282 | +}; |
| 283 | + |
| 284 | +struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; |
| 285 | + |
| 286 | +/* |
| 287 | + * This 8bytes seems big..maybe we can reduce this when we can use "id" for |
| 288 | + * cgroup rather than pointer. |
| 289 | + */ |
| 290 | +struct swap_cgroup { |
| 291 | + struct mem_cgroup *val; |
| 292 | +}; |
| 293 | +#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) |
| 294 | +#define SC_POS_MASK (SC_PER_PAGE - 1) |
| 295 | + |
| 296 | +/* |
| 297 | + * SwapCgroup implements "lookup" and "exchange" operations. |
| 298 | + * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge |
| 299 | + * against SwapCache. At swap_free(), this is accessed directly from swap. |
| 300 | + * |
| 301 | + * This means, |
| 302 | + * - we have no race in "exchange" when we're accessed via SwapCache because |
| 303 | + * SwapCache(and its swp_entry) is under lock. |
| 304 | + * - When called via swap_free(), there is no user of this entry and no race. |
| 305 | + * Then, we don't need lock around "exchange". |
| 306 | + * |
| 307 | + * TODO: we can push these buffers out to HIGHMEM. |
| 308 | + */ |
| 309 | + |
| 310 | +/* |
| 311 | + * allocate buffer for swap_cgroup. |
| 312 | + */ |
| 313 | +static int swap_cgroup_prepare(int type) |
| 314 | +{ |
| 315 | + struct page *page; |
| 316 | + struct swap_cgroup_ctrl *ctrl; |
| 317 | + unsigned long idx, max; |
| 318 | + |
| 319 | + if (!do_swap_account) |
| 320 | + return 0; |
| 321 | + ctrl = &swap_cgroup_ctrl[type]; |
| 322 | + |
| 323 | + for (idx = 0; idx < ctrl->length; idx++) { |
| 324 | + page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
| 325 | + if (!page) |
| 326 | + goto not_enough_page; |
| 327 | + ctrl->map[idx] = page; |
| 328 | + } |
| 329 | + return 0; |
| 330 | +not_enough_page: |
| 331 | + max = idx; |
| 332 | + for (idx = 0; idx < max; idx++) |
| 333 | + __free_page(ctrl->map[idx]); |
| 334 | + |
| 335 | + return -ENOMEM; |
| 336 | +} |
| 337 | + |
| 338 | +/** |
| 339 | + * swap_cgroup_record - record mem_cgroup for this swp_entry. |
| 340 | + * @ent: swap entry to be recorded into |
| 341 | + * @mem: mem_cgroup to be recorded |
| 342 | + * |
| 343 | + * Returns old value at success, NULL at failure. |
| 344 | + * (Of course, old value can be NULL.) |
| 345 | + */ |
| 346 | +struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) |
| 347 | +{ |
| 348 | + int type = swp_type(ent); |
| 349 | + unsigned long offset = swp_offset(ent); |
| 350 | + unsigned long idx = offset / SC_PER_PAGE; |
| 351 | + unsigned long pos = offset & SC_POS_MASK; |
| 352 | + struct swap_cgroup_ctrl *ctrl; |
| 353 | + struct page *mappage; |
| 354 | + struct swap_cgroup *sc; |
| 355 | + struct mem_cgroup *old; |
| 356 | + |
| 357 | + if (!do_swap_account) |
| 358 | + return NULL; |
| 359 | + |
| 360 | + ctrl = &swap_cgroup_ctrl[type]; |
| 361 | + |
| 362 | + mappage = ctrl->map[idx]; |
| 363 | + sc = page_address(mappage); |
| 364 | + sc += pos; |
| 365 | + old = sc->val; |
| 366 | + sc->val = mem; |
| 367 | + |
| 368 | + return old; |
| 369 | +} |
| 370 | + |
| 371 | +/** |
| 372 | + * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry |
| 373 | + * @ent: swap entry to be looked up. |
| 374 | + * |
| 375 | + * Returns pointer to mem_cgroup at success. NULL at failure. |
| 376 | + */ |
| 377 | +struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) |
| 378 | +{ |
| 379 | + int type = swp_type(ent); |
| 380 | + unsigned long offset = swp_offset(ent); |
| 381 | + unsigned long idx = offset / SC_PER_PAGE; |
| 382 | + unsigned long pos = offset & SC_POS_MASK; |
| 383 | + struct swap_cgroup_ctrl *ctrl; |
| 384 | + struct page *mappage; |
| 385 | + struct swap_cgroup *sc; |
| 386 | + struct mem_cgroup *ret; |
| 387 | + |
| 388 | + if (!do_swap_account) |
| 389 | + return NULL; |
| 390 | + |
| 391 | + ctrl = &swap_cgroup_ctrl[type]; |
| 392 | + mappage = ctrl->map[idx]; |
| 393 | + sc = page_address(mappage); |
| 394 | + sc += pos; |
| 395 | + ret = sc->val; |
| 396 | + return ret; |
| 397 | +} |
| 398 | + |
| 399 | +int swap_cgroup_swapon(int type, unsigned long max_pages) |
| 400 | +{ |
| 401 | + void *array; |
| 402 | + unsigned long array_size; |
| 403 | + unsigned long length; |
| 404 | + struct swap_cgroup_ctrl *ctrl; |
| 405 | + |
| 406 | + if (!do_swap_account) |
| 407 | + return 0; |
| 408 | + |
| 409 | + length = ((max_pages/SC_PER_PAGE) + 1); |
| 410 | + array_size = length * sizeof(void *); |
| 411 | + |
| 412 | + array = vmalloc(array_size); |
| 413 | + if (!array) |
| 414 | + goto nomem; |
| 415 | + |
| 416 | + memset(array, 0, array_size); |
| 417 | + ctrl = &swap_cgroup_ctrl[type]; |
| 418 | + mutex_lock(&swap_cgroup_mutex); |
| 419 | + ctrl->length = length; |
| 420 | + ctrl->map = array; |
| 421 | + if (swap_cgroup_prepare(type)) { |
| 422 | + /* memory shortage */ |
| 423 | + ctrl->map = NULL; |
| 424 | + ctrl->length = 0; |
| 425 | + vfree(array); |
| 426 | + mutex_unlock(&swap_cgroup_mutex); |
| 427 | + goto nomem; |
| 428 | + } |
| 429 | + mutex_unlock(&swap_cgroup_mutex); |
| 430 | + |
| 431 | + printk(KERN_INFO |
| 432 | + "swap_cgroup: uses %ld bytes of vmalloc for pointer array space" |
| 433 | + " and %ld bytes to hold mem_cgroup pointers on swap\n", |
| 434 | + array_size, length * PAGE_SIZE); |
| 435 | + printk(KERN_INFO |
| 436 | + "swap_cgroup can be disabled by noswapaccount boot option.\n"); |
| 437 | + |
| 438 | + return 0; |
| 439 | +nomem: |
| 440 | + printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); |
| 441 | + printk(KERN_INFO |
| 442 | + "swap_cgroup can be disabled by noswapaccount boot option\n"); |
| 443 | + return -ENOMEM; |
| 444 | +} |
| 445 | + |
| 446 | +void swap_cgroup_swapoff(int type) |
| 447 | +{ |
| 448 | + int i; |
| 449 | + struct swap_cgroup_ctrl *ctrl; |
| 450 | + |
| 451 | + if (!do_swap_account) |
| 452 | + return; |
| 453 | + |
| 454 | + mutex_lock(&swap_cgroup_mutex); |
| 455 | + ctrl = &swap_cgroup_ctrl[type]; |
| 456 | + if (ctrl->map) { |
| 457 | + for (i = 0; i < ctrl->length; i++) { |
| 458 | + struct page *page = ctrl->map[i]; |
| 459 | + if (page) |
| 460 | + __free_page(page); |
| 461 | + } |
| 462 | + vfree(ctrl->map); |
| 463 | + ctrl->map = NULL; |
| 464 | + ctrl->length = 0; |
| 465 | + } |
| 466 | + mutex_unlock(&swap_cgroup_mutex); |
| 467 | +} |
| 468 | + |
| 469 | +#endif |
0 commit comments