Skip to content

Commit 9d66e23

Browse files
Josef Bacikchrismason-xx
authored andcommitted
Btrfs: load free space cache if it exists
This patch actually loads the free space cache if it exists. The only thing that really changes here is that we need to cache the block group if we're going to remove an extent from it. Previously we did not do this since the caching kthread would pick it up. With the on disk cache we don't have this luxury so we need to make sure we read the on disk cache in first, and then remove the extent, that way when the extent is unpinned the free space is added to the block group. This has been tested with all sorts of things. Signed-off-by: Josef Bacik <josef@redhat.com>
1 parent 0cb59c9 commit 9d66e23

File tree

3 files changed

+345
-3
lines changed

3 files changed

+345
-3
lines changed

fs/btrfs/extent-tree.c

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,9 @@ static int caching_kthread(void *data)
421421
return 0;
422422
}
423423

424-
static int cache_block_group(struct btrfs_block_group_cache *cache)
424+
static int cache_block_group(struct btrfs_block_group_cache *cache,
425+
struct btrfs_trans_handle *trans,
426+
int load_cache_only)
425427
{
426428
struct btrfs_fs_info *fs_info = cache->fs_info;
427429
struct btrfs_caching_control *caching_ctl;
@@ -432,6 +434,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
432434
if (cache->cached != BTRFS_CACHE_NO)
433435
return 0;
434436

437+
/*
438+
* We can't do the read from on-disk cache during a commit since we need
439+
* to have the normal tree locking.
440+
*/
441+
if (!trans->transaction->in_commit) {
442+
spin_lock(&cache->lock);
443+
if (cache->cached != BTRFS_CACHE_NO) {
444+
spin_unlock(&cache->lock);
445+
return 0;
446+
}
447+
cache->cached = BTRFS_CACHE_STARTED;
448+
spin_unlock(&cache->lock);
449+
450+
ret = load_free_space_cache(fs_info, cache);
451+
452+
spin_lock(&cache->lock);
453+
if (ret == 1) {
454+
cache->cached = BTRFS_CACHE_FINISHED;
455+
cache->last_byte_to_unpin = (u64)-1;
456+
} else {
457+
cache->cached = BTRFS_CACHE_NO;
458+
}
459+
spin_unlock(&cache->lock);
460+
if (ret == 1)
461+
return 0;
462+
}
463+
464+
if (load_cache_only)
465+
return 0;
466+
435467
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
436468
BUG_ON(!caching_ctl);
437469

@@ -3984,6 +4016,14 @@ static int update_block_group(struct btrfs_trans_handle *trans,
39844016
factor = 2;
39854017
else
39864018
factor = 1;
4019+
/*
4020+
* If this block group has free space cache written out, we
4021+
* need to make sure to load it if we are removing space. This
4022+
* is because we need the unpinning stage to actually add the
4023+
* space back to the block group, otherwise we will leak space.
4024+
*/
4025+
if (!alloc && cache->cached == BTRFS_CACHE_NO)
4026+
cache_block_group(cache, trans, 1);
39874027

39884028
byte_in_group = bytenr - cache->key.objectid;
39894029
WARN_ON(byte_in_group > cache->key.offset);
@@ -4828,6 +4868,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
48284868
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
48294869
u64 free_percent;
48304870

4871+
ret = cache_block_group(block_group, trans, 1);
4872+
if (block_group->cached == BTRFS_CACHE_FINISHED)
4873+
goto have_block_group;
4874+
48314875
free_percent = btrfs_block_group_used(&block_group->item);
48324876
free_percent *= 100;
48334877
free_percent = div64_u64(free_percent,
@@ -4848,7 +4892,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
48484892
if (loop > LOOP_CACHING_NOWAIT ||
48494893
(loop > LOOP_FIND_IDEAL &&
48504894
atomic_read(&space_info->caching_threads) < 2)) {
4851-
ret = cache_block_group(block_group);
4895+
ret = cache_block_group(block_group, trans, 0);
48524896
BUG_ON(ret);
48534897
}
48544898
found_uncached_bg = true;
@@ -5405,7 +5449,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
54055449
u64 num_bytes = ins->offset;
54065450

54075451
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5408-
cache_block_group(block_group);
5452+
cache_block_group(block_group, trans, 0);
54095453
caching_ctl = get_caching_control(block_group);
54105454

54115455
if (!caching_ctl) {

fs/btrfs/free-space-cache.c

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,302 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
187187
return btrfs_update_inode(trans, root, inode);
188188
}
189189

190+
static int readahead_cache(struct inode *inode)
191+
{
192+
struct file_ra_state *ra;
193+
unsigned long last_index;
194+
195+
ra = kzalloc(sizeof(*ra), GFP_NOFS);
196+
if (!ra)
197+
return -ENOMEM;
198+
199+
file_ra_state_init(ra, inode->i_mapping);
200+
last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
201+
202+
page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
203+
204+
kfree(ra);
205+
206+
return 0;
207+
}
208+
209+
int load_free_space_cache(struct btrfs_fs_info *fs_info,
210+
struct btrfs_block_group_cache *block_group)
211+
{
212+
struct btrfs_root *root = fs_info->tree_root;
213+
struct inode *inode;
214+
struct btrfs_free_space_header *header;
215+
struct extent_buffer *leaf;
216+
struct page *page;
217+
struct btrfs_path *path;
218+
u32 *checksums = NULL, *crc;
219+
char *disk_crcs = NULL;
220+
struct btrfs_key key;
221+
struct list_head bitmaps;
222+
u64 num_entries;
223+
u64 num_bitmaps;
224+
u64 generation;
225+
u32 cur_crc = ~(u32)0;
226+
pgoff_t index = 0;
227+
unsigned long first_page_offset;
228+
int num_checksums;
229+
int ret = 0;
230+
231+
/*
232+
* If we're unmounting then just return, since this does a search on the
233+
* normal root and not the commit root and we could deadlock.
234+
*/
235+
smp_mb();
236+
if (fs_info->closing)
237+
return 0;
238+
239+
/*
240+
* If this block group has been marked to be cleared for one reason or
241+
* another then we can't trust the on disk cache, so just return.
242+
*/
243+
spin_lock(&block_group->lock);
244+
if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
245+
printk(KERN_ERR "not reading block group %llu, dcs is %d\n", block_group->key.objectid,
246+
block_group->disk_cache_state);
247+
spin_unlock(&block_group->lock);
248+
return 0;
249+
}
250+
spin_unlock(&block_group->lock);
251+
252+
INIT_LIST_HEAD(&bitmaps);
253+
254+
path = btrfs_alloc_path();
255+
if (!path)
256+
return 0;
257+
258+
inode = lookup_free_space_inode(root, block_group, path);
259+
if (IS_ERR(inode)) {
260+
btrfs_free_path(path);
261+
return 0;
262+
}
263+
264+
/* Nothing in the space cache, goodbye */
265+
if (!i_size_read(inode)) {
266+
btrfs_free_path(path);
267+
goto out;
268+
}
269+
270+
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
271+
key.offset = block_group->key.objectid;
272+
key.type = 0;
273+
274+
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
275+
if (ret) {
276+
btrfs_free_path(path);
277+
goto out;
278+
}
279+
280+
leaf = path->nodes[0];
281+
header = btrfs_item_ptr(leaf, path->slots[0],
282+
struct btrfs_free_space_header);
283+
num_entries = btrfs_free_space_entries(leaf, header);
284+
num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
285+
generation = btrfs_free_space_generation(leaf, header);
286+
btrfs_free_path(path);
287+
288+
if (BTRFS_I(inode)->generation != generation) {
289+
printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
290+
" not match free space cache generation (%llu) for "
291+
"block group %llu\n",
292+
(unsigned long long)BTRFS_I(inode)->generation,
293+
(unsigned long long)generation,
294+
(unsigned long long)block_group->key.objectid);
295+
goto out;
296+
}
297+
298+
if (!num_entries)
299+
goto out;
300+
301+
/* Setup everything for doing checksumming */
302+
num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
303+
checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
304+
if (!checksums)
305+
goto out;
306+
first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
307+
disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
308+
if (!disk_crcs)
309+
goto out;
310+
311+
ret = readahead_cache(inode);
312+
if (ret) {
313+
ret = 0;
314+
goto out;
315+
}
316+
317+
while (1) {
318+
struct btrfs_free_space_entry *entry;
319+
struct btrfs_free_space *e;
320+
void *addr;
321+
unsigned long offset = 0;
322+
unsigned long start_offset = 0;
323+
int need_loop = 0;
324+
325+
if (!num_entries && !num_bitmaps)
326+
break;
327+
328+
if (index == 0) {
329+
start_offset = first_page_offset;
330+
offset = start_offset;
331+
}
332+
333+
page = grab_cache_page(inode->i_mapping, index);
334+
if (!page) {
335+
ret = 0;
336+
goto free_cache;
337+
}
338+
339+
if (!PageUptodate(page)) {
340+
btrfs_readpage(NULL, page);
341+
lock_page(page);
342+
if (!PageUptodate(page)) {
343+
unlock_page(page);
344+
page_cache_release(page);
345+
printk(KERN_ERR "btrfs: error reading free "
346+
"space cache: %llu\n",
347+
(unsigned long long)
348+
block_group->key.objectid);
349+
goto free_cache;
350+
}
351+
}
352+
addr = kmap(page);
353+
354+
if (index == 0) {
355+
u64 *gen;
356+
357+
memcpy(disk_crcs, addr, first_page_offset);
358+
gen = addr + (sizeof(u32) * num_checksums);
359+
if (*gen != BTRFS_I(inode)->generation) {
360+
printk(KERN_ERR "btrfs: space cache generation"
361+
" (%llu) does not match inode (%llu) "
362+
"for block group %llu\n",
363+
(unsigned long long)*gen,
364+
(unsigned long long)
365+
BTRFS_I(inode)->generation,
366+
(unsigned long long)
367+
block_group->key.objectid);
368+
kunmap(page);
369+
unlock_page(page);
370+
page_cache_release(page);
371+
goto free_cache;
372+
}
373+
crc = (u32 *)disk_crcs;
374+
}
375+
entry = addr + start_offset;
376+
377+
/* First lets check our crc before we do anything fun */
378+
cur_crc = ~(u32)0;
379+
cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
380+
PAGE_CACHE_SIZE - start_offset);
381+
btrfs_csum_final(cur_crc, (char *)&cur_crc);
382+
if (cur_crc != *crc) {
383+
printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
384+
"block group %llu\n", index,
385+
(unsigned long long)block_group->key.objectid);
386+
kunmap(page);
387+
unlock_page(page);
388+
page_cache_release(page);
389+
goto free_cache;
390+
}
391+
crc++;
392+
393+
while (1) {
394+
if (!num_entries)
395+
break;
396+
397+
need_loop = 1;
398+
e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
399+
if (!e) {
400+
kunmap(page);
401+
unlock_page(page);
402+
page_cache_release(page);
403+
goto free_cache;
404+
}
405+
406+
e->offset = le64_to_cpu(entry->offset);
407+
e->bytes = le64_to_cpu(entry->bytes);
408+
if (!e->bytes) {
409+
kunmap(page);
410+
kfree(e);
411+
unlock_page(page);
412+
page_cache_release(page);
413+
goto free_cache;
414+
}
415+
416+
if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
417+
spin_lock(&block_group->tree_lock);
418+
ret = link_free_space(block_group, e);
419+
spin_unlock(&block_group->tree_lock);
420+
BUG_ON(ret);
421+
} else {
422+
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
423+
if (!e->bitmap) {
424+
kunmap(page);
425+
kfree(e);
426+
unlock_page(page);
427+
page_cache_release(page);
428+
goto free_cache;
429+
}
430+
spin_lock(&block_group->tree_lock);
431+
ret = link_free_space(block_group, e);
432+
block_group->total_bitmaps++;
433+
recalculate_thresholds(block_group);
434+
spin_unlock(&block_group->tree_lock);
435+
list_add_tail(&e->list, &bitmaps);
436+
}
437+
438+
num_entries--;
439+
offset += sizeof(struct btrfs_free_space_entry);
440+
if (offset + sizeof(struct btrfs_free_space_entry) >=
441+
PAGE_CACHE_SIZE)
442+
break;
443+
entry++;
444+
}
445+
446+
/*
447+
* We read an entry out of this page, we need to move on to the
448+
* next page.
449+
*/
450+
if (need_loop) {
451+
kunmap(page);
452+
goto next;
453+
}
454+
455+
/*
456+
* We add the bitmaps at the end of the entries in order that
457+
* the bitmap entries are added to the cache.
458+
*/
459+
e = list_entry(bitmaps.next, struct btrfs_free_space, list);
460+
list_del_init(&e->list);
461+
memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
462+
kunmap(page);
463+
num_bitmaps--;
464+
next:
465+
unlock_page(page);
466+
page_cache_release(page);
467+
index++;
468+
}
469+
470+
ret = 1;
471+
out:
472+
kfree(checksums);
473+
kfree(disk_crcs);
474+
iput(inode);
475+
return ret;
476+
477+
free_cache:
478+
/* This cache is bogus, make sure it gets cleared */
479+
spin_lock(&block_group->lock);
480+
block_group->disk_cache_state = BTRFS_DC_CLEAR;
481+
spin_unlock(&block_group->lock);
482+
btrfs_remove_free_space_cache(block_group);
483+
goto out;
484+
}
485+
190486
int btrfs_write_out_cache(struct btrfs_root *root,
191487
struct btrfs_trans_handle *trans,
192488
struct btrfs_block_group_cache *block_group,

0 commit comments

Comments
 (0)