Skip to content

Commit 3530c18

Browse files
committed
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (64 commits) ext4: Update documentation about quota mount options ext4: replace MAX_DEFRAG_SIZE with EXT_MAX_BLOCK ext4: Fix the alloc on close after a truncate hueristic ext4: Add a tracepoint for ext4_alloc_da_blocks() ext4: store EXT4_EXT_MIGRATE in i_state instead of i_flags ext4: limit block allocations for indirect-block files to < 2^32 ext4: Fix different block exchange issue in EXT4_IOC_MOVE_EXT ext4: Add null extent check to ext_get_path ext4: Replace BUG_ON() with ext4_error() in move_extents.c ext4: Replace get_ext_path macro with an inline funciton ext4: Fix include/trace/events/ext4.h to work with Systemtap ext4: Fix initalization of s_flex_groups ext4: Always set dx_node's fake_dirent explicitly. ext4: Fix async commit mode to be safe by using a barrier ext4: Don't update superblock write time when filesystem is read-only ext4: Clarify the locking details in mballoc ext4: check for need init flag in ext4_mb_load_buddy ext4: move ext4_mb_init_group() function earlier in the mballoc.c ext4: Make non-journal fsync work properly ext4: Assure that metadata blocks are written during fsync in no journal mode ...
2 parents 6952b61 + 1358870 commit 3530c18

File tree

25 files changed

+1000
-601
lines changed

25 files changed

+1000
-601
lines changed

Documentation/filesystems/ext4.txt

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,9 @@ ro Mount filesystem read only. Note that ext4 will
134134
mount options "ro,noload" can be used to prevent
135135
writes to the filesystem.
136136

137-
journal_checksum Enable checksumming of the journal transactions.
138-
This will allow the recovery code in e2fsck and the
139-
kernel to detect corruption in the kernel. It is a
140-
compatible change and will be ignored by older kernels.
141-
142137
journal_async_commit Commit block can be written to disk without waiting
143138
for descriptor blocks. If enabled older kernels cannot
144-
mount the device. This will enable 'journal_checksum'
145-
internally.
139+
mount the device.
146140

147141
journal=update Update the ext4 file system's journal to the current
148142
format.
@@ -263,10 +257,18 @@ resuid=n The user ID which may use the reserved blocks.
263257

264258
sb=n Use alternate superblock at this location.
265259

266-
quota
267-
noquota
268-
grpquota
269-
usrquota
260+
quota These options are ignored by the filesystem. They
261+
noquota are used only by quota tools to recognize volumes
262+
grpquota where quota should be turned on. See documentation
263+
usrquota in the quota-tools package for more details
264+
(http://sourceforge.net/projects/linuxquota).
265+
266+
jqfmt=<quota type> These options tell filesystem details about quota
267+
usrjquota=<file> so that quota information can be properly updated
268+
grpjquota=<file> during journal replay. They replace the above
269+
quota options. See documentation in the quota-tools
270+
package for more details
271+
(http://sourceforge.net/projects/linuxquota).
270272

271273
bh (*) ext4 associates buffer heads to data pages to
272274
nobh (a) cache disk block mapping information

fs/ext4/Kconfig

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ config EXT4DEV_COMPAT
3737

3838
To enable backwards compatibility so that systems that are
3939
still expecting to mount ext4 filesystems using ext4dev,
40-
chose Y here. This feature will go away by 2.6.31, so
40+
choose Y here. This feature will go away by 2.6.31, so
4141
please arrange to get your userspace programs fixed!
4242

4343
config EXT4_FS_XATTR
@@ -77,3 +77,12 @@ config EXT4_FS_SECURITY
7777

7878
If you are not using a security module that requires using
7979
extended attributes for file security labels, say N.
80+
81+
config EXT4_DEBUG
82+
bool "EXT4 debugging support"
83+
depends on EXT4_FS
84+
help
85+
Enables run-time debugging support for the ext4 filesystem.
86+
87+
If you select Y here, then you will be able to turn on debugging
88+
with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"

fs/ext4/balloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
478478
* new bitmap information
479479
*/
480480
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
481-
ext4_mb_update_group_info(grp, blocks_freed);
481+
grp->bb_free += blocks_freed;
482482
up_write(&grp->alloc_sem);
483483

484484
/* We dirtied the bitmap block */

fs/ext4/ext4.h

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t;
6767

6868

6969
/* prefer goal again. length */
70-
#define EXT4_MB_HINT_MERGE 1
70+
#define EXT4_MB_HINT_MERGE 0x0001
7171
/* blocks already reserved */
72-
#define EXT4_MB_HINT_RESERVED 2
72+
#define EXT4_MB_HINT_RESERVED 0x0002
7373
/* metadata is being allocated */
74-
#define EXT4_MB_HINT_METADATA 4
74+
#define EXT4_MB_HINT_METADATA 0x0004
7575
/* first blocks in the file */
76-
#define EXT4_MB_HINT_FIRST 8
76+
#define EXT4_MB_HINT_FIRST 0x0008
7777
/* search for the best chunk */
78-
#define EXT4_MB_HINT_BEST 16
78+
#define EXT4_MB_HINT_BEST 0x0010
7979
/* data is being allocated */
80-
#define EXT4_MB_HINT_DATA 32
80+
#define EXT4_MB_HINT_DATA 0x0020
8181
/* don't preallocate (for tails) */
82-
#define EXT4_MB_HINT_NOPREALLOC 64
82+
#define EXT4_MB_HINT_NOPREALLOC 0x0040
8383
/* allocate for locality group */
84-
#define EXT4_MB_HINT_GROUP_ALLOC 128
84+
#define EXT4_MB_HINT_GROUP_ALLOC 0x0080
8585
/* allocate goal blocks or none */
86-
#define EXT4_MB_HINT_GOAL_ONLY 256
86+
#define EXT4_MB_HINT_GOAL_ONLY 0x0100
8787
/* goal is meaningful */
88-
#define EXT4_MB_HINT_TRY_GOAL 512
88+
#define EXT4_MB_HINT_TRY_GOAL 0x0200
8989
/* blocks already pre-reserved by delayed allocation */
90-
#define EXT4_MB_DELALLOC_RESERVED 1024
90+
#define EXT4_MB_DELALLOC_RESERVED 0x0400
91+
/* We are doing stream allocation */
92+
#define EXT4_MB_STREAM_ALLOC 0x0800
9193

9294

9395
struct ext4_allocation_request {
@@ -111,6 +113,21 @@ struct ext4_allocation_request {
111113
unsigned int flags;
112114
};
113115

116+
/*
117+
* For delayed allocation tracking
118+
*/
119+
struct mpage_da_data {
120+
struct inode *inode;
121+
sector_t b_blocknr; /* start block number of extent */
122+
size_t b_size; /* size of extent */
123+
unsigned long b_state; /* state of the extent */
124+
unsigned long first_page, next_page; /* extent of pages */
125+
struct writeback_control *wbc;
126+
int io_done;
127+
int pages_written;
128+
int retval;
129+
};
130+
114131
/*
115132
* Special inodes numbers
116133
*/
@@ -251,7 +268,6 @@ struct flex_groups {
251268
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
252269
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
253270
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
254-
#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
255271
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
256272

257273
#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
@@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
289305
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
290306
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
291307
#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
308+
#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */
292309

293310
/* Used to pass group descriptor data when online resize is done */
294311
struct ext4_new_group_input {
@@ -386,6 +403,9 @@ struct ext4_mount_options {
386403
#endif
387404
};
388405

406+
/* Max physical block we can addres w/o extents */
407+
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
408+
389409
/*
390410
* Structure of an inode on the disk
391411
*/
@@ -456,7 +476,6 @@ struct move_extent {
456476
__u64 len; /* block length to be moved */
457477
__u64 moved_len; /* moved block length */
458478
};
459-
#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
460479

461480
#define EXT4_EPOCH_BITS 2
462481
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@@ -694,7 +713,6 @@ struct ext4_inode_info {
694713
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
695714
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
696715
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
697-
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
698716
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
699717
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
700718
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
@@ -841,6 +859,7 @@ struct ext4_sb_info {
841859
unsigned long s_gdb_count; /* Number of group descriptor blocks */
842860
unsigned long s_desc_per_block; /* Number of group descriptors per block */
843861
ext4_group_t s_groups_count; /* Number of groups in the fs */
862+
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
844863
unsigned long s_overhead_last; /* Last calculated overhead */
845864
unsigned long s_blocks_last; /* Last seen block count */
846865
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
@@ -950,6 +969,7 @@ struct ext4_sb_info {
950969
atomic_t s_mb_lost_chunks;
951970
atomic_t s_mb_preallocated;
952971
atomic_t s_mb_discarded;
972+
atomic_t s_lock_busy;
953973

954974
/* locality groups */
955975
struct ext4_locality_group *s_locality_groups;
@@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *,
13401360
ext4_fsblk_t, unsigned long, int, unsigned long *);
13411361
extern int ext4_mb_add_groupinfo(struct super_block *sb,
13421362
ext4_group_t i, struct ext4_group_desc *desc);
1343-
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1344-
ext4_grpblk_t add);
13451363
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
13461364
extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
13471365
ext4_group_t, int);
@@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
13671385
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
13681386
extern int ext4_can_truncate(struct inode *inode);
13691387
extern void ext4_truncate(struct inode *);
1388+
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
13701389
extern void ext4_set_inode_flags(struct inode *);
13711390
extern void ext4_get_inode_flags(struct ext4_inode_info *);
13721391
extern int ext4_alloc_da_blocks(struct inode *inode);
@@ -1575,31 +1594,61 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
15751594
struct ext4_group_info {
15761595
unsigned long bb_state;
15771596
struct rb_root bb_free_root;
1578-
unsigned short bb_first_free;
1579-
unsigned short bb_free;
1580-
unsigned short bb_fragments;
1597+
ext4_grpblk_t bb_first_free; /* first free block */
1598+
ext4_grpblk_t bb_free; /* total free blocks */
1599+
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
15811600
struct list_head bb_prealloc_list;
15821601
#ifdef DOUBLE_CHECK
15831602
void *bb_bitmap;
15841603
#endif
15851604
struct rw_semaphore alloc_sem;
1586-
unsigned short bb_counters[];
1605+
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
1606+
* regions, index is order.
1607+
* bb_counters[3] = 5 means
1608+
* 5 free 8-block regions. */
15871609
};
15881610

15891611
#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
15901612

15911613
#define EXT4_MB_GRP_NEED_INIT(grp) \
15921614
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
15931615

1616+
#define EXT4_MAX_CONTENTION 8
1617+
#define EXT4_CONTENTION_THRESHOLD 2
1618+
15941619
static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
15951620
ext4_group_t group)
15961621
{
15971622
return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
15981623
}
15991624

1625+
/*
1626+
* Returns true if the filesystem is busy enough that attempts to
1627+
* access the block group locks has run into contention.
1628+
*/
1629+
static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
1630+
{
1631+
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
1632+
}
1633+
16001634
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
16011635
{
1602-
spin_lock(ext4_group_lock_ptr(sb, group));
1636+
spinlock_t *lock = ext4_group_lock_ptr(sb, group);
1637+
if (spin_trylock(lock))
1638+
/*
1639+
* We're able to grab the lock right away, so drop the
1640+
* lock contention counter.
1641+
*/
1642+
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
1643+
else {
1644+
/*
1645+
* The lock is busy, so bump the contention counter,
1646+
* and then wait on the spin lock.
1647+
*/
1648+
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
1649+
EXT4_MAX_CONTENTION);
1650+
spin_lock(lock);
1651+
}
16031652
}
16041653

16051654
static inline void ext4_unlock_group(struct super_block *sb,

fs/ext4/ext4_extents.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@
4343
#define CHECK_BINSEARCH__
4444

4545
/*
46-
* If EXT_DEBUG is defined you can use the 'extdebug' mount option
47-
* to get lots of info about what's going on.
46+
* Turn on EXT_DEBUG to get lots of info about extents operations.
4847
*/
4948
#define EXT_DEBUG__
5049
#ifdef EXT_DEBUG
@@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
138137
#define EXT_BREAK 1
139138
#define EXT_REPEAT 2
140139

140+
/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */
141141
#define EXT_MAX_BLOCK 0xffffffff
142142

143143
/*

fs/ext4/ext4_jbd2.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
4444
handle, err);
4545
}
4646
else
47-
brelse(bh);
47+
bforget(bh);
4848
return err;
4949
}
5050

@@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
6060
handle, err);
6161
}
6262
else
63-
brelse(bh);
63+
bforget(bh);
6464
return err;
6565
}
6666

@@ -89,7 +89,10 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
8989
ext4_journal_abort_handle(where, __func__, bh,
9090
handle, err);
9191
} else {
92-
mark_buffer_dirty(bh);
92+
if (inode && bh)
93+
mark_buffer_dirty_inode(bh, inode);
94+
else
95+
mark_buffer_dirty(bh);
9396
if (inode && inode_needs_sync(inode)) {
9497
sync_dirty_buffer(bh);
9598
if (buffer_req(bh) && !buffer_uptodate(bh)) {

0 commit comments

Comments
 (0)