@@ -3602,6 +3602,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3602
3602
* b> Splits in two extents: Write is happening at either end of the extent
3603
3603
* c> Splits in three extents: Somone is writing in middle of the extent
3604
3604
*
3605
+ * This works the same way in the case of initialized -> unwritten conversion.
3606
+ *
3605
3607
* One of more index blocks maybe needed if the extent tree grow after
3606
3608
* the uninitialized extent split. To prevent ENOSPC occur at the IO
3607
3609
* complete, we need to split the uninitialized extent before DIO submit
@@ -3612,7 +3614,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3612
3614
*
3613
3615
* Returns the size of uninitialized extent to be written on success.
3614
3616
*/
3615
- static int ext4_split_unwritten_extents (handle_t * handle ,
3617
+ static int ext4_split_convert_extents (handle_t * handle ,
3616
3618
struct inode * inode ,
3617
3619
struct ext4_map_blocks * map ,
3618
3620
struct ext4_ext_path * path ,
@@ -3624,9 +3626,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3624
3626
unsigned int ee_len ;
3625
3627
int split_flag = 0 , depth ;
3626
3628
3627
- ext_debug ("ext4_split_unwritten_extents : inode %lu, logical"
3628
- "block %llu, max_blocks %u\n" , inode -> i_ino ,
3629
- (unsigned long long )map -> m_lblk , map -> m_len );
3629
+ ext_debug ("%s : inode %lu, logical block %llu, max_blocks %u\n" ,
3630
+ __func__ , inode -> i_ino ,
3631
+ (unsigned long long )map -> m_lblk , map -> m_len );
3630
3632
3631
3633
eof_block = (inode -> i_size + inode -> i_sb -> s_blocksize - 1 ) >>
3632
3634
inode -> i_sb -> s_blocksize_bits ;
@@ -3641,14 +3643,73 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3641
3643
ee_block = le32_to_cpu (ex -> ee_block );
3642
3644
ee_len = ext4_ext_get_actual_len (ex );
3643
3645
3644
- split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0 ;
3645
- split_flag |= EXT4_EXT_MARK_UNINIT2 ;
3646
- if (flags & EXT4_GET_BLOCKS_CONVERT )
3647
- split_flag |= EXT4_EXT_DATA_VALID2 ;
3646
+ /* Convert to unwritten */
3647
+ if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN ) {
3648
+ split_flag |= EXT4_EXT_DATA_VALID1 ;
3649
+ /* Convert to initialized */
3650
+ } else if (flags & EXT4_GET_BLOCKS_CONVERT ) {
3651
+ split_flag |= ee_block + ee_len <= eof_block ?
3652
+ EXT4_EXT_MAY_ZEROOUT : 0 ;
3653
+ split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2 );
3654
+ }
3648
3655
flags |= EXT4_GET_BLOCKS_PRE_IO ;
3649
3656
return ext4_split_extent (handle , inode , path , map , split_flag , flags );
3650
3657
}
3651
3658
3659
+ static int ext4_convert_initialized_extents (handle_t * handle ,
3660
+ struct inode * inode ,
3661
+ struct ext4_map_blocks * map ,
3662
+ struct ext4_ext_path * path )
3663
+ {
3664
+ struct ext4_extent * ex ;
3665
+ ext4_lblk_t ee_block ;
3666
+ unsigned int ee_len ;
3667
+ int depth ;
3668
+ int err = 0 ;
3669
+
3670
+ depth = ext_depth (inode );
3671
+ ex = path [depth ].p_ext ;
3672
+ ee_block = le32_to_cpu (ex -> ee_block );
3673
+ ee_len = ext4_ext_get_actual_len (ex );
3674
+
3675
+ ext_debug ("%s: inode %lu, logical"
3676
+ "block %llu, max_blocks %u\n" , __func__ , inode -> i_ino ,
3677
+ (unsigned long long )ee_block , ee_len );
3678
+
3679
+ if (ee_block != map -> m_lblk || ee_len > map -> m_len ) {
3680
+ err = ext4_split_convert_extents (handle , inode , map , path ,
3681
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN );
3682
+ if (err < 0 )
3683
+ goto out ;
3684
+ ext4_ext_drop_refs (path );
3685
+ path = ext4_ext_find_extent (inode , map -> m_lblk , path , 0 );
3686
+ if (IS_ERR (path )) {
3687
+ err = PTR_ERR (path );
3688
+ goto out ;
3689
+ }
3690
+ depth = ext_depth (inode );
3691
+ ex = path [depth ].p_ext ;
3692
+ }
3693
+
3694
+ err = ext4_ext_get_access (handle , inode , path + depth );
3695
+ if (err )
3696
+ goto out ;
3697
+ /* first mark the extent as uninitialized */
3698
+ ext4_ext_mark_uninitialized (ex );
3699
+
3700
+ /* note: ext4_ext_correct_indexes() isn't needed here because
3701
+ * borders are not changed
3702
+ */
3703
+ ext4_ext_try_to_merge (handle , inode , path , ex );
3704
+
3705
+ /* Mark modified extent as dirty */
3706
+ err = ext4_ext_dirty (handle , inode , path + path -> p_depth );
3707
+ out :
3708
+ ext4_ext_show_leaf (inode , path );
3709
+ return err ;
3710
+ }
3711
+
3712
+
3652
3713
static int ext4_convert_unwritten_extents_endio (handle_t * handle ,
3653
3714
struct inode * inode ,
3654
3715
struct ext4_map_blocks * map ,
@@ -3682,8 +3743,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3682
3743
inode -> i_ino , (unsigned long long )ee_block , ee_len ,
3683
3744
(unsigned long long )map -> m_lblk , map -> m_len );
3684
3745
#endif
3685
- err = ext4_split_unwritten_extents (handle , inode , map , path ,
3686
- EXT4_GET_BLOCKS_CONVERT );
3746
+ err = ext4_split_convert_extents (handle , inode , map , path ,
3747
+ EXT4_GET_BLOCKS_CONVERT );
3687
3748
if (err < 0 )
3688
3749
goto out ;
3689
3750
ext4_ext_drop_refs (path );
@@ -3883,6 +3944,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3883
3944
return allocated_clusters ;
3884
3945
}
3885
3946
3947
+ static int
3948
+ ext4_ext_convert_initialized_extent (handle_t * handle , struct inode * inode ,
3949
+ struct ext4_map_blocks * map ,
3950
+ struct ext4_ext_path * path , int flags ,
3951
+ unsigned int allocated , ext4_fsblk_t newblock )
3952
+ {
3953
+ int ret = 0 ;
3954
+ int err = 0 ;
3955
+
3956
+ /*
3957
+ * Make sure that the extent is no bigger than we support with
3958
+ * uninitialized extent
3959
+ */
3960
+ if (map -> m_len > EXT_UNINIT_MAX_LEN )
3961
+ map -> m_len = EXT_UNINIT_MAX_LEN / 2 ;
3962
+
3963
+ ret = ext4_convert_initialized_extents (handle , inode , map ,
3964
+ path );
3965
+ if (ret >= 0 ) {
3966
+ ext4_update_inode_fsync_trans (handle , inode , 1 );
3967
+ err = check_eofblocks_fl (handle , inode , map -> m_lblk ,
3968
+ path , map -> m_len );
3969
+ } else
3970
+ err = ret ;
3971
+ map -> m_flags |= EXT4_MAP_UNWRITTEN ;
3972
+ if (allocated > map -> m_len )
3973
+ allocated = map -> m_len ;
3974
+ map -> m_len = allocated ;
3975
+
3976
+ return err ? err : allocated ;
3977
+ }
3978
+
3886
3979
static int
3887
3980
ext4_ext_handle_uninitialized_extents (handle_t * handle , struct inode * inode ,
3888
3981
struct ext4_map_blocks * map ,
@@ -3910,8 +4003,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3910
4003
3911
4004
/* get_block() before submit the IO, split the extent */
3912
4005
if ((flags & EXT4_GET_BLOCKS_PRE_IO )) {
3913
- ret = ext4_split_unwritten_extents (handle , inode , map ,
3914
- path , flags );
4006
+ ret = ext4_split_convert_extents (handle , inode , map ,
4007
+ path , flags | EXT4_GET_BLOCKS_CONVERT );
3915
4008
if (ret <= 0 )
3916
4009
goto out ;
3917
4010
/*
@@ -4199,6 +4292,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4199
4292
ext4_fsblk_t ee_start = ext4_ext_pblock (ex );
4200
4293
unsigned short ee_len ;
4201
4294
4295
+
4202
4296
/*
4203
4297
* Uninitialized extents are treated as holes, except that
4204
4298
* we split out initialized portions during a write.
@@ -4215,7 +4309,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4215
4309
ext_debug ("%u fit into %u:%d -> %llu\n" , map -> m_lblk ,
4216
4310
ee_block , ee_len , newblock );
4217
4311
4218
- if (!ext4_ext_is_uninitialized (ex ))
4312
+ /*
4313
+ * If the extent is initialized check whether the
4314
+ * caller wants to convert it to unwritten.
4315
+ */
4316
+ if ((!ext4_ext_is_uninitialized (ex )) &&
4317
+ (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN )) {
4318
+ allocated = ext4_ext_convert_initialized_extent (
4319
+ handle , inode , map , path , flags ,
4320
+ allocated , newblock );
4321
+ goto out2 ;
4322
+ } else if (!ext4_ext_is_uninitialized (ex ))
4219
4323
goto out ;
4220
4324
4221
4325
ret = ext4_ext_handle_uninitialized_extents (
@@ -4604,6 +4708,144 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4604
4708
return ret > 0 ? ret2 : ret ;
4605
4709
}
4606
4710
4711
+ static long ext4_zero_range (struct file * file , loff_t offset ,
4712
+ loff_t len , int mode )
4713
+ {
4714
+ struct inode * inode = file_inode (file );
4715
+ handle_t * handle = NULL ;
4716
+ unsigned int max_blocks ;
4717
+ loff_t new_size = 0 ;
4718
+ int ret = 0 ;
4719
+ int flags ;
4720
+ int partial ;
4721
+ loff_t start , end ;
4722
+ ext4_lblk_t lblk ;
4723
+ struct address_space * mapping = inode -> i_mapping ;
4724
+ unsigned int blkbits = inode -> i_blkbits ;
4725
+
4726
+ trace_ext4_zero_range (inode , offset , len , mode );
4727
+
4728
+ /*
4729
+ * Write out all dirty pages to avoid race conditions
4730
+ * Then release them.
4731
+ */
4732
+ if (mapping -> nrpages && mapping_tagged (mapping , PAGECACHE_TAG_DIRTY )) {
4733
+ ret = filemap_write_and_wait_range (mapping , offset ,
4734
+ offset + len - 1 );
4735
+ if (ret )
4736
+ return ret ;
4737
+ }
4738
+
4739
+ /*
4740
+ * Round up offset. This is not fallocate, we neet to zero out
4741
+ * blocks, so convert interior block aligned part of the range to
4742
+ * unwritten and possibly manually zero out unaligned parts of the
4743
+ * range.
4744
+ */
4745
+ start = round_up (offset , 1 << blkbits );
4746
+ end = round_down ((offset + len ), 1 << blkbits );
4747
+
4748
+ if (start < offset || end > offset + len )
4749
+ return - EINVAL ;
4750
+ partial = (offset + len ) & ((1 << blkbits ) - 1 );
4751
+
4752
+ lblk = start >> blkbits ;
4753
+ max_blocks = (end >> blkbits );
4754
+ if (max_blocks < lblk )
4755
+ max_blocks = 0 ;
4756
+ else
4757
+ max_blocks -= lblk ;
4758
+
4759
+ flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
4760
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN ;
4761
+ if (mode & FALLOC_FL_KEEP_SIZE )
4762
+ flags |= EXT4_GET_BLOCKS_KEEP_SIZE ;
4763
+
4764
+ mutex_lock (& inode -> i_mutex );
4765
+
4766
+ /*
4767
+ * Indirect files do not support unwritten extnets
4768
+ */
4769
+ if (!(ext4_test_inode_flag (inode , EXT4_INODE_EXTENTS ))) {
4770
+ ret = - EOPNOTSUPP ;
4771
+ goto out_mutex ;
4772
+ }
4773
+
4774
+ if (!(mode & FALLOC_FL_KEEP_SIZE ) &&
4775
+ offset + len > i_size_read (inode )) {
4776
+ new_size = offset + len ;
4777
+ ret = inode_newsize_ok (inode , new_size );
4778
+ if (ret )
4779
+ goto out_mutex ;
4780
+ /*
4781
+ * If we have a partial block after EOF we have to allocate
4782
+ * the entire block.
4783
+ */
4784
+ if (partial )
4785
+ max_blocks += 1 ;
4786
+ }
4787
+
4788
+ if (max_blocks > 0 ) {
4789
+
4790
+ /* Now release the pages and zero block aligned part of pages*/
4791
+ truncate_pagecache_range (inode , start , end - 1 );
4792
+
4793
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
4794
+ ext4_inode_block_unlocked_dio (inode );
4795
+ inode_dio_wait (inode );
4796
+
4797
+ /*
4798
+ * Remove entire range from the extent status tree.
4799
+ */
4800
+ ret = ext4_es_remove_extent (inode , lblk , max_blocks );
4801
+ if (ret )
4802
+ goto out_dio ;
4803
+
4804
+ ret = ext4_alloc_file_blocks (file , lblk , max_blocks , flags ,
4805
+ mode );
4806
+ if (ret )
4807
+ goto out_dio ;
4808
+ }
4809
+
4810
+ handle = ext4_journal_start (inode , EXT4_HT_MISC , 4 );
4811
+ if (IS_ERR (handle )) {
4812
+ ret = PTR_ERR (handle );
4813
+ ext4_std_error (inode -> i_sb , ret );
4814
+ goto out_dio ;
4815
+ }
4816
+
4817
+ inode -> i_mtime = inode -> i_ctime = ext4_current_time (inode );
4818
+
4819
+ if (!ret && new_size ) {
4820
+ if (new_size > i_size_read (inode ))
4821
+ i_size_write (inode , new_size );
4822
+ if (new_size > EXT4_I (inode )-> i_disksize )
4823
+ ext4_update_i_disksize (inode , new_size );
4824
+ } else if (!ret && !new_size ) {
4825
+ /*
4826
+ * Mark that we allocate beyond EOF so the subsequent truncate
4827
+ * can proceed even if the new size is the same as i_size.
4828
+ */
4829
+ if ((offset + len ) > i_size_read (inode ))
4830
+ ext4_set_inode_flag (inode , EXT4_INODE_EOFBLOCKS );
4831
+ }
4832
+
4833
+ ext4_mark_inode_dirty (handle , inode );
4834
+
4835
+ /* Zero out partial block at the edges of the range */
4836
+ ret = ext4_zero_partial_blocks (handle , inode , offset , len );
4837
+
4838
+ if (file -> f_flags & O_SYNC )
4839
+ ext4_handle_sync (handle );
4840
+
4841
+ ext4_journal_stop (handle );
4842
+ out_dio :
4843
+ ext4_inode_resume_unlocked_dio (inode );
4844
+ out_mutex :
4845
+ mutex_unlock (& inode -> i_mutex );
4846
+ return ret ;
4847
+ }
4848
+
4607
4849
/*
4608
4850
* preallocate space for a file. This implements ext4's fallocate file
4609
4851
* operation, which gets called from sys_fallocate system call.
@@ -4625,7 +4867,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4625
4867
4626
4868
/* Return error if mode is not supported */
4627
4869
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4628
- FALLOC_FL_COLLAPSE_RANGE ))
4870
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE ))
4629
4871
return - EOPNOTSUPP ;
4630
4872
4631
4873
if (mode & FALLOC_FL_PUNCH_HOLE )
@@ -4645,6 +4887,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4645
4887
if (!(ext4_test_inode_flag (inode , EXT4_INODE_EXTENTS )))
4646
4888
return - EOPNOTSUPP ;
4647
4889
4890
+ if (mode & FALLOC_FL_ZERO_RANGE )
4891
+ return ext4_zero_range (file , offset , len , mode );
4892
+
4648
4893
trace_ext4_fallocate_enter (inode , offset , len , mode );
4649
4894
lblk = offset >> blkbits ;
4650
4895
/*
0 commit comments