43
43
#include <linux/blkdev.h>
44
44
#include <linux/uuid.h>
45
45
#include <linux/btrfs.h>
46
+ #include <linux/uaccess.h>
46
47
#include "compat.h"
47
48
#include "ctree.h"
48
49
#include "disk-io.h"
57
58
#include "send.h"
58
59
#include "dev-replace.h"
59
60
61
+ static int btrfs_clone (struct inode * src , struct inode * inode ,
62
+ u64 off , u64 olen , u64 olen_aligned , u64 destoff );
63
+
60
64
/* Mask out flags that are inappropriate for the given type of inode. */
61
65
static inline __u32 btrfs_mask_flags (umode_t mode , __u32 flags )
62
66
{
@@ -2470,6 +2474,34 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2470
2474
return ret ;
2471
2475
}
2472
2476
2477
+ static struct page * extent_same_get_page (struct inode * inode , u64 off )
2478
+ {
2479
+ struct page * page ;
2480
+ pgoff_t index ;
2481
+ struct extent_io_tree * tree = & BTRFS_I (inode )-> io_tree ;
2482
+
2483
+ index = off >> PAGE_CACHE_SHIFT ;
2484
+
2485
+ page = grab_cache_page (inode -> i_mapping , index );
2486
+ if (!page )
2487
+ return NULL ;
2488
+
2489
+ if (!PageUptodate (page )) {
2490
+ if (extent_read_full_page_nolock (tree , page , btrfs_get_extent ,
2491
+ 0 ))
2492
+ return NULL ;
2493
+ lock_page (page );
2494
+ if (!PageUptodate (page )) {
2495
+ unlock_page (page );
2496
+ page_cache_release (page );
2497
+ return NULL ;
2498
+ }
2499
+ }
2500
+ unlock_page (page );
2501
+
2502
+ return page ;
2503
+ }
2504
+
2473
2505
static inline void lock_extent_range (struct inode * inode , u64 off , u64 len )
2474
2506
{
2475
2507
/* do any pending delalloc/csum calc on src, one way or
@@ -2490,6 +2522,251 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2490
2522
}
2491
2523
}
2492
2524
2525
+ static void btrfs_double_unlock (struct inode * inode1 , u64 loff1 ,
2526
+ struct inode * inode2 , u64 loff2 , u64 len )
2527
+ {
2528
+ unlock_extent (& BTRFS_I (inode1 )-> io_tree , loff1 , loff1 + len - 1 );
2529
+ unlock_extent (& BTRFS_I (inode2 )-> io_tree , loff2 , loff2 + len - 1 );
2530
+
2531
+ mutex_unlock (& inode1 -> i_mutex );
2532
+ mutex_unlock (& inode2 -> i_mutex );
2533
+ }
2534
+
2535
+ static void btrfs_double_lock (struct inode * inode1 , u64 loff1 ,
2536
+ struct inode * inode2 , u64 loff2 , u64 len )
2537
+ {
2538
+ if (inode1 < inode2 ) {
2539
+ swap (inode1 , inode2 );
2540
+ swap (loff1 , loff2 );
2541
+ }
2542
+
2543
+ mutex_lock_nested (& inode1 -> i_mutex , I_MUTEX_PARENT );
2544
+ lock_extent_range (inode1 , loff1 , len );
2545
+ if (inode1 != inode2 ) {
2546
+ mutex_lock_nested (& inode2 -> i_mutex , I_MUTEX_CHILD );
2547
+ lock_extent_range (inode2 , loff2 , len );
2548
+ }
2549
+ }
2550
+
2551
+ static int btrfs_cmp_data (struct inode * src , u64 loff , struct inode * dst ,
2552
+ u64 dst_loff , u64 len )
2553
+ {
2554
+ int ret = 0 ;
2555
+ struct page * src_page , * dst_page ;
2556
+ unsigned int cmp_len = PAGE_CACHE_SIZE ;
2557
+ void * addr , * dst_addr ;
2558
+
2559
+ while (len ) {
2560
+ if (len < PAGE_CACHE_SIZE )
2561
+ cmp_len = len ;
2562
+
2563
+ src_page = extent_same_get_page (src , loff );
2564
+ if (!src_page )
2565
+ return - EINVAL ;
2566
+ dst_page = extent_same_get_page (dst , dst_loff );
2567
+ if (!dst_page ) {
2568
+ page_cache_release (src_page );
2569
+ return - EINVAL ;
2570
+ }
2571
+ addr = kmap_atomic (src_page );
2572
+ dst_addr = kmap_atomic (dst_page );
2573
+
2574
+ flush_dcache_page (src_page );
2575
+ flush_dcache_page (dst_page );
2576
+
2577
+ if (memcmp (addr , dst_addr , cmp_len ))
2578
+ ret = BTRFS_SAME_DATA_DIFFERS ;
2579
+
2580
+ kunmap_atomic (addr );
2581
+ kunmap_atomic (dst_addr );
2582
+ page_cache_release (src_page );
2583
+ page_cache_release (dst_page );
2584
+
2585
+ if (ret )
2586
+ break ;
2587
+
2588
+ loff += cmp_len ;
2589
+ dst_loff += cmp_len ;
2590
+ len -= cmp_len ;
2591
+ }
2592
+
2593
+ return ret ;
2594
+ }
2595
+
2596
+ static int extent_same_check_offsets (struct inode * inode , u64 off , u64 len )
2597
+ {
2598
+ u64 bs = BTRFS_I (inode )-> root -> fs_info -> sb -> s_blocksize ;
2599
+
2600
+ if (off + len > inode -> i_size || off + len < off )
2601
+ return - EINVAL ;
2602
+ /* Check that we are block aligned - btrfs_clone() requires this */
2603
+ if (!IS_ALIGNED (off , bs ) || !IS_ALIGNED (off + len , bs ))
2604
+ return - EINVAL ;
2605
+
2606
+ return 0 ;
2607
+ }
2608
+
2609
+ static int btrfs_extent_same (struct inode * src , u64 loff , u64 len ,
2610
+ struct inode * dst , u64 dst_loff )
2611
+ {
2612
+ int ret ;
2613
+
2614
+ /*
2615
+ * btrfs_clone() can't handle extents in the same file
2616
+ * yet. Once that works, we can drop this check and replace it
2617
+ * with a check for the same inode, but overlapping extents.
2618
+ */
2619
+ if (src == dst )
2620
+ return - EINVAL ;
2621
+
2622
+ btrfs_double_lock (src , loff , dst , dst_loff , len );
2623
+
2624
+ ret = extent_same_check_offsets (src , loff , len );
2625
+ if (ret )
2626
+ goto out_unlock ;
2627
+
2628
+ ret = extent_same_check_offsets (dst , dst_loff , len );
2629
+ if (ret )
2630
+ goto out_unlock ;
2631
+
2632
+ /* don't make the dst file partly checksummed */
2633
+ if ((BTRFS_I (src )-> flags & BTRFS_INODE_NODATASUM ) !=
2634
+ (BTRFS_I (dst )-> flags & BTRFS_INODE_NODATASUM )) {
2635
+ ret = - EINVAL ;
2636
+ goto out_unlock ;
2637
+ }
2638
+
2639
+ ret = btrfs_cmp_data (src , loff , dst , dst_loff , len );
2640
+ if (ret == 0 )
2641
+ ret = btrfs_clone (src , dst , loff , len , len , dst_loff );
2642
+
2643
+ out_unlock :
2644
+ btrfs_double_unlock (src , loff , dst , dst_loff , len );
2645
+
2646
+ return ret ;
2647
+ }
2648
+
2649
+ #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
2650
+
2651
+ static long btrfs_ioctl_file_extent_same (struct file * file ,
2652
+ void __user * argp )
2653
+ {
2654
+ struct btrfs_ioctl_same_args * args = argp ;
2655
+ struct btrfs_ioctl_same_args same ;
2656
+ struct btrfs_ioctl_same_extent_info info ;
2657
+ struct inode * src = file -> f_dentry -> d_inode ;
2658
+ struct file * dst_file = NULL ;
2659
+ struct inode * dst ;
2660
+ u64 off ;
2661
+ u64 len ;
2662
+ int i ;
2663
+ int ret ;
2664
+ u64 bs = BTRFS_I (src )-> root -> fs_info -> sb -> s_blocksize ;
2665
+ bool is_admin = capable (CAP_SYS_ADMIN );
2666
+
2667
+ if (!(file -> f_mode & FMODE_READ ))
2668
+ return - EINVAL ;
2669
+
2670
+ ret = mnt_want_write_file (file );
2671
+ if (ret )
2672
+ return ret ;
2673
+
2674
+ if (copy_from_user (& same ,
2675
+ (struct btrfs_ioctl_same_args __user * )argp ,
2676
+ sizeof (same ))) {
2677
+ ret = - EFAULT ;
2678
+ goto out ;
2679
+ }
2680
+
2681
+ off = same .logical_offset ;
2682
+ len = same .length ;
2683
+
2684
+ /*
2685
+ * Limit the total length we will dedupe for each operation.
2686
+ * This is intended to bound the total time spent in this
2687
+ * ioctl to something sane.
2688
+ */
2689
+ if (len > BTRFS_MAX_DEDUPE_LEN )
2690
+ len = BTRFS_MAX_DEDUPE_LEN ;
2691
+
2692
+ if (WARN_ON_ONCE (bs < PAGE_CACHE_SIZE )) {
2693
+ /*
2694
+ * Btrfs does not support blocksize < page_size. As a
2695
+ * result, btrfs_cmp_data() won't correctly handle
2696
+ * this situation without an update.
2697
+ */
2698
+ ret = - EINVAL ;
2699
+ goto out ;
2700
+ }
2701
+
2702
+ ret = - EISDIR ;
2703
+ if (S_ISDIR (src -> i_mode ))
2704
+ goto out ;
2705
+
2706
+ ret = - EACCES ;
2707
+ if (!S_ISREG (src -> i_mode ))
2708
+ goto out ;
2709
+
2710
+ ret = 0 ;
2711
+ for (i = 0 ; i < same .dest_count ; i ++ ) {
2712
+ if (copy_from_user (& info , & args -> info [i ], sizeof (info ))) {
2713
+ ret = - EFAULT ;
2714
+ goto out ;
2715
+ }
2716
+
2717
+ info .bytes_deduped = 0 ;
2718
+
2719
+ dst_file = fget (info .fd );
2720
+ if (!dst_file ) {
2721
+ info .status = - EBADF ;
2722
+ goto next ;
2723
+ }
2724
+
2725
+ if (!(is_admin || (dst_file -> f_mode & FMODE_WRITE ))) {
2726
+ info .status = - EINVAL ;
2727
+ goto next ;
2728
+ }
2729
+
2730
+ info .status = - EXDEV ;
2731
+ if (file -> f_path .mnt != dst_file -> f_path .mnt )
2732
+ goto next ;
2733
+
2734
+ dst = dst_file -> f_dentry -> d_inode ;
2735
+ if (src -> i_sb != dst -> i_sb )
2736
+ goto next ;
2737
+
2738
+ if (S_ISDIR (dst -> i_mode )) {
2739
+ info .status = - EISDIR ;
2740
+ goto next ;
2741
+ }
2742
+
2743
+ if (!S_ISREG (dst -> i_mode )) {
2744
+ info .status = - EACCES ;
2745
+ goto next ;
2746
+ }
2747
+
2748
+ info .status = btrfs_extent_same (src , off , len , dst ,
2749
+ info .logical_offset );
2750
+ if (info .status == 0 )
2751
+ info .bytes_deduped += len ;
2752
+
2753
+ next :
2754
+ if (dst_file )
2755
+ fput (dst_file );
2756
+
2757
+ if (__put_user_unaligned (info .status , & args -> info [i ].status ) ||
2758
+ __put_user_unaligned (info .bytes_deduped ,
2759
+ & args -> info [i ].bytes_deduped )) {
2760
+ ret = - EFAULT ;
2761
+ goto out ;
2762
+ }
2763
+ }
2764
+
2765
+ out :
2766
+ mnt_drop_write_file (file );
2767
+ return ret ;
2768
+ }
2769
+
2493
2770
/**
2494
2771
* btrfs_clone() - clone a range from inode file to another
2495
2772
*
@@ -4242,6 +4519,8 @@ long btrfs_ioctl(struct file *file, unsigned int
4242
4519
return btrfs_ioctl_get_fslabel (file , argp );
4243
4520
case BTRFS_IOC_SET_FSLABEL :
4244
4521
return btrfs_ioctl_set_fslabel (file , argp );
4522
+ case BTRFS_IOC_FILE_EXTENT_SAME :
4523
+ return btrfs_ioctl_file_extent_same (file , argp );
4245
4524
}
4246
4525
4247
4526
return - ENOTTY ;
0 commit comments