17
17
#include <linux/iomap.h>
18
18
#include <linux/uaccess.h>
19
19
#include <linux/gfp.h>
20
+ #include <linux/migrate.h>
20
21
#include <linux/mm.h>
21
22
#include <linux/mm_inline.h>
22
23
#include <linux/swap.h>
@@ -104,6 +105,138 @@ iomap_sector(struct iomap *iomap, loff_t pos)
104
105
return (iomap -> addr + pos - iomap -> offset ) >> SECTOR_SHIFT ;
105
106
}
106
107
108
+ static struct iomap_page *
109
+ iomap_page_create (struct inode * inode , struct page * page )
110
+ {
111
+ struct iomap_page * iop = to_iomap_page (page );
112
+
113
+ if (iop || i_blocksize (inode ) == PAGE_SIZE )
114
+ return iop ;
115
+
116
+ iop = kmalloc (sizeof (* iop ), GFP_NOFS | __GFP_NOFAIL );
117
+ atomic_set (& iop -> read_count , 0 );
118
+ atomic_set (& iop -> write_count , 0 );
119
+ bitmap_zero (iop -> uptodate , PAGE_SIZE / SECTOR_SIZE );
120
+ set_page_private (page , (unsigned long )iop );
121
+ SetPagePrivate (page );
122
+ return iop ;
123
+ }
124
+
125
+ static void
126
+ iomap_page_release (struct page * page )
127
+ {
128
+ struct iomap_page * iop = to_iomap_page (page );
129
+
130
+ if (!iop )
131
+ return ;
132
+ WARN_ON_ONCE (atomic_read (& iop -> read_count ));
133
+ WARN_ON_ONCE (atomic_read (& iop -> write_count ));
134
+ ClearPagePrivate (page );
135
+ set_page_private (page , 0 );
136
+ kfree (iop );
137
+ }
138
+
139
+ /*
140
+ * Calculate the range inside the page that we actually need to read.
141
+ */
142
+ static void
143
+ iomap_adjust_read_range (struct inode * inode , struct iomap_page * iop ,
144
+ loff_t * pos , loff_t length , unsigned * offp , unsigned * lenp )
145
+ {
146
+ unsigned block_bits = inode -> i_blkbits ;
147
+ unsigned block_size = (1 << block_bits );
148
+ unsigned poff = * pos & (PAGE_SIZE - 1 );
149
+ unsigned plen = min_t (loff_t , PAGE_SIZE - poff , length );
150
+ unsigned first = poff >> block_bits ;
151
+ unsigned last = (poff + plen - 1 ) >> block_bits ;
152
+ unsigned end = (i_size_read (inode ) & (PAGE_SIZE - 1 )) >> block_bits ;
153
+
154
+ /*
155
+ * If the block size is smaller than the page size we need to check the
156
+ * per-block uptodate status and adjust the offset and length if needed
157
+ * to avoid reading in already uptodate ranges.
158
+ */
159
+ if (iop ) {
160
+ unsigned int i ;
161
+
162
+ /* move forward for each leading block marked uptodate */
163
+ for (i = first ; i <= last ; i ++ ) {
164
+ if (!test_bit (i , iop -> uptodate ))
165
+ break ;
166
+ * pos += block_size ;
167
+ poff += block_size ;
168
+ plen -= block_size ;
169
+ first ++ ;
170
+ }
171
+
172
+ /* truncate len if we find any trailing uptodate block(s) */
173
+ for ( ; i <= last ; i ++ ) {
174
+ if (test_bit (i , iop -> uptodate )) {
175
+ plen -= (last - i + 1 ) * block_size ;
176
+ last = i - 1 ;
177
+ break ;
178
+ }
179
+ }
180
+ }
181
+
182
+ /*
183
+ * If the extent spans the block that contains the i_size we need to
184
+ * handle both halves separately so that we properly zero data in the
185
+ * page cache for blocks that are entirely outside of i_size.
186
+ */
187
+ if (first <= end && last > end )
188
+ plen -= (last - end ) * block_size ;
189
+
190
+ * offp = poff ;
191
+ * lenp = plen ;
192
+ }
193
+
194
+ static void
195
+ iomap_set_range_uptodate (struct page * page , unsigned off , unsigned len )
196
+ {
197
+ struct iomap_page * iop = to_iomap_page (page );
198
+ struct inode * inode = page -> mapping -> host ;
199
+ unsigned first = off >> inode -> i_blkbits ;
200
+ unsigned last = (off + len - 1 ) >> inode -> i_blkbits ;
201
+ unsigned int i ;
202
+ bool uptodate = true;
203
+
204
+ if (iop ) {
205
+ for (i = 0 ; i < PAGE_SIZE / i_blocksize (inode ); i ++ ) {
206
+ if (i >= first && i <= last )
207
+ set_bit (i , iop -> uptodate );
208
+ else if (!test_bit (i , iop -> uptodate ))
209
+ uptodate = false;
210
+ }
211
+ }
212
+
213
+ if (uptodate && !PageError (page ))
214
+ SetPageUptodate (page );
215
+ }
216
+
217
+ static void
218
+ iomap_read_finish (struct iomap_page * iop , struct page * page )
219
+ {
220
+ if (!iop || atomic_dec_and_test (& iop -> read_count ))
221
+ unlock_page (page );
222
+ }
223
+
224
+ static void
225
+ iomap_read_page_end_io (struct bio_vec * bvec , int error )
226
+ {
227
+ struct page * page = bvec -> bv_page ;
228
+ struct iomap_page * iop = to_iomap_page (page );
229
+
230
+ if (unlikely (error )) {
231
+ ClearPageUptodate (page );
232
+ SetPageError (page );
233
+ } else {
234
+ iomap_set_range_uptodate (page , bvec -> bv_offset , bvec -> bv_len );
235
+ }
236
+
237
+ iomap_read_finish (iop , page );
238
+ }
239
+
107
240
static void
108
241
iomap_read_inline_data (struct inode * inode , struct page * page ,
109
242
struct iomap * iomap )
@@ -132,7 +265,7 @@ iomap_read_end_io(struct bio *bio)
132
265
int i ;
133
266
134
267
bio_for_each_segment_all (bvec , bio , i )
135
- page_endio (bvec -> bv_page , false , error );
268
+ iomap_read_page_end_io (bvec , error );
136
269
bio_put (bio );
137
270
}
138
271
@@ -150,9 +283,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
150
283
{
151
284
struct iomap_readpage_ctx * ctx = data ;
152
285
struct page * page = ctx -> cur_page ;
153
- unsigned poff = pos & (PAGE_SIZE - 1 );
154
- unsigned plen = min_t (loff_t , PAGE_SIZE - poff , length );
286
+ struct iomap_page * iop = iomap_page_create (inode , page );
155
287
bool is_contig = false;
288
+ loff_t orig_pos = pos ;
289
+ unsigned poff , plen ;
156
290
sector_t sector ;
157
291
158
292
if (iomap -> type == IOMAP_INLINE ) {
@@ -161,13 +295,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
161
295
return PAGE_SIZE ;
162
296
}
163
297
164
- /* we don't support blocksize < PAGE_SIZE quite yet. */
165
- WARN_ON_ONCE (pos != page_offset (page ));
166
- WARN_ON_ONCE (plen != PAGE_SIZE );
298
+ /* zero post-eof blocks as the page may be mapped */
299
+ iomap_adjust_read_range (inode , iop , & pos , length , & poff , & plen );
300
+ if (plen == 0 )
301
+ goto done ;
167
302
168
303
if (iomap -> type != IOMAP_MAPPED || pos >= i_size_read (inode )) {
169
304
zero_user (page , poff , plen );
170
- SetPageUptodate (page );
305
+ iomap_set_range_uptodate (page , poff , plen );
171
306
goto done ;
172
307
}
173
308
@@ -183,6 +318,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
183
318
is_contig = true;
184
319
}
185
320
321
+ /*
322
+ * If we start a new segment we need to increase the read count, and we
323
+ * need to do so before submitting any previous full bio to make sure
324
+ * that we don't prematurely unlock the page.
325
+ */
326
+ if (iop )
327
+ atomic_inc (& iop -> read_count );
328
+
186
329
if (!ctx -> bio || !is_contig || bio_full (ctx -> bio )) {
187
330
gfp_t gfp = mapping_gfp_constraint (page -> mapping , GFP_KERNEL );
188
331
int nr_vecs = (length + PAGE_SIZE - 1 ) >> PAGE_SHIFT ;
@@ -203,7 +346,13 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
203
346
204
347
__bio_add_page (ctx -> bio , page , plen , poff );
205
348
done :
206
- return plen ;
349
+ /*
350
+ * Move the caller beyond our range so that it keeps making progress.
351
+ * For that we have to include any leading non-uptodate ranges, but
352
+ * we can skip trailing ones as they will be handled in the next
353
+ * iteration.
354
+ */
355
+ return pos - orig_pos + plen ;
207
356
}
208
357
209
358
int
@@ -214,8 +363,6 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
214
363
unsigned poff ;
215
364
loff_t ret ;
216
365
217
- WARN_ON_ONCE (page_has_buffers (page ));
218
-
219
366
for (poff = 0 ; poff < PAGE_SIZE ; poff += ret ) {
220
367
ret = iomap_apply (inode , page_offset (page ) + poff ,
221
368
PAGE_SIZE - poff , 0 , ops , & ctx ,
@@ -341,6 +488,84 @@ iomap_readpages(struct address_space *mapping, struct list_head *pages,
341
488
}
342
489
EXPORT_SYMBOL_GPL (iomap_readpages );
343
490
491
+ int
492
+ iomap_is_partially_uptodate (struct page * page , unsigned long from ,
493
+ unsigned long count )
494
+ {
495
+ struct iomap_page * iop = to_iomap_page (page );
496
+ struct inode * inode = page -> mapping -> host ;
497
+ unsigned first = from >> inode -> i_blkbits ;
498
+ unsigned last = (from + count - 1 ) >> inode -> i_blkbits ;
499
+ unsigned i ;
500
+
501
+ if (iop ) {
502
+ for (i = first ; i <= last ; i ++ )
503
+ if (!test_bit (i , iop -> uptodate ))
504
+ return 0 ;
505
+ return 1 ;
506
+ }
507
+
508
+ return 0 ;
509
+ }
510
+ EXPORT_SYMBOL_GPL (iomap_is_partially_uptodate );
511
+
512
+ int
513
+ iomap_releasepage (struct page * page , gfp_t gfp_mask )
514
+ {
515
+ /*
516
+ * mm accommodates an old ext3 case where clean pages might not have had
517
+ * the dirty bit cleared. Thus, it can send actual dirty pages to
518
+ * ->releasepage() via shrink_active_list(), skip those here.
519
+ */
520
+ if (PageDirty (page ) || PageWriteback (page ))
521
+ return 0 ;
522
+ iomap_page_release (page );
523
+ return 1 ;
524
+ }
525
+ EXPORT_SYMBOL_GPL (iomap_releasepage );
526
+
527
+ void
528
+ iomap_invalidatepage (struct page * page , unsigned int offset , unsigned int len )
529
+ {
530
+ /*
531
+ * If we are invalidating the entire page, clear the dirty state from it
532
+ * and release it to avoid unnecessary buildup of the LRU.
533
+ */
534
+ if (offset == 0 && len == PAGE_SIZE ) {
535
+ WARN_ON_ONCE (PageWriteback (page ));
536
+ cancel_dirty_page (page );
537
+ iomap_page_release (page );
538
+ }
539
+ }
540
+ EXPORT_SYMBOL_GPL (iomap_invalidatepage );
541
+
542
+ #ifdef CONFIG_MIGRATION
543
+ int
544
+ iomap_migrate_page (struct address_space * mapping , struct page * newpage ,
545
+ struct page * page , enum migrate_mode mode )
546
+ {
547
+ int ret ;
548
+
549
+ ret = migrate_page_move_mapping (mapping , newpage , page , NULL , mode , 0 );
550
+ if (ret != MIGRATEPAGE_SUCCESS )
551
+ return ret ;
552
+
553
+ if (page_has_private (page )) {
554
+ ClearPagePrivate (page );
555
+ set_page_private (newpage , page_private (page ));
556
+ set_page_private (page , 0 );
557
+ SetPagePrivate (newpage );
558
+ }
559
+
560
+ if (mode != MIGRATE_SYNC_NO_COPY )
561
+ migrate_page_copy (newpage , page );
562
+ else
563
+ migrate_page_states (newpage , page );
564
+ return MIGRATEPAGE_SUCCESS ;
565
+ }
566
+ EXPORT_SYMBOL_GPL (iomap_migrate_page );
567
+ #endif /* CONFIG_MIGRATION */
568
+
344
569
static void
345
570
iomap_write_failed (struct inode * inode , loff_t pos , unsigned len )
346
571
{
@@ -364,6 +589,7 @@ iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
364
589
365
590
if (iomap -> type != IOMAP_MAPPED || block_start >= i_size_read (inode )) {
366
591
zero_user_segments (page , poff , from , to , poff + plen );
592
+ iomap_set_range_uptodate (page , poff , plen );
367
593
return 0 ;
368
594
}
369
595
@@ -379,21 +605,33 @@ static int
379
605
__iomap_write_begin (struct inode * inode , loff_t pos , unsigned len ,
380
606
struct page * page , struct iomap * iomap )
381
607
{
608
+ struct iomap_page * iop = iomap_page_create (inode , page );
382
609
loff_t block_size = i_blocksize (inode );
383
610
loff_t block_start = pos & ~(block_size - 1 );
384
611
loff_t block_end = (pos + len + block_size - 1 ) & ~(block_size - 1 );
385
- unsigned poff = block_start & (PAGE_SIZE - 1 );
386
- unsigned plen = min_t (loff_t , PAGE_SIZE - poff , block_end - block_start );
387
- unsigned from = pos & (PAGE_SIZE - 1 ), to = from + len ;
388
-
389
- WARN_ON_ONCE (i_blocksize (inode ) < PAGE_SIZE );
612
+ unsigned from = pos & (PAGE_SIZE - 1 ), to = from + len , poff , plen ;
613
+ int status = 0 ;
390
614
391
615
if (PageUptodate (page ))
392
616
return 0 ;
393
- if (from <= poff && to >= poff + plen )
394
- return 0 ;
395
- return iomap_read_page_sync (inode , block_start , page ,
396
- poff , plen , from , to , iomap );
617
+
618
+ do {
619
+ iomap_adjust_read_range (inode , iop , & block_start ,
620
+ block_end - block_start , & poff , & plen );
621
+ if (plen == 0 )
622
+ break ;
623
+
624
+ if ((from > poff && from < poff + plen ) ||
625
+ (to > poff && to < poff + plen )) {
626
+ status = iomap_read_page_sync (inode , block_start , page ,
627
+ poff , plen , from , to , iomap );
628
+ if (status )
629
+ break ;
630
+ }
631
+
632
+ } while ((block_start += plen ) < block_end );
633
+
634
+ return status ;
397
635
}
398
636
399
637
static int
@@ -476,7 +714,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
476
714
if (unlikely (copied < len && !PageUptodate (page ))) {
477
715
copied = 0 ;
478
716
} else {
479
- SetPageUptodate (page );
717
+ iomap_set_range_uptodate (page , pos & ( PAGE_SIZE - 1 ), len );
480
718
iomap_set_page_dirty (page );
481
719
}
482
720
return __generic_write_end (inode , pos , copied , page );
@@ -812,7 +1050,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
812
1050
block_commit_write (page , 0 , length );
813
1051
} else {
814
1052
WARN_ON_ONCE (!PageUptodate (page ));
815
- WARN_ON_ONCE ( i_blocksize ( inode ) < PAGE_SIZE );
1053
+ iomap_page_create ( inode , page );
816
1054
}
817
1055
818
1056
return length ;
0 commit comments