Skip to content

Commit 399254a

Browse files
committed
block: add BIO_NO_PAGE_REF flag
If bio_iov_iter_get_pages() is called on an iov_iter that is flagged with NO_REF, then we don't need to add a page reference for the pages that we add. Add BIO_NO_PAGE_REF to track this in the bio, so IO completion knows not to drop a reference to these pages. Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 875f1d0 commit 399254a

File tree

4 files changed

+39
-29
lines changed

4 files changed

+39
-29
lines changed

block/bio.c

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
849849
size = bio_add_page(bio, bv->bv_page, len,
850850
bv->bv_offset + iter->iov_offset);
851851
if (size == len) {
852-
struct page *page;
853-
int i;
852+
if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
853+
struct page *page;
854+
int i;
855+
856+
mp_bvec_for_each_page(page, bv, i)
857+
get_page(page);
858+
}
854859

855-
/*
856-
* For the normal O_DIRECT case, we could skip grabbing this
857-
* reference and then not have to put them again when IO
858-
* completes. But this breaks some in-kernel users, like
859-
* splicing to/from a loop device, where we release the pipe
860-
* pages unconditionally. If we can fix that case, we can
861-
* get rid of the get here and the need to call
862-
* bio_release_pages() at IO completion time.
863-
*/
864-
mp_bvec_for_each_page(page, bv, i)
865-
get_page(page);
866860
iov_iter_advance(iter, size);
867861
return 0;
868862
}
@@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
925919
* This takes either an iterator pointing to user memory, or one pointing to
926920
* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
927921
* map them into the kernel. On IO completion, the caller should put those
928-
* pages. For now, when adding kernel pages, we still grab a reference to the
929-
* page. This isn't strictly needed for the common case, but some call paths
930-
* end up releasing pages from eg a pipe and we can't easily control these.
931-
* See comment in __bio_iov_bvec_add_pages().
922+
* pages. If we're adding kernel pages, and the caller told us it's safe to
923+
* do so, we just have to add the pages to the bio directly. We don't grab an
924+
* extra reference to those pages (the user should already have that), and we
925+
* don't put the page on IO completion. The caller needs to check if the bio is
926+
* flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
927+
* released.
932928
*
933929
* The function tries, but does not guarantee, to pin as many pages as
934930
* fit into the bio, or are requested in *iter, whatever is smaller. If
@@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
940936
const bool is_bvec = iov_iter_is_bvec(iter);
941937
unsigned short orig_vcnt = bio->bi_vcnt;
942938

939+
/*
940+
* If this is a BVEC iter, then the pages are kernel pages. Don't
941+
* release them on IO completion, if the caller asked us to.
942+
*/
943+
if (is_bvec && iov_iter_bvec_no_ref(iter))
944+
bio_set_flag(bio, BIO_NO_PAGE_REF);
945+
943946
do {
944947
int ret;
945948

@@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work)
16961699
next = bio->bi_private;
16971700

16981701
bio_set_pages_dirty(bio);
1699-
bio_release_pages(bio);
1702+
if (!bio_flagged(bio, BIO_NO_PAGE_REF))
1703+
bio_release_pages(bio);
17001704
bio_put(bio);
17011705
}
17021706
}
@@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio)
17131717
goto defer;
17141718
}
17151719

1716-
bio_release_pages(bio);
1720+
if (!bio_flagged(bio, BIO_NO_PAGE_REF))
1721+
bio_release_pages(bio);
17171722
bio_put(bio);
17181723
return;
17191724
defer:

fs/block_dev.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio)
336336
if (should_dirty) {
337337
bio_check_pages_dirty(bio);
338338
} else {
339-
struct bio_vec *bvec;
340-
int i;
341-
struct bvec_iter_all iter_all;
339+
if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
340+
struct bvec_iter_all iter_all;
341+
struct bio_vec *bvec;
342+
int i;
342343

343-
bio_for_each_segment_all(bvec, bio, i, iter_all)
344-
put_page(bvec->bv_page);
344+
bio_for_each_segment_all(bvec, bio, i, iter_all)
345+
put_page(bvec->bv_page);
346+
}
345347
bio_put(bio);
346348
}
347349
}

fs/iomap.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)
15891589
if (should_dirty) {
15901590
bio_check_pages_dirty(bio);
15911591
} else {
1592-
struct bio_vec *bvec;
1593-
int i;
1594-
struct bvec_iter_all iter_all;
1592+
if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
1593+
struct bvec_iter_all iter_all;
1594+
struct bio_vec *bvec;
1595+
int i;
15951596

1596-
bio_for_each_segment_all(bvec, bio, i, iter_all)
1597-
put_page(bvec->bv_page);
1597+
bio_for_each_segment_all(bvec, bio, i, iter_all)
1598+
put_page(bvec->bv_page);
1599+
}
15981600
bio_put(bio);
15991601
}
16001602
}

include/linux/blk_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ struct bio {
215215
/*
216216
* bio flags
217217
*/
218+
#define BIO_NO_PAGE_REF 0 /* don't put release vec pages */
218219
#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
219220
#define BIO_CLONED 2 /* doesn't own data */
220221
#define BIO_BOUNCED 3 /* bio is a bounce bio */

0 commit comments

Comments
 (0)