Skip to content

Commit 9a56a23

Browse files
committed
io_uring: use fget/fput_many() for file references
Add a separate io_submit_state structure, to cache some of the things we need for IO submission. One such example is file reference batching. io_submit_state. We get as many references as the number of sqes we are submitting, and drop unused ones if we end up switching files. The assumption here is that we're usually only dealing with one fd, and if there are multiple, hopefuly they are at least somewhat ordered. Could trivially be extended to cover multiple fds, if needed. On the completion side we do the same thing, except this is trivially done just locally in io_iopoll_reap(). Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 091141a commit 9a56a23

File tree

1 file changed

+121
-21
lines changed

1 file changed

+121
-21
lines changed

fs/io_uring.c

Lines changed: 121 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,19 @@ struct io_kiocb {
165165
#define IO_PLUG_THRESHOLD 2
166166
#define IO_IOPOLL_BATCH 8
167167

168+
struct io_submit_state {
169+
struct blk_plug plug;
170+
171+
/*
172+
* File reference cache
173+
*/
174+
struct file *file;
175+
unsigned int fd;
176+
unsigned int has_refs;
177+
unsigned int used_refs;
178+
unsigned int ios_left;
179+
};
180+
168181
static struct kmem_cache *req_cachep;
169182

170183
static const struct file_operations io_uring_fops;
@@ -332,9 +345,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
332345
struct list_head *done)
333346
{
334347
void *reqs[IO_IOPOLL_BATCH];
348+
int file_count, to_free;
349+
struct file *file = NULL;
335350
struct io_kiocb *req;
336-
int to_free = 0;
337351

352+
file_count = to_free = 0;
338353
while (!list_empty(done)) {
339354
req = list_first_entry(done, struct io_kiocb, list);
340355
list_del(&req->list);
@@ -344,12 +359,28 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
344359
reqs[to_free++] = req;
345360
(*nr_events)++;
346361

347-
fput(req->rw.ki_filp);
362+
/*
363+
* Batched puts of the same file, to avoid dirtying the
364+
* file usage count multiple times, if avoidable.
365+
*/
366+
if (!file) {
367+
file = req->rw.ki_filp;
368+
file_count = 1;
369+
} else if (file == req->rw.ki_filp) {
370+
file_count++;
371+
} else {
372+
fput_many(file, file_count);
373+
file = req->rw.ki_filp;
374+
file_count = 1;
375+
}
376+
348377
if (to_free == ARRAY_SIZE(reqs))
349378
io_free_req_many(ctx, reqs, &to_free);
350379
}
351380
io_commit_cqring(ctx);
352381

382+
if (file)
383+
fput_many(file, file_count);
353384
io_free_req_many(ctx, reqs, &to_free);
354385
}
355386

@@ -530,6 +561,48 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
530561
list_add_tail(&req->list, &ctx->poll_list);
531562
}
532563

564+
static void io_file_put(struct io_submit_state *state, struct file *file)
565+
{
566+
if (!state) {
567+
fput(file);
568+
} else if (state->file) {
569+
int diff = state->has_refs - state->used_refs;
570+
571+
if (diff)
572+
fput_many(state->file, diff);
573+
state->file = NULL;
574+
}
575+
}
576+
577+
/*
578+
* Get as many references to a file as we have IOs left in this submission,
579+
* assuming most submissions are for one file, or at least that each file
580+
* has more than one submission.
581+
*/
582+
static struct file *io_file_get(struct io_submit_state *state, int fd)
583+
{
584+
if (!state)
585+
return fget(fd);
586+
587+
if (state->file) {
588+
if (state->fd == fd) {
589+
state->used_refs++;
590+
state->ios_left--;
591+
return state->file;
592+
}
593+
io_file_put(state, NULL);
594+
}
595+
state->file = fget_many(fd, state->ios_left);
596+
if (!state->file)
597+
return NULL;
598+
599+
state->fd = fd;
600+
state->has_refs = state->ios_left;
601+
state->used_refs = 1;
602+
state->ios_left--;
603+
return state->file;
604+
}
605+
533606
/*
534607
* If we tracked the file through the SCM inflight mechanism, we could support
535608
* any file. For now, just ensure that anything potentially problematic is done
@@ -548,7 +621,7 @@ static bool io_file_supports_async(struct file *file)
548621
}
549622

550623
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
551-
bool force_nonblock)
624+
bool force_nonblock, struct io_submit_state *state)
552625
{
553626
struct io_ring_ctx *ctx = req->ctx;
554627
struct kiocb *kiocb = &req->rw;
@@ -560,7 +633,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
560633
return 0;
561634

562635
fd = READ_ONCE(sqe->fd);
563-
kiocb->ki_filp = fget(fd);
636+
kiocb->ki_filp = io_file_get(state, fd);
564637
if (unlikely(!kiocb->ki_filp))
565638
return -EBADF;
566639
if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
@@ -604,7 +677,10 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
604677
}
605678
return 0;
606679
out_fput:
607-
fput(kiocb->ki_filp);
680+
/* in case of error, we didn't use this file reference. drop it. */
681+
if (state)
682+
state->used_refs--;
683+
io_file_put(state, kiocb->ki_filp);
608684
return ret;
609685
}
610686

@@ -650,15 +726,15 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
650726
}
651727

652728
static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
653-
bool force_nonblock)
729+
bool force_nonblock, struct io_submit_state *state)
654730
{
655731
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
656732
struct kiocb *kiocb = &req->rw;
657733
struct iov_iter iter;
658734
struct file *file;
659735
ssize_t ret;
660736

661-
ret = io_prep_rw(req, s->sqe, force_nonblock);
737+
ret = io_prep_rw(req, s->sqe, force_nonblock, state);
662738
if (ret)
663739
return ret;
664740
file = kiocb->ki_filp;
@@ -694,15 +770,15 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
694770
}
695771

696772
static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
697-
bool force_nonblock)
773+
bool force_nonblock, struct io_submit_state *state)
698774
{
699775
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
700776
struct kiocb *kiocb = &req->rw;
701777
struct iov_iter iter;
702778
struct file *file;
703779
ssize_t ret;
704780

705-
ret = io_prep_rw(req, s->sqe, force_nonblock);
781+
ret = io_prep_rw(req, s->sqe, force_nonblock, state);
706782
if (ret)
707783
return ret;
708784
/* Hold on to the file for -EAGAIN */
@@ -826,7 +902,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
826902
}
827903

828904
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
829-
const struct sqe_submit *s, bool force_nonblock)
905+
const struct sqe_submit *s, bool force_nonblock,
906+
struct io_submit_state *state)
830907
{
831908
ssize_t ret;
832909
int opcode;
@@ -841,10 +918,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
841918
ret = io_nop(req, req->user_data);
842919
break;
843920
case IORING_OP_READV:
844-
ret = io_read(req, s, force_nonblock);
921+
ret = io_read(req, s, force_nonblock, state);
845922
break;
846923
case IORING_OP_WRITEV:
847-
ret = io_write(req, s, force_nonblock);
924+
ret = io_write(req, s, force_nonblock, state);
848925
break;
849926
case IORING_OP_FSYNC:
850927
ret = io_fsync(req, s->sqe, force_nonblock);
@@ -896,7 +973,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
896973
s->needs_lock = true;
897974

898975
do {
899-
ret = __io_submit_sqe(ctx, req, s, false);
976+
ret = __io_submit_sqe(ctx, req, s, false, NULL);
900977
/*
901978
* We can get EAGAIN for polled IO even though we're forcing
902979
* a sync submission from here, since we can't wait for
@@ -920,7 +997,8 @@ static void io_sq_wq_submit_work(struct work_struct *work)
920997
kfree(sqe);
921998
}
922999

923-
static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
1000+
static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
1001+
struct io_submit_state *state)
9241002
{
9251003
struct io_kiocb *req;
9261004
ssize_t ret;
@@ -935,7 +1013,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
9351013

9361014
req->rw.ki_filp = NULL;
9371015

938-
ret = __io_submit_sqe(ctx, req, s, true);
1016+
ret = __io_submit_sqe(ctx, req, s, true, state);
9391017
if (ret == -EAGAIN) {
9401018
struct io_uring_sqe *sqe_copy;
9411019

@@ -956,6 +1034,26 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
9561034
return ret;
9571035
}
9581036

1037+
/*
1038+
* Batched submission is done, ensure local IO is flushed out.
1039+
*/
1040+
static void io_submit_state_end(struct io_submit_state *state)
1041+
{
1042+
blk_finish_plug(&state->plug);
1043+
io_file_put(state, NULL);
1044+
}
1045+
1046+
/*
1047+
* Start submission side cache.
1048+
*/
1049+
static void io_submit_state_start(struct io_submit_state *state,
1050+
struct io_ring_ctx *ctx, unsigned max_ios)
1051+
{
1052+
blk_start_plug(&state->plug);
1053+
state->file = NULL;
1054+
state->ios_left = max_ios;
1055+
}
1056+
9591057
static void io_commit_sqring(struct io_ring_ctx *ctx)
9601058
{
9611059
struct io_sq_ring *ring = ctx->sq_ring;
@@ -1029,11 +1127,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
10291127

10301128
static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
10311129
{
1130+
struct io_submit_state state, *statep = NULL;
10321131
int i, ret = 0, submit = 0;
1033-
struct blk_plug plug;
10341132

1035-
if (to_submit > IO_PLUG_THRESHOLD)
1036-
blk_start_plug(&plug);
1133+
if (to_submit > IO_PLUG_THRESHOLD) {
1134+
io_submit_state_start(&state, ctx, to_submit);
1135+
statep = &state;
1136+
}
10371137

10381138
for (i = 0; i < to_submit; i++) {
10391139
struct sqe_submit s;
@@ -1044,7 +1144,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
10441144
s.has_user = true;
10451145
s.needs_lock = false;
10461146

1047-
ret = io_submit_sqe(ctx, &s);
1147+
ret = io_submit_sqe(ctx, &s, statep);
10481148
if (ret) {
10491149
io_drop_sqring(ctx);
10501150
break;
@@ -1054,8 +1154,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
10541154
}
10551155
io_commit_sqring(ctx);
10561156

1057-
if (to_submit > IO_PLUG_THRESHOLD)
1058-
blk_finish_plug(&plug);
1157+
if (statep)
1158+
io_submit_state_end(statep);
10591159

10601160
return submit ? submit : ret;
10611161
}

0 commit comments

Comments
 (0)