@@ -165,6 +165,19 @@ struct io_kiocb {
165
165
#define IO_PLUG_THRESHOLD 2
166
166
#define IO_IOPOLL_BATCH 8
167
167
168
+ struct io_submit_state {
169
+ struct blk_plug plug ;
170
+
171
+ /*
172
+ * File reference cache
173
+ */
174
+ struct file * file ;
175
+ unsigned int fd ;
176
+ unsigned int has_refs ;
177
+ unsigned int used_refs ;
178
+ unsigned int ios_left ;
179
+ };
180
+
168
181
static struct kmem_cache * req_cachep ;
169
182
170
183
static const struct file_operations io_uring_fops ;
@@ -332,9 +345,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
332
345
struct list_head * done )
333
346
{
334
347
void * reqs [IO_IOPOLL_BATCH ];
348
+ int file_count , to_free ;
349
+ struct file * file = NULL ;
335
350
struct io_kiocb * req ;
336
- int to_free = 0 ;
337
351
352
+ file_count = to_free = 0 ;
338
353
while (!list_empty (done )) {
339
354
req = list_first_entry (done , struct io_kiocb , list );
340
355
list_del (& req -> list );
@@ -344,12 +359,28 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
344
359
reqs [to_free ++ ] = req ;
345
360
(* nr_events )++ ;
346
361
347
- fput (req -> rw .ki_filp );
362
+ /*
363
+ * Batched puts of the same file, to avoid dirtying the
364
+ * file usage count multiple times, if avoidable.
365
+ */
366
+ if (!file ) {
367
+ file = req -> rw .ki_filp ;
368
+ file_count = 1 ;
369
+ } else if (file == req -> rw .ki_filp ) {
370
+ file_count ++ ;
371
+ } else {
372
+ fput_many (file , file_count );
373
+ file = req -> rw .ki_filp ;
374
+ file_count = 1 ;
375
+ }
376
+
348
377
if (to_free == ARRAY_SIZE (reqs ))
349
378
io_free_req_many (ctx , reqs , & to_free );
350
379
}
351
380
io_commit_cqring (ctx );
352
381
382
+ if (file )
383
+ fput_many (file , file_count );
353
384
io_free_req_many (ctx , reqs , & to_free );
354
385
}
355
386
@@ -530,6 +561,48 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
530
561
list_add_tail (& req -> list , & ctx -> poll_list );
531
562
}
532
563
564
+ static void io_file_put (struct io_submit_state * state , struct file * file )
565
+ {
566
+ if (!state ) {
567
+ fput (file );
568
+ } else if (state -> file ) {
569
+ int diff = state -> has_refs - state -> used_refs ;
570
+
571
+ if (diff )
572
+ fput_many (state -> file , diff );
573
+ state -> file = NULL ;
574
+ }
575
+ }
576
+
577
+ /*
578
+ * Get as many references to a file as we have IOs left in this submission,
579
+ * assuming most submissions are for one file, or at least that each file
580
+ * has more than one submission.
581
+ */
582
+ static struct file * io_file_get (struct io_submit_state * state , int fd )
583
+ {
584
+ if (!state )
585
+ return fget (fd );
586
+
587
+ if (state -> file ) {
588
+ if (state -> fd == fd ) {
589
+ state -> used_refs ++ ;
590
+ state -> ios_left -- ;
591
+ return state -> file ;
592
+ }
593
+ io_file_put (state , NULL );
594
+ }
595
+ state -> file = fget_many (fd , state -> ios_left );
596
+ if (!state -> file )
597
+ return NULL ;
598
+
599
+ state -> fd = fd ;
600
+ state -> has_refs = state -> ios_left ;
601
+ state -> used_refs = 1 ;
602
+ state -> ios_left -- ;
603
+ return state -> file ;
604
+ }
605
+
533
606
/*
534
607
* If we tracked the file through the SCM inflight mechanism, we could support
535
608
* any file. For now, just ensure that anything potentially problematic is done
@@ -548,7 +621,7 @@ static bool io_file_supports_async(struct file *file)
548
621
}
549
622
550
623
static int io_prep_rw (struct io_kiocb * req , const struct io_uring_sqe * sqe ,
551
- bool force_nonblock )
624
+ bool force_nonblock , struct io_submit_state * state )
552
625
{
553
626
struct io_ring_ctx * ctx = req -> ctx ;
554
627
struct kiocb * kiocb = & req -> rw ;
@@ -560,7 +633,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
560
633
return 0 ;
561
634
562
635
fd = READ_ONCE (sqe -> fd );
563
- kiocb -> ki_filp = fget ( fd );
636
+ kiocb -> ki_filp = io_file_get ( state , fd );
564
637
if (unlikely (!kiocb -> ki_filp ))
565
638
return - EBADF ;
566
639
if (force_nonblock && !io_file_supports_async (kiocb -> ki_filp ))
@@ -604,7 +677,10 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
604
677
}
605
678
return 0 ;
606
679
out_fput :
607
- fput (kiocb -> ki_filp );
680
+ /* in case of error, we didn't use this file reference. drop it. */
681
+ if (state )
682
+ state -> used_refs -- ;
683
+ io_file_put (state , kiocb -> ki_filp );
608
684
return ret ;
609
685
}
610
686
@@ -650,15 +726,15 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
650
726
}
651
727
652
728
static ssize_t io_read (struct io_kiocb * req , const struct sqe_submit * s ,
653
- bool force_nonblock )
729
+ bool force_nonblock , struct io_submit_state * state )
654
730
{
655
731
struct iovec inline_vecs [UIO_FASTIOV ], * iovec = inline_vecs ;
656
732
struct kiocb * kiocb = & req -> rw ;
657
733
struct iov_iter iter ;
658
734
struct file * file ;
659
735
ssize_t ret ;
660
736
661
- ret = io_prep_rw (req , s -> sqe , force_nonblock );
737
+ ret = io_prep_rw (req , s -> sqe , force_nonblock , state );
662
738
if (ret )
663
739
return ret ;
664
740
file = kiocb -> ki_filp ;
@@ -694,15 +770,15 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
694
770
}
695
771
696
772
static ssize_t io_write (struct io_kiocb * req , const struct sqe_submit * s ,
697
- bool force_nonblock )
773
+ bool force_nonblock , struct io_submit_state * state )
698
774
{
699
775
struct iovec inline_vecs [UIO_FASTIOV ], * iovec = inline_vecs ;
700
776
struct kiocb * kiocb = & req -> rw ;
701
777
struct iov_iter iter ;
702
778
struct file * file ;
703
779
ssize_t ret ;
704
780
705
- ret = io_prep_rw (req , s -> sqe , force_nonblock );
781
+ ret = io_prep_rw (req , s -> sqe , force_nonblock , state );
706
782
if (ret )
707
783
return ret ;
708
784
/* Hold on to the file for -EAGAIN */
@@ -826,7 +902,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
826
902
}
827
903
828
904
static int __io_submit_sqe (struct io_ring_ctx * ctx , struct io_kiocb * req ,
829
- const struct sqe_submit * s , bool force_nonblock )
905
+ const struct sqe_submit * s , bool force_nonblock ,
906
+ struct io_submit_state * state )
830
907
{
831
908
ssize_t ret ;
832
909
int opcode ;
@@ -841,10 +918,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
841
918
ret = io_nop (req , req -> user_data );
842
919
break ;
843
920
case IORING_OP_READV :
844
- ret = io_read (req , s , force_nonblock );
921
+ ret = io_read (req , s , force_nonblock , state );
845
922
break ;
846
923
case IORING_OP_WRITEV :
847
- ret = io_write (req , s , force_nonblock );
924
+ ret = io_write (req , s , force_nonblock , state );
848
925
break ;
849
926
case IORING_OP_FSYNC :
850
927
ret = io_fsync (req , s -> sqe , force_nonblock );
@@ -896,7 +973,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
896
973
s -> needs_lock = true;
897
974
898
975
do {
899
- ret = __io_submit_sqe (ctx , req , s , false);
976
+ ret = __io_submit_sqe (ctx , req , s , false, NULL );
900
977
/*
901
978
* We can get EAGAIN for polled IO even though we're forcing
902
979
* a sync submission from here, since we can't wait for
@@ -920,7 +997,8 @@ static void io_sq_wq_submit_work(struct work_struct *work)
920
997
kfree (sqe );
921
998
}
922
999
923
- static int io_submit_sqe (struct io_ring_ctx * ctx , struct sqe_submit * s )
1000
+ static int io_submit_sqe (struct io_ring_ctx * ctx , struct sqe_submit * s ,
1001
+ struct io_submit_state * state )
924
1002
{
925
1003
struct io_kiocb * req ;
926
1004
ssize_t ret ;
@@ -935,7 +1013,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
935
1013
936
1014
req -> rw .ki_filp = NULL ;
937
1015
938
- ret = __io_submit_sqe (ctx , req , s , true);
1016
+ ret = __io_submit_sqe (ctx , req , s , true, state );
939
1017
if (ret == - EAGAIN ) {
940
1018
struct io_uring_sqe * sqe_copy ;
941
1019
@@ -956,6 +1034,26 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
956
1034
return ret ;
957
1035
}
958
1036
1037
+ /*
1038
+ * Batched submission is done, ensure local IO is flushed out.
1039
+ */
1040
+ static void io_submit_state_end (struct io_submit_state * state )
1041
+ {
1042
+ blk_finish_plug (& state -> plug );
1043
+ io_file_put (state , NULL );
1044
+ }
1045
+
1046
+ /*
1047
+ * Start submission side cache.
1048
+ */
1049
+ static void io_submit_state_start (struct io_submit_state * state ,
1050
+ struct io_ring_ctx * ctx , unsigned max_ios )
1051
+ {
1052
+ blk_start_plug (& state -> plug );
1053
+ state -> file = NULL ;
1054
+ state -> ios_left = max_ios ;
1055
+ }
1056
+
959
1057
static void io_commit_sqring (struct io_ring_ctx * ctx )
960
1058
{
961
1059
struct io_sq_ring * ring = ctx -> sq_ring ;
@@ -1029,11 +1127,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
1029
1127
1030
1128
static int io_ring_submit (struct io_ring_ctx * ctx , unsigned int to_submit )
1031
1129
{
1130
+ struct io_submit_state state , * statep = NULL ;
1032
1131
int i , ret = 0 , submit = 0 ;
1033
- struct blk_plug plug ;
1034
1132
1035
- if (to_submit > IO_PLUG_THRESHOLD )
1036
- blk_start_plug (& plug );
1133
+ if (to_submit > IO_PLUG_THRESHOLD ) {
1134
+ io_submit_state_start (& state , ctx , to_submit );
1135
+ statep = & state ;
1136
+ }
1037
1137
1038
1138
for (i = 0 ; i < to_submit ; i ++ ) {
1039
1139
struct sqe_submit s ;
@@ -1044,7 +1144,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
1044
1144
s .has_user = true;
1045
1145
s .needs_lock = false;
1046
1146
1047
- ret = io_submit_sqe (ctx , & s );
1147
+ ret = io_submit_sqe (ctx , & s , statep );
1048
1148
if (ret ) {
1049
1149
io_drop_sqring (ctx );
1050
1150
break ;
@@ -1054,8 +1154,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
1054
1154
}
1055
1155
io_commit_sqring (ctx );
1056
1156
1057
- if (to_submit > IO_PLUG_THRESHOLD )
1058
- blk_finish_plug ( & plug );
1157
+ if (statep )
1158
+ io_submit_state_end ( statep );
1059
1159
1060
1160
return submit ? submit : ret ;
1061
1161
}
0 commit comments