35
35
#include <linux/eventfd.h>
36
36
#include <linux/blkdev.h>
37
37
#include <linux/compat.h>
38
+ #include <linux/anon_inodes.h>
39
+ #include <linux/migrate.h>
40
+ #include <linux/ramfs.h>
38
41
39
42
#include <asm/kmap_types.h>
40
43
#include <asm/uaccess.h>
@@ -110,6 +113,7 @@ struct kioctx {
110
113
} ____cacheline_aligned_in_smp ;
111
114
112
115
struct page * internal_pages [AIO_RING_PAGES ];
116
+ struct file * aio_ring_file ;
113
117
};
114
118
115
119
/*------ sysctl variables----*/
@@ -138,36 +142,124 @@ __initcall(aio_setup);
138
142
139
143
static void aio_free_ring (struct kioctx * ctx )
140
144
{
141
- long i ;
145
+ int i ;
146
+ struct file * aio_ring_file = ctx -> aio_ring_file ;
142
147
143
- for (i = 0 ; i < ctx -> nr_pages ; i ++ )
148
+ for (i = 0 ; i < ctx -> nr_pages ; i ++ ) {
149
+ pr_debug ("pid(%d) [%d] page->count=%d\n" , current -> pid , i ,
150
+ page_count (ctx -> ring_pages [i ]));
144
151
put_page (ctx -> ring_pages [i ]);
152
+ }
145
153
146
154
if (ctx -> ring_pages && ctx -> ring_pages != ctx -> internal_pages )
147
155
kfree (ctx -> ring_pages );
156
+
157
+ if (aio_ring_file ) {
158
+ truncate_setsize (aio_ring_file -> f_inode , 0 );
159
+ pr_debug ("pid(%d) i_nlink=%u d_count=%d d_unhashed=%d i_count=%d\n" ,
160
+ current -> pid , aio_ring_file -> f_inode -> i_nlink ,
161
+ aio_ring_file -> f_path .dentry -> d_count ,
162
+ d_unhashed (aio_ring_file -> f_path .dentry ),
163
+ atomic_read (& aio_ring_file -> f_inode -> i_count ));
164
+ fput (aio_ring_file );
165
+ ctx -> aio_ring_file = NULL ;
166
+ }
148
167
}
149
168
169
+ static int aio_ring_mmap (struct file * file , struct vm_area_struct * vma )
170
+ {
171
+ vma -> vm_ops = & generic_file_vm_ops ;
172
+ return 0 ;
173
+ }
174
+
175
+ static const struct file_operations aio_ring_fops = {
176
+ .mmap = aio_ring_mmap ,
177
+ };
178
+
179
+ static int aio_set_page_dirty (struct page * page )
180
+ {
181
+ return 0 ;
182
+ }
183
+
184
+ static int aio_migratepage (struct address_space * mapping , struct page * new ,
185
+ struct page * old , enum migrate_mode mode )
186
+ {
187
+ struct kioctx * ctx = mapping -> private_data ;
188
+ unsigned long flags ;
189
+ unsigned idx = old -> index ;
190
+ int rc ;
191
+
192
+ /* Writeback must be complete */
193
+ BUG_ON (PageWriteback (old ));
194
+ put_page (old );
195
+
196
+ rc = migrate_page_move_mapping (mapping , new , old , NULL , mode );
197
+ if (rc != MIGRATEPAGE_SUCCESS ) {
198
+ get_page (old );
199
+ return rc ;
200
+ }
201
+
202
+ get_page (new );
203
+
204
+ spin_lock_irqsave (& ctx -> completion_lock , flags );
205
+ migrate_page_copy (new , old );
206
+ ctx -> ring_pages [idx ] = new ;
207
+ spin_unlock_irqrestore (& ctx -> completion_lock , flags );
208
+
209
+ return rc ;
210
+ }
211
+
212
+ static const struct address_space_operations aio_ctx_aops = {
213
+ .set_page_dirty = aio_set_page_dirty ,
214
+ .migratepage = aio_migratepage ,
215
+ };
216
+
150
217
static int aio_setup_ring (struct kioctx * ctx )
151
218
{
152
219
struct aio_ring * ring ;
153
220
unsigned nr_events = ctx -> max_reqs ;
154
221
struct mm_struct * mm = current -> mm ;
155
222
unsigned long size , populate ;
156
223
int nr_pages ;
224
+ int i ;
225
+ struct file * file ;
157
226
158
227
/* Compensate for the ring buffer's head/tail overlap entry */
159
228
nr_events += 2 ; /* 1 is required, 2 for good luck */
160
229
161
230
size = sizeof (struct aio_ring );
162
231
size += sizeof (struct io_event ) * nr_events ;
163
- nr_pages = (size + PAGE_SIZE - 1 ) >> PAGE_SHIFT ;
164
232
233
+ nr_pages = PFN_UP (size );
165
234
if (nr_pages < 0 )
166
235
return - EINVAL ;
167
236
168
- nr_events = (PAGE_SIZE * nr_pages - sizeof (struct aio_ring )) / sizeof (struct io_event );
237
+ file = anon_inode_getfile_private ("[aio]" , & aio_ring_fops , ctx , O_RDWR );
238
+ if (IS_ERR (file )) {
239
+ ctx -> aio_ring_file = NULL ;
240
+ return - EAGAIN ;
241
+ }
242
+
243
+ file -> f_inode -> i_mapping -> a_ops = & aio_ctx_aops ;
244
+ file -> f_inode -> i_mapping -> private_data = ctx ;
245
+ file -> f_inode -> i_size = PAGE_SIZE * (loff_t )nr_pages ;
246
+
247
+ for (i = 0 ; i < nr_pages ; i ++ ) {
248
+ struct page * page ;
249
+ page = find_or_create_page (file -> f_inode -> i_mapping ,
250
+ i , GFP_HIGHUSER | __GFP_ZERO );
251
+ if (!page )
252
+ break ;
253
+ pr_debug ("pid(%d) page[%d]->count=%d\n" ,
254
+ current -> pid , i , page_count (page ));
255
+ SetPageUptodate (page );
256
+ SetPageDirty (page );
257
+ unlock_page (page );
258
+ }
259
+ ctx -> aio_ring_file = file ;
260
+ nr_events = (PAGE_SIZE * nr_pages - sizeof (struct aio_ring ))
261
+ / sizeof (struct io_event );
169
262
170
- ctx -> nr_events = 0 ;
171
263
ctx -> ring_pages = ctx -> internal_pages ;
172
264
if (nr_pages > AIO_RING_PAGES ) {
173
265
ctx -> ring_pages = kcalloc (nr_pages , sizeof (struct page * ),
@@ -178,28 +270,31 @@ static int aio_setup_ring(struct kioctx *ctx)
178
270
179
271
ctx -> mmap_size = nr_pages * PAGE_SIZE ;
180
272
pr_debug ("attempting mmap of %lu bytes\n" , ctx -> mmap_size );
273
+
181
274
down_write (& mm -> mmap_sem );
182
- ctx -> mmap_base = do_mmap_pgoff (NULL , 0 , ctx -> mmap_size ,
183
- PROT_READ | PROT_WRITE ,
184
- MAP_ANONYMOUS | MAP_PRIVATE , 0 , & populate );
275
+ ctx -> mmap_base = do_mmap_pgoff (ctx -> aio_ring_file , 0 , ctx -> mmap_size ,
276
+ PROT_READ | PROT_WRITE ,
277
+ MAP_SHARED | MAP_POPULATE , 0 , & populate );
185
278
if (IS_ERR ((void * )ctx -> mmap_base )) {
186
279
up_write (& mm -> mmap_sem );
187
280
ctx -> mmap_size = 0 ;
188
281
aio_free_ring (ctx );
189
282
return - EAGAIN ;
190
283
}
284
+ up_write (& mm -> mmap_sem );
285
+
286
+ mm_populate (ctx -> mmap_base , populate );
191
287
192
288
pr_debug ("mmap address: 0x%08lx\n" , ctx -> mmap_base );
193
289
ctx -> nr_pages = get_user_pages (current , mm , ctx -> mmap_base , nr_pages ,
194
290
1 , 0 , ctx -> ring_pages , NULL );
195
- up_write (& mm -> mmap_sem );
291
+ for (i = 0 ; i < ctx -> nr_pages ; i ++ )
292
+ put_page (ctx -> ring_pages [i ]);
196
293
197
294
if (unlikely (ctx -> nr_pages != nr_pages )) {
198
295
aio_free_ring (ctx );
199
296
return - EAGAIN ;
200
297
}
201
- if (populate )
202
- mm_populate (ctx -> mmap_base , populate );
203
298
204
299
ctx -> user_id = ctx -> mmap_base ;
205
300
ctx -> nr_events = nr_events ; /* trusted copy */
@@ -399,6 +494,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
399
494
err = - EAGAIN ;
400
495
aio_free_ring (ctx );
401
496
out_freectx :
497
+ if (ctx -> aio_ring_file )
498
+ fput (ctx -> aio_ring_file );
402
499
kmem_cache_free (kioctx_cachep , ctx );
403
500
pr_debug ("error allocating ioctx %d\n" , err );
404
501
return ERR_PTR (err );
0 commit comments