37
37
#include <linux/pagevec.h>
38
38
#include <linux/writeback.h>
39
39
40
- /* flags for direct write completions */
41
- #define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
42
- #define XFS_DIO_FLAG_APPEND (1 << 1)
43
- #define XFS_DIO_FLAG_COW (1 << 2)
44
-
45
40
/*
46
41
* structure owned by writepages passed to individual writepage calls
47
42
*/
@@ -1175,45 +1170,6 @@ xfs_vm_releasepage(
1175
1170
return try_to_free_buffers (page );
1176
1171
}
1177
1172
1178
- /*
1179
- * When we map a DIO buffer, we may need to pass flags to
1180
- * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
1181
- *
1182
- * Note that for DIO, an IO to the highest supported file block offset (i.e.
1183
- * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
1184
- * bit variable. Hence if we see this overflow, we have to assume that the IO is
1185
- * extending the file size. We won't know for sure until IO completion is run
1186
- * and the actual max write offset is communicated to the IO completion
1187
- * routine.
1188
- */
1189
- static void
1190
- xfs_map_direct (
1191
- struct inode * inode ,
1192
- struct buffer_head * bh_result ,
1193
- struct xfs_bmbt_irec * imap ,
1194
- xfs_off_t offset ,
1195
- bool is_cow )
1196
- {
1197
- uintptr_t * flags = (uintptr_t * )& bh_result -> b_private ;
1198
- xfs_off_t size = bh_result -> b_size ;
1199
-
1200
- trace_xfs_get_blocks_map_direct (XFS_I (inode ), offset , size ,
1201
- ISUNWRITTEN (imap ) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
1202
- XFS_IO_OVERWRITE , imap );
1203
-
1204
- if (ISUNWRITTEN (imap )) {
1205
- * flags |= XFS_DIO_FLAG_UNWRITTEN ;
1206
- set_buffer_defer_completion (bh_result );
1207
- } else if (is_cow ) {
1208
- * flags |= XFS_DIO_FLAG_COW ;
1209
- set_buffer_defer_completion (bh_result );
1210
- }
1211
- if (offset + size > i_size_read (inode ) || offset + size < 0 ) {
1212
- * flags |= XFS_DIO_FLAG_APPEND ;
1213
- set_buffer_defer_completion (bh_result );
1214
- }
1215
- }
1216
-
1217
1173
/*
1218
1174
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
1219
1175
* is, so that we can avoid repeated get_blocks calls.
@@ -1254,51 +1210,12 @@ xfs_map_trim_size(
1254
1210
bh_result -> b_size = mapping_size ;
1255
1211
}
1256
1212
1257
- /* Bounce unaligned directio writes to the page cache. */
1258
1213
static int
1259
- xfs_bounce_unaligned_dio_write (
1260
- struct xfs_inode * ip ,
1261
- xfs_fileoff_t offset_fsb ,
1262
- struct xfs_bmbt_irec * imap )
1263
- {
1264
- struct xfs_bmbt_irec irec ;
1265
- xfs_fileoff_t delta ;
1266
- bool shared ;
1267
- bool x ;
1268
- int error ;
1269
-
1270
- irec = * imap ;
1271
- if (offset_fsb > irec .br_startoff ) {
1272
- delta = offset_fsb - irec .br_startoff ;
1273
- irec .br_blockcount -= delta ;
1274
- irec .br_startblock += delta ;
1275
- irec .br_startoff = offset_fsb ;
1276
- }
1277
- error = xfs_reflink_trim_around_shared (ip , & irec , & shared , & x );
1278
- if (error )
1279
- return error ;
1280
-
1281
- /*
1282
- * We're here because we're trying to do a directio write to a
1283
- * region that isn't aligned to a filesystem block. If any part
1284
- * of the extent is shared, fall back to buffered mode to handle
1285
- * the RMW. This is done by returning -EREMCHG ("remote addr
1286
- * changed"), which is caught further up the call stack.
1287
- */
1288
- if (shared ) {
1289
- trace_xfs_reflink_bounce_dio_write (ip , imap );
1290
- return - EREMCHG ;
1291
- }
1292
- return 0 ;
1293
- }
1294
-
1295
- STATIC int
1296
- __xfs_get_blocks (
1214
+ xfs_get_blocks (
1297
1215
struct inode * inode ,
1298
1216
sector_t iblock ,
1299
1217
struct buffer_head * bh_result ,
1300
- int create ,
1301
- bool direct )
1218
+ int create )
1302
1219
{
1303
1220
struct xfs_inode * ip = XFS_I (inode );
1304
1221
struct xfs_mount * mp = ip -> i_mount ;
@@ -1309,10 +1226,8 @@ __xfs_get_blocks(
1309
1226
int nimaps = 1 ;
1310
1227
xfs_off_t offset ;
1311
1228
ssize_t size ;
1312
- int new = 0 ;
1313
- bool is_cow = false;
1314
1229
1315
- BUG_ON (create && ! direct );
1230
+ BUG_ON (create );
1316
1231
1317
1232
if (XFS_FORCED_SHUTDOWN (mp ))
1318
1233
return - EIO ;
@@ -1321,7 +1236,7 @@ __xfs_get_blocks(
1321
1236
ASSERT (bh_result -> b_size >= (1 << inode -> i_blkbits ));
1322
1237
size = bh_result -> b_size ;
1323
1238
1324
- if (! create && offset >= i_size_read (inode ))
1239
+ if (offset >= i_size_read (inode ))
1325
1240
return 0 ;
1326
1241
1327
1242
/*
@@ -1336,73 +1251,12 @@ __xfs_get_blocks(
1336
1251
end_fsb = XFS_B_TO_FSB (mp , (xfs_ufsize_t )offset + size );
1337
1252
offset_fsb = XFS_B_TO_FSBT (mp , offset );
1338
1253
1339
- if (create && direct && xfs_is_reflink_inode (ip )) {
1340
- is_cow = xfs_reflink_find_cow_mapping (ip , offset , & imap );
1341
- ASSERT (!is_cow || !isnullstartblock (imap .br_startblock ));
1342
- }
1343
-
1344
- if (!is_cow ) {
1345
- error = xfs_bmapi_read (ip , offset_fsb , end_fsb - offset_fsb ,
1346
- & imap , & nimaps , XFS_BMAPI_ENTIRE );
1347
- /*
1348
- * Truncate an overwrite extent if there's a pending CoW
1349
- * reservation before the end of this extent. This
1350
- * forces us to come back to get_blocks to take care of
1351
- * the CoW.
1352
- */
1353
- if (create && direct && nimaps &&
1354
- imap .br_startblock != HOLESTARTBLOCK &&
1355
- imap .br_startblock != DELAYSTARTBLOCK &&
1356
- !ISUNWRITTEN (& imap ))
1357
- xfs_reflink_trim_irec_to_next_cow (ip , offset_fsb ,
1358
- & imap );
1359
- }
1254
+ error = xfs_bmapi_read (ip , offset_fsb , end_fsb - offset_fsb ,
1255
+ & imap , & nimaps , XFS_BMAPI_ENTIRE );
1360
1256
if (error )
1361
1257
goto out_unlock ;
1362
1258
1363
- /*
1364
- * The only time we can ever safely find delalloc blocks on direct I/O
1365
- * is a dio write to post-eof speculative preallocation. All other
1366
- * scenarios are indicative of a problem or misuse (such as mixing
1367
- * direct and mapped I/O).
1368
- *
1369
- * The file may be unmapped by the time we get here so we cannot
1370
- * reliably fail the I/O based on mapping. Instead, fail the I/O if this
1371
- * is a read or a write within eof. Otherwise, carry on but warn as a
1372
- * precuation if the file happens to be mapped.
1373
- */
1374
- if (direct && imap .br_startblock == DELAYSTARTBLOCK ) {
1375
- if (!create || offset < i_size_read (VFS_I (ip ))) {
1376
- WARN_ON_ONCE (1 );
1377
- error = - EIO ;
1378
- goto out_unlock ;
1379
- }
1380
- WARN_ON_ONCE (mapping_mapped (VFS_I (ip )-> i_mapping ));
1381
- }
1382
-
1383
- /* for DAX, we convert unwritten extents directly */
1384
- if (create &&
1385
- (!nimaps ||
1386
- (imap .br_startblock == HOLESTARTBLOCK ||
1387
- imap .br_startblock == DELAYSTARTBLOCK ) ||
1388
- (IS_DAX (inode ) && ISUNWRITTEN (& imap )))) {
1389
- /*
1390
- * xfs_iomap_write_direct() expects the shared lock. It
1391
- * is unlocked on return.
1392
- */
1393
- if (lockmode == XFS_ILOCK_EXCL )
1394
- xfs_ilock_demote (ip , lockmode );
1395
-
1396
- error = xfs_iomap_write_direct (ip , offset , size ,
1397
- & imap , nimaps );
1398
- if (error )
1399
- return error ;
1400
- new = 1 ;
1401
-
1402
- trace_xfs_get_blocks_alloc (ip , offset , size ,
1403
- ISUNWRITTEN (& imap ) ? XFS_IO_UNWRITTEN
1404
- : XFS_IO_DELALLOC , & imap );
1405
- } else if (nimaps ) {
1259
+ if (nimaps ) {
1406
1260
trace_xfs_get_blocks_found (ip , offset , size ,
1407
1261
ISUNWRITTEN (& imap ) ? XFS_IO_UNWRITTEN
1408
1262
: XFS_IO_OVERWRITE , & imap );
@@ -1412,12 +1266,6 @@ __xfs_get_blocks(
1412
1266
goto out_unlock ;
1413
1267
}
1414
1268
1415
- if (IS_DAX (inode ) && create ) {
1416
- ASSERT (!ISUNWRITTEN (& imap ));
1417
- /* zeroing is not needed at a higher layer */
1418
- new = 0 ;
1419
- }
1420
-
1421
1269
/* trim mapping down to size requested */
1422
1270
xfs_map_trim_size (inode , iblock , bh_result , & imap , offset , size );
1423
1271
@@ -1427,144 +1275,21 @@ __xfs_get_blocks(
1427
1275
*/
1428
1276
if (imap .br_startblock != HOLESTARTBLOCK &&
1429
1277
imap .br_startblock != DELAYSTARTBLOCK &&
1430
- (create || !ISUNWRITTEN (& imap ))) {
1431
- if (create && direct && !is_cow ) {
1432
- error = xfs_bounce_unaligned_dio_write (ip , offset_fsb ,
1433
- & imap );
1434
- if (error )
1435
- return error ;
1436
- }
1437
-
1278
+ !ISUNWRITTEN (& imap ))
1438
1279
xfs_map_buffer (inode , bh_result , & imap , offset );
1439
- if (ISUNWRITTEN (& imap ))
1440
- set_buffer_unwritten (bh_result );
1441
- /* direct IO needs special help */
1442
- if (create )
1443
- xfs_map_direct (inode , bh_result , & imap , offset , is_cow );
1444
- }
1445
1280
1446
1281
/*
1447
1282
* If this is a realtime file, data may be on a different device.
1448
1283
* to that pointed to from the buffer_head b_bdev currently.
1449
1284
*/
1450
1285
bh_result -> b_bdev = xfs_find_bdev_for_inode (inode );
1451
-
1452
- /*
1453
- * If we previously allocated a block out beyond eof and we are now
1454
- * coming back to use it then we will need to flag it as new even if it
1455
- * has a disk address.
1456
- *
1457
- * With sub-block writes into unwritten extents we also need to mark
1458
- * the buffer as new so that the unwritten parts of the buffer gets
1459
- * correctly zeroed.
1460
- */
1461
- if (create &&
1462
- ((!buffer_mapped (bh_result ) && !buffer_uptodate (bh_result )) ||
1463
- (offset >= i_size_read (inode )) ||
1464
- (new || ISUNWRITTEN (& imap ))))
1465
- set_buffer_new (bh_result );
1466
-
1467
1286
return 0 ;
1468
1287
1469
1288
out_unlock :
1470
1289
xfs_iunlock (ip , lockmode );
1471
1290
return error ;
1472
1291
}
1473
1292
1474
- int
1475
- xfs_get_blocks (
1476
- struct inode * inode ,
1477
- sector_t iblock ,
1478
- struct buffer_head * bh_result ,
1479
- int create )
1480
- {
1481
- return __xfs_get_blocks (inode , iblock , bh_result , create , false);
1482
- }
1483
-
1484
- int
1485
- xfs_get_blocks_direct (
1486
- struct inode * inode ,
1487
- sector_t iblock ,
1488
- struct buffer_head * bh_result ,
1489
- int create )
1490
- {
1491
- return __xfs_get_blocks (inode , iblock , bh_result , create , true);
1492
- }
1493
-
1494
- /*
1495
- * Complete a direct I/O write request.
1496
- *
1497
- * xfs_map_direct passes us some flags in the private data to tell us what to
1498
- * do. If no flags are set, then the write IO is an overwrite wholly within
1499
- * the existing allocated file size and so there is nothing for us to do.
1500
- *
1501
- * Note that in this case the completion can be called in interrupt context,
1502
- * whereas if we have flags set we will always be called in task context
1503
- * (i.e. from a workqueue).
1504
- */
1505
- int
1506
- xfs_end_io_direct_write (
1507
- struct kiocb * iocb ,
1508
- loff_t offset ,
1509
- ssize_t size ,
1510
- void * private )
1511
- {
1512
- struct inode * inode = file_inode (iocb -> ki_filp );
1513
- struct xfs_inode * ip = XFS_I (inode );
1514
- uintptr_t flags = (uintptr_t )private ;
1515
- int error = 0 ;
1516
-
1517
- trace_xfs_end_io_direct_write (ip , offset , size );
1518
-
1519
- if (XFS_FORCED_SHUTDOWN (ip -> i_mount ))
1520
- return - EIO ;
1521
-
1522
- if (size <= 0 )
1523
- return size ;
1524
-
1525
- /*
1526
- * The flags tell us whether we are doing unwritten extent conversions
1527
- * or an append transaction that updates the on-disk file size. These
1528
- * cases are the only cases where we should *potentially* be needing
1529
- * to update the VFS inode size.
1530
- */
1531
- if (flags == 0 ) {
1532
- ASSERT (offset + size <= i_size_read (inode ));
1533
- return 0 ;
1534
- }
1535
-
1536
- /*
1537
- * We need to update the in-core inode size here so that we don't end up
1538
- * with the on-disk inode size being outside the in-core inode size. We
1539
- * have no other method of updating EOF for AIO, so always do it here
1540
- * if necessary.
1541
- *
1542
- * We need to lock the test/set EOF update as we can be racing with
1543
- * other IO completions here to update the EOF. Failing to serialise
1544
- * here can result in EOF moving backwards and Bad Things Happen when
1545
- * that occurs.
1546
- */
1547
- spin_lock (& ip -> i_flags_lock );
1548
- if (offset + size > i_size_read (inode ))
1549
- i_size_write (inode , offset + size );
1550
- spin_unlock (& ip -> i_flags_lock );
1551
-
1552
- if (flags & XFS_DIO_FLAG_COW )
1553
- error = xfs_reflink_end_cow (ip , offset , size );
1554
- if (flags & XFS_DIO_FLAG_UNWRITTEN ) {
1555
- trace_xfs_end_io_direct_write_unwritten (ip , offset , size );
1556
-
1557
- error = xfs_iomap_write_unwritten (ip , offset , size );
1558
- }
1559
- if (flags & XFS_DIO_FLAG_APPEND ) {
1560
- trace_xfs_end_io_direct_write_append (ip , offset , size );
1561
-
1562
- error = xfs_setfilesize (ip , offset , size );
1563
- }
1564
-
1565
- return error ;
1566
- }
1567
-
1568
1293
STATIC ssize_t
1569
1294
xfs_vm_direct_IO (
1570
1295
struct kiocb * iocb ,
0 commit comments