Skip to content

Commit 0b80ae6

Browse files
cmaiolinodjwong
authored andcommitted
xfs: Add infrastructure needed for error propagation during buffer IO failure
With the current code, XFS never re-submit a failed buffer for IO, because the failed item in the buffer is kept in the flush locked state forever. To be able to resubmit an log item for IO, we need a way to mark an item as failed, if, for any reason the buffer which the item belonged to failed during writeback. Add a new log item callback to be used after an IO completion failure and make the needed clean ups. Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
1 parent 6f4a1ee commit 0b80ae6

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

fs/xfs/xfs_buf_item.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "xfs_error.h"
3030
#include "xfs_trace.h"
3131
#include "xfs_log.h"
32+
#include "xfs_inode.h"
3233

3334

3435
kmem_zone_t *xfs_buf_item_zone;
@@ -1054,6 +1055,31 @@ xfs_buf_do_callbacks(
10541055
}
10551056
}
10561057

1058+
/*
1059+
* Invoke the error state callback for each log item affected by the failed I/O.
1060+
*
1061+
* If a metadata buffer write fails with a non-permanent error, the buffer is
1062+
* eventually resubmitted and so the completion callbacks are not run. The error
1063+
* state may need to be propagated to the log items attached to the buffer,
1064+
* however, so the next AIL push of the item knows hot to handle it correctly.
1065+
*/
1066+
STATIC void
1067+
xfs_buf_do_callbacks_fail(
1068+
struct xfs_buf *bp)
1069+
{
1070+
struct xfs_log_item *next;
1071+
struct xfs_log_item *lip = bp->b_fspriv;
1072+
struct xfs_ail *ailp = lip->li_ailp;
1073+
1074+
spin_lock(&ailp->xa_lock);
1075+
for (; lip; lip = next) {
1076+
next = lip->li_bio_list;
1077+
if (lip->li_ops->iop_error)
1078+
lip->li_ops->iop_error(lip, bp);
1079+
}
1080+
spin_unlock(&ailp->xa_lock);
1081+
}
1082+
10571083
static bool
10581084
xfs_buf_iodone_callback_error(
10591085
struct xfs_buf *bp)
@@ -1123,7 +1149,11 @@ xfs_buf_iodone_callback_error(
11231149
if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
11241150
goto permanent_error;
11251151

1126-
/* still a transient error, higher layers will retry */
1152+
/*
1153+
* Still a transient error, run IO completion failure callbacks and let
1154+
* the higher layers retry the buffer.
1155+
*/
1156+
xfs_buf_do_callbacks_fail(bp);
11271157
xfs_buf_ioerror(bp, 0);
11281158
xfs_buf_relse(bp);
11291159
return true;

fs/xfs/xfs_trans.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,13 @@ typedef struct xfs_log_item {
6464
} xfs_log_item_t;
6565

6666
#define XFS_LI_IN_AIL 0x1
67-
#define XFS_LI_ABORTED 0x2
67+
#define XFS_LI_ABORTED 0x2
68+
#define XFS_LI_FAILED 0x4
6869

6970
#define XFS_LI_FLAGS \
7071
{ XFS_LI_IN_AIL, "IN_AIL" }, \
71-
{ XFS_LI_ABORTED, "ABORTED" }
72+
{ XFS_LI_ABORTED, "ABORTED" }, \
73+
{ XFS_LI_FAILED, "FAILED" }
7274

7375
struct xfs_item_ops {
7476
void (*iop_size)(xfs_log_item_t *, int *, int *);
@@ -79,6 +81,7 @@ struct xfs_item_ops {
7981
void (*iop_unlock)(xfs_log_item_t *);
8082
xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
8183
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
84+
void (*iop_error)(xfs_log_item_t *, xfs_buf_t *);
8285
};
8386

8487
void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,

0 commit comments

Comments
 (0)