Skip to content

Commit 3be0f80

Browse files
trondmypdamschuma-ntap
authored andcommitted
NFSv4.1: Fix up replays of interrupted requests
If the previous request on a slot was interrupted before it was processed by the server, then our slot sequence number may be out of whack, and so we try the next operation using the old sequence number. The problem with this, is that not all servers check to see that the client is replaying the same operations as previously when they decide to go to the replay cache, and so instead of the expected error of NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could (if the operations match up) be mistaken by the client for a new reply. To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op in order to resync our slot sequence number. Cc: Olga Kornievskaia <olga.kornievskaia@gmail.com> [olga.kornievskaia@gmail.com: fix an Oops] Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
1 parent 6f0afc2 commit 3be0f80

File tree

2 files changed

+103
-47
lines changed

2 files changed

+103
-47
lines changed

fs/nfs/nfs4_fs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
464464
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
465465
extern void nfs_release_seqid(struct nfs_seqid *seqid);
466466
extern void nfs_free_seqid(struct nfs_seqid *seqid);
467-
extern int nfs4_setup_sequence(const struct nfs_client *client,
467+
extern int nfs4_setup_sequence(struct nfs_client *client,
468468
struct nfs4_sequence_args *args,
469469
struct nfs4_sequence_res *res,
470470
struct rpc_task *task);

fs/nfs/nfs4proc.c

Lines changed: 102 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
9696
struct nfs_open_context *ctx, struct nfs4_label *ilabel,
9797
struct nfs4_label *olabel);
9898
#ifdef CONFIG_NFS_V4_1
99+
static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
100+
struct rpc_cred *cred,
101+
struct nfs4_slot *slot,
102+
bool is_privileged);
99103
static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
100104
struct rpc_cred *);
101105
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
@@ -644,13 +648,14 @@ static int nfs40_sequence_done(struct rpc_task *task,
644648

645649
#if defined(CONFIG_NFS_V4_1)
646650

647-
static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
651+
static void nfs41_release_slot(struct nfs4_slot *slot)
648652
{
649653
struct nfs4_session *session;
650654
struct nfs4_slot_table *tbl;
651-
struct nfs4_slot *slot = res->sr_slot;
652655
bool send_new_highest_used_slotid = false;
653656

657+
if (!slot)
658+
return;
654659
tbl = slot->table;
655660
session = tbl->session;
656661

@@ -676,13 +681,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
676681
send_new_highest_used_slotid = false;
677682
out_unlock:
678683
spin_unlock(&tbl->slot_tbl_lock);
679-
res->sr_slot = NULL;
680684
if (send_new_highest_used_slotid)
681685
nfs41_notify_server(session->clp);
682686
if (waitqueue_active(&tbl->slot_waitq))
683687
wake_up_all(&tbl->slot_waitq);
684688
}
685689

690+
static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
691+
{
692+
nfs41_release_slot(res->sr_slot);
693+
res->sr_slot = NULL;
694+
}
695+
686696
static int nfs41_sequence_process(struct rpc_task *task,
687697
struct nfs4_sequence_res *res)
688698
{
@@ -710,13 +720,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
710720
/* Check the SEQUENCE operation status */
711721
switch (res->sr_status) {
712722
case 0:
713-
/* If previous op on slot was interrupted and we reused
714-
* the seq# and got a reply from the cache, then retry
715-
*/
716-
if (task->tk_status == -EREMOTEIO && interrupted) {
717-
++slot->seq_nr;
718-
goto retry_nowait;
719-
}
720723
/* Update the slot's sequence and clientid lease timer */
721724
slot->seq_done = 1;
722725
clp = session->clp;
@@ -750,16 +753,16 @@ static int nfs41_sequence_process(struct rpc_task *task,
750753
* The slot id we used was probably retired. Try again
751754
* using a different slot id.
752755
*/
756+
if (slot->seq_nr < slot->table->target_highest_slotid)
757+
goto session_recover;
753758
goto retry_nowait;
754759
case -NFS4ERR_SEQ_MISORDERED:
755760
/*
756761
* Was the last operation on this sequence interrupted?
757762
* If so, retry after bumping the sequence number.
758763
*/
759-
if (interrupted) {
760-
++slot->seq_nr;
761-
goto retry_nowait;
762-
}
764+
if (interrupted)
765+
goto retry_new_seq;
763766
/*
764767
* Could this slot have been previously retired?
765768
* If so, then the server may be expecting seq_nr = 1!
@@ -768,10 +771,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
768771
slot->seq_nr = 1;
769772
goto retry_nowait;
770773
}
771-
break;
774+
goto session_recover;
772775
case -NFS4ERR_SEQ_FALSE_RETRY:
773-
++slot->seq_nr;
774-
goto retry_nowait;
776+
if (interrupted)
777+
goto retry_new_seq;
778+
goto session_recover;
775779
default:
776780
/* Just update the slot sequence no. */
777781
slot->seq_done = 1;
@@ -781,6 +785,11 @@ static int nfs41_sequence_process(struct rpc_task *task,
781785
dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
782786
out_noaction:
783787
return ret;
788+
session_recover:
789+
nfs4_schedule_session_recovery(session, res->sr_status);
790+
goto retry_nowait;
791+
retry_new_seq:
792+
++slot->seq_nr;
784793
retry_nowait:
785794
if (rpc_restart_call_prepare(task)) {
786795
nfs41_sequence_free_slot(res);
@@ -857,6 +866,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
857866
.rpc_call_done = nfs41_call_sync_done,
858867
};
859868

869+
static void
870+
nfs4_sequence_process_interrupted(struct nfs_client *client,
871+
struct nfs4_slot *slot, struct rpc_cred *cred)
872+
{
873+
struct rpc_task *task;
874+
875+
task = _nfs41_proc_sequence(client, cred, slot, true);
876+
if (!IS_ERR(task))
877+
rpc_put_task_async(task);
878+
}
879+
860880
#else /* !CONFIG_NFS_V4_1 */
861881

862882
static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@@ -877,9 +897,34 @@ int nfs4_sequence_done(struct rpc_task *task,
877897
}
878898
EXPORT_SYMBOL_GPL(nfs4_sequence_done);
879899

900+
static void
901+
nfs4_sequence_process_interrupted(struct nfs_client *client,
902+
struct nfs4_slot *slot, struct rpc_cred *cred)
903+
{
904+
WARN_ON_ONCE(1);
905+
slot->interrupted = 0;
906+
}
907+
880908
#endif /* !CONFIG_NFS_V4_1 */
881909

882-
int nfs4_setup_sequence(const struct nfs_client *client,
910+
static
911+
void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
912+
struct nfs4_sequence_res *res,
913+
struct nfs4_slot *slot)
914+
{
915+
if (!slot)
916+
return;
917+
slot->privileged = args->sa_privileged ? 1 : 0;
918+
args->sa_slot = slot;
919+
920+
res->sr_slot = slot;
921+
res->sr_timestamp = jiffies;
922+
res->sr_status_flags = 0;
923+
res->sr_status = 1;
924+
925+
}
926+
927+
int nfs4_setup_sequence(struct nfs_client *client,
883928
struct nfs4_sequence_args *args,
884929
struct nfs4_sequence_res *res,
885930
struct rpc_task *task)
@@ -897,29 +942,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
897942
task->tk_timeout = 0;
898943
}
899944

900-
spin_lock(&tbl->slot_tbl_lock);
901-
/* The state manager will wait until the slot table is empty */
902-
if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
903-
goto out_sleep;
945+
for (;;) {
946+
spin_lock(&tbl->slot_tbl_lock);
947+
/* The state manager will wait until the slot table is empty */
948+
if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
949+
goto out_sleep;
950+
951+
slot = nfs4_alloc_slot(tbl);
952+
if (IS_ERR(slot)) {
953+
/* Try again in 1/4 second */
954+
if (slot == ERR_PTR(-ENOMEM))
955+
task->tk_timeout = HZ >> 2;
956+
goto out_sleep;
957+
}
958+
spin_unlock(&tbl->slot_tbl_lock);
904959

905-
slot = nfs4_alloc_slot(tbl);
906-
if (IS_ERR(slot)) {
907-
/* Try again in 1/4 second */
908-
if (slot == ERR_PTR(-ENOMEM))
909-
task->tk_timeout = HZ >> 2;
910-
goto out_sleep;
960+
if (likely(!slot->interrupted))
961+
break;
962+
nfs4_sequence_process_interrupted(client,
963+
slot, task->tk_msg.rpc_cred);
911964
}
912-
spin_unlock(&tbl->slot_tbl_lock);
913-
914-
slot->privileged = args->sa_privileged ? 1 : 0;
915-
args->sa_slot = slot;
916965

917-
res->sr_slot = slot;
918-
if (session) {
919-
res->sr_timestamp = jiffies;
920-
res->sr_status_flags = 0;
921-
res->sr_status = 1;
922-
}
966+
nfs4_sequence_attach_slot(args, res, slot);
923967

924968
trace_nfs4_setup_sequence(session, args);
925969
out_start:
@@ -8118,6 +8162,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = {
81188162

81198163
static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
81208164
struct rpc_cred *cred,
8165+
struct nfs4_slot *slot,
81218166
bool is_privileged)
81228167
{
81238168
struct nfs4_sequence_data *calldata;
@@ -8131,23 +8176,34 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
81318176
.callback_ops = &nfs41_sequence_ops,
81328177
.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
81338178
};
8179+
struct rpc_task *ret;
81348180

8181+
ret = ERR_PTR(-EIO);
81358182
if (!atomic_inc_not_zero(&clp->cl_count))
8136-
return ERR_PTR(-EIO);
8183+
goto out_err;
8184+
8185+
ret = ERR_PTR(-ENOMEM);
81378186
calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
8138-
if (calldata == NULL) {
8139-
nfs_put_client(clp);
8140-
return ERR_PTR(-ENOMEM);
8141-
}
8187+
if (calldata == NULL)
8188+
goto out_put_clp;
81428189
nfs4_init_sequence(&calldata->args, &calldata->res, 0);
8190+
nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot);
81438191
if (is_privileged)
81448192
nfs4_set_sequence_privileged(&calldata->args);
81458193
msg.rpc_argp = &calldata->args;
81468194
msg.rpc_resp = &calldata->res;
81478195
calldata->clp = clp;
81488196
task_setup_data.callback_data = calldata;
81498197

8150-
return rpc_run_task(&task_setup_data);
8198+
ret = rpc_run_task(&task_setup_data);
8199+
if (IS_ERR(ret))
8200+
goto out_err;
8201+
return ret;
8202+
out_put_clp:
8203+
nfs_put_client(clp);
8204+
out_err:
8205+
nfs41_release_slot(slot);
8206+
return ret;
81518207
}
81528208

81538209
static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
@@ -8157,7 +8213,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
81578213

81588214
if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
81598215
return -EAGAIN;
8160-
task = _nfs41_proc_sequence(clp, cred, false);
8216+
task = _nfs41_proc_sequence(clp, cred, NULL, false);
81618217
if (IS_ERR(task))
81628218
ret = PTR_ERR(task);
81638219
else
@@ -8171,7 +8227,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
81718227
struct rpc_task *task;
81728228
int ret;
81738229

8174-
task = _nfs41_proc_sequence(clp, cred, true);
8230+
task = _nfs41_proc_sequence(clp, cred, NULL, true);
81758231
if (IS_ERR(task)) {
81768232
ret = PTR_ERR(task);
81778233
goto out;

0 commit comments

Comments
 (0)