Skip to content

Commit e7f680f

Browse files
committed
afs: Improve FS server rotation error handling
Improve the error handling in FS server rotation by: (1) Cache the latest useful error value for the fs operation as a whole in struct afs_fs_cursor separately from the error cached in the afs_addr_cursor struct. The one in the address cursor gets clobbered occasionally. Copy over the error to the fs operation only when it's something we'd be interested in passing to userspace. (2) Make it so that EDESTADDRREQ is the default that is seen only if no addresses are available to be accessed. (3) When calling utility functions, such as checking a volume status or probing a fileserver, don't let a successful result clobber the cached error in the cursor; instead, stash the result in a temporary variable until it has been assessed. (4) Don't return ETIMEDOUT or ETIME if a better error, such as ENETUNREACH, is already cached. (5) On leaving the rotation loop, turn any remote abort code into a more useful error than ECONNABORTED. Fixes: d2ddc77 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells <dhowells@redhat.com>
1 parent 12bdcf3 commit e7f680f

File tree

3 files changed

+55
-45
lines changed

3 files changed

+55
-45
lines changed

fs/afs/addr_list.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,8 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac)
318318
if (ac->index == ac->alist->nr_addrs)
319319
ac->index = 0;
320320

321-
if (ac->index == ac->start) {
322-
ac->error = -EDESTADDRREQ;
321+
if (ac->index == ac->start)
323322
return false;
324-
}
325323
}
326324

327325
ac->begun = true;

fs/afs/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,7 @@ struct afs_fs_cursor {
629629
unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */
630630
unsigned char start; /* Initial index in server list */
631631
unsigned char index; /* Number of servers tried beyond start */
632+
short error;
632633
unsigned short flags;
633634
#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */
634635
#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */

fs/afs/rotate.c

Lines changed: 53 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,10 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode
3939
fc->vnode = vnode;
4040
fc->key = key;
4141
fc->ac.error = SHRT_MAX;
42+
fc->error = -EDESTADDRREQ;
4243

4344
if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
44-
fc->ac.error = -EINTR;
45+
fc->error = -EINTR;
4546
fc->flags |= AFS_FS_CURSOR_STOP;
4647
return false;
4748
}
@@ -80,7 +81,7 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
8081
* and have to return an error.
8182
*/
8283
if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
83-
fc->ac.error = -ESTALE;
84+
fc->error = -ESTALE;
8485
return false;
8586
}
8687

@@ -127,7 +128,7 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
127128
{
128129
msleep_interruptible(1000);
129130
if (signal_pending(current)) {
130-
fc->ac.error = -ERESTARTSYS;
131+
fc->error = -ERESTARTSYS;
131132
return false;
132133
}
133134

@@ -143,27 +144,29 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
143144
struct afs_addr_list *alist;
144145
struct afs_server *server;
145146
struct afs_vnode *vnode = fc->vnode;
147+
int error = fc->ac.error;
146148

147149
_enter("%u/%u,%u/%u,%d,%d",
148150
fc->index, fc->start,
149151
fc->ac.index, fc->ac.start,
150-
fc->ac.error, fc->ac.abort_code);
152+
error, fc->ac.abort_code);
151153

152154
if (fc->flags & AFS_FS_CURSOR_STOP) {
153155
_leave(" = f [stopped]");
154156
return false;
155157
}
156158

157159
/* Evaluate the result of the previous operation, if there was one. */
158-
switch (fc->ac.error) {
160+
switch (error) {
159161
case SHRT_MAX:
160162
goto start;
161163

162164
case 0:
163165
default:
164166
/* Success or local failure. Stop. */
167+
fc->error = error;
165168
fc->flags |= AFS_FS_CURSOR_STOP;
166-
_leave(" = f [okay/local %d]", fc->ac.error);
169+
_leave(" = f [okay/local %d]", error);
167170
return false;
168171

169172
case -ECONNABORTED:
@@ -178,7 +181,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
178181
* - May indicate that the fileserver couldn't attach to the vol.
179182
*/
180183
if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
181-
fc->ac.error = -EREMOTEIO;
184+
fc->error = -EREMOTEIO;
182185
goto next_server;
183186
}
184187

@@ -187,20 +190,20 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
187190
write_unlock(&vnode->volume->servers_lock);
188191

189192
set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
190-
fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
191-
if (fc->ac.error < 0)
192-
goto failed;
193+
error = afs_check_volume_status(vnode->volume, fc->key);
194+
if (error < 0)
195+
goto failed_set_error;
193196

194197
if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
195-
fc->ac.error = -ENOMEDIUM;
198+
fc->error = -ENOMEDIUM;
196199
goto failed;
197200
}
198201

199202
/* If the server list didn't change, then assume that
200203
* it's the fileserver having trouble.
201204
*/
202205
if (vnode->volume->servers == fc->server_list) {
203-
fc->ac.error = -EREMOTEIO;
206+
fc->error = -EREMOTEIO;
204207
goto next_server;
205208
}
206209

@@ -215,7 +218,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
215218
case VONLINE:
216219
case VDISKFULL:
217220
case VOVERQUOTA:
218-
fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
221+
fc->error = afs_abort_to_error(fc->ac.abort_code);
219222
goto next_server;
220223

221224
case VOFFLINE:
@@ -224,11 +227,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
224227
clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
225228
}
226229
if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
227-
fc->ac.error = -EADV;
230+
fc->error = -EADV;
228231
goto failed;
229232
}
230233
if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
231-
fc->ac.error = -ESTALE;
234+
fc->error = -ESTALE;
232235
goto failed;
233236
}
234237
goto busy;
@@ -240,7 +243,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
240243
* have a file lock we need to maintain.
241244
*/
242245
if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
243-
fc->ac.error = -EBUSY;
246+
fc->error = -EBUSY;
244247
goto failed;
245248
}
246249
if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
@@ -269,16 +272,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
269272
* honour, just in case someone sets up a loop.
270273
*/
271274
if (fc->flags & AFS_FS_CURSOR_VMOVED) {
272-
fc->ac.error = -EREMOTEIO;
275+
fc->error = -EREMOTEIO;
273276
goto failed;
274277
}
275278
fc->flags |= AFS_FS_CURSOR_VMOVED;
276279

277280
set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
278281
set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
279-
fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
280-
if (fc->ac.error < 0)
281-
goto failed;
282+
error = afs_check_volume_status(vnode->volume, fc->key);
283+
if (error < 0)
284+
goto failed_set_error;
282285

283286
/* If the server list didn't change, then the VLDB is
284287
* out of sync with the fileservers. This is hopefully
@@ -290,7 +293,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
290293
* TODO: Retry a few times with sleeps.
291294
*/
292295
if (vnode->volume->servers == fc->server_list) {
293-
fc->ac.error = -ENOMEDIUM;
296+
fc->error = -ENOMEDIUM;
294297
goto failed;
295298
}
296299

@@ -299,20 +302,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
299302
default:
300303
clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
301304
clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
302-
fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
305+
fc->error = afs_abort_to_error(fc->ac.abort_code);
303306
goto failed;
304307
}
305308

309+
case -ETIMEDOUT:
310+
case -ETIME:
311+
if (fc->error != -EDESTADDRREQ)
312+
goto iterate_address;
313+
/* Fall through */
306314
case -ENETUNREACH:
307315
case -EHOSTUNREACH:
308316
case -ECONNREFUSED:
309-
case -ETIMEDOUT:
310-
case -ETIME:
311317
_debug("no conn");
318+
fc->error = error;
312319
goto iterate_address;
313320

314321
case -ECONNRESET:
315322
_debug("call reset");
323+
fc->error = error;
316324
goto failed;
317325
}
318326

@@ -328,9 +336,9 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
328336
/* See if we need to do an update of the volume record. Note that the
329337
* volume may have moved or even have been deleted.
330338
*/
331-
fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
332-
if (fc->ac.error < 0)
333-
goto failed;
339+
error = afs_check_volume_status(vnode->volume, fc->key);
340+
if (error < 0)
341+
goto failed_set_error;
334342

335343
if (!afs_start_fs_iteration(fc, vnode))
336344
goto failed;
@@ -354,10 +362,10 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
354362
* break request before we've finished decoding the reply and
355363
* installing the vnode.
356364
*/
357-
fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
358-
fc->index);
359-
if (fc->ac.error < 0)
360-
goto failed;
365+
error = afs_register_server_cb_interest(vnode, fc->server_list,
366+
fc->index);
367+
if (error < 0)
368+
goto failed_set_error;
361369

362370
fc->cbi = afs_get_cb_interest(vnode->cb_interest);
363371

@@ -422,13 +430,14 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
422430
if (fc->flags & AFS_FS_CURSOR_VBUSY)
423431
goto restart_from_beginning;
424432

425-
fc->ac.error = -EDESTADDRREQ;
426433
goto failed;
427434

435+
failed_set_error:
436+
fc->error = error;
428437
failed:
429438
fc->flags |= AFS_FS_CURSOR_STOP;
430439
afs_end_cursor(&fc->ac);
431-
_leave(" = f [failed %d]", fc->ac.error);
440+
_leave(" = f [failed %d]", fc->error);
432441
return false;
433442
}
434443

@@ -442,13 +451,14 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
442451
struct afs_vnode *vnode = fc->vnode;
443452
struct afs_cb_interest *cbi = vnode->cb_interest;
444453
struct afs_addr_list *alist;
454+
int error = fc->ac.error;
445455

446456
_enter("");
447457

448-
switch (fc->ac.error) {
458+
switch (error) {
449459
case SHRT_MAX:
450460
if (!cbi) {
451-
fc->ac.error = -ESTALE;
461+
fc->error = -ESTALE;
452462
fc->flags |= AFS_FS_CURSOR_STOP;
453463
return false;
454464
}
@@ -461,7 +471,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
461471
afs_get_addrlist(alist);
462472
read_unlock(&cbi->server->fs_lock);
463473
if (!alist) {
464-
fc->ac.error = -ESTALE;
474+
fc->error = -ESTALE;
465475
fc->flags |= AFS_FS_CURSOR_STOP;
466476
return false;
467477
}
@@ -475,11 +485,13 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
475485
case 0:
476486
default:
477487
/* Success or local failure. Stop. */
488+
fc->error = error;
478489
fc->flags |= AFS_FS_CURSOR_STOP;
479-
_leave(" = f [okay/local %d]", fc->ac.error);
490+
_leave(" = f [okay/local %d]", error);
480491
return false;
481492

482493
case -ECONNABORTED:
494+
fc->error = afs_abort_to_error(fc->ac.abort_code);
483495
fc->flags |= AFS_FS_CURSOR_STOP;
484496
_leave(" = f [abort]");
485497
return false;
@@ -490,6 +502,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
490502
case -ETIMEDOUT:
491503
case -ETIME:
492504
_debug("no conn");
505+
fc->error = error;
493506
goto iterate_address;
494507
}
495508

@@ -512,17 +525,15 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
512525
int afs_end_vnode_operation(struct afs_fs_cursor *fc)
513526
{
514527
struct afs_net *net = afs_v2net(fc->vnode);
515-
int ret;
516528

517529
mutex_unlock(&fc->vnode->io_lock);
518530

519531
afs_end_cursor(&fc->ac);
520532
afs_put_cb_interest(net, fc->cbi);
521533
afs_put_serverlist(net, fc->server_list);
522534

523-
ret = fc->ac.error;
524-
if (ret == -ECONNABORTED)
525-
afs_abort_to_error(fc->ac.abort_code);
535+
if (fc->error == -ECONNABORTED)
536+
fc->error = afs_abort_to_error(fc->ac.abort_code);
526537

527-
return fc->ac.error;
538+
return fc->error;
528539
}

0 commit comments

Comments
 (0)