Skip to content

Commit 6282cd5

Browse files
neilbrownJ. Bruce Fields
authored andcommitted
NFSD: Don't hand out delegations for 30 seconds after recalling them.
If nfsd needs to recall a delegation for some reason it implies that there is contention on the file, so further delegations should not be handed out. The current code fails to do so, and the result is effectively a live-lock under some workloads: a client attempting a conflicting operation on a read-delegated file receives NFS4ERR_DELAY and retries the operation, but by the time it retries the server may already have given out another delegation. We could simply avoid delegations for (say) 30 seconds after any recall, but this is probably too heavy handed. We could keep a list of inodes (or inode numbers or filehandles) for recalled delegations, but that requires memory allocation and searching. The approach taken here is to use a bloom filter to record the filehandles which are currently blocked from delegation, and to accept the cost of a few false positives. We have 2 bloom filters, each of which is valid for 30 seconds. When a delegation is recalled the filehandle is added to one filter and will remain disabled for between 30 and 60 seconds. We keep a count of the number of filehandles that have been added, so when that count is zero we can bypass all other tests. The bloom filters have 256 bits and 3 hash functions. This should allow a couple of dozen blocked filehandles with minimal false positives. If many more filehandles are all blocked at once, behaviour will degrade towards rejecting all delegations for between 30 and 60 seconds, then resetting and allowing new delegations. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
1 parent 7171511 commit 6282cd5

File tree

1 file changed

+78
-0
lines changed

1 file changed

+78
-0
lines changed

fs/nfsd/nfs4state.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <linux/ratelimit.h>
4242
#include <linux/sunrpc/svcauth_gss.h>
4343
#include <linux/sunrpc/addr.h>
44+
#include <linux/hash.h>
4445
#include "xdr4.h"
4546
#include "xdr4cb.h"
4647
#include "vfs.h"
@@ -364,6 +365,79 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
364365
return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
365366
}
366367

368+
/*
369+
* When we recall a delegation, we should be careful not to hand it
370+
* out again straight away.
371+
* To ensure this we keep a pair of bloom filters ('new' and 'old')
372+
* in which the filehandles of recalled delegations are "stored".
373+
* If a filehandle appear in either filter, a delegation is blocked.
374+
* When a delegation is recalled, the filehandle is stored in the "new"
375+
* filter.
376+
* Every 30 seconds we swap the filters and clear the "new" one,
377+
* unless both are empty of course.
378+
*
379+
* Each filter is 256 bits. We hash the filehandle to 32bit and use the
380+
* low 3 bytes as hash-table indices.
381+
*
382+
* 'state_lock', which is always held when block_delegations() is called,
383+
* is used to manage concurrent access. Testing does not need the lock
384+
* except when swapping the two filters.
385+
*/
386+
static struct bloom_pair {
387+
int entries, old_entries;
388+
time_t swap_time;
389+
int new; /* index into 'set' */
390+
DECLARE_BITMAP(set[2], 256);
391+
} blocked_delegations;
392+
393+
static int delegation_blocked(struct knfsd_fh *fh)
394+
{
395+
u32 hash;
396+
struct bloom_pair *bd = &blocked_delegations;
397+
398+
if (bd->entries == 0)
399+
return 0;
400+
if (seconds_since_boot() - bd->swap_time > 30) {
401+
spin_lock(&state_lock);
402+
if (seconds_since_boot() - bd->swap_time > 30) {
403+
bd->entries -= bd->old_entries;
404+
bd->old_entries = bd->entries;
405+
memset(bd->set[bd->new], 0,
406+
sizeof(bd->set[0]));
407+
bd->new = 1-bd->new;
408+
bd->swap_time = seconds_since_boot();
409+
}
410+
spin_unlock(&state_lock);
411+
}
412+
hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
413+
if (test_bit(hash&255, bd->set[0]) &&
414+
test_bit((hash>>8)&255, bd->set[0]) &&
415+
test_bit((hash>>16)&255, bd->set[0]))
416+
return 1;
417+
418+
if (test_bit(hash&255, bd->set[1]) &&
419+
test_bit((hash>>8)&255, bd->set[1]) &&
420+
test_bit((hash>>16)&255, bd->set[1]))
421+
return 1;
422+
423+
return 0;
424+
}
425+
426+
static void block_delegations(struct knfsd_fh *fh)
427+
{
428+
u32 hash;
429+
struct bloom_pair *bd = &blocked_delegations;
430+
431+
hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
432+
433+
__set_bit(hash&255, bd->set[bd->new]);
434+
__set_bit((hash>>8)&255, bd->set[bd->new]);
435+
__set_bit((hash>>16)&255, bd->set[bd->new]);
436+
if (bd->entries == 0)
437+
bd->swap_time = seconds_since_boot();
438+
bd->entries += 1;
439+
}
440+
367441
static struct nfs4_delegation *
368442
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
369443
{
@@ -372,6 +446,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
372446
dprintk("NFSD alloc_init_deleg\n");
373447
if (num_delegations > max_delegations)
374448
return NULL;
449+
if (delegation_blocked(&current_fh->fh_handle))
450+
return NULL;
375451
dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
376452
if (dp == NULL)
377453
return dp;
@@ -2770,6 +2846,8 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
27702846
/* Only place dl_time is set; protected by i_lock: */
27712847
dp->dl_time = get_seconds();
27722848

2849+
block_delegations(&dp->dl_fh);
2850+
27732851
nfsd4_cb_recall(dp);
27742852
}
27752853

0 commit comments

Comments
 (0)