Skip to content

Commit ec30d78

Browse files
Florian Westphaldavem330
authored andcommitted
xfrm: add xdst pcpu cache
retain last used xfrm_dst in a pcpu cache. On next request, reuse this dst if the policies are the same. The cache will not help with strict RR workloads as there is no hit. The cache packet-path part is reasonably small, the notifier part is needed so we do not add long hangs when a device is dismantled but some pcpu xdst still holds a reference, there are also calls to the flush operation when userspace deletes SAs so modules can be removed (there is no hit. We need to run the dst_release on the correct cpu to avoid races with packet path. This is done by adding a work_struct for each cpu and then doing the actual test/release on each affected cpu via schedule_work_on(). Test results using 4 network namespaces and null encryption: ns1 ns2 -> ns3 -> ns4 netperf -> xfrm/null enc -> xfrm/null dec -> netserver what TCP_STREAM UDP_STREAM UDP_RR Flow cache: 14644.61 294.35 327231.64 No flow cache: 14349.81 242.64 202301.72 Pcpu cache: 14629.70 292.21 205595.22 UDP tests used 64byte packets, tests ran for one minute each, value is average over ten iterations. 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this series but without this patch. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 09c7570 commit ec30d78

File tree

4 files changed

+132
-3
lines changed

4 files changed

+132
-3
lines changed

include/net/xfrm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
317317
void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
318318
void km_policy_notify(struct xfrm_policy *xp, int dir,
319319
const struct km_event *c);
320+
void xfrm_policy_cache_flush(void);
320321
void km_state_notify(struct xfrm_state *x, const struct km_event *c);
321322

322323
struct xfrm_tmpl;

net/xfrm/xfrm_device.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev)
153153

154154
static int xfrm_dev_unregister(struct net_device *dev)
155155
{
156+
xfrm_policy_cache_flush();
156157
return NOTIFY_DONE;
157158
}
158159

@@ -175,6 +176,7 @@ static int xfrm_dev_down(struct net_device *dev)
175176
if (dev->features & NETIF_F_HW_ESP)
176177
xfrm_dev_state_flush(dev_net(dev), dev, true);
177178

179+
xfrm_policy_cache_flush();
178180
return NOTIFY_DONE;
179181
}
180182

net/xfrm/xfrm_policy.c

Lines changed: 126 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/netfilter.h>
2525
#include <linux/module.h>
2626
#include <linux/cache.h>
27+
#include <linux/cpu.h>
2728
#include <linux/audit.h>
2829
#include <net/dst.h>
2930
#include <net/flow.h>
@@ -44,6 +45,8 @@ struct xfrm_flo {
4445
u8 flags;
4546
};
4647

48+
static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
49+
static struct work_struct *xfrm_pcpu_work __read_mostly;
4750
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
4851
static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
4952
__read_mostly;
@@ -972,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
972975
}
973976
if (!cnt)
974977
err = -ESRCH;
978+
else
979+
xfrm_policy_cache_flush();
975980
out:
976981
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
977982
return err;
@@ -1700,17 +1705,125 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
17001705

17011706
}
17021707

1708+
static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
1709+
{
1710+
this_cpu_write(xfrm_last_dst, xdst);
1711+
if (old)
1712+
dst_release(&old->u.dst);
1713+
}
1714+
1715+
static void __xfrm_pcpu_work_fn(void)
1716+
{
1717+
struct xfrm_dst *old;
1718+
1719+
old = this_cpu_read(xfrm_last_dst);
1720+
if (old && !xfrm_bundle_ok(old))
1721+
xfrm_last_dst_update(NULL, old);
1722+
}
1723+
1724+
static void xfrm_pcpu_work_fn(struct work_struct *work)
1725+
{
1726+
local_bh_disable();
1727+
rcu_read_lock();
1728+
__xfrm_pcpu_work_fn();
1729+
rcu_read_unlock();
1730+
local_bh_enable();
1731+
}
1732+
1733+
void xfrm_policy_cache_flush(void)
1734+
{
1735+
struct xfrm_dst *old;
1736+
bool found = 0;
1737+
int cpu;
1738+
1739+
local_bh_disable();
1740+
rcu_read_lock();
1741+
for_each_possible_cpu(cpu) {
1742+
old = per_cpu(xfrm_last_dst, cpu);
1743+
if (old && !xfrm_bundle_ok(old)) {
1744+
if (smp_processor_id() == cpu) {
1745+
__xfrm_pcpu_work_fn();
1746+
continue;
1747+
}
1748+
found = true;
1749+
break;
1750+
}
1751+
}
1752+
1753+
rcu_read_unlock();
1754+
local_bh_enable();
1755+
1756+
if (!found)
1757+
return;
1758+
1759+
get_online_cpus();
1760+
1761+
for_each_possible_cpu(cpu) {
1762+
bool bundle_release;
1763+
1764+
rcu_read_lock();
1765+
old = per_cpu(xfrm_last_dst, cpu);
1766+
bundle_release = old && !xfrm_bundle_ok(old);
1767+
rcu_read_unlock();
1768+
1769+
if (!bundle_release)
1770+
continue;
1771+
1772+
if (cpu_online(cpu)) {
1773+
schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
1774+
continue;
1775+
}
1776+
1777+
rcu_read_lock();
1778+
old = per_cpu(xfrm_last_dst, cpu);
1779+
if (old && !xfrm_bundle_ok(old)) {
1780+
per_cpu(xfrm_last_dst, cpu) = NULL;
1781+
dst_release(&old->u.dst);
1782+
}
1783+
rcu_read_unlock();
1784+
}
1785+
1786+
put_online_cpus();
1787+
}
1788+
1789+
static bool xfrm_pol_dead(struct xfrm_dst *xdst)
1790+
{
1791+
unsigned int num_pols = xdst->num_pols;
1792+
unsigned int pol_dead = 0, i;
1793+
1794+
for (i = 0; i < num_pols; i++)
1795+
pol_dead |= xdst->pols[i]->walk.dead;
1796+
1797+
/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
1798+
if (pol_dead)
1799+
xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
1800+
1801+
return pol_dead;
1802+
}
1803+
17031804
static struct xfrm_dst *
17041805
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
17051806
const struct flowi *fl, u16 family,
17061807
struct dst_entry *dst_orig)
17071808
{
17081809
struct net *net = xp_net(pols[0]);
17091810
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1811+
struct xfrm_dst *xdst, *old;
17101812
struct dst_entry *dst;
1711-
struct xfrm_dst *xdst;
17121813
int err;
17131814

1815+
xdst = this_cpu_read(xfrm_last_dst);
1816+
if (xdst &&
1817+
xdst->u.dst.dev == dst_orig->dev &&
1818+
xdst->num_pols == num_pols &&
1819+
!xfrm_pol_dead(xdst) &&
1820+
memcmp(xdst->pols, pols,
1821+
sizeof(struct xfrm_policy *) * num_pols) == 0) {
1822+
dst_hold(&xdst->u.dst);
1823+
return xdst;
1824+
}
1825+
1826+
old = xdst;
17141827
/* Try to instantiate a bundle */
17151828
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
17161829
if (err <= 0) {
@@ -1731,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
17311844
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
17321845
xdst->policy_genid = atomic_read(&pols[0]->genid);
17331846

1847+
atomic_set(&xdst->u.dst.__refcnt, 2);
1848+
xfrm_last_dst_update(xdst, old);
1849+
17341850
return xdst;
17351851
}
17361852

@@ -2843,6 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
28432959

28442960
void __init xfrm_init(void)
28452961
{
2962+
int i;
2963+
2964+
xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
2965+
GFP_KERNEL);
2966+
BUG_ON(!xfrm_pcpu_work);
2967+
2968+
for (i = 0; i < NR_CPUS; i++)
2969+
INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
2970+
28462971
register_pernet_subsys(&xfrm_net_ops);
28472972
seqcount_init(&xfrm_policy_hash_generation);
28482973
xfrm_input_init();

net/xfrm/xfrm_state.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -724,9 +724,10 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
724724
}
725725
}
726726
}
727-
if (cnt)
727+
if (cnt) {
728728
err = 0;
729-
729+
xfrm_policy_cache_flush();
730+
}
730731
out:
731732
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
732733
return err;

0 commit comments

Comments
 (0)