Skip to content

Commit c0c77d8

Browse files
Björn Töpelmagnus-karlsson
authored andcommitted
xsk: add user memory registration support sockopt
In this commit the base structure of the AF_XDP address family is set up. Further, we introduce the abilty register a window of user memory to the kernel via the XDP_UMEM_REG setsockopt syscall. The memory window is viewed by an AF_XDP socket as a set of equally large frames. After a user memory registration all frames are "owned" by the user application, and not the kernel. v2: More robust checks on umem creation and unaccount on error. Call set_page_dirty_lock on cleanup. Simplified xdp_umem_reg. Co-authored-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent 68e8b84 commit c0c77d8

File tree

8 files changed

+596
-0
lines changed

8 files changed

+596
-0
lines changed

include/net/xdp_sock.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* SPDX-License-Identifier: GPL-2.0
2+
* AF_XDP internal functions
3+
* Copyright(c) 2018 Intel Corporation.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef _LINUX_XDP_SOCK_H
16+
#define _LINUX_XDP_SOCK_H
17+
18+
#include <linux/mutex.h>
19+
#include <net/sock.h>
20+
21+
struct xdp_umem;
22+
23+
struct xdp_sock {
24+
/* struct sock must be the first member of struct xdp_sock */
25+
struct sock sk;
26+
struct xdp_umem *umem;
27+
/* Protects multiple processes in the control path */
28+
struct mutex mutex;
29+
};
30+
31+
#endif /* _LINUX_XDP_SOCK_H */

include/uapi/linux/if_xdp.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2+
*
3+
* if_xdp: XDP socket user-space interface
4+
* Copyright(c) 2018 Intel Corporation.
5+
*
6+
* This program is free software; you can redistribute it and/or modify it
7+
* under the terms and conditions of the GNU General Public License,
8+
* version 2, as published by the Free Software Foundation.
9+
*
10+
* This program is distributed in the hope it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13+
* more details.
14+
*
15+
* Author(s): Björn Töpel <bjorn.topel@intel.com>
16+
* Magnus Karlsson <magnus.karlsson@intel.com>
17+
*/
18+
19+
#ifndef _LINUX_IF_XDP_H
20+
#define _LINUX_IF_XDP_H
21+
22+
#include <linux/types.h>
23+
24+
/* XDP socket options */
25+
#define XDP_UMEM_REG 3
26+
27+
struct xdp_umem_reg {
28+
__u64 addr; /* Start of packet data area */
29+
__u64 len; /* Length of packet data area */
30+
__u32 frame_size; /* Frame size */
31+
__u32 frame_headroom; /* Frame head room */
32+
};
33+
34+
#endif /* _LINUX_IF_XDP_H */

net/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,4 @@ obj-y += l3mdev/
8585
endif
8686
obj-$(CONFIG_QRTR) += qrtr/
8787
obj-$(CONFIG_NET_NCSI) += ncsi/
88+
obj-$(CONFIG_XDP_SOCKETS) += xdp/

net/xdp/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o
2+

net/xdp/xdp_umem.c

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* XDP user-space packet buffer
3+
* Copyright(c) 2018 Intel Corporation.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#include <linux/init.h>
16+
#include <linux/sched/mm.h>
17+
#include <linux/sched/signal.h>
18+
#include <linux/sched/task.h>
19+
#include <linux/uaccess.h>
20+
#include <linux/slab.h>
21+
#include <linux/bpf.h>
22+
#include <linux/mm.h>
23+
24+
#include "xdp_umem.h"
25+
26+
#define XDP_UMEM_MIN_FRAME_SIZE 2048
27+
28+
int xdp_umem_create(struct xdp_umem **umem)
29+
{
30+
*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
31+
32+
if (!(*umem))
33+
return -ENOMEM;
34+
35+
return 0;
36+
}
37+
38+
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
39+
{
40+
unsigned int i;
41+
42+
if (umem->pgs) {
43+
for (i = 0; i < umem->npgs; i++) {
44+
struct page *page = umem->pgs[i];
45+
46+
set_page_dirty_lock(page);
47+
put_page(page);
48+
}
49+
50+
kfree(umem->pgs);
51+
umem->pgs = NULL;
52+
}
53+
}
54+
55+
static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
56+
{
57+
if (umem->user) {
58+
atomic_long_sub(umem->npgs, &umem->user->locked_vm);
59+
free_uid(umem->user);
60+
}
61+
}
62+
63+
static void xdp_umem_release(struct xdp_umem *umem)
64+
{
65+
struct task_struct *task;
66+
struct mm_struct *mm;
67+
68+
if (umem->pgs) {
69+
xdp_umem_unpin_pages(umem);
70+
71+
task = get_pid_task(umem->pid, PIDTYPE_PID);
72+
put_pid(umem->pid);
73+
if (!task)
74+
goto out;
75+
mm = get_task_mm(task);
76+
put_task_struct(task);
77+
if (!mm)
78+
goto out;
79+
80+
mmput(mm);
81+
umem->pgs = NULL;
82+
}
83+
84+
xdp_umem_unaccount_pages(umem);
85+
out:
86+
kfree(umem);
87+
}
88+
89+
static void xdp_umem_release_deferred(struct work_struct *work)
90+
{
91+
struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
92+
93+
xdp_umem_release(umem);
94+
}
95+
96+
void xdp_get_umem(struct xdp_umem *umem)
97+
{
98+
atomic_inc(&umem->users);
99+
}
100+
101+
void xdp_put_umem(struct xdp_umem *umem)
102+
{
103+
if (!umem)
104+
return;
105+
106+
if (atomic_dec_and_test(&umem->users)) {
107+
INIT_WORK(&umem->work, xdp_umem_release_deferred);
108+
schedule_work(&umem->work);
109+
}
110+
}
111+
112+
static int xdp_umem_pin_pages(struct xdp_umem *umem)
113+
{
114+
unsigned int gup_flags = FOLL_WRITE;
115+
long npgs;
116+
int err;
117+
118+
umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
119+
if (!umem->pgs)
120+
return -ENOMEM;
121+
122+
down_write(&current->mm->mmap_sem);
123+
npgs = get_user_pages(umem->address, umem->npgs,
124+
gup_flags, &umem->pgs[0], NULL);
125+
up_write(&current->mm->mmap_sem);
126+
127+
if (npgs != umem->npgs) {
128+
if (npgs >= 0) {
129+
umem->npgs = npgs;
130+
err = -ENOMEM;
131+
goto out_pin;
132+
}
133+
err = npgs;
134+
goto out_pgs;
135+
}
136+
return 0;
137+
138+
out_pin:
139+
xdp_umem_unpin_pages(umem);
140+
out_pgs:
141+
kfree(umem->pgs);
142+
umem->pgs = NULL;
143+
return err;
144+
}
145+
146+
static int xdp_umem_account_pages(struct xdp_umem *umem)
147+
{
148+
unsigned long lock_limit, new_npgs, old_npgs;
149+
150+
if (capable(CAP_IPC_LOCK))
151+
return 0;
152+
153+
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
154+
umem->user = get_uid(current_user());
155+
156+
do {
157+
old_npgs = atomic_long_read(&umem->user->locked_vm);
158+
new_npgs = old_npgs + umem->npgs;
159+
if (new_npgs > lock_limit) {
160+
free_uid(umem->user);
161+
umem->user = NULL;
162+
return -ENOBUFS;
163+
}
164+
} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
165+
new_npgs) != old_npgs);
166+
return 0;
167+
}
168+
169+
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
170+
{
171+
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
172+
u64 addr = mr->addr, size = mr->len;
173+
unsigned int nframes, nfpp;
174+
int size_chk, err;
175+
176+
if (!umem)
177+
return -EINVAL;
178+
179+
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
180+
/* Strictly speaking we could support this, if:
181+
* - huge pages, or*
182+
* - using an IOMMU, or
183+
* - making sure the memory area is consecutive
184+
* but for now, we simply say "computer says no".
185+
*/
186+
return -EINVAL;
187+
}
188+
189+
if (!is_power_of_2(frame_size))
190+
return -EINVAL;
191+
192+
if (!PAGE_ALIGNED(addr)) {
193+
/* Memory area has to be page size aligned. For
194+
* simplicity, this might change.
195+
*/
196+
return -EINVAL;
197+
}
198+
199+
if ((addr + size) < addr)
200+
return -EINVAL;
201+
202+
nframes = size / frame_size;
203+
if (nframes == 0 || nframes > UINT_MAX)
204+
return -EINVAL;
205+
206+
nfpp = PAGE_SIZE / frame_size;
207+
if (nframes < nfpp || nframes % nfpp)
208+
return -EINVAL;
209+
210+
frame_headroom = ALIGN(frame_headroom, 64);
211+
212+
size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM;
213+
if (size_chk < 0)
214+
return -EINVAL;
215+
216+
umem->pid = get_task_pid(current, PIDTYPE_PID);
217+
umem->size = (size_t)size;
218+
umem->address = (unsigned long)addr;
219+
umem->props.frame_size = frame_size;
220+
umem->props.nframes = nframes;
221+
umem->frame_headroom = frame_headroom;
222+
umem->npgs = size / PAGE_SIZE;
223+
umem->pgs = NULL;
224+
umem->user = NULL;
225+
226+
umem->frame_size_log2 = ilog2(frame_size);
227+
umem->nfpp_mask = nfpp - 1;
228+
umem->nfpplog2 = ilog2(nfpp);
229+
atomic_set(&umem->users, 1);
230+
231+
err = xdp_umem_account_pages(umem);
232+
if (err)
233+
goto out;
234+
235+
err = xdp_umem_pin_pages(umem);
236+
if (err)
237+
goto out_account;
238+
return 0;
239+
240+
out_account:
241+
xdp_umem_unaccount_pages(umem);
242+
out:
243+
put_pid(umem->pid);
244+
return err;
245+
}

net/xdp/xdp_umem.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/* SPDX-License-Identifier: GPL-2.0
2+
* XDP user-space packet buffer
3+
* Copyright(c) 2018 Intel Corporation.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef XDP_UMEM_H_
16+
#define XDP_UMEM_H_
17+
18+
#include <linux/mm.h>
19+
#include <linux/if_xdp.h>
20+
#include <linux/workqueue.h>
21+
22+
#include "xdp_umem_props.h"
23+
24+
struct xdp_umem {
25+
struct page **pgs;
26+
struct xdp_umem_props props;
27+
u32 npgs;
28+
u32 frame_headroom;
29+
u32 nfpp_mask;
30+
u32 nfpplog2;
31+
u32 frame_size_log2;
32+
struct user_struct *user;
33+
struct pid *pid;
34+
unsigned long address;
35+
size_t size;
36+
atomic_t users;
37+
struct work_struct work;
38+
};
39+
40+
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
41+
void xdp_get_umem(struct xdp_umem *umem);
42+
void xdp_put_umem(struct xdp_umem *umem);
43+
int xdp_umem_create(struct xdp_umem **umem);
44+
45+
#endif /* XDP_UMEM_H_ */

net/xdp/xdp_umem_props.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/* SPDX-License-Identifier: GPL-2.0
2+
* XDP user-space packet buffer
3+
* Copyright(c) 2018 Intel Corporation.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef XDP_UMEM_PROPS_H_
16+
#define XDP_UMEM_PROPS_H_
17+
18+
struct xdp_umem_props {
19+
u32 frame_size;
20+
u32 nframes;
21+
};
22+
23+
#endif /* XDP_UMEM_PROPS_H_ */

0 commit comments

Comments
 (0)