Skip to content

Add a parameter NetfilterQueue(sockfd=N) for using an externally-allocated netlink socket #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ v1.0.0, unreleased
Raise an error if a packet verdict is set after its parent queue is closed
set_payload() now affects the result of later get_payload()
Handle signals received when run() is blocked in recv()
Accept packets in COPY_META mode, only failing on an attempt to access the payload
Add a parameter NetfilterQueue(sockfd=N) that uses an already-opened Netlink socket

v0.9.0, 12 Jan 2021
Improve usability when Packet objects are retained past the callback
Expand Down
25 changes: 25 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,31 @@ until they've been given a verdict (accept, drop, or repeat). Also, the
kernel stores the enqueued packets in a linked list, so keeping lots of packets
outstanding is likely to adversely impact performance.

Monitoring a different network namespace
----------------------------------------

If you are using Linux network namespaces (``man 7
network_namespaces``) in some kind of containerization system, all of
the Netfilter queue state is kept per-namespace; queue 1 in namespace
X is not the same as queue 1 in namespace Y. NetfilterQueue will
ordinarily pass you the traffic for the network namespace you're a
part of. If you want to monitor a different one, you can do so with a
bit of trickery and cooperation from a process in that
namespace; this section describes how.

You'll need to arrange for a process in the network namespace you want
to monitor to call ``socket(AF_NETLINK, SOCK_RAW, 12)`` and pass you
the resulting file descriptor using something like
``socket.send_fds()`` over a Unix domain socket. (12 is
``NETLINK_NETFILTER``, a constant which is not exposed by the Python
``socket`` module.) Once you've received that file descriptor in your
process, you can create a NetfilterQueue object using the special
constructor ``NetfilterQueue(sockfd=N)`` where N is the file
descriptor you received. Because the socket was originally created
in the other network namespace, the kernel treats it as part of that
namespace, and you can use it to access that namespace even though it's
not the namespace you're in yourself.

Usage
=====

Expand Down
18 changes: 13 additions & 5 deletions netfilterqueue.pxd
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
cdef extern from "sys/types.h":
cdef extern from "<sys/types.h>":
ctypedef unsigned char u_int8_t
ctypedef unsigned short int u_int16_t
ctypedef unsigned int u_int32_t

cdef extern from "<unistd.h>":
int dup2(int oldfd, int newfd)

cdef extern from "<errno.h>":
int errno

Expand All @@ -13,7 +16,7 @@ cdef enum:
EWOULDBLOCK = EAGAIN
ENOBUFS = 105 # No buffer space available

cdef extern from "netinet/ip.h":
cdef extern from "<netinet/ip.h>":
struct iphdr:
u_int8_t tos
u_int16_t tot_len
Expand Down Expand Up @@ -60,15 +63,15 @@ cdef extern from "Python.h":
object PyBytes_FromStringAndSize(char *s, Py_ssize_t len)
object PyString_FromStringAndSize(char *s, Py_ssize_t len)

cdef extern from "sys/time.h":
cdef extern from "<sys/time.h>":
ctypedef long time_t
struct timeval:
time_t tv_sec
time_t tv_usec
struct timezone:
pass

cdef extern from "netinet/in.h":
cdef extern from "<netinet/in.h>":
u_int32_t ntohl (u_int32_t __netlong) nogil
u_int16_t ntohs (u_int16_t __netshort) nogil
u_int32_t htonl (u_int32_t __hostlong) nogil
Expand All @@ -83,6 +86,9 @@ cdef extern from "libnfnetlink/linux_nfnetlink.h":
cdef extern from "libnfnetlink/libnfnetlink.h":
struct nfnl_handle:
pass
nfnl_handle *nfnl_open()
void nfnl_close(nfnl_handle *h)
int nfnl_fd(nfnl_handle *h)
unsigned int nfnl_rcvbufsiz(nfnl_handle *h, unsigned int size)

cdef extern from "libnetfilter_queue/linux_nfnetlink_queue.h":
Expand All @@ -106,6 +112,7 @@ cdef extern from "libnetfilter_queue/libnetfilter_queue.h":
u_int8_t hw_addr[8]

nfq_handle *nfq_open()
nfq_handle *nfq_open_nfnl(nfnl_handle *h)
int nfq_close(nfq_handle *h)

int nfq_bind_pf(nfq_handle *h, u_int16_t pf)
Expand Down Expand Up @@ -153,8 +160,9 @@ cdef extern from "libnetfilter_queue/libnetfilter_queue.h":
cdef enum: # Protocol families, same as address families.
PF_INET = 2
PF_INET6 = 10
PF_NETLINK = 16

cdef extern from "sys/socket.h":
cdef extern from "<sys/socket.h>":
ssize_t recv(int __fd, void *__buf, size_t __n, int __flags) nogil
int MSG_DONTWAIT

Expand Down
46 changes: 40 additions & 6 deletions netfilterqueue.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,47 @@ cdef class Packet:

cdef class NetfilterQueue:
"""Handle a single numbered queue."""
def __cinit__(self, *args, **kwargs):
cdef u_int16_t af # Address family
af = kwargs.get("af", PF_INET)
def __cinit__(self, *, u_int16_t af = PF_INET, int sockfd = -1):
cdef nfnl_handle *nlh = NULL
try:
if sockfd >= 0:
# This is a hack to use the given Netlink socket instead
# of the one allocated by nfq_open(). Intended use case:
# the given socket was opened in a different network
# namespace, and you want to monitor traffic in that
# namespace from this process running outside of it.
# Call socket(AF_NETLINK, SOCK_RAW, /*NETLINK_NETFILTER*/ 12)
# in the other namespace and pass that fd here (via Unix
# domain socket or similar).
nlh = nfnl_open()
if nlh == NULL:
raise OSError(errno, "Failed to open nfnetlink handle")

# At this point nfnl_get_fd(nlh) is a new netlink socket
# and has been bound to an automatically chosen port id.
# This dup2 will close it, freeing up that address.
if dup2(sockfd, nfnl_fd(nlh)) < 0:
raise OSError(errno, "dup2 failed")

# Opening the netfilterqueue subsystem will rebind
# the socket, using the same portid from the old socket,
# which is hopefully now free. An alternative approach,
# theoretically more robust against concurrent binds,
# would be to autobind the new socket and write the chosen
# address to nlh->local. nlh is an opaque type so this
# would need to be done using memcpy (local starts
# 4 bytes into the structure); let's avoid that unless
# we really need it.
self.h = nfq_open_nfnl(nlh)
else:
self.h = nfq_open()
if self.h == NULL:
raise OSError(errno, "Failed to open NFQueue.")
except:
if nlh != NULL:
nfnl_close(nlh)
raise

self.h = nfq_open()
if self.h == NULL:
raise OSError("Failed to open NFQueue.")
nfq_unbind_pf(self.h, af) # This does NOT kick out previous queues
if nfq_bind_pf(self.h, af) < 0:
raise OSError("Failed to bind family %s. Are you root?" % af)
Expand Down
61 changes: 59 additions & 2 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import gc
import struct
import trio
import trio.testing
import os
import pytest
import signal
import socket
import sys
import time
import trio
import trio.testing
import weakref

from netfilterqueue import NetfilterQueue, COPY_META
Expand Down Expand Up @@ -261,5 +262,61 @@ def raise_alarm(sig, frame):
nfq.run()
assert any("NetfilterQueue.run" in line.name for line in exc_info.traceback)
finally:
nfq.unbind()
signal.setitimer(signal.ITIMER_REAL, *old_timer)
signal.signal(signal.SIGALRM, old_handler)


async def test_external_fd(harness):
child_prog = """
import os, sys, unshare
from netfilterqueue import NetfilterQueue
unshare.unshare(unshare.CLONE_NEWNET)
nfq = NetfilterQueue(sockfd=int(sys.argv[1]))
def cb(pkt):
pkt.accept()
sys.exit(pkt.get_payload()[28:].decode("ascii"))
nfq.bind(1, cb, sock_len=131072)
os.write(1, b"ok\\n")
try:
nfq.run()
finally:
nfq.unbind()
"""
async with trio.open_nursery() as nursery:

async def monitor_in_child(task_status):
with trio.fail_after(5):
r, w = os.pipe()
# 12 is NETLINK_NETFILTER family
nlsock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, 12)

@nursery.start_soon
async def wait_started():
await trio.lowlevel.wait_readable(r)
assert b"ok\n" == os.read(r, 16)
nlsock.close()
os.close(w)
os.close(r)
task_status.started()

result = await trio.run_process(
[sys.executable, "-c", child_prog, str(nlsock.fileno())],
stdout=w,
capture_stderr=True,
check=False,
pass_fds=(nlsock.fileno(),),
)
assert result.stderr == b"this is a test\n"

await nursery.start(monitor_in_child)
async with harness.enqueue_packets_to(2, queue_num=1):
await harness.send(2, b"this is a test")
await harness.expect(2, b"this is a test")

with pytest.raises(OSError, match="dup2 failed"):
NetfilterQueue(sockfd=1000)

with pytest.raises(OSError, match="Failed to open NFQueue"):
with open("/dev/null") as fp:
NetfilterQueue(sockfd=fp.fileno())