Skip to content

Commit bfe4037

Browse files
author
Christoph Hellwig
committed
aio: implement IOCB_CMD_POLL
Simple one-shot poll through the io_submit() interface. To poll for a file descriptor the application should submit an iocb of type IOCB_CMD_POLL. It will poll the fd for the events specified in the the first 32 bits of the aio_buf field of the iocb. Unlike poll or epoll without EPOLLONESHOT this interface always works in one shot mode, that is once the iocb is completed, it will have to be resubmitted. Signed-off-by: Christoph Hellwig <hch@lst.de> Tested-by: Avi Kivity <avi@scylladb.com>
1 parent 9018ccc commit bfe4037

File tree

2 files changed

+180
-4
lines changed

2 files changed

+180
-4
lines changed

fs/aio.c

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Implements an efficient asynchronous io interface.
66
*
77
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
8+
* Copyright 2018 Christoph Hellwig.
89
*
910
* See ../COPYING for licensing terms.
1011
*/
@@ -165,10 +166,21 @@ struct fsync_iocb {
165166
bool datasync;
166167
};
167168

169+
struct poll_iocb {
170+
struct file *file;
171+
struct wait_queue_head *head;
172+
__poll_t events;
173+
bool woken;
174+
bool cancelled;
175+
struct wait_queue_entry wait;
176+
struct work_struct work;
177+
};
178+
168179
struct aio_kiocb {
169180
union {
170181
struct kiocb rw;
171182
struct fsync_iocb fsync;
183+
struct poll_iocb poll;
172184
};
173185

174186
struct kioctx *ki_ctx;
@@ -1601,6 +1613,169 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
16011613
return 0;
16021614
}
16031615

1616+
static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
1617+
{
1618+
struct file *file = iocb->poll.file;
1619+
1620+
aio_complete(iocb, mangle_poll(mask), 0);
1621+
fput(file);
1622+
}
1623+
1624+
static void aio_poll_complete_work(struct work_struct *work)
1625+
{
1626+
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
1627+
struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
1628+
struct poll_table_struct pt = { ._key = req->events };
1629+
struct kioctx *ctx = iocb->ki_ctx;
1630+
__poll_t mask = 0;
1631+
1632+
if (!READ_ONCE(req->cancelled))
1633+
mask = vfs_poll(req->file, &pt) & req->events;
1634+
1635+
/*
1636+
* Note that ->ki_cancel callers also delete iocb from active_reqs after
1637+
* calling ->ki_cancel. We need the ctx_lock roundtrip here to
1638+
* synchronize with them. In the cancellation case the list_del_init
1639+
* itself is not actually needed, but harmless so we keep it in to
1640+
* avoid further branches in the fast path.
1641+
*/
1642+
spin_lock_irq(&ctx->ctx_lock);
1643+
if (!mask && !READ_ONCE(req->cancelled)) {
1644+
add_wait_queue(req->head, &req->wait);
1645+
spin_unlock_irq(&ctx->ctx_lock);
1646+
return;
1647+
}
1648+
list_del_init(&iocb->ki_list);
1649+
spin_unlock_irq(&ctx->ctx_lock);
1650+
1651+
aio_poll_complete(iocb, mask);
1652+
}
1653+
1654+
/* assumes we are called with irqs disabled */
1655+
static int aio_poll_cancel(struct kiocb *iocb)
1656+
{
1657+
struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
1658+
struct poll_iocb *req = &aiocb->poll;
1659+
1660+
spin_lock(&req->head->lock);
1661+
WRITE_ONCE(req->cancelled, true);
1662+
if (!list_empty(&req->wait.entry)) {
1663+
list_del_init(&req->wait.entry);
1664+
schedule_work(&aiocb->poll.work);
1665+
}
1666+
spin_unlock(&req->head->lock);
1667+
1668+
return 0;
1669+
}
1670+
1671+
static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
1672+
void *key)
1673+
{
1674+
struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
1675+
__poll_t mask = key_to_poll(key);
1676+
1677+
req->woken = true;
1678+
1679+
/* for instances that support it check for an event match first: */
1680+
if (mask && !(mask & req->events))
1681+
return 0;
1682+
1683+
list_del_init(&req->wait.entry);
1684+
schedule_work(&req->work);
1685+
return 1;
1686+
}
1687+
1688+
struct aio_poll_table {
1689+
struct poll_table_struct pt;
1690+
struct aio_kiocb *iocb;
1691+
int error;
1692+
};
1693+
1694+
static void
1695+
aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
1696+
struct poll_table_struct *p)
1697+
{
1698+
struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
1699+
1700+
/* multiple wait queues per file are not supported */
1701+
if (unlikely(pt->iocb->poll.head)) {
1702+
pt->error = -EINVAL;
1703+
return;
1704+
}
1705+
1706+
pt->error = 0;
1707+
pt->iocb->poll.head = head;
1708+
add_wait_queue(head, &pt->iocb->poll.wait);
1709+
}
1710+
1711+
static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
1712+
{
1713+
struct kioctx *ctx = aiocb->ki_ctx;
1714+
struct poll_iocb *req = &aiocb->poll;
1715+
struct aio_poll_table apt;
1716+
__poll_t mask;
1717+
1718+
/* reject any unknown events outside the normal event mask. */
1719+
if ((u16)iocb->aio_buf != iocb->aio_buf)
1720+
return -EINVAL;
1721+
/* reject fields that are not defined for poll */
1722+
if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
1723+
return -EINVAL;
1724+
1725+
INIT_WORK(&req->work, aio_poll_complete_work);
1726+
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
1727+
req->file = fget(iocb->aio_fildes);
1728+
if (unlikely(!req->file))
1729+
return -EBADF;
1730+
1731+
apt.pt._qproc = aio_poll_queue_proc;
1732+
apt.pt._key = req->events;
1733+
apt.iocb = aiocb;
1734+
apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
1735+
1736+
/* initialized the list so that we can do list_empty checks */
1737+
INIT_LIST_HEAD(&req->wait.entry);
1738+
init_waitqueue_func_entry(&req->wait, aio_poll_wake);
1739+
1740+
/* one for removal from waitqueue, one for this function */
1741+
refcount_set(&aiocb->ki_refcnt, 2);
1742+
1743+
mask = vfs_poll(req->file, &apt.pt) & req->events;
1744+
if (unlikely(!req->head)) {
1745+
/* we did not manage to set up a waitqueue, done */
1746+
goto out;
1747+
}
1748+
1749+
spin_lock_irq(&ctx->ctx_lock);
1750+
spin_lock(&req->head->lock);
1751+
if (req->woken) {
1752+
/* wake_up context handles the rest */
1753+
mask = 0;
1754+
apt.error = 0;
1755+
} else if (mask || apt.error) {
1756+
/* if we get an error or a mask we are done */
1757+
WARN_ON_ONCE(list_empty(&req->wait.entry));
1758+
list_del_init(&req->wait.entry);
1759+
} else {
1760+
/* actually waiting for an event */
1761+
list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
1762+
aiocb->ki_cancel = aio_poll_cancel;
1763+
}
1764+
spin_unlock(&req->head->lock);
1765+
spin_unlock_irq(&ctx->ctx_lock);
1766+
1767+
out:
1768+
if (unlikely(apt.error)) {
1769+
fput(req->file);
1770+
return apt.error;
1771+
}
1772+
1773+
if (mask)
1774+
aio_poll_complete(aiocb, mask);
1775+
iocb_put(aiocb);
1776+
return 0;
1777+
}
1778+
16041779
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
16051780
bool compat)
16061781
{
@@ -1674,6 +1849,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
16741849
case IOCB_CMD_FDSYNC:
16751850
ret = aio_fsync(&req->fsync, &iocb, true);
16761851
break;
1852+
case IOCB_CMD_POLL:
1853+
ret = aio_poll(req, &iocb);
1854+
break;
16771855
default:
16781856
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
16791857
ret = -EINVAL;

include/uapi/linux/aio_abi.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,8 @@ enum {
3838
IOCB_CMD_PWRITE = 1,
3939
IOCB_CMD_FSYNC = 2,
4040
IOCB_CMD_FDSYNC = 3,
41-
/* These two are experimental.
42-
* IOCB_CMD_PREADX = 4,
43-
* IOCB_CMD_POLL = 5,
44-
*/
41+
/* 4 was the experimental IOCB_CMD_PREADX */
42+
IOCB_CMD_POLL = 5,
4543
IOCB_CMD_NOOP = 6,
4644
IOCB_CMD_PREADV = 7,
4745
IOCB_CMD_PWRITEV = 8,

0 commit comments

Comments
 (0)