Skip to content

Commit af5c72b

Browse files
author
Al Viro
committed
Fix aio_poll() races
aio_poll() has to cope with several unpleasant problems: * requests that might stay around indefinitely need to be made visible for io_cancel(2); that must not be done to a request already completed, though. * in cases when ->poll() has placed us on a waitqueue, wakeup might have happened (and request completed) before ->poll() returns. * worse, in some early wakeup cases request might end up re-added into the queue later - we can't treat "woken up and currently not in the queue" as "it's not going to stick around indefinitely" * ... moreover, ->poll() might have decided not to put it on any queues to start with, and that needs to be distinguished from the previous case * ->poll() might have tried to put us on more than one queue. Only the first will succeed for aio poll, so we might end up missing wakeups. OTOH, we might very well notice that only after the wakeup hits and request gets completed (all before ->poll() gets around to the second poll_wait()). In that case it's too late to decide that we have an error. req->woken was an attempt to deal with that. Unfortunately, it was broken. What we need to keep track of is not that wakeup has happened - the thing might come back after that. It's that async reference is already gone and won't come back, so we can't (and needn't) put the request on the list of cancellables. The easiest case is "request hadn't been put on any waitqueues"; we can tell by seeing NULL apt.head, and in that case there won't be anything async. We should either complete the request ourselves (if vfs_poll() reports anything of interest) or return an error. In all other cases we get exclusion with wakeups by grabbing the queue lock. If request is currently on queue and we have something interesting from vfs_poll(), we can steal it and complete the request ourselves. If it's on queue and vfs_poll() has not reported anything interesting, we either put it on the cancellable list, or, if we know that it hadn't been put on all queues ->poll() wanted it on, we steal it and return an error. If it's _not_ on queue, it's either been already dealt with (in which case we do nothing), or there's aio_poll_complete_work() about to be executed. In that case we either put it on the cancellable list, or, if we know it hadn't been put on all queues ->poll() wanted it on, simulate what cancel would've done. It's a lot more convoluted than I'd like it to be. Single-consumer APIs suck, and unfortunately aio is not an exception... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 2bb874c commit af5c72b

File tree

1 file changed

+40
-50
lines changed

1 file changed

+40
-50
lines changed

fs/aio.c

Lines changed: 40 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ struct poll_iocb {
181181
struct file *file;
182182
struct wait_queue_head *head;
183183
__poll_t events;
184-
bool woken;
184+
bool done;
185185
bool cancelled;
186186
struct wait_queue_entry wait;
187187
struct work_struct work;
@@ -1606,12 +1606,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
16061606
return 0;
16071607
}
16081608

1609-
static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
1610-
{
1611-
iocb->ki_res.res = mangle_poll(mask);
1612-
iocb_put(iocb);
1613-
}
1614-
16151609
static void aio_poll_complete_work(struct work_struct *work)
16161610
{
16171611
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1637,9 +1631,11 @@ static void aio_poll_complete_work(struct work_struct *work)
16371631
return;
16381632
}
16391633
list_del_init(&iocb->ki_list);
1634+
iocb->ki_res.res = mangle_poll(mask);
1635+
req->done = true;
16401636
spin_unlock_irq(&ctx->ctx_lock);
16411637

1642-
aio_poll_complete(iocb, mask);
1638+
iocb_put(iocb);
16431639
}
16441640

16451641
/* assumes we are called with irqs disabled */
@@ -1667,31 +1663,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
16671663
__poll_t mask = key_to_poll(key);
16681664
unsigned long flags;
16691665

1670-
req->woken = true;
1671-
16721666
/* for instances that support it check for an event match first: */
1673-
if (mask) {
1674-
if (!(mask & req->events))
1675-
return 0;
1667+
if (mask && !(mask & req->events))
1668+
return 0;
16761669

1670+
list_del_init(&req->wait.entry);
1671+
1672+
if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
16771673
/*
16781674
* Try to complete the iocb inline if we can. Use
16791675
* irqsave/irqrestore because not all filesystems (e.g. fuse)
16801676
* call this function with IRQs disabled and because IRQs
16811677
* have to be disabled before ctx_lock is obtained.
16821678
*/
1683-
if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
1684-
list_del(&iocb->ki_list);
1685-
spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
1686-
1687-
list_del_init(&req->wait.entry);
1688-
aio_poll_complete(iocb, mask);
1689-
return 1;
1690-
}
1679+
list_del(&iocb->ki_list);
1680+
iocb->ki_res.res = mangle_poll(mask);
1681+
req->done = true;
1682+
spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
1683+
iocb_put(iocb);
1684+
} else {
1685+
schedule_work(&req->work);
16911686
}
1692-
1693-
list_del_init(&req->wait.entry);
1694-
schedule_work(&req->work);
16951687
return 1;
16961688
}
16971689

@@ -1723,6 +1715,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
17231715
struct kioctx *ctx = aiocb->ki_ctx;
17241716
struct poll_iocb *req = &aiocb->poll;
17251717
struct aio_poll_table apt;
1718+
bool cancel = false;
17261719
__poll_t mask;
17271720

17281721
/* reject any unknown events outside the normal event mask. */
@@ -1736,7 +1729,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
17361729
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
17371730

17381731
req->head = NULL;
1739-
req->woken = false;
1732+
req->done = false;
17401733
req->cancelled = false;
17411734

17421735
apt.pt._qproc = aio_poll_queue_proc;
@@ -1749,36 +1742,33 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
17491742
init_waitqueue_func_entry(&req->wait, aio_poll_wake);
17501743

17511744
mask = vfs_poll(req->file, &apt.pt) & req->events;
1752-
if (unlikely(!req->head)) {
1753-
/* we did not manage to set up a waitqueue, done */
1754-
goto out;
1755-
}
1756-
17571745
spin_lock_irq(&ctx->ctx_lock);
1758-
spin_lock(&req->head->lock);
1759-
if (req->woken) {
1760-
/* wake_up context handles the rest */
1761-
mask = 0;
1746+
if (likely(req->head)) {
1747+
spin_lock(&req->head->lock);
1748+
if (unlikely(list_empty(&req->wait.entry))) {
1749+
if (apt.error)
1750+
cancel = true;
1751+
apt.error = 0;
1752+
mask = 0;
1753+
}
1754+
if (mask || apt.error) {
1755+
list_del_init(&req->wait.entry);
1756+
} else if (cancel) {
1757+
WRITE_ONCE(req->cancelled, true);
1758+
} else if (!req->done) { /* actually waiting for an event */
1759+
list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
1760+
aiocb->ki_cancel = aio_poll_cancel;
1761+
}
1762+
spin_unlock(&req->head->lock);
1763+
}
1764+
if (mask) { /* no async, we'd stolen it */
1765+
aiocb->ki_res.res = mangle_poll(mask);
17621766
apt.error = 0;
1763-
} else if (mask || apt.error) {
1764-
/* if we get an error or a mask we are done */
1765-
WARN_ON_ONCE(list_empty(&req->wait.entry));
1766-
list_del_init(&req->wait.entry);
1767-
} else {
1768-
/* actually waiting for an event */
1769-
list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
1770-
aiocb->ki_cancel = aio_poll_cancel;
17711767
}
1772-
spin_unlock(&req->head->lock);
17731768
spin_unlock_irq(&ctx->ctx_lock);
1774-
1775-
out:
1776-
if (unlikely(apt.error))
1777-
return apt.error;
1778-
17791769
if (mask)
1780-
aio_poll_complete(aiocb, mask);
1781-
return 0;
1770+
iocb_put(aiocb);
1771+
return apt.error;
17821772
}
17831773

17841774
static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,

0 commit comments

Comments
 (0)