Skip to content

Commit f42f7c2

Browse files
author
Trond Myklebust
committed
SUNRPC: Fix priority queue fairness
Fix up the priority queue to not batch by owner, but by queue, so that we allow '1 << priority' elements to be dequeued before switching to the next priority queue. The owner field is still used to wake up requests in round robin order by owner to avoid single processes hogging the RPC layer by loading the queues. Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
1 parent 95f7691 commit f42f7c2

File tree

2 files changed

+54
-57
lines changed

2 files changed

+54
-57
lines changed

include/linux/sunrpc/sched.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,6 @@ struct rpc_timer {
189189
struct rpc_wait_queue {
190190
spinlock_t lock;
191191
struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
192-
pid_t owner; /* process id of last task serviced */
193192
unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
194193
unsigned char priority; /* current priority */
195194
unsigned char nr; /* # tasks remaining for cookie */
@@ -205,7 +204,6 @@ struct rpc_wait_queue {
205204
* from a single cookie. The aim is to improve
206205
* performance of NFS operations such as read/write.
207206
*/
208-
#define RPC_BATCH_COUNT 16
209207
#define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0)
210208

211209
/*

net/sunrpc/sched.c

Lines changed: 54 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -99,64 +99,78 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
9999
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
100100
}
101101

102-
static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
103-
{
104-
struct list_head *q = &queue->tasks[queue->priority];
105-
struct rpc_task *task;
106-
107-
if (!list_empty(q)) {
108-
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
109-
if (task->tk_owner == queue->owner)
110-
list_move_tail(&task->u.tk_wait.list, q);
111-
}
112-
}
113-
114102
static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
115103
{
116104
if (queue->priority != priority) {
117-
/* Fairness: rotate the list when changing priority */
118-
rpc_rotate_queue_owner(queue);
119105
queue->priority = priority;
106+
queue->nr = 1U << priority;
120107
}
121108
}
122109

123-
static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
124-
{
125-
queue->owner = pid;
126-
queue->nr = RPC_BATCH_COUNT;
127-
}
128-
129110
static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
130111
{
131112
rpc_set_waitqueue_priority(queue, queue->maxpriority);
132-
rpc_set_waitqueue_owner(queue, 0);
133113
}
134114

135115
/*
136-
* Add new request to a priority queue.
116+
* Add a request to a queue list
137117
*/
138-
static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
139-
struct rpc_task *task,
140-
unsigned char queue_priority)
118+
static void
119+
__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
141120
{
142-
struct list_head *q;
143121
struct rpc_task *t;
144122

145-
INIT_LIST_HEAD(&task->u.tk_wait.links);
146-
if (unlikely(queue_priority > queue->maxpriority))
147-
queue_priority = queue->maxpriority;
148-
if (queue_priority > queue->priority)
149-
rpc_set_waitqueue_priority(queue, queue_priority);
150-
q = &queue->tasks[queue_priority];
151123
list_for_each_entry(t, q, u.tk_wait.list) {
152124
if (t->tk_owner == task->tk_owner) {
153-
list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
125+
list_add_tail(&task->u.tk_wait.links,
126+
&t->u.tk_wait.links);
127+
/* Cache the queue head in task->u.tk_wait.list */
128+
task->u.tk_wait.list.next = q;
129+
task->u.tk_wait.list.prev = NULL;
154130
return;
155131
}
156132
}
133+
INIT_LIST_HEAD(&task->u.tk_wait.links);
157134
list_add_tail(&task->u.tk_wait.list, q);
158135
}
159136

137+
/*
138+
* Remove request from a queue list
139+
*/
140+
static void
141+
__rpc_list_dequeue_task(struct rpc_task *task)
142+
{
143+
struct list_head *q;
144+
struct rpc_task *t;
145+
146+
if (task->u.tk_wait.list.prev == NULL) {
147+
list_del(&task->u.tk_wait.links);
148+
return;
149+
}
150+
if (!list_empty(&task->u.tk_wait.links)) {
151+
t = list_first_entry(&task->u.tk_wait.links,
152+
struct rpc_task,
153+
u.tk_wait.links);
154+
/* Assume __rpc_list_enqueue_task() cached the queue head */
155+
q = t->u.tk_wait.list.next;
156+
list_add_tail(&t->u.tk_wait.list, q);
157+
list_del(&task->u.tk_wait.links);
158+
}
159+
list_del(&task->u.tk_wait.list);
160+
}
161+
162+
/*
163+
* Add new request to a priority queue.
164+
*/
165+
static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
166+
struct rpc_task *task,
167+
unsigned char queue_priority)
168+
{
169+
if (unlikely(queue_priority > queue->maxpriority))
170+
queue_priority = queue->maxpriority;
171+
__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
172+
}
173+
160174
/*
161175
* Add new request to wait queue.
162176
*
@@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
194208
*/
195209
static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
196210
{
197-
struct rpc_task *t;
198-
199-
if (!list_empty(&task->u.tk_wait.links)) {
200-
t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
201-
list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
202-
list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
203-
}
211+
__rpc_list_dequeue_task(task);
204212
}
205213

206214
/*
@@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
212220
__rpc_disable_timer(queue, task);
213221
if (RPC_IS_PRIORITY(queue))
214222
__rpc_remove_wait_queue_priority(task);
215-
list_del(&task->u.tk_wait.list);
223+
else
224+
list_del(&task->u.tk_wait.list);
216225
queue->qlen--;
217226
dprintk("RPC: %5u removed from queue %p \"%s\"\n",
218227
task->tk_pid, queue, rpc_qname(queue));
@@ -545,17 +554,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
545554
* Service a batch of tasks from a single owner.
546555
*/
547556
q = &queue->tasks[queue->priority];
548-
if (!list_empty(q)) {
549-
task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
550-
if (queue->owner == task->tk_owner) {
551-
if (--queue->nr)
552-
goto out;
553-
list_move_tail(&task->u.tk_wait.list, q);
554-
}
555-
/*
556-
* Check if we need to switch queues.
557-
*/
558-
goto new_owner;
557+
if (!list_empty(q) && --queue->nr) {
558+
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
559+
goto out;
559560
}
560561

561562
/*
@@ -567,7 +568,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
567568
else
568569
q = q - 1;
569570
if (!list_empty(q)) {
570-
task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
571+
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
571572
goto new_queue;
572573
}
573574
} while (q != &queue->tasks[queue->priority]);
@@ -577,8 +578,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
577578

578579
new_queue:
579580
rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
580-
new_owner:
581-
rpc_set_waitqueue_owner(queue, task->tk_owner);
582581
out:
583582
return task;
584583
}

0 commit comments

Comments
 (0)