Skip to content

Commit f333444

Browse files
author
Christoph Hellwig
committed
nvme: take node locality into account when selecting a path
Make current_path an array with an entry for every possible node, and cache the best path on a per-node basis. Take the node distance into account when selecting it. This is primarily useful for dual-ported PCIe devices which are connected to PCIe root ports on different sockets. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Keith Busch <keith.busch@intel.com> Reviewed-by: Hannes Reinecke <hare@suse.com>
1 parent 73383ad commit f333444

File tree

3 files changed

+54
-28
lines changed

3 files changed

+54
-28
lines changed

drivers/nvme/host/core.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2908,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
29082908
unsigned nsid, struct nvme_id_ns *id)
29092909
{
29102910
struct nvme_ns_head *head;
2911+
size_t size = sizeof(*head);
29112912
int ret = -ENOMEM;
29122913

2913-
head = kzalloc(sizeof(*head), GFP_KERNEL);
2914+
#ifdef CONFIG_NVME_MULTIPATH
2915+
size += num_possible_nodes() * sizeof(struct nvme_ns *);
2916+
#endif
2917+
2918+
head = kzalloc(size, GFP_KERNEL);
29142919
if (!head)
29152920
goto out;
29162921
ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);

drivers/nvme/host/multipath.c

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,29 +117,55 @@ static const char *nvme_ana_state_names[] = {
117117
[NVME_ANA_CHANGE] = "change",
118118
};
119119

120-
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
120+
void nvme_mpath_clear_current_path(struct nvme_ns *ns)
121121
{
122-
struct nvme_ns *ns, *fallback = NULL;
122+
struct nvme_ns_head *head = ns->head;
123+
int node;
124+
125+
if (!head)
126+
return;
127+
128+
for_each_node(node) {
129+
if (ns == rcu_access_pointer(head->current_path[node]))
130+
rcu_assign_pointer(head->current_path[node], NULL);
131+
}
132+
}
133+
134+
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
135+
{
136+
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
137+
struct nvme_ns *found = NULL, *fallback = NULL, *ns;
123138

124139
list_for_each_entry_rcu(ns, &head->list, siblings) {
125140
if (ns->ctrl->state != NVME_CTRL_LIVE ||
126141
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
127142
continue;
143+
144+
distance = node_distance(node, dev_to_node(ns->ctrl->dev));
145+
128146
switch (ns->ana_state) {
129147
case NVME_ANA_OPTIMIZED:
130-
rcu_assign_pointer(head->current_path, ns);
131-
return ns;
148+
if (distance < found_distance) {
149+
found_distance = distance;
150+
found = ns;
151+
}
152+
break;
132153
case NVME_ANA_NONOPTIMIZED:
133-
fallback = ns;
154+
if (distance < fallback_distance) {
155+
fallback_distance = distance;
156+
fallback = ns;
157+
}
134158
break;
135159
default:
136160
break;
137161
}
138162
}
139163

140-
if (fallback)
141-
rcu_assign_pointer(head->current_path, fallback);
142-
return fallback;
164+
if (!found)
165+
found = fallback;
166+
if (found)
167+
rcu_assign_pointer(head->current_path[node], found);
168+
return found;
143169
}
144170

145171
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
@@ -150,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
150176

151177
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
152178
{
153-
struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
179+
int node = numa_node_id();
180+
struct nvme_ns *ns;
154181

182+
ns = srcu_dereference(head->current_path[node], &head->srcu);
155183
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
156-
ns = __nvme_find_path(head);
184+
ns = __nvme_find_path(head, node);
157185
return ns;
158186
}
159187

@@ -200,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
200228
int srcu_idx;
201229

202230
srcu_idx = srcu_read_lock(&head->srcu);
203-
ns = srcu_dereference(head->current_path, &head->srcu);
231+
ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu);
204232
if (likely(ns && nvme_path_is_optimized(ns)))
205233
found = ns->queue->poll_fn(q, qc);
206234
srcu_read_unlock(&head->srcu, srcu_idx);

drivers/nvme/host/nvme.h

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -277,14 +277,6 @@ struct nvme_ns_ids {
277277
* only ever has a single entry for private namespaces.
278278
*/
279279
struct nvme_ns_head {
280-
#ifdef CONFIG_NVME_MULTIPATH
281-
struct gendisk *disk;
282-
struct nvme_ns __rcu *current_path;
283-
struct bio_list requeue_list;
284-
spinlock_t requeue_lock;
285-
struct work_struct requeue_work;
286-
struct mutex lock;
287-
#endif
288280
struct list_head list;
289281
struct srcu_struct srcu;
290282
struct nvme_subsystem *subsys;
@@ -293,6 +285,14 @@ struct nvme_ns_head {
293285
struct list_head entry;
294286
struct kref ref;
295287
int instance;
288+
#ifdef CONFIG_NVME_MULTIPATH
289+
struct gendisk *disk;
290+
struct bio_list requeue_list;
291+
spinlock_t requeue_lock;
292+
struct work_struct requeue_work;
293+
struct mutex lock;
294+
struct nvme_ns __rcu *current_path[];
295+
#endif
296296
};
297297

298298
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
@@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
474474
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
475475
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
476476
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
477-
478-
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
479-
{
480-
struct nvme_ns_head *head = ns->head;
481-
482-
if (head && ns == rcu_access_pointer(head->current_path))
483-
rcu_assign_pointer(head->current_path, NULL);
484-
}
477+
void nvme_mpath_clear_current_path(struct nvme_ns *ns);
485478
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
486479

487480
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)

0 commit comments

Comments
 (0)