Skip to content

Commit 62239fc

Browse files
mjruhldledford
authored andcommitted
IB/hfi1: Clean up on context initialization failure
The error path for context initialization is not consistent. Cleanup all resources on failure. Removed unused variable user_event_mask. Add the _BASE_FAILED bit to the event flags so that a base context can notify waiting sub contexts that they cannot continue. Running out of sub contexts is an EBUSY result, not EINVAL. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 8737ce9 commit 62239fc

File tree

5 files changed

+95
-85
lines changed

5 files changed

+95
-85
lines changed

drivers/infiniband/hw/hfi1/file_ops.c

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
8282
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
8383
const struct hfi1_user_info *uinfo);
8484
static int init_user_ctxt(struct hfi1_filedata *fd);
85-
static int user_init(struct hfi1_ctxtdata *uctxt);
85+
static void user_init(struct hfi1_ctxtdata *uctxt);
8686
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
8787
__u32 len);
8888
static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
@@ -847,7 +847,7 @@ static u64 kvirt_to_phys(void *addr)
847847

848848
static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
849849
{
850-
int ret = 0;
850+
int ret;
851851
unsigned int swmajor, swminor;
852852

853853
swmajor = uinfo->userversion >> 16;
@@ -857,14 +857,16 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
857857
swminor = uinfo->userversion & 0xffff;
858858

859859
mutex_lock(&hfi1_mutex);
860-
/* First, lets check if we need to get a sub context? */
860+
/*
861+
* Get a sub context if necessary.
862+
* ret < 0 error, 0 no context, 1 sub-context found
863+
*/
864+
ret = 0;
861865
if (uinfo->subctxt_cnt) {
862-
/* < 0 error, 0 no context, 1 sub-context found */
863866
ret = find_sub_ctxt(fd, uinfo);
864-
if (ret > 0) {
867+
if (ret > 0)
865868
fd->rec_cpu_num =
866869
hfi1_get_proc_affinity(fd->uctxt->numa_id);
867-
}
868870
}
869871

870872
/*
@@ -885,17 +887,27 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
885887
ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
886888
HFI1_CTXT_BASE_UNINIT,
887889
&fd->uctxt->event_flags));
890+
if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
891+
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
892+
return -ENOMEM;
893+
}
888894
/* The only thing a sub context needs is the user_xxx stuff */
889895
if (!ret)
890-
init_user_ctxt(fd);
896+
ret = init_user_ctxt(fd);
897+
898+
if (ret)
899+
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
891900
} else if (!ret) {
892901
ret = setup_base_ctxt(fd);
893-
894-
/*
895-
* Base context is done, notify anybody using a sub-context
896-
* that is waiting for this completion
897-
*/
898-
if (!ret && fd->uctxt->subctxt_cnt) {
902+
if (fd->uctxt->subctxt_cnt) {
903+
/* If there is an error, set the failed bit. */
904+
if (ret)
905+
set_bit(HFI1_CTXT_BASE_FAILED,
906+
&fd->uctxt->event_flags);
907+
/*
908+
* Base context is done, notify anybody using a
909+
* sub-context that is waiting for this completion
910+
*/
899911
clear_bit(HFI1_CTXT_BASE_UNINIT,
900912
&fd->uctxt->event_flags);
901913
wake_up(&fd->uctxt->wait);
@@ -945,7 +957,7 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
945957
subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
946958
HFI1_MAX_SHARED_CTXTS);
947959
if (subctxt >= uctxt->subctxt_cnt)
948-
return -EINVAL;
960+
return -EBUSY;
949961

950962
fd->uctxt = uctxt;
951963
fd->subctxt = subctxt;
@@ -1118,7 +1130,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
11181130
return ret;
11191131
}
11201132

1121-
static int user_init(struct hfi1_ctxtdata *uctxt)
1133+
static void user_init(struct hfi1_ctxtdata *uctxt)
11221134
{
11231135
unsigned int rcvctrl_ops = 0;
11241136

@@ -1168,8 +1180,6 @@ static int user_init(struct hfi1_ctxtdata *uctxt)
11681180
else
11691181
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
11701182
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
1171-
1172-
return 0;
11731183
}
11741184

11751185
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
@@ -1238,28 +1248,32 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
12381248
/* Now allocate the RcvHdr queue and eager buffers. */
12391249
ret = hfi1_create_rcvhdrq(dd, uctxt);
12401250
if (ret)
1241-
goto done;
1251+
return ret;
12421252

12431253
ret = hfi1_setup_eagerbufs(uctxt);
12441254
if (ret)
1245-
goto done;
1255+
goto setup_failed;
12461256

12471257
/* If sub-contexts are enabled, do the appropriate setup */
12481258
if (uctxt->subctxt_cnt)
12491259
ret = setup_subctxt(uctxt);
12501260
if (ret)
1251-
goto done;
1261+
goto setup_failed;
12521262

12531263
ret = hfi1_user_exp_rcv_grp_init(fd);
12541264
if (ret)
1255-
goto done;
1265+
goto setup_failed;
12561266

12571267
ret = init_user_ctxt(fd);
12581268
if (ret)
1259-
goto done;
1269+
goto setup_failed;
12601270

1261-
ret = user_init(uctxt);
1262-
done:
1271+
user_init(uctxt);
1272+
1273+
return 0;
1274+
1275+
setup_failed:
1276+
hfi1_free_ctxtdata(dd, uctxt);
12631277
return ret;
12641278
}
12651279

drivers/infiniband/hw/hfi1/hfi.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,6 @@ struct hfi1_ctxtdata {
196196
void *rcvhdrq;
197197
/* kernel virtual address where hdrqtail is updated */
198198
volatile __le64 *rcvhdrtail_kvaddr;
199-
/*
200-
* Shared page for kernel to signal user processes that send buffers
201-
* need disarming. The process should call HFI1_CMD_DISARM_BUFS
202-
* or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
203-
*/
204-
unsigned long *user_event_mask;
205199
/* when waiting for rcv or pioavail */
206200
wait_queue_head_t wait;
207201
/* rcvhdrq size (for freeing) */
@@ -1724,12 +1718,14 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
17241718
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
17251719

17261720
/* ctxt_flag bit offsets */
1721+
/* base context has not finished initializing */
1722+
#define HFI1_CTXT_BASE_UNINIT 1
1723+
/* base context initaliation failed */
1724+
#define HFI1_CTXT_BASE_FAILED 2
17271725
/* waiting for a packet to arrive */
1728-
#define HFI1_CTXT_WAITING_RCV 2
1729-
/* master has not finished initializing */
1730-
#define HFI1_CTXT_BASE_UNINIT 4
1726+
#define HFI1_CTXT_WAITING_RCV 3
17311727
/* waiting for an urgent packet to arrive */
1732-
#define HFI1_CTXT_WAITING_URG 5
1728+
#define HFI1_CTXT_WAITING_URG 4
17331729

17341730
/* free up any allocated data at closes */
17351731
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,

drivers/infiniband/hw/hfi1/init.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
964964
kfree(rcd->egrbufs.buffers);
965965

966966
sc_free(rcd->sc);
967-
vfree(rcd->user_event_mask);
968967
vfree(rcd->subctxt_uregbase);
969968
vfree(rcd->subctxt_rcvegrbuf);
970969
vfree(rcd->subctxt_rcvhdr_base);
@@ -1683,8 +1682,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
16831682
dd_dev_err(dd,
16841683
"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
16851684
rcd->ctxt);
1686-
vfree(rcd->user_event_mask);
1687-
rcd->user_event_mask = NULL;
16881685
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
16891686
rcd->rcvhdrq_dma);
16901687
rcd->rcvhdrq = NULL;
@@ -1851,15 +1848,16 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
18511848
"ctxt%u: current Eager buffer size is invalid %u\n",
18521849
rcd->ctxt, rcd->egrbufs.rcvtid_size);
18531850
ret = -EINVAL;
1854-
goto bail;
1851+
goto bail_rcvegrbuf_phys;
18551852
}
18561853

18571854
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
18581855
hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
18591856
rcd->egrbufs.rcvtids[idx].dma, order);
18601857
cond_resched();
18611858
}
1862-
goto bail;
1859+
1860+
return 0;
18631861

18641862
bail_rcvegrbuf_phys:
18651863
for (idx = 0; idx < rcd->egrbufs.alloced &&
@@ -1873,6 +1871,6 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
18731871
rcd->egrbufs.buffers[idx].dma = 0;
18741872
rcd->egrbufs.buffers[idx].len = 0;
18751873
}
1876-
bail:
1874+
18771875
return ret;
18781876
}

drivers/infiniband/hw/hfi1/user_exp_rcv.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
160160
struct hfi1_devdata *dd = fd->dd;
161161
u32 tidbase;
162162
u32 i;
163+
struct tid_group *grp, *gptr;
163164

164165
exp_tid_group_init(&uctxt->tid_group_list);
165166
exp_tid_group_init(&uctxt->tid_used_list);
@@ -168,24 +169,26 @@ int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
168169
tidbase = uctxt->expected_base;
169170
for (i = 0; i < uctxt->expected_count /
170171
dd->rcv_entries.group_size; i++) {
171-
struct tid_group *grp;
172-
173172
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
174-
if (!grp) {
175-
/*
176-
* If we fail here, the groups already
177-
* allocated will be freed by the close
178-
* call.
179-
*/
180-
return -ENOMEM;
181-
}
173+
if (!grp)
174+
goto grp_failed;
175+
182176
grp->size = dd->rcv_entries.group_size;
183177
grp->base = tidbase;
184178
tid_group_add_tail(grp, &uctxt->tid_group_list);
185179
tidbase += dd->rcv_entries.group_size;
186180
}
187181

188182
return 0;
183+
184+
grp_failed:
185+
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
186+
list) {
187+
list_del_init(&grp->list);
188+
kfree(grp);
189+
}
190+
191+
return -ENOMEM;
189192
}
190193

191194
/*
@@ -213,11 +216,11 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
213216
fd->invalid_tids = kcalloc(uctxt->expected_count,
214217
sizeof(*fd->invalid_tids),
215218
GFP_KERNEL);
216-
/*
217-
* NOTE: If this is an error, shouldn't we cleanup enry_to_rb?
218-
*/
219-
if (!fd->invalid_tids)
219+
if (!fd->invalid_tids) {
220+
kfree(fd->entry_to_rb);
221+
fd->entry_to_rb = NULL;
220222
return -ENOMEM;
223+
}
221224

222225
/*
223226
* Register MMU notifier callbacks. If the registration

drivers/infiniband/hw/hfi1/user_sdma.c

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -374,40 +374,24 @@ static void sdma_kmem_cache_ctor(void *obj)
374374
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
375375
struct hfi1_filedata *fd)
376376
{
377-
int ret = 0;
377+
int ret = -ENOMEM;
378378
char buf[64];
379379
struct hfi1_devdata *dd;
380380
struct hfi1_user_sdma_comp_q *cq;
381381
struct hfi1_user_sdma_pkt_q *pq;
382382
unsigned long flags;
383383

384-
if (!uctxt || !fd) {
385-
ret = -EBADF;
386-
goto done;
387-
}
384+
if (!uctxt || !fd)
385+
return -EBADF;
388386

389-
if (!hfi1_sdma_comp_ring_size) {
390-
ret = -EINVAL;
391-
goto done;
392-
}
387+
if (!hfi1_sdma_comp_ring_size)
388+
return -EINVAL;
393389

394390
dd = uctxt->dd;
395391

396392
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
397393
if (!pq)
398-
goto pq_nomem;
399-
400-
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
401-
sizeof(*pq->reqs),
402-
GFP_KERNEL);
403-
if (!pq->reqs)
404-
goto pq_reqs_nomem;
405-
406-
pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
407-
sizeof(*pq->req_in_use),
408-
GFP_KERNEL);
409-
if (!pq->req_in_use)
410-
goto pq_reqs_no_in_use;
394+
return -ENOMEM;
411395

412396
INIT_LIST_HEAD(&pq->list);
413397
pq->dd = dd;
@@ -423,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
423407
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
424408
activate_packet_queue, NULL);
425409
pq->reqidx = 0;
410+
411+
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
412+
sizeof(*pq->reqs),
413+
GFP_KERNEL);
414+
if (!pq->reqs)
415+
goto pq_reqs_nomem;
416+
417+
pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
418+
sizeof(*pq->req_in_use),
419+
GFP_KERNEL);
420+
if (!pq->req_in_use)
421+
goto pq_reqs_no_in_use;
422+
426423
snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
427424
fd->subctxt);
428425
pq->txreq_cache = kmem_cache_create(buf,
429-
sizeof(struct user_sdma_txreq),
426+
sizeof(struct user_sdma_txreq),
430427
L1_CACHE_BYTES,
431428
SLAB_HWCACHE_ALIGN,
432429
sdma_kmem_cache_ctor);
@@ -435,7 +432,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
435432
uctxt->ctxt);
436433
goto pq_txreq_nomem;
437434
}
438-
fd->pq = pq;
435+
439436
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
440437
if (!cq)
441438
goto cq_nomem;
@@ -446,20 +443,25 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
446443
goto cq_comps_nomem;
447444

448445
cq->nentries = hfi1_sdma_comp_ring_size;
449-
fd->cq = cq;
450446

451447
ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
452448
&pq->handler);
453449
if (ret) {
454450
dd_dev_err(dd, "Failed to register with MMU %d", ret);
455-
goto done;
451+
goto pq_mmu_fail;
456452
}
457453

454+
fd->pq = pq;
455+
fd->cq = cq;
456+
458457
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
459458
list_add(&pq->list, &uctxt->sdma_queues);
460459
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
461-
goto done;
462460

461+
return 0;
462+
463+
pq_mmu_fail:
464+
vfree(cq->comps);
463465
cq_comps_nomem:
464466
kfree(cq);
465467
cq_nomem:
@@ -470,10 +472,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
470472
kfree(pq->reqs);
471473
pq_reqs_nomem:
472474
kfree(pq);
473-
fd->pq = NULL;
474-
pq_nomem:
475-
ret = -ENOMEM;
476-
done:
475+
477476
return ret;
478477
}
479478

0 commit comments

Comments
 (0)