Skip to content

Commit 1259c5d

Browse files
Sesidhar BeddelJames Bottomley
authored andcommitted
[SCSI] fnic: Hitting BUG_ON(io_req->abts_done) in fnic_rport_exch_reset
Hitting BUG_ON(io_req->abts_done) in fnic_rport_exch_reset in case of timing issue and also to some extent locking issue where abts and terminate is happening around same timing. The code changes are intended to update CMD_STATE(sc) and io_req->abts_done together. Signed-off-by: Sesidhar Beddel <sebaddel@cisco.com> Signed-off-by: Hiral Patel <hiralpat@cisco.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
1 parent 318c7c4 commit 1259c5d

File tree

1 file changed

+42
-28
lines changed

1 file changed

+42
-28
lines changed

drivers/scsi/fnic/fnic_scsi.c

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
111111
return &fnic->io_req_lock[hash];
112112
}
113113

114+
static inline spinlock_t *fnic_io_lock_tag(struct fnic *fnic,
115+
int tag)
116+
{
117+
return &fnic->io_req_lock[tag & (FNIC_IO_LOCKS - 1)];
118+
}
119+
114120
/*
115121
* Unmap the data buffer and sense buffer for an io_req,
116122
* also unmap and free the device-private scatter/gather list.
@@ -956,9 +962,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
956962
spin_unlock_irqrestore(io_lock, flags);
957963
return;
958964
}
959-
CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
960965
CMD_ABTS_STATUS(sc) = hdr_status;
961-
962966
CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
963967
FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
964968
"abts cmpl recd. id %d status %s\n",
@@ -1116,7 +1120,7 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
11161120

11171121
static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
11181122
{
1119-
unsigned int i;
1123+
int i;
11201124
struct fnic_io_req *io_req;
11211125
unsigned long flags = 0;
11221126
struct scsi_cmnd *sc;
@@ -1127,12 +1131,14 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
11271131
if (i == exclude_id)
11281132
continue;
11291133

1134+
io_lock = fnic_io_lock_tag(fnic, i);
1135+
spin_lock_irqsave(io_lock, flags);
11301136
sc = scsi_host_find_tag(fnic->lport->host, i);
1131-
if (!sc)
1137+
if (!sc) {
1138+
spin_unlock_irqrestore(io_lock, flags);
11321139
continue;
1140+
}
11331141

1134-
io_lock = fnic_io_lock_hash(fnic, sc);
1135-
spin_lock_irqsave(io_lock, flags);
11361142
io_req = (struct fnic_io_req *)CMD_SP(sc);
11371143
if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
11381144
!(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
@@ -1310,12 +1316,13 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
13101316

13111317
for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
13121318
abt_tag = tag;
1319+
io_lock = fnic_io_lock_tag(fnic, tag);
1320+
spin_lock_irqsave(io_lock, flags);
13131321
sc = scsi_host_find_tag(fnic->lport->host, tag);
1314-
if (!sc)
1322+
if (!sc) {
1323+
spin_unlock_irqrestore(io_lock, flags);
13151324
continue;
1316-
1317-
io_lock = fnic_io_lock_hash(fnic, sc);
1318-
spin_lock_irqsave(io_lock, flags);
1325+
}
13191326

13201327
io_req = (struct fnic_io_req *)CMD_SP(sc);
13211328

@@ -1426,16 +1433,19 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
14261433

14271434
for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
14281435
abt_tag = tag;
1436+
io_lock = fnic_io_lock_tag(fnic, tag);
1437+
spin_lock_irqsave(io_lock, flags);
14291438
sc = scsi_host_find_tag(fnic->lport->host, tag);
1430-
if (!sc)
1439+
if (!sc) {
1440+
spin_unlock_irqrestore(io_lock, flags);
14311441
continue;
1442+
}
14321443

14331444
cmd_rport = starget_to_rport(scsi_target(sc->device));
1434-
if (rport != cmd_rport)
1445+
if (rport != cmd_rport) {
1446+
spin_unlock_irqrestore(io_lock, flags);
14351447
continue;
1436-
1437-
io_lock = fnic_io_lock_hash(fnic, sc);
1438-
spin_lock_irqsave(io_lock, flags);
1448+
}
14391449

14401450
io_req = (struct fnic_io_req *)CMD_SP(sc);
14411451

@@ -1648,13 +1658,15 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
16481658
io_req->abts_done = NULL;
16491659

16501660
/* fw did not complete abort, timed out */
1651-
if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1661+
if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
16521662
spin_unlock_irqrestore(io_lock, flags);
16531663
CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT;
16541664
ret = FAILED;
16551665
goto fnic_abort_cmd_end;
16561666
}
16571667

1668+
CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
1669+
16581670
/*
16591671
* firmware completed the abort, check the status,
16601672
* free the io_req irrespective of failure or success
@@ -1753,16 +1765,17 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
17531765
enum fnic_ioreq_state old_ioreq_state;
17541766

17551767
for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1768+
io_lock = fnic_io_lock_tag(fnic, tag);
1769+
spin_lock_irqsave(io_lock, flags);
17561770
sc = scsi_host_find_tag(fnic->lport->host, tag);
17571771
/*
17581772
* ignore this lun reset cmd or cmds that do not belong to
17591773
* this lun
17601774
*/
1761-
if (!sc || sc == lr_sc || sc->device != lun_dev)
1775+
if (!sc || sc == lr_sc || sc->device != lun_dev) {
1776+
spin_unlock_irqrestore(io_lock, flags);
17621777
continue;
1763-
1764-
io_lock = fnic_io_lock_hash(fnic, sc);
1765-
spin_lock_irqsave(io_lock, flags);
1778+
}
17661779

17671780
io_req = (struct fnic_io_req *)CMD_SP(sc);
17681781

@@ -1791,6 +1804,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
17911804
spin_unlock_irqrestore(io_lock, flags);
17921805
continue;
17931806
}
1807+
1808+
if (io_req->abts_done)
1809+
shost_printk(KERN_ERR, fnic->lport->host,
1810+
"%s: io_req->abts_done is set state is %s\n",
1811+
__func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
17941812
old_ioreq_state = CMD_STATE(sc);
17951813
/*
17961814
* Any pending IO issued prior to reset is expected to be
@@ -1801,11 +1819,6 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
18011819
*/
18021820
CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
18031821

1804-
if (io_req->abts_done)
1805-
shost_printk(KERN_ERR, fnic->lport->host,
1806-
"%s: io_req->abts_done is set state is %s\n",
1807-
__func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
1808-
18091822
BUG_ON(io_req->abts_done);
18101823

18111824
abt_tag = tag;
@@ -1858,12 +1871,13 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
18581871
io_req->abts_done = NULL;
18591872

18601873
/* if abort is still pending with fw, fail */
1861-
if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1874+
if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
18621875
spin_unlock_irqrestore(io_lock, flags);
18631876
CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
18641877
ret = 1;
18651878
goto clean_pending_aborts_end;
18661879
}
1880+
CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
18671881
CMD_SP(sc) = NULL;
18681882
spin_unlock_irqrestore(io_lock, flags);
18691883

@@ -2061,8 +2075,8 @@ int fnic_device_reset(struct scsi_cmnd *sc)
20612075
spin_unlock_irqrestore(io_lock, flags);
20622076
int_to_scsilun(sc->device->lun, &fc_lun);
20632077
/*
2064-
* Issue abort and terminate on the device reset request.
2065-
* If q'ing of the abort fails, retry issue it after a delay.
2078+
* Issue abort and terminate on device reset request.
2079+
* If q'ing of terminate fails, retry it after a delay.
20662080
*/
20672081
while (1) {
20682082
spin_lock_irqsave(io_lock, flags);

0 commit comments

Comments
 (0)