Skip to content

Commit fa9656b

Browse files
Jack MorgensteinRoland Dreier
authored andcommitted
IB/mad: include GID/class when matching receives
Received responses are currently matched against sent requests based on TID only. According to the spec, responses should match based on the combination of TID, management class, and requester LID/GID. Without the additional qualification, an agent that is responding to two requests, both of which have the same TID, can match RMPP ACKs with the incorrect transaction. This problem can occur on the SM node when responding to SA queries. Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il> Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
1 parent e1f7868 commit fa9656b

File tree

3 files changed

+67
-29
lines changed

3 files changed

+67
-29
lines changed

drivers/infiniband/core/mad.c

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,14 +1618,59 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
16181618
(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
16191619
}
16201620

1621+
static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
1622+
struct ib_mad_recv_wc *rwc)
1623+
{
1624+
return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class ==
1625+
rwc->recv_buf.mad->mad_hdr.mgmt_class;
1626+
}
1627+
1628+
static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
1629+
struct ib_mad_recv_wc *rwc )
1630+
{
1631+
struct ib_ah_attr attr;
1632+
u8 send_resp, rcv_resp;
1633+
1634+
send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
1635+
mad_hdr.method & IB_MGMT_METHOD_RESP;
1636+
rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
1637+
1638+
if (!send_resp && rcv_resp)
1639+
/* is request/response. GID/LIDs are both local (same). */
1640+
return 1;
1641+
1642+
if (send_resp == rcv_resp)
1643+
/* both requests, or both responses. GIDs different */
1644+
return 0;
1645+
1646+
if (ib_query_ah(wr->send_buf.ah, &attr))
1647+
/* Assume not equal, to avoid false positives. */
1648+
return 0;
1649+
1650+
if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH))
1651+
return attr.dlid == rwc->wc->slid;
1652+
else if ((attr.ah_flags & IB_AH_GRH) &&
1653+
(rwc->wc->wc_flags & IB_WC_GRH))
1654+
return memcmp(attr.grh.dgid.raw,
1655+
rwc->recv_buf.grh->sgid.raw, 16) == 0;
1656+
else
1657+
/* one has GID, other does not. Assume different */
1658+
return 0;
1659+
}
16211660
struct ib_mad_send_wr_private*
1622-
ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
1661+
ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
1662+
struct ib_mad_recv_wc *mad_recv_wc)
16231663
{
16241664
struct ib_mad_send_wr_private *mad_send_wr;
1665+
struct ib_mad *mad;
1666+
1667+
mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad;
16251668

16261669
list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
16271670
agent_list) {
1628-
if (mad_send_wr->tid == tid)
1671+
if ((mad_send_wr->tid == mad->mad_hdr.tid) &&
1672+
rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
1673+
rcv_has_same_gid(mad_send_wr, mad_recv_wc))
16291674
return mad_send_wr;
16301675
}
16311676

@@ -1636,7 +1681,10 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
16361681
list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
16371682
agent_list) {
16381683
if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
1639-
mad_send_wr->tid == tid && mad_send_wr->timeout) {
1684+
mad_send_wr->tid == mad->mad_hdr.tid &&
1685+
mad_send_wr->timeout &&
1686+
rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
1687+
rcv_has_same_gid(mad_send_wr, mad_recv_wc)) {
16401688
/* Verify request has not been canceled */
16411689
return (mad_send_wr->status == IB_WC_SUCCESS) ?
16421690
mad_send_wr : NULL;
@@ -1661,7 +1709,6 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
16611709
struct ib_mad_send_wr_private *mad_send_wr;
16621710
struct ib_mad_send_wc mad_send_wc;
16631711
unsigned long flags;
1664-
__be64 tid;
16651712

16661713
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
16671714
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
@@ -1677,9 +1724,8 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
16771724

16781725
/* Complete corresponding request */
16791726
if (response_mad(mad_recv_wc->recv_buf.mad)) {
1680-
tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid;
16811727
spin_lock_irqsave(&mad_agent_priv->lock, flags);
1682-
mad_send_wr = ib_find_send_mad(mad_agent_priv, tid);
1728+
mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
16831729
if (!mad_send_wr) {
16841730
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
16851731
ib_free_recv_mad(mad_recv_wc);

drivers/infiniband/core/mad_priv.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ extern kmem_cache_t *ib_mad_cache;
216216
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
217217

218218
struct ib_mad_send_wr_private *
219-
ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid);
219+
ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
220+
struct ib_mad_recv_wc *mad_recv_wc);
220221

221222
void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
222223
struct ib_mad_send_wc *mad_send_wc);

drivers/infiniband/core/mad_rmpp.c

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -562,15 +562,15 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
562562
return ib_send_mad(mad_send_wr);
563563
}
564564

565-
static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
566-
u8 rmpp_status)
565+
static void abort_send(struct ib_mad_agent_private *agent,
566+
struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status)
567567
{
568568
struct ib_mad_send_wr_private *mad_send_wr;
569569
struct ib_mad_send_wc wc;
570570
unsigned long flags;
571571

572572
spin_lock_irqsave(&agent->lock, flags);
573-
mad_send_wr = ib_find_send_mad(agent, tid);
573+
mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
574574
if (!mad_send_wr)
575575
goto out; /* Unmatched send */
576576

@@ -612,23 +612,21 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
612612

613613
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
614614
if (rmpp_mad->rmpp_hdr.rmpp_status) {
615-
abort_send(agent, rmpp_mad->mad_hdr.tid,
616-
IB_MGMT_RMPP_STATUS_BAD_STATUS);
615+
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
617616
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
618617
return;
619618
}
620619

621620
seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
622621
newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
623622
if (newwin < seg_num) {
624-
abort_send(agent, rmpp_mad->mad_hdr.tid,
625-
IB_MGMT_RMPP_STATUS_W2S);
623+
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
626624
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
627625
return;
628626
}
629627

630628
spin_lock_irqsave(&agent->lock, flags);
631-
mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid);
629+
mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
632630
if (!mad_send_wr)
633631
goto out; /* Unmatched ACK */
634632

@@ -639,8 +637,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
639637
if (seg_num > mad_send_wr->send_buf.seg_count ||
640638
seg_num > mad_send_wr->newwin) {
641639
spin_unlock_irqrestore(&agent->lock, flags);
642-
abort_send(agent, rmpp_mad->mad_hdr.tid,
643-
IB_MGMT_RMPP_STATUS_S2B);
640+
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
644641
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
645642
return;
646643
}
@@ -728,12 +725,10 @@ static void process_rmpp_stop(struct ib_mad_agent_private *agent,
728725
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
729726

730727
if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) {
731-
abort_send(agent, rmpp_mad->mad_hdr.tid,
732-
IB_MGMT_RMPP_STATUS_BAD_STATUS);
728+
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
733729
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
734730
} else
735-
abort_send(agent, rmpp_mad->mad_hdr.tid,
736-
rmpp_mad->rmpp_hdr.rmpp_status);
731+
abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
737732
}
738733

739734
static void process_rmpp_abort(struct ib_mad_agent_private *agent,
@@ -745,12 +740,10 @@ static void process_rmpp_abort(struct ib_mad_agent_private *agent,
745740

746741
if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN ||
747742
rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) {
748-
abort_send(agent, rmpp_mad->mad_hdr.tid,
749-
IB_MGMT_RMPP_STATUS_BAD_STATUS);
743+
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
750744
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
751745
} else
752-
abort_send(agent, rmpp_mad->mad_hdr.tid,
753-
rmpp_mad->rmpp_hdr.rmpp_status);
746+
abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
754747
}
755748

756749
struct ib_mad_recv_wc *
@@ -764,8 +757,7 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
764757
return mad_recv_wc;
765758

766759
if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) {
767-
abort_send(agent, rmpp_mad->mad_hdr.tid,
768-
IB_MGMT_RMPP_STATUS_UNV);
760+
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
769761
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
770762
goto out;
771763
}
@@ -783,8 +775,7 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
783775
process_rmpp_abort(agent, mad_recv_wc);
784776
break;
785777
default:
786-
abort_send(agent, rmpp_mad->mad_hdr.tid,
787-
IB_MGMT_RMPP_STATUS_BADT);
778+
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
788779
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
789780
break;
790781
}

0 commit comments

Comments
 (0)