Skip to content

Commit 85743f1

Browse files
Huy Nguyendavem330
authored andcommitted
net/mlx4_core: Set UAR page size to 4KB regardless of system page size
problem description: The current code sets UAR page size equal to system page size. The ConnectX-3 and ConnectX-3 Pro HWs require minimum 128 UAR pages. The mlx4 kernel drivers are not loaded if there is less than 128 UAR pages. solution: Always set UAR page to 4KB. This allows more UAR pages if the OS has PAGE_SIZE larger than 4KB. For example, PowerPC kernel use 64KB system page size, with 4MB uar region, there are 4MB/2/64KB = 32 uars (half for uar, half for blueflame). This does not meet minimum 128 UAR pages requirement. With 4KB UAR page, there are 4MB/2/4KB = 512 uars which meet the minimum requirement. Note that only codes in mlx4_core that deal with firmware know that uar page size is 4KB. Codes that deal with usr page in cq and qp context (mlx4_ib, mlx4_en and part of mlx4_core) still have the same assumption that uar page size equals to system page size. Note that with this implementation, on 64KB system page size kernel, there are 16 uars per system page but only one uars is used. The other 15 uars are ignored because of the above assumption. Regarding SR-IOV, mlx4_core in hypervisor will set the uar page size to 4KB and mlx4_core code in virtual OS will obtain the uar page size from firmware. Regarding backward compatibility in SR-IOV, if hypervisor has this new code, the virtual OS must be updated. If hypervisor has old code, and the virtual OS has this new code, the new code will be backward compatible with the old code. If the uar size is big enough, this new code in VF continues to work with 64 KB uar page size (on PowerPc kernel). If the uar size does not meet 128 uars requirement, this new code not loaded in VF and print the same error message as the old code in Hypervisor. Signed-off-by: Huy Nguyen <huyn@mellanox.com> Reviewed-by: Yishai Hadas <yishaih@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 22e3817 commit 85743f1

File tree

8 files changed

+84
-22
lines changed

8 files changed

+84
-22
lines changed

drivers/infiniband/hw/mlx4/qp.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1681,9 +1681,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
16811681
}
16821682

16831683
if (qp->ibqp.uobject)
1684-
context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
1684+
context->usr_page = cpu_to_be32(
1685+
mlx4_to_hw_uar_index(dev->dev,
1686+
to_mucontext(ibqp->uobject->context)->uar.index));
16851687
else
1686-
context->usr_page = cpu_to_be32(dev->priv_uar.index);
1688+
context->usr_page = cpu_to_be32(
1689+
mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
16871690

16881691
if (attr_mask & IB_QP_DEST_QPN)
16891692
context->remote_qpn = cpu_to_be32(attr->dest_qp_num);

drivers/net/ethernet/mellanox/mlx4/cq.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
318318
if (timestamp_en)
319319
cq_context->flags |= cpu_to_be32(1 << 19);
320320

321-
cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
321+
cq_context->logsize_usrpage =
322+
cpu_to_be32((ilog2(nent) << 24) |
323+
mlx4_to_hw_uar_index(dev, uar->index));
322324
cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn;
323325
cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
324326

drivers/net/ethernet/mellanox/mlx4/en_resources.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
5858
} else {
5959
context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
6060
}
61-
context->usr_page = cpu_to_be32(mdev->priv_uar.index);
61+
context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
62+
mdev->priv_uar.index));
6263
context->local_qpn = cpu_to_be32(qpn);
6364
context->pri_path.ackto = 1 & 0x07;
6465
context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;

drivers/net/ethernet/mellanox/mlx4/en_tx.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
213213
mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
214214
ring->cqn, user_prio, &ring->context);
215215
if (ring->bf_alloced)
216-
ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
216+
ring->context.usr_page =
217+
cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
218+
ring->bf.uar->index));
217219

218220
err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
219221
&ring->qp, &ring->qp_state);

drivers/net/ethernet/mellanox/mlx4/eq.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -940,9 +940,10 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
940940

941941
if (!priv->eq_table.uar_map[index]) {
942942
priv->eq_table.uar_map[index] =
943-
ioremap(pci_resource_start(dev->persist->pdev, 2) +
944-
((eq->eqn / 4) << PAGE_SHIFT),
945-
PAGE_SIZE);
943+
ioremap(
944+
pci_resource_start(dev->persist->pdev, 2) +
945+
((eq->eqn / 4) << (dev->uar_page_shift)),
946+
(1 << (dev->uar_page_shift)));
946947
if (!priv->eq_table.uar_map[index]) {
947948
mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
948949
eq->eqn);

drivers/net/ethernet/mellanox/mlx4/main.c

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,20 @@ struct mlx4_port_config {
168168

169169
static atomic_t pf_loading = ATOMIC_INIT(0);
170170

171+
static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
172+
struct mlx4_dev_cap *dev_cap)
173+
{
174+
/* The reserved_uars is calculated by system page size unit.
175+
* Therefore, adjustment is added when the uar page size is less
176+
* than the system page size
177+
*/
178+
dev->caps.reserved_uars =
179+
max_t(int,
180+
mlx4_get_num_reserved_uar(dev),
181+
dev_cap->reserved_uars /
182+
(1 << (PAGE_SHIFT - dev->uar_page_shift)));
183+
}
184+
171185
int mlx4_check_port_params(struct mlx4_dev *dev,
172186
enum mlx4_port_type *port_type)
173187
{
@@ -386,8 +400,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
386400
dev->caps.reserved_mtts = dev_cap->reserved_mtts;
387401
dev->caps.reserved_mrws = dev_cap->reserved_mrws;
388402

389-
/* The first 128 UARs are used for EQ doorbells */
390-
dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars);
391403
dev->caps.reserved_pds = dev_cap->reserved_pds;
392404
dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
393405
dev_cap->reserved_xrcds : 0;
@@ -405,6 +417,15 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
405417
dev->caps.max_gso_sz = dev_cap->max_gso_sz;
406418
dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
407419

420+
/* Save uar page shift */
421+
if (!mlx4_is_slave(dev)) {
422+
/* Virtual PCI function needs to determine UAR page size from
423+
* firmware. Only master PCI function can set the uar page size
424+
*/
425+
dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
426+
mlx4_set_num_reserved_uars(dev, dev_cap);
427+
}
428+
408429
if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
409430
struct mlx4_init_hca_param hca_param;
410431

@@ -815,16 +836,25 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
815836
return -ENODEV;
816837
}
817838

818-
/* slave gets uar page size from QUERY_HCA fw command */
819-
dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
839+
/* Set uar_page_shift for VF */
840+
dev->uar_page_shift = hca_param.uar_page_sz + 12;
820841

821-
/* TODO: relax this assumption */
822-
if (dev->caps.uar_page_size != PAGE_SIZE) {
823-
mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
824-
dev->caps.uar_page_size, PAGE_SIZE);
825-
return -ENODEV;
842+
/* Make sure the master uar page size is valid */
843+
if (dev->uar_page_shift > PAGE_SHIFT) {
844+
mlx4_err(dev,
845+
"Invalid configuration: uar page size is larger than system page size\n");
846+
return -ENODEV;
826847
}
827848

849+
/* Set reserved_uars based on the uar_page_shift */
850+
mlx4_set_num_reserved_uars(dev, &dev_cap);
851+
852+
/* Although uar page size in FW differs from system page size,
853+
* upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
854+
* still works with assumption that uar page size == system page size
855+
*/
856+
dev->caps.uar_page_size = PAGE_SIZE;
857+
828858
memset(&func_cap, 0, sizeof(func_cap));
829859
err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
830860
if (err) {
@@ -2179,8 +2209,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
21792209

21802210
dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
21812211

2182-
init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2183-
init_hca.uar_page_sz = PAGE_SHIFT - 12;
2212+
/* Always set UAR page size 4KB, set log_uar_sz accordingly */
2213+
init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
2214+
PAGE_SHIFT -
2215+
DEFAULT_UAR_PAGE_SHIFT;
2216+
init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
2217+
21842218
init_hca.mw_enabled = 0;
21852219
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
21862220
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)

drivers/net/ethernet/mellanox/mlx4/pd.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,15 @@ EXPORT_SYMBOL_GPL(mlx4_bf_free);
269269

270270
int mlx4_init_uar_table(struct mlx4_dev *dev)
271271
{
272-
if (dev->caps.num_uars <= 128) {
273-
mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
274-
dev->caps.num_uars);
272+
int num_reserved_uar = mlx4_get_num_reserved_uar(dev);
273+
274+
mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift);
275+
mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars);
276+
277+
if (dev->caps.num_uars <= num_reserved_uar) {
278+
mlx4_err(
279+
dev, "Only %d UAR pages (need more than %d)\n",
280+
dev->caps.num_uars, num_reserved_uar);
275281
mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
276282
return -ENODEV;
277283
}

include/linux/mlx4/device.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444

4545
#include <linux/timecounter.h>
4646

47+
#define DEFAULT_UAR_PAGE_SHIFT 12
48+
4749
#define MAX_MSIX_P_PORT 17
4850
#define MAX_MSIX 64
4951
#define MIN_MSIX_P_PORT 5
@@ -856,6 +858,7 @@ struct mlx4_dev {
856858
u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
857859
u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
858860
struct mlx4_vf_dev *dev_vfs;
861+
u8 uar_page_shift;
859862
};
860863

861864
struct mlx4_clock_params {
@@ -1528,4 +1531,14 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
15281531
int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
15291532
struct mlx4_clock_params *params);
15301533

1534+
static inline int mlx4_to_hw_uar_index(struct mlx4_dev *dev, int index)
1535+
{
1536+
return (index << (PAGE_SHIFT - dev->uar_page_shift));
1537+
}
1538+
1539+
static inline int mlx4_get_num_reserved_uar(struct mlx4_dev *dev)
1540+
{
1541+
/* The first 128 UARs are used for EQ doorbells */
1542+
return (128 >> (PAGE_SHIFT - dev->uar_page_shift));
1543+
}
15311544
#endif /* MLX4_DEVICE_H */

0 commit comments

Comments
 (0)