Skip to content

Commit ec25bc0

Browse files
Eugenia Emantayevdavem330
authored andcommitted
net/mlx4_en: Add resilience in low memory systems
This patch fixes the lost of Ethernet port on low memory system, when driver frees its resources and fails to allocate new resources. Issue could happen while changing number of channels, rings size or changing the timestamp configuration. This fix is necessary because of removing vmap use in the code. When vmap was in use driver could allocate non-contiguous memory and make it contiguous with vmap. Now it could fail to allocate a large chunk of contiguous memory and lose the port. Current code tries to allocate new resources and then upon success frees the old resources. Fixes: 73898db ('net/mlx4: Avoid wrong virtual mappings') Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 30f56e3 commit ec25bc0

File tree

3 files changed

+132
-37
lines changed

3 files changed

+132
-37
lines changed

drivers/net/ethernet/mellanox/mlx4/en_ethtool.c

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
10421042
{
10431043
struct mlx4_en_priv *priv = netdev_priv(dev);
10441044
struct mlx4_en_dev *mdev = priv->mdev;
1045+
struct mlx4_en_port_profile new_prof;
1046+
struct mlx4_en_priv *tmp;
10451047
u32 rx_size, tx_size;
10461048
int port_up = 0;
10471049
int err = 0;
@@ -1061,31 +1063,34 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
10611063
tx_size == priv->tx_ring[0]->size)
10621064
return 0;
10631065

1066+
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1067+
if (!tmp)
1068+
return -ENOMEM;
1069+
10641070
mutex_lock(&mdev->state_lock);
1071+
memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
1072+
new_prof.tx_ring_size = tx_size;
1073+
new_prof.rx_ring_size = rx_size;
1074+
err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
1075+
if (err)
1076+
goto out;
1077+
10651078
if (priv->port_up) {
10661079
port_up = 1;
10671080
mlx4_en_stop_port(dev, 1);
10681081
}
10691082

1070-
mlx4_en_free_resources(priv);
1071-
1072-
priv->prof->tx_ring_size = tx_size;
1073-
priv->prof->rx_ring_size = rx_size;
1083+
mlx4_en_safe_replace_resources(priv, tmp);
10741084

1075-
err = mlx4_en_alloc_resources(priv);
1076-
if (err) {
1077-
en_err(priv, "Failed reallocating port resources\n");
1078-
goto out;
1079-
}
10801085
if (port_up) {
10811086
err = mlx4_en_start_port(dev);
10821087
if (err)
10831088
en_err(priv, "Failed starting port\n");
10841089
}
10851090

10861091
err = mlx4_en_moderation_update(priv);
1087-
10881092
out:
1093+
kfree(tmp);
10891094
mutex_unlock(&mdev->state_lock);
10901095
return err;
10911096
}
@@ -1714,6 +1719,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
17141719
{
17151720
struct mlx4_en_priv *priv = netdev_priv(dev);
17161721
struct mlx4_en_dev *mdev = priv->mdev;
1722+
struct mlx4_en_port_profile new_prof;
1723+
struct mlx4_en_priv *tmp;
17171724
int port_up = 0;
17181725
int err = 0;
17191726

@@ -1723,23 +1730,26 @@ static int mlx4_en_set_channels(struct net_device *dev,
17231730
!channel->tx_count || !channel->rx_count)
17241731
return -EINVAL;
17251732

1733+
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1734+
if (!tmp)
1735+
return -ENOMEM;
1736+
17261737
mutex_lock(&mdev->state_lock);
1738+
memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
1739+
new_prof.num_tx_rings_p_up = channel->tx_count;
1740+
new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
1741+
new_prof.rx_ring_num = channel->rx_count;
1742+
1743+
err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
1744+
if (err)
1745+
goto out;
1746+
17271747
if (priv->port_up) {
17281748
port_up = 1;
17291749
mlx4_en_stop_port(dev, 1);
17301750
}
17311751

1732-
mlx4_en_free_resources(priv);
1733-
1734-
priv->num_tx_rings_p_up = channel->tx_count;
1735-
priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
1736-
priv->rx_ring_num = channel->rx_count;
1737-
1738-
err = mlx4_en_alloc_resources(priv);
1739-
if (err) {
1740-
en_err(priv, "Failed reallocating port resources\n");
1741-
goto out;
1742-
}
1752+
mlx4_en_safe_replace_resources(priv, tmp);
17431753

17441754
netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
17451755
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
@@ -1757,8 +1767,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
17571767
}
17581768

17591769
err = mlx4_en_moderation_update(priv);
1760-
17611770
out:
1771+
kfree(tmp);
17621772
mutex_unlock(&mdev->state_lock);
17631773
return err;
17641774
}

drivers/net/ethernet/mellanox/mlx4/en_netdev.c

Lines changed: 93 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,7 +1954,7 @@ static int mlx4_en_close(struct net_device *dev)
19541954
return 0;
19551955
}
19561956

1957-
void mlx4_en_free_resources(struct mlx4_en_priv *priv)
1957+
static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
19581958
{
19591959
int i;
19601960

@@ -1979,7 +1979,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
19791979

19801980
}
19811981

1982-
int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
1982+
static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
19831983
{
19841984
struct mlx4_en_port_profile *prof = priv->prof;
19851985
int i;
@@ -2044,6 +2044,77 @@ static void mlx4_en_shutdown(struct net_device *dev)
20442044
rtnl_unlock();
20452045
}
20462046

2047+
static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
2048+
struct mlx4_en_priv *src,
2049+
struct mlx4_en_port_profile *prof)
2050+
{
2051+
memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
2052+
sizeof(dst->hwtstamp_config));
2053+
dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
2054+
dst->tx_ring_num = prof->tx_ring_num;
2055+
dst->rx_ring_num = prof->rx_ring_num;
2056+
dst->flags = prof->flags;
2057+
dst->mdev = src->mdev;
2058+
dst->port = src->port;
2059+
dst->dev = src->dev;
2060+
dst->prof = prof;
2061+
dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
2062+
DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
2063+
2064+
dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
2065+
GFP_KERNEL);
2066+
if (!dst->tx_ring)
2067+
return -ENOMEM;
2068+
2069+
dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
2070+
GFP_KERNEL);
2071+
if (!dst->tx_cq) {
2072+
kfree(dst->tx_ring);
2073+
return -ENOMEM;
2074+
}
2075+
return 0;
2076+
}
2077+
2078+
static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
2079+
struct mlx4_en_priv *src)
2080+
{
2081+
memcpy(dst->rx_ring, src->rx_ring,
2082+
sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num);
2083+
memcpy(dst->rx_cq, src->rx_cq,
2084+
sizeof(struct mlx4_en_cq *) * src->rx_ring_num);
2085+
memcpy(&dst->hwtstamp_config, &src->hwtstamp_config,
2086+
sizeof(dst->hwtstamp_config));
2087+
dst->tx_ring_num = src->tx_ring_num;
2088+
dst->rx_ring_num = src->rx_ring_num;
2089+
dst->tx_ring = src->tx_ring;
2090+
dst->tx_cq = src->tx_cq;
2091+
memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
2092+
}
2093+
2094+
int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
2095+
struct mlx4_en_priv *tmp,
2096+
struct mlx4_en_port_profile *prof)
2097+
{
2098+
mlx4_en_copy_priv(tmp, priv, prof);
2099+
2100+
if (mlx4_en_alloc_resources(tmp)) {
2101+
en_warn(priv,
2102+
"%s: Resource allocation failed, using previous configuration\n",
2103+
__func__);
2104+
kfree(tmp->tx_ring);
2105+
kfree(tmp->tx_cq);
2106+
return -ENOMEM;
2107+
}
2108+
return 0;
2109+
}
2110+
2111+
void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
2112+
struct mlx4_en_priv *tmp)
2113+
{
2114+
mlx4_en_free_resources(priv);
2115+
mlx4_en_update_priv(priv, tmp);
2116+
}
2117+
20472118
void mlx4_en_destroy_netdev(struct net_device *dev)
20482119
{
20492120
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -3128,6 +3199,8 @@ int mlx4_en_reset_config(struct net_device *dev,
31283199
{
31293200
struct mlx4_en_priv *priv = netdev_priv(dev);
31303201
struct mlx4_en_dev *mdev = priv->mdev;
3202+
struct mlx4_en_port_profile new_prof;
3203+
struct mlx4_en_priv *tmp;
31313204
int port_up = 0;
31323205
int err = 0;
31333206

@@ -3144,19 +3217,29 @@ int mlx4_en_reset_config(struct net_device *dev,
31443217
return -EINVAL;
31453218
}
31463219

3220+
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
3221+
if (!tmp)
3222+
return -ENOMEM;
3223+
31473224
mutex_lock(&mdev->state_lock);
3225+
3226+
memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
3227+
memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config));
3228+
3229+
err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
3230+
if (err)
3231+
goto out;
3232+
31483233
if (priv->port_up) {
31493234
port_up = 1;
31503235
mlx4_en_stop_port(dev, 1);
31513236
}
31523237

3153-
mlx4_en_free_resources(priv);
3154-
31553238
en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
3156-
ts_config.rx_filter, !!(features & NETIF_F_HW_VLAN_CTAG_RX));
3239+
ts_config.rx_filter,
3240+
!!(features & NETIF_F_HW_VLAN_CTAG_RX));
31573241

3158-
priv->hwtstamp_config.tx_type = ts_config.tx_type;
3159-
priv->hwtstamp_config.rx_filter = ts_config.rx_filter;
3242+
mlx4_en_safe_replace_resources(priv, tmp);
31603243

31613244
if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
31623245
if (features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -3190,11 +3273,6 @@ int mlx4_en_reset_config(struct net_device *dev,
31903273
dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
31913274
}
31923275

3193-
err = mlx4_en_alloc_resources(priv);
3194-
if (err) {
3195-
en_err(priv, "Failed reallocating port resources\n");
3196-
goto out;
3197-
}
31983276
if (port_up) {
31993277
err = mlx4_en_start_port(dev);
32003278
if (err)
@@ -3203,6 +3281,8 @@ int mlx4_en_reset_config(struct net_device *dev,
32033281

32043282
out:
32053283
mutex_unlock(&mdev->state_lock);
3206-
netdev_features_change(dev);
3284+
kfree(tmp);
3285+
if (!err)
3286+
netdev_features_change(dev);
32073287
return err;
32083288
}

drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,12 +353,14 @@ struct mlx4_en_port_profile {
353353
u32 rx_ring_num;
354354
u32 tx_ring_size;
355355
u32 rx_ring_size;
356+
u8 num_tx_rings_p_up;
356357
u8 rx_pause;
357358
u8 rx_ppp;
358359
u8 tx_pause;
359360
u8 tx_ppp;
360361
int rss_rings;
361362
int inline_thold;
363+
struct hwtstamp_config hwtstamp_config;
362364
};
363365

364366
struct mlx4_en_profile {
@@ -623,8 +625,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
623625
u8 rx_ppp, u8 rx_pause,
624626
u8 tx_ppp, u8 tx_pause);
625627

626-
void mlx4_en_free_resources(struct mlx4_en_priv *priv);
627-
int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
628+
int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
629+
struct mlx4_en_priv *tmp,
630+
struct mlx4_en_port_profile *prof);
631+
void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
632+
struct mlx4_en_priv *tmp);
628633

629634
int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
630635
int entries, int ring, enum cq_type mode, int node);

0 commit comments

Comments
 (0)