Skip to content

Commit ad9a0bf

Browse files
yishaihdavem330
authored andcommitted
net/mlx4_core: Refactor the catas flow to work per device
Using a WQ per device instead of a single global WQ, this allows independent reset handling per device even when SRIOV is used. This comes as a pre-patch for supporting chip reset for both native and SRIOV. Signed-off-by: Yishai Hadas <yishaih@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent dd0eefe commit ad9a0bf

File tree

4 files changed

+48
-44
lines changed

4 files changed

+48
-44
lines changed

drivers/net/ethernet/mellanox/mlx4/catas.c

Lines changed: 34 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,7 @@ enum {
4040
MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
4141
};
4242

43-
static DEFINE_SPINLOCK(catas_lock);
4443

45-
static LIST_HEAD(catas_list);
46-
static struct work_struct catas_work;
4744

4845
static int internal_err_reset = 1;
4946
module_param(internal_err_reset, int, 0644);
@@ -77,13 +74,9 @@ static void poll_catas(unsigned long dev_ptr)
7774
dump_err_buf(dev);
7875
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
7976

80-
if (internal_err_reset) {
81-
spin_lock(&catas_lock);
82-
list_add(&priv->catas_err.list, &catas_list);
83-
spin_unlock(&catas_lock);
84-
85-
queue_work(mlx4_wq, &catas_work);
86-
}
77+
if (internal_err_reset)
78+
queue_work(dev->persist->catas_wq,
79+
&dev->persist->catas_work);
8780
}
8881
} else
8982
mod_timer(&priv->catas_err.timer,
@@ -92,34 +85,23 @@ static void poll_catas(unsigned long dev_ptr)
9285

9386
static void catas_reset(struct work_struct *work)
9487
{
95-
struct mlx4_priv *priv, *tmppriv;
96-
struct mlx4_dev *dev;
97-
struct mlx4_dev_persistent *persist;
98-
99-
LIST_HEAD(tlist);
88+
struct mlx4_dev_persistent *persist =
89+
container_of(work, struct mlx4_dev_persistent,
90+
catas_work);
91+
struct pci_dev *pdev = persist->pdev;
10092
int ret;
10193

102-
spin_lock_irq(&catas_lock);
103-
list_splice_init(&catas_list, &tlist);
104-
spin_unlock_irq(&catas_lock);
105-
106-
list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
107-
struct pci_dev *pdev = priv->dev.persist->pdev;
108-
109-
/* If the device is off-line, we cannot reset it */
110-
if (pci_channel_offline(pdev))
111-
continue;
94+
/* If the device is off-line, we cannot reset it */
95+
if (pci_channel_offline(pdev))
96+
return;
11297

113-
ret = mlx4_restart_one(priv->dev.persist->pdev);
114-
/* 'priv' now is not valid */
115-
if (ret)
116-
pr_err("mlx4 %s: Reset failed (%d)\n",
117-
pci_name(pdev), ret);
118-
else {
119-
persist = pci_get_drvdata(pdev);
120-
mlx4_dbg(persist->dev, "Reset succeeded\n");
121-
}
122-
}
98+
ret = mlx4_restart_one(pdev);
99+
/* 'priv' now is not valid */
100+
if (ret)
101+
pr_err("mlx4 %s: Reset failed (%d)\n",
102+
pci_name(pdev), ret);
103+
else
104+
mlx4_dbg(persist->dev, "Reset succeeded\n");
123105
}
124106

125107
void mlx4_start_catas_poll(struct mlx4_dev *dev)
@@ -158,15 +140,26 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
158140

159141
del_timer_sync(&priv->catas_err.timer);
160142

161-
if (priv->catas_err.map)
143+
if (priv->catas_err.map) {
162144
iounmap(priv->catas_err.map);
145+
priv->catas_err.map = NULL;
146+
}
147+
}
163148

164-
spin_lock_irq(&catas_lock);
165-
list_del(&priv->catas_err.list);
166-
spin_unlock_irq(&catas_lock);
149+
int mlx4_catas_init(struct mlx4_dev *dev)
150+
{
151+
INIT_WORK(&dev->persist->catas_work, catas_reset);
152+
dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health");
153+
if (!dev->persist->catas_wq)
154+
return -ENOMEM;
155+
156+
return 0;
167157
}
168158

169-
void __init mlx4_catas_init(void)
159+
void mlx4_catas_end(struct mlx4_dev *dev)
170160
{
171-
INIT_WORK(&catas_work, catas_reset);
161+
if (dev->persist->catas_wq) {
162+
destroy_workqueue(dev->persist->catas_wq);
163+
dev->persist->catas_wq = NULL;
164+
}
172165
}

drivers/net/ethernet/mellanox/mlx4/main.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3064,11 +3064,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
30643064
}
30653065
}
30663066

3067-
err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
3067+
err = mlx4_catas_init(&priv->dev);
30683068
if (err)
30693069
goto err_release_regions;
3070+
3071+
err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
3072+
if (err)
3073+
goto err_catas;
3074+
30703075
return 0;
30713076

3077+
err_catas:
3078+
mlx4_catas_end(&priv->dev);
3079+
30723080
err_release_regions:
30733081
pci_release_regions(pdev);
30743082

@@ -3219,6 +3227,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
32193227
struct mlx4_priv *priv = mlx4_priv(dev);
32203228

32213229
mlx4_unload_one(pdev);
3230+
mlx4_catas_end(dev);
32223231
pci_release_regions(pdev);
32233232
pci_disable_device(pdev);
32243233
kfree(dev->persist);
@@ -3403,7 +3412,6 @@ static int __init mlx4_init(void)
34033412
if (mlx4_verify_params())
34043413
return -EINVAL;
34053414

3406-
mlx4_catas_init();
34073415

34083416
mlx4_wq = create_singlethread_workqueue("mlx4");
34093417
if (!mlx4_wq)

drivers/net/ethernet/mellanox/mlx4/mlx4.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
995995

996996
void mlx4_start_catas_poll(struct mlx4_dev *dev);
997997
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
998-
void mlx4_catas_init(void);
998+
int mlx4_catas_init(struct mlx4_dev *dev);
999+
void mlx4_catas_end(struct mlx4_dev *dev);
9991000
int mlx4_restart_one(struct pci_dev *pdev);
10001001
int mlx4_register_device(struct mlx4_dev *dev);
10011002
void mlx4_unregister_device(struct mlx4_dev *dev);

include/linux/mlx4/device.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,8 @@ struct mlx4_dev_persistent {
751751
int num_vfs;
752752
enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
753753
enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
754+
struct work_struct catas_work;
755+
struct workqueue_struct *catas_wq;
754756
};
755757

756758
struct mlx4_dev {

0 commit comments

Comments
 (0)