Skip to content

Commit cd2d5b5

Browse files
author
Ben Hutchings
committed
sfc: Add SR-IOV back-end support for SFC9000 family
On the SFC9000 family, each port has 1024 Virtual Interfaces (VIs), each with an RX queue, a TX queue, an event queue and a mailbox register. These may be assigned to up to 127 SR-IOV virtual functions per port, with up to 64 VIs per VF. We allocate an extra channel (IRQ and event queue only) to receive requests from VF drivers. There is a per-port limit of 4 concurrent RX queue flushes, and queue flushes may be initiated by the MC in response to a Function Level Reset (FLR) of a VF. Therefore, when SR-IOV is in use, we submit all flush requests via the MC. The RSS indirection table is shared with VFs, so the number of RX queues used in the PF is limited to the number of VIs per VF. This is almost entirely the work of Steve Hodgson, formerly shodgson@solarflare.com. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
1 parent 28e47c4 commit cd2d5b5

File tree

13 files changed

+2192
-26
lines changed

13 files changed

+2192
-26
lines changed

drivers/net/ethernet/sfc/Kconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,11 @@ config SFC_MCDI_MON
2626
----help---
2727
This exposes the on-board firmware-managed sensors as a
2828
hardware monitor device.
29+
config SFC_SRIOV
30+
bool "Solarflare SFC9000-family SR-IOV support"
31+
depends on SFC && PCI_IOV
32+
default y
33+
---help---
34+
This enables support for the SFC9000 I/O Virtualization
35+
features, allowing accelerated network performance in
36+
virtualized environments.

drivers/net/ethernet/sfc/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \
44
tenxpress.o txc43128_phy.o falcon_boards.o \
55
mcdi.o mcdi_phy.o mcdi_mon.o
66
sfc-$(CONFIG_SFC_MTD) += mtd.o
7+
sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o
78

89
obj-$(CONFIG_SFC) += sfc.o

drivers/net/ethernet/sfc/efx.c

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,25 +1175,40 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
11751175
unsigned int count;
11761176
int cpu;
11771177

1178-
if (rss_cpus)
1179-
return rss_cpus;
1178+
if (rss_cpus) {
1179+
count = rss_cpus;
1180+
} else {
1181+
if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
1182+
netif_warn(efx, probe, efx->net_dev,
1183+
"RSS disabled due to allocation failure\n");
1184+
return 1;
1185+
}
11801186

1181-
if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
1182-
netif_warn(efx, probe, efx->net_dev,
1183-
"RSS disabled due to allocation failure\n");
1184-
return 1;
1187+
count = 0;
1188+
for_each_online_cpu(cpu) {
1189+
if (!cpumask_test_cpu(cpu, thread_mask)) {
1190+
++count;
1191+
cpumask_or(thread_mask, thread_mask,
1192+
topology_thread_cpumask(cpu));
1193+
}
1194+
}
1195+
1196+
free_cpumask_var(thread_mask);
11851197
}
11861198

1187-
count = 0;
1188-
for_each_online_cpu(cpu) {
1189-
if (!cpumask_test_cpu(cpu, thread_mask)) {
1190-
++count;
1191-
cpumask_or(thread_mask, thread_mask,
1192-
topology_thread_cpumask(cpu));
1193-
}
1199+
/* If RSS is requested for the PF *and* VFs then we can't write RSS
1200+
* table entries that are inaccessible to VFs
1201+
*/
1202+
if (efx_sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
1203+
count > efx_vf_size(efx)) {
1204+
netif_warn(efx, probe, efx->net_dev,
1205+
"Reducing number of RSS channels from %u to %u for "
1206+
"VF support. Increase vf-msix-limit to use more "
1207+
"channels on the PF.\n",
1208+
count, efx_vf_size(efx));
1209+
count = efx_vf_size(efx);
11941210
}
11951211

1196-
free_cpumask_var(thread_mask);
11971212
return count;
11981213
}
11991214

@@ -1327,6 +1342,10 @@ static int efx_probe_interrupts(struct efx_nic *efx)
13271342
}
13281343
}
13291344

1345+
/* RSS might be usable on VFs even if it is disabled on the PF */
1346+
efx->rss_spread = (efx->n_rx_channels > 1 ?
1347+
efx->n_rx_channels : efx_vf_size(efx));
1348+
13301349
return 0;
13311350
}
13321351

@@ -1426,7 +1445,7 @@ static int efx_probe_nic(struct efx_nic *efx)
14261445
get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
14271446
for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
14281447
efx->rx_indir_table[i] =
1429-
ethtool_rxfh_indir_default(i, efx->n_rx_channels);
1448+
ethtool_rxfh_indir_default(i, efx->rss_spread);
14301449

14311450
efx_set_channels(efx);
14321451
netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
@@ -1915,6 +1934,7 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
19151934
}
19161935

19171936
memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len);
1937+
efx_sriov_mac_address_changed(efx);
19181938

19191939
/* Reconfigure the MAC */
19201940
mutex_lock(&efx->mac_lock);
@@ -1981,6 +2001,12 @@ static const struct net_device_ops efx_netdev_ops = {
19812001
.ndo_set_mac_address = efx_set_mac_address,
19822002
.ndo_set_rx_mode = efx_set_rx_mode,
19832003
.ndo_set_features = efx_set_features,
2004+
#ifdef CONFIG_SFC_SRIOV
2005+
.ndo_set_vf_mac = efx_sriov_set_vf_mac,
2006+
.ndo_set_vf_vlan = efx_sriov_set_vf_vlan,
2007+
.ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk,
2008+
.ndo_get_vf_config = efx_sriov_get_vf_config,
2009+
#endif
19842010
#ifdef CONFIG_NET_POLL_CONTROLLER
19852011
.ndo_poll_controller = efx_netpoll,
19862012
#endif
@@ -2150,6 +2176,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
21502176

21512177
efx_start_interrupts(efx, false);
21522178
efx_restore_filters(efx);
2179+
efx_sriov_reset(efx);
21532180

21542181
mutex_unlock(&efx->mac_lock);
21552182

@@ -2440,6 +2467,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
24402467
rtnl_unlock();
24412468

24422469
efx_stop_interrupts(efx, false);
2470+
efx_sriov_fini(efx);
24432471
efx_unregister_netdev(efx);
24442472

24452473
efx_mtd_remove(efx);
@@ -2581,6 +2609,11 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
25812609
if (rc)
25822610
goto fail4;
25832611

2612+
rc = efx_sriov_init(efx);
2613+
if (rc)
2614+
netif_err(efx, probe, efx->net_dev,
2615+
"SR-IOV can't be enabled rc %d\n", rc);
2616+
25842617
netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
25852618

25862619
/* Try to create MTDs, but allow this to fail */
@@ -2732,6 +2765,10 @@ static int __init efx_init_module(void)
27322765
if (rc)
27332766
goto err_notifier;
27342767

2768+
rc = efx_init_sriov();
2769+
if (rc)
2770+
goto err_sriov;
2771+
27352772
reset_workqueue = create_singlethread_workqueue("sfc_reset");
27362773
if (!reset_workqueue) {
27372774
rc = -ENOMEM;
@@ -2747,6 +2784,8 @@ static int __init efx_init_module(void)
27472784
err_pci:
27482785
destroy_workqueue(reset_workqueue);
27492786
err_reset:
2787+
efx_fini_sriov();
2788+
err_sriov:
27502789
unregister_netdevice_notifier(&efx_netdev_notifier);
27512790
err_notifier:
27522791
return rc;
@@ -2758,6 +2797,7 @@ static void __exit efx_exit_module(void)
27582797

27592798
pci_unregister_driver(&efx_pci_driver);
27602799
destroy_workqueue(reset_workqueue);
2800+
efx_fini_sriov();
27612801
unregister_netdevice_notifier(&efx_netdev_notifier);
27622802

27632803
}

drivers/net/ethernet/sfc/ethtool.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,8 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
10851085
{
10861086
struct efx_nic *efx = netdev_priv(net_dev);
10871087

1088-
return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ?
1088+
return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 ||
1089+
efx->n_rx_channels == 1) ?
10891090
0 : ARRAY_SIZE(efx->rx_indir_table));
10901091
}
10911092

drivers/net/ethernet/sfc/mcdi.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ void efx_mcdi_process_event(struct efx_channel *channel,
560560
case MCDI_EVENT_CODE_MAC_STATS_DMA:
561561
/* MAC stats are gather lazily. We can ignore this. */
562562
break;
563+
case MCDI_EVENT_CODE_FLR:
564+
efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF));
565+
break;
563566

564567
default:
565568
netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
@@ -1154,6 +1157,37 @@ int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id)
11541157
return rc;
11551158
}
11561159

1160+
int efx_mcdi_flush_rxqs(struct efx_nic *efx)
1161+
{
1162+
struct efx_channel *channel;
1163+
struct efx_rx_queue *rx_queue;
1164+
__le32 *qid;
1165+
int rc, count;
1166+
1167+
qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL);
1168+
if (qid == NULL)
1169+
return -ENOMEM;
1170+
1171+
count = 0;
1172+
efx_for_each_channel(channel, efx) {
1173+
efx_for_each_channel_rx_queue(rx_queue, channel) {
1174+
if (rx_queue->flush_pending) {
1175+
rx_queue->flush_pending = false;
1176+
atomic_dec(&efx->rxq_flush_pending);
1177+
qid[count++] = cpu_to_le32(
1178+
efx_rx_queue_index(rx_queue));
1179+
}
1180+
}
1181+
}
1182+
1183+
rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)qid,
1184+
count * sizeof(*qid), NULL, 0, NULL);
1185+
WARN_ON(rc > 0);
1186+
1187+
kfree(qid);
1188+
1189+
return rc;
1190+
}
11571191

11581192
int efx_mcdi_wol_filter_reset(struct efx_nic *efx)
11591193
{

drivers/net/ethernet/sfc/mcdi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ extern int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx,
146146
extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out);
147147
extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id);
148148
extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx);
149+
extern int efx_mcdi_flush_rxqs(struct efx_nic *efx);
150+
extern int efx_mcdi_set_mac(struct efx_nic *efx);
149151
extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr,
150152
u32 dma_len, int enable, int clear);
151153
extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx);

drivers/net/ethernet/sfc/mcdi_mac.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include "mcdi.h"
1313
#include "mcdi_pcol.h"
1414

15-
static int efx_mcdi_set_mac(struct efx_nic *efx)
15+
int efx_mcdi_set_mac(struct efx_nic *efx)
1616
{
1717
u32 reject, fcntl;
1818
u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN];

drivers/net/ethernet/sfc/net_driver.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/device.h>
2525
#include <linux/highmem.h>
2626
#include <linux/workqueue.h>
27+
#include <linux/mutex.h>
2728
#include <linux/vmalloc.h>
2829
#include <linux/i2c.h>
2930

@@ -54,7 +55,8 @@
5455

5556
#define EFX_MAX_CHANNELS 32U
5657
#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
57-
#define EFX_MAX_EXTRA_CHANNELS 0U
58+
#define EFX_EXTRA_CHANNEL_IOV 0
59+
#define EFX_MAX_EXTRA_CHANNELS 1U
5860

5961
/* Checksum generation is a per-queue option in hardware, so each
6062
* queue visible to the networking core is backed by two hardware TX
@@ -629,6 +631,8 @@ union efx_multicast_hash {
629631
};
630632

631633
struct efx_filter_state;
634+
struct efx_vf;
635+
struct vfdi_status;
632636

633637
/**
634638
* struct efx_nic - an Efx NIC
@@ -712,6 +716,17 @@ struct efx_filter_state;
712716
* completed (either success or failure). Not used when MCDI is used to
713717
* flush receive queues.
714718
* @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions.
719+
* @vf: Array of &struct efx_vf objects.
720+
* @vf_count: Number of VFs intended to be enabled.
721+
* @vf_init_count: Number of VFs that have been fully initialised.
722+
* @vi_scale: log2 number of vnics per VF.
723+
* @vf_buftbl_base: The zeroth buffer table index used to back VF queues.
724+
* @vfdi_status: Common VFDI status page to be dmad to VF address space.
725+
* @local_addr_list: List of local addresses. Protected by %local_lock.
726+
* @local_page_list: List of DMA addressable pages used to broadcast
727+
* %local_addr_list. Protected by %local_lock.
728+
* @local_lock: Mutex protecting %local_addr_list and %local_page_list.
729+
* @peer_work: Work item to broadcast peer addresses to VMs.
715730
* @monitor_work: Hardware monitor workitem
716731
* @biu_lock: BIU (bus interface unit) lock
717732
* @last_irq_cpu: Last CPU to handle a possible test interrupt. This
@@ -762,6 +777,7 @@ struct efx_nic {
762777
unsigned next_buffer_table;
763778
unsigned n_channels;
764779
unsigned n_rx_channels;
780+
unsigned rss_spread;
765781
unsigned tx_channel_offset;
766782
unsigned n_tx_channels;
767783
unsigned int rx_buffer_len;
@@ -820,6 +836,20 @@ struct efx_nic {
820836
atomic_t rxq_flush_outstanding;
821837
wait_queue_head_t flush_wq;
822838

839+
#ifdef CONFIG_SFC_SRIOV
840+
struct efx_channel *vfdi_channel;
841+
struct efx_vf *vf;
842+
unsigned vf_count;
843+
unsigned vf_init_count;
844+
unsigned vi_scale;
845+
unsigned vf_buftbl_base;
846+
struct efx_buffer vfdi_status;
847+
struct list_head local_addr_list;
848+
struct list_head local_page_list;
849+
struct mutex local_lock;
850+
struct work_struct peer_work;
851+
#endif
852+
823853
/* The following fields may be written more often */
824854

825855
struct delayed_work monitor_work ____cacheline_aligned_in_smp;

0 commit comments

Comments
 (0)