Skip to content

Commit ee45fd9

Browse files
jco-xilinxBen Hutchings
authored andcommitted
sfc: Use TX PIO for sufficiently small packets
Sufficiently small linear packets can be copied into the PIO buffer with a single call to memcpy_toio(). Non-linear packets require an intermediate cache-line-sized buffer. [bwh: I wrote the first version of this, but Jon did the hard work to handle non-linear packets.] Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
1 parent 0fe5565 commit ee45fd9

File tree

4 files changed

+155
-0
lines changed

4 files changed

+155
-0
lines changed

drivers/net/ethernet/sfc/ef10_regs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@
315315
#define ESF_DZ_TX_PIO_TYPE_WIDTH 1
316316
#define ESF_DZ_TX_PIO_OPT_LBN 60
317317
#define ESF_DZ_TX_PIO_OPT_WIDTH 3
318+
#define ESE_DZ_TX_OPTION_DESC_PIO 1
318319
#define ESF_DZ_TX_PIO_CONT_LBN 59
319320
#define ESF_DZ_TX_PIO_CONT_WIDTH 1
320321
#define ESF_DZ_TX_PIO_BYTE_CNT_LBN 32

drivers/net/ethernet/sfc/ethtool.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
7070
EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
7171
EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
7272
EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
73+
EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
7374
EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
7475
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
7576
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),

drivers/net/ethernet/sfc/net_driver.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ struct efx_tx_buffer {
212212
* blocks
213213
* @tso_packets: Number of packets via the TSO xmit path
214214
* @pushes: Number of times the TX push feature has been used
215+
* @pio_packets: Number of times the TX PIO feature has been used
215216
* @empty_read_count: If the completion path has seen the queue as empty
216217
* and the transmission path has not yet checked this, the value of
217218
* @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0.
@@ -243,6 +244,7 @@ struct efx_tx_queue {
243244
unsigned int tso_long_headers;
244245
unsigned int tso_packets;
245246
unsigned int pushes;
247+
unsigned int pio_packets;
246248

247249
/* Members shared between paths and sometimes updated */
248250
unsigned int empty_read_count ____cacheline_aligned_in_smp;

drivers/net/ethernet/sfc/tx.c

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,145 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
183183
}
184184
}
185185

186+
#ifdef EFX_USE_PIO
187+
188+
struct efx_short_copy_buffer {
189+
int used;
190+
u8 buf[L1_CACHE_BYTES];
191+
};
192+
193+
/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
194+
* Advances piobuf pointer. Leaves additional data in the copy buffer.
195+
*/
196+
static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf,
197+
u8 *data, int len,
198+
struct efx_short_copy_buffer *copy_buf)
199+
{
200+
int block_len = len & ~(sizeof(copy_buf->buf) - 1);
201+
202+
memcpy_toio(*piobuf, data, block_len);
203+
*piobuf += block_len;
204+
len -= block_len;
205+
206+
if (len) {
207+
data += block_len;
208+
BUG_ON(copy_buf->used);
209+
BUG_ON(len > sizeof(copy_buf->buf));
210+
memcpy(copy_buf->buf, data, len);
211+
copy_buf->used = len;
212+
}
213+
}
214+
215+
/* Copy to PIO, respecting dword alignment, popping data from copy buffer first.
216+
* Advances piobuf pointer. Leaves additional data in the copy buffer.
217+
*/
218+
static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf,
219+
u8 *data, int len,
220+
struct efx_short_copy_buffer *copy_buf)
221+
{
222+
if (copy_buf->used) {
223+
/* if the copy buffer is partially full, fill it up and write */
224+
int copy_to_buf =
225+
min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len);
226+
227+
memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf);
228+
copy_buf->used += copy_to_buf;
229+
230+
/* if we didn't fill it up then we're done for now */
231+
if (copy_buf->used < sizeof(copy_buf->buf))
232+
return;
233+
234+
memcpy_toio(*piobuf, copy_buf->buf, sizeof(copy_buf->buf));
235+
*piobuf += sizeof(copy_buf->buf);
236+
data += copy_to_buf;
237+
len -= copy_to_buf;
238+
copy_buf->used = 0;
239+
}
240+
241+
efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf);
242+
}
243+
244+
static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf,
245+
struct efx_short_copy_buffer *copy_buf)
246+
{
247+
/* if there's anything in it, write the whole buffer, including junk */
248+
if (copy_buf->used)
249+
memcpy_toio(piobuf, copy_buf->buf, sizeof(copy_buf->buf));
250+
}
251+
252+
/* Traverse skb structure and copy fragments in to PIO buffer.
253+
* Advances piobuf pointer.
254+
*/
255+
static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
256+
u8 __iomem **piobuf,
257+
struct efx_short_copy_buffer *copy_buf)
258+
{
259+
int i;
260+
261+
efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb),
262+
copy_buf);
263+
264+
for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
265+
skb_frag_t *f = &skb_shinfo(skb)->frags[i];
266+
u8 *vaddr;
267+
268+
vaddr = kmap_atomic(skb_frag_page(f));
269+
270+
efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset,
271+
skb_frag_size(f), copy_buf);
272+
kunmap_atomic(vaddr);
273+
}
274+
275+
EFX_BUG_ON_PARANOID(skb_shinfo(skb)->frag_list);
276+
}
277+
278+
static struct efx_tx_buffer *
279+
efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
280+
{
281+
struct efx_tx_buffer *buffer =
282+
efx_tx_queue_get_insert_buffer(tx_queue);
283+
u8 __iomem *piobuf = tx_queue->piobuf;
284+
285+
/* Copy to PIO buffer. Ensure the writes are padded to the end
286+
* of a cache line, as this is required for write-combining to be
287+
* effective on at least x86.
288+
*/
289+
290+
if (skb_shinfo(skb)->nr_frags) {
291+
/* The size of the copy buffer will ensure all writes
292+
* are the size of a cache line.
293+
*/
294+
struct efx_short_copy_buffer copy_buf;
295+
296+
copy_buf.used = 0;
297+
298+
efx_skb_copy_bits_to_pio(tx_queue->efx, skb,
299+
&piobuf, &copy_buf);
300+
efx_flush_copy_buffer(tx_queue->efx, piobuf, &copy_buf);
301+
} else {
302+
/* Pad the write to the size of a cache line.
303+
* We can do this because we know the skb_shared_info sruct is
304+
* after the source, and the destination buffer is big enough.
305+
*/
306+
BUILD_BUG_ON(L1_CACHE_BYTES >
307+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
308+
memcpy_toio(tx_queue->piobuf, skb->data,
309+
ALIGN(skb->len, L1_CACHE_BYTES));
310+
}
311+
312+
EFX_POPULATE_QWORD_5(buffer->option,
313+
ESF_DZ_TX_DESC_IS_OPT, 1,
314+
ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
315+
ESF_DZ_TX_PIO_CONT, 0,
316+
ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
317+
ESF_DZ_TX_PIO_BUF_ADDR,
318+
tx_queue->piobuf_offset);
319+
++tx_queue->pio_packets;
320+
++tx_queue->insert_count;
321+
return buffer;
322+
}
323+
#endif /* EFX_USE_PIO */
324+
186325
/*
187326
* Add a socket buffer to a TX queue
188327
*
@@ -227,6 +366,17 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
227366
return NETDEV_TX_OK;
228367
}
229368

369+
/* Consider using PIO for short packets */
370+
#ifdef EFX_USE_PIO
371+
if (skb->len <= efx_piobuf_size && tx_queue->piobuf &&
372+
efx_nic_tx_is_empty(tx_queue) &&
373+
efx_nic_tx_is_empty(efx_tx_queue_partner(tx_queue))) {
374+
buffer = efx_enqueue_skb_pio(tx_queue, skb);
375+
dma_flags = EFX_TX_BUF_OPTION;
376+
goto finish_packet;
377+
}
378+
#endif
379+
230380
/* Map for DMA. Use dma_map_single rather than dma_map_page
231381
* since this is more efficient on machines with sparse
232382
* memory.
@@ -279,6 +429,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
279429
}
280430

281431
/* Transfer ownership of the skb to the final buffer */
432+
finish_packet:
282433
buffer->skb = skb;
283434
buffer->flags = EFX_TX_BUF_SKB | dma_flags;
284435

0 commit comments

Comments
 (0)