@@ -183,6 +183,145 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
183
183
}
184
184
}
185
185
186
+ #ifdef EFX_USE_PIO
187
+
188
+ struct efx_short_copy_buffer {
189
+ int used ;
190
+ u8 buf [L1_CACHE_BYTES ];
191
+ };
192
+
193
+ /* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
194
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
195
+ */
196
+ static void efx_memcpy_toio_aligned (struct efx_nic * efx , u8 __iomem * * piobuf ,
197
+ u8 * data , int len ,
198
+ struct efx_short_copy_buffer * copy_buf )
199
+ {
200
+ int block_len = len & ~(sizeof (copy_buf -> buf ) - 1 );
201
+
202
+ memcpy_toio (* piobuf , data , block_len );
203
+ * piobuf += block_len ;
204
+ len -= block_len ;
205
+
206
+ if (len ) {
207
+ data += block_len ;
208
+ BUG_ON (copy_buf -> used );
209
+ BUG_ON (len > sizeof (copy_buf -> buf ));
210
+ memcpy (copy_buf -> buf , data , len );
211
+ copy_buf -> used = len ;
212
+ }
213
+ }
214
+
215
+ /* Copy to PIO, respecting dword alignment, popping data from copy buffer first.
216
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
217
+ */
218
+ static void efx_memcpy_toio_aligned_cb (struct efx_nic * efx , u8 __iomem * * piobuf ,
219
+ u8 * data , int len ,
220
+ struct efx_short_copy_buffer * copy_buf )
221
+ {
222
+ if (copy_buf -> used ) {
223
+ /* if the copy buffer is partially full, fill it up and write */
224
+ int copy_to_buf =
225
+ min_t (int , sizeof (copy_buf -> buf ) - copy_buf -> used , len );
226
+
227
+ memcpy (copy_buf -> buf + copy_buf -> used , data , copy_to_buf );
228
+ copy_buf -> used += copy_to_buf ;
229
+
230
+ /* if we didn't fill it up then we're done for now */
231
+ if (copy_buf -> used < sizeof (copy_buf -> buf ))
232
+ return ;
233
+
234
+ memcpy_toio (* piobuf , copy_buf -> buf , sizeof (copy_buf -> buf ));
235
+ * piobuf += sizeof (copy_buf -> buf );
236
+ data += copy_to_buf ;
237
+ len -= copy_to_buf ;
238
+ copy_buf -> used = 0 ;
239
+ }
240
+
241
+ efx_memcpy_toio_aligned (efx , piobuf , data , len , copy_buf );
242
+ }
243
+
244
+ static void efx_flush_copy_buffer (struct efx_nic * efx , u8 __iomem * piobuf ,
245
+ struct efx_short_copy_buffer * copy_buf )
246
+ {
247
+ /* if there's anything in it, write the whole buffer, including junk */
248
+ if (copy_buf -> used )
249
+ memcpy_toio (piobuf , copy_buf -> buf , sizeof (copy_buf -> buf ));
250
+ }
251
+
252
+ /* Traverse skb structure and copy fragments in to PIO buffer.
253
+ * Advances piobuf pointer.
254
+ */
255
+ static void efx_skb_copy_bits_to_pio (struct efx_nic * efx , struct sk_buff * skb ,
256
+ u8 __iomem * * piobuf ,
257
+ struct efx_short_copy_buffer * copy_buf )
258
+ {
259
+ int i ;
260
+
261
+ efx_memcpy_toio_aligned (efx , piobuf , skb -> data , skb_headlen (skb ),
262
+ copy_buf );
263
+
264
+ for (i = 0 ; i < skb_shinfo (skb )-> nr_frags ; ++ i ) {
265
+ skb_frag_t * f = & skb_shinfo (skb )-> frags [i ];
266
+ u8 * vaddr ;
267
+
268
+ vaddr = kmap_atomic (skb_frag_page (f ));
269
+
270
+ efx_memcpy_toio_aligned_cb (efx , piobuf , vaddr + f -> page_offset ,
271
+ skb_frag_size (f ), copy_buf );
272
+ kunmap_atomic (vaddr );
273
+ }
274
+
275
+ EFX_BUG_ON_PARANOID (skb_shinfo (skb )-> frag_list );
276
+ }
277
+
278
+ static struct efx_tx_buffer *
279
+ efx_enqueue_skb_pio (struct efx_tx_queue * tx_queue , struct sk_buff * skb )
280
+ {
281
+ struct efx_tx_buffer * buffer =
282
+ efx_tx_queue_get_insert_buffer (tx_queue );
283
+ u8 __iomem * piobuf = tx_queue -> piobuf ;
284
+
285
+ /* Copy to PIO buffer. Ensure the writes are padded to the end
286
+ * of a cache line, as this is required for write-combining to be
287
+ * effective on at least x86.
288
+ */
289
+
290
+ if (skb_shinfo (skb )-> nr_frags ) {
291
+ /* The size of the copy buffer will ensure all writes
292
+ * are the size of a cache line.
293
+ */
294
+ struct efx_short_copy_buffer copy_buf ;
295
+
296
+ copy_buf .used = 0 ;
297
+
298
+ efx_skb_copy_bits_to_pio (tx_queue -> efx , skb ,
299
+ & piobuf , & copy_buf );
300
+ efx_flush_copy_buffer (tx_queue -> efx , piobuf , & copy_buf );
301
+ } else {
302
+ /* Pad the write to the size of a cache line.
303
+ * We can do this because we know the skb_shared_info sruct is
304
+ * after the source, and the destination buffer is big enough.
305
+ */
306
+ BUILD_BUG_ON (L1_CACHE_BYTES >
307
+ SKB_DATA_ALIGN (sizeof (struct skb_shared_info )));
308
+ memcpy_toio (tx_queue -> piobuf , skb -> data ,
309
+ ALIGN (skb -> len , L1_CACHE_BYTES ));
310
+ }
311
+
312
+ EFX_POPULATE_QWORD_5 (buffer -> option ,
313
+ ESF_DZ_TX_DESC_IS_OPT , 1 ,
314
+ ESF_DZ_TX_OPTION_TYPE , ESE_DZ_TX_OPTION_DESC_PIO ,
315
+ ESF_DZ_TX_PIO_CONT , 0 ,
316
+ ESF_DZ_TX_PIO_BYTE_CNT , skb -> len ,
317
+ ESF_DZ_TX_PIO_BUF_ADDR ,
318
+ tx_queue -> piobuf_offset );
319
+ ++ tx_queue -> pio_packets ;
320
+ ++ tx_queue -> insert_count ;
321
+ return buffer ;
322
+ }
323
+ #endif /* EFX_USE_PIO */
324
+
186
325
/*
187
326
* Add a socket buffer to a TX queue
188
327
*
@@ -227,6 +366,17 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
227
366
return NETDEV_TX_OK ;
228
367
}
229
368
369
+ /* Consider using PIO for short packets */
370
+ #ifdef EFX_USE_PIO
371
+ if (skb -> len <= efx_piobuf_size && tx_queue -> piobuf &&
372
+ efx_nic_tx_is_empty (tx_queue ) &&
373
+ efx_nic_tx_is_empty (efx_tx_queue_partner (tx_queue ))) {
374
+ buffer = efx_enqueue_skb_pio (tx_queue , skb );
375
+ dma_flags = EFX_TX_BUF_OPTION ;
376
+ goto finish_packet ;
377
+ }
378
+ #endif
379
+
230
380
/* Map for DMA. Use dma_map_single rather than dma_map_page
231
381
* since this is more efficient on machines with sparse
232
382
* memory.
@@ -279,6 +429,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
279
429
}
280
430
281
431
/* Transfer ownership of the skb to the final buffer */
432
+ finish_packet :
282
433
buffer -> skb = skb ;
283
434
buffer -> flags = EFX_TX_BUF_SKB | dma_flags ;
284
435
0 commit comments