Skip to content

Commit 2f45d19

Browse files
Sergei Shtylyovdavem330
authored andcommitted
ravb: minimize TX data copying
Renesas Ethernet AVB controller requires that all data are aligned on 4-byte boundary. While it's easily achievable for the RX data with the help of skb_reserve() (we even align on 128-byte boundary as recommended by the manual), we can't do the same with the TX data, and it always comes unaligned from the networking core. Originally we solved it an easy way, copying all packet to a preallocated aligned buffer; however, it's enough to copy only up to 3 first bytes from each packet, doing the transfer using 2 TX descriptors instead of just 1. Here's an implementation of the new TX algorithm that significantly reduces the driver's memory requirements. Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent c22995c commit 2f45d19

File tree

2 files changed

+64
-45
lines changed

2 files changed

+64
-45
lines changed

drivers/net/ethernet/renesas/ravb.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,8 @@ struct ravb_desc {
658658
__le32 dptr; /* Descriptor pointer */
659659
};
660660

661+
#define DPTR_ALIGN 4 /* Required descriptor pointer alignment */
662+
661663
enum DIE_DT {
662664
/* Frame data */
663665
DT_FMID = 0x40,
@@ -739,6 +741,7 @@ enum RAVB_QUEUE {
739741
#define RX_QUEUE_OFFSET 4
740742
#define NUM_RX_QUEUE 2
741743
#define NUM_TX_QUEUE 2
744+
#define NUM_TX_DESC 2 /* TX descriptors per packet */
742745

743746
struct ravb_tstamp_skb {
744747
struct list_head list;
@@ -777,9 +780,9 @@ struct ravb_private {
777780
dma_addr_t tx_desc_dma[NUM_TX_QUEUE];
778781
struct ravb_ex_rx_desc *rx_ring[NUM_RX_QUEUE];
779782
struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE];
783+
void *tx_align[NUM_TX_QUEUE];
780784
struct sk_buff **rx_skb[NUM_RX_QUEUE];
781785
struct sk_buff **tx_skb[NUM_TX_QUEUE];
782-
void **tx_buffers[NUM_TX_QUEUE];
783786
u32 rx_over_errors;
784787
u32 rx_fifo_errors;
785788
struct net_device_stats stats[NUM_RX_QUEUE];

drivers/net/ethernet/renesas/ravb_main.c

Lines changed: 60 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,8 @@ static void ravb_ring_free(struct net_device *ndev, int q)
195195
priv->tx_skb[q] = NULL;
196196

197197
/* Free aligned TX buffers */
198-
if (priv->tx_buffers[q]) {
199-
for (i = 0; i < priv->num_tx_ring[q]; i++)
200-
kfree(priv->tx_buffers[q][i]);
201-
}
202-
kfree(priv->tx_buffers[q]);
203-
priv->tx_buffers[q] = NULL;
198+
kfree(priv->tx_align[q]);
199+
priv->tx_align[q] = NULL;
204200

205201
if (priv->rx_ring[q]) {
206202
ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -212,7 +208,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
212208

213209
if (priv->tx_ring[q]) {
214210
ring_size = sizeof(struct ravb_tx_desc) *
215-
(priv->num_tx_ring[q] + 1);
211+
(priv->num_tx_ring[q] * NUM_TX_DESC + 1);
216212
dma_free_coherent(NULL, ring_size, priv->tx_ring[q],
217213
priv->tx_desc_dma[q]);
218214
priv->tx_ring[q] = NULL;
@@ -227,7 +223,8 @@ static void ravb_ring_format(struct net_device *ndev, int q)
227223
struct ravb_tx_desc *tx_desc;
228224
struct ravb_desc *desc;
229225
int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
230-
int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q];
226+
int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] *
227+
NUM_TX_DESC;
231228
dma_addr_t dma_addr;
232229
int i;
233230

@@ -260,11 +257,12 @@ static void ravb_ring_format(struct net_device *ndev, int q)
260257

261258
memset(priv->tx_ring[q], 0, tx_ring_size);
262259
/* Build TX ring buffer */
263-
for (i = 0; i < priv->num_tx_ring[q]; i++) {
264-
tx_desc = &priv->tx_ring[q][i];
260+
for (i = 0, tx_desc = priv->tx_ring[q]; i < priv->num_tx_ring[q];
261+
i++, tx_desc++) {
262+
tx_desc->die_dt = DT_EEMPTY;
263+
tx_desc++;
265264
tx_desc->die_dt = DT_EEMPTY;
266265
}
267-
tx_desc = &priv->tx_ring[q][i];
268266
tx_desc->dptr = cpu_to_le32((u32)priv->tx_desc_dma[q]);
269267
tx_desc->die_dt = DT_LINKFIX; /* type */
270268

@@ -285,7 +283,6 @@ static int ravb_ring_init(struct net_device *ndev, int q)
285283
struct ravb_private *priv = netdev_priv(ndev);
286284
struct sk_buff *skb;
287285
int ring_size;
288-
void *buffer;
289286
int i;
290287

291288
/* Allocate RX and TX skb rings */
@@ -305,19 +302,11 @@ static int ravb_ring_init(struct net_device *ndev, int q)
305302
}
306303

307304
/* Allocate rings for the aligned buffers */
308-
priv->tx_buffers[q] = kcalloc(priv->num_tx_ring[q],
309-
sizeof(*priv->tx_buffers[q]), GFP_KERNEL);
310-
if (!priv->tx_buffers[q])
305+
priv->tx_align[q] = kmalloc(DPTR_ALIGN * priv->num_tx_ring[q] +
306+
DPTR_ALIGN - 1, GFP_KERNEL);
307+
if (!priv->tx_align[q])
311308
goto error;
312309

313-
for (i = 0; i < priv->num_tx_ring[q]; i++) {
314-
buffer = kmalloc(PKT_BUF_SZ + RAVB_ALIGN - 1, GFP_KERNEL);
315-
if (!buffer)
316-
goto error;
317-
/* Aligned TX buffer */
318-
priv->tx_buffers[q][i] = buffer;
319-
}
320-
321310
/* Allocate all RX descriptors. */
322311
ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
323312
priv->rx_ring[q] = dma_alloc_coherent(NULL, ring_size,
@@ -329,7 +318,8 @@ static int ravb_ring_init(struct net_device *ndev, int q)
329318
priv->dirty_rx[q] = 0;
330319

331320
/* Allocate all TX descriptors. */
332-
ring_size = sizeof(struct ravb_tx_desc) * (priv->num_tx_ring[q] + 1);
321+
ring_size = sizeof(struct ravb_tx_desc) *
322+
(priv->num_tx_ring[q] * NUM_TX_DESC + 1);
333323
priv->tx_ring[q] = dma_alloc_coherent(NULL, ring_size,
334324
&priv->tx_desc_dma[q],
335325
GFP_KERNEL);
@@ -443,22 +433,27 @@ static int ravb_tx_free(struct net_device *ndev, int q)
443433
u32 size;
444434

445435
for (; priv->cur_tx[q] - priv->dirty_tx[q] > 0; priv->dirty_tx[q]++) {
446-
entry = priv->dirty_tx[q] % priv->num_tx_ring[q];
436+
entry = priv->dirty_tx[q] % (priv->num_tx_ring[q] *
437+
NUM_TX_DESC);
447438
desc = &priv->tx_ring[q][entry];
448439
if (desc->die_dt != DT_FEMPTY)
449440
break;
450441
/* Descriptor type must be checked before all other reads */
451442
dma_rmb();
452443
size = le16_to_cpu(desc->ds_tagl) & TX_DS;
453444
/* Free the original skb. */
454-
if (priv->tx_skb[q][entry]) {
445+
if (priv->tx_skb[q][entry / NUM_TX_DESC]) {
455446
dma_unmap_single(&ndev->dev, le32_to_cpu(desc->dptr),
456447
size, DMA_TO_DEVICE);
457-
dev_kfree_skb_any(priv->tx_skb[q][entry]);
458-
priv->tx_skb[q][entry] = NULL;
448+
/* Last packet descriptor? */
449+
if (entry % NUM_TX_DESC == NUM_TX_DESC - 1) {
450+
entry /= NUM_TX_DESC;
451+
dev_kfree_skb_any(priv->tx_skb[q][entry]);
452+
priv->tx_skb[q][entry] = NULL;
453+
stats->tx_packets++;
454+
}
459455
free_num++;
460456
}
461-
stats->tx_packets++;
462457
stats->tx_bytes += size;
463458
desc->die_dt = DT_EEMPTY;
464459
}
@@ -1284,37 +1279,53 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
12841279
u32 dma_addr;
12851280
void *buffer;
12861281
u32 entry;
1282+
u32 len;
12871283

12881284
spin_lock_irqsave(&priv->lock, flags);
1289-
if (priv->cur_tx[q] - priv->dirty_tx[q] >= priv->num_tx_ring[q]) {
1285+
if (priv->cur_tx[q] - priv->dirty_tx[q] > (priv->num_tx_ring[q] - 1) *
1286+
NUM_TX_DESC) {
12901287
netif_err(priv, tx_queued, ndev,
12911288
"still transmitting with the full ring!\n");
12921289
netif_stop_subqueue(ndev, q);
12931290
spin_unlock_irqrestore(&priv->lock, flags);
12941291
return NETDEV_TX_BUSY;
12951292
}
1296-
entry = priv->cur_tx[q] % priv->num_tx_ring[q];
1297-
priv->tx_skb[q][entry] = skb;
1293+
entry = priv->cur_tx[q] % (priv->num_tx_ring[q] * NUM_TX_DESC);
1294+
priv->tx_skb[q][entry / NUM_TX_DESC] = skb;
12981295

12991296
if (skb_put_padto(skb, ETH_ZLEN))
13001297
goto drop;
13011298

1302-
buffer = PTR_ALIGN(priv->tx_buffers[q][entry], RAVB_ALIGN);
1303-
memcpy(buffer, skb->data, skb->len);
1304-
desc = &priv->tx_ring[q][entry];
1305-
desc->ds_tagl = cpu_to_le16(skb->len);
1306-
dma_addr = dma_map_single(&ndev->dev, buffer, skb->len, DMA_TO_DEVICE);
1299+
buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) +
1300+
entry / NUM_TX_DESC * DPTR_ALIGN;
1301+
len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data;
1302+
memcpy(buffer, skb->data, len);
1303+
dma_addr = dma_map_single(&ndev->dev, buffer, len, DMA_TO_DEVICE);
13071304
if (dma_mapping_error(&ndev->dev, dma_addr))
13081305
goto drop;
1306+
1307+
desc = &priv->tx_ring[q][entry];
1308+
desc->ds_tagl = cpu_to_le16(len);
1309+
desc->dptr = cpu_to_le32(dma_addr);
1310+
1311+
buffer = skb->data + len;
1312+
len = skb->len - len;
1313+
dma_addr = dma_map_single(&ndev->dev, buffer, len, DMA_TO_DEVICE);
1314+
if (dma_mapping_error(&ndev->dev, dma_addr))
1315+
goto unmap;
1316+
1317+
desc++;
1318+
desc->ds_tagl = cpu_to_le16(len);
13091319
desc->dptr = cpu_to_le32(dma_addr);
13101320

13111321
/* TX timestamp required */
13121322
if (q == RAVB_NC) {
13131323
ts_skb = kmalloc(sizeof(*ts_skb), GFP_ATOMIC);
13141324
if (!ts_skb) {
1315-
dma_unmap_single(&ndev->dev, dma_addr, skb->len,
1325+
desc--;
1326+
dma_unmap_single(&ndev->dev, dma_addr, len,
13161327
DMA_TO_DEVICE);
1317-
goto drop;
1328+
goto unmap;
13181329
}
13191330
ts_skb->skb = skb;
13201331
ts_skb->tag = priv->ts_skb_tag++;
@@ -1330,23 +1341,28 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
13301341

13311342
/* Descriptor type must be set after all the above writes */
13321343
dma_wmb();
1333-
desc->die_dt = DT_FSINGLE;
1344+
desc->die_dt = DT_FEND;
1345+
desc--;
1346+
desc->die_dt = DT_FSTART;
13341347

13351348
ravb_write(ndev, ravb_read(ndev, TCCR) | (TCCR_TSRQ0 << q), TCCR);
13361349

1337-
priv->cur_tx[q]++;
1338-
if (priv->cur_tx[q] - priv->dirty_tx[q] >= priv->num_tx_ring[q] &&
1339-
!ravb_tx_free(ndev, q))
1350+
priv->cur_tx[q] += NUM_TX_DESC;
1351+
if (priv->cur_tx[q] - priv->dirty_tx[q] >
1352+
(priv->num_tx_ring[q] - 1) * NUM_TX_DESC && !ravb_tx_free(ndev, q))
13401353
netif_stop_subqueue(ndev, q);
13411354

13421355
exit:
13431356
mmiowb();
13441357
spin_unlock_irqrestore(&priv->lock, flags);
13451358
return NETDEV_TX_OK;
13461359

1360+
unmap:
1361+
dma_unmap_single(&ndev->dev, le32_to_cpu(desc->dptr),
1362+
le16_to_cpu(desc->ds_tagl), DMA_TO_DEVICE);
13471363
drop:
13481364
dev_kfree_skb_any(skb);
1349-
priv->tx_skb[q][entry] = NULL;
1365+
priv->tx_skb[q][entry / NUM_TX_DESC] = NULL;
13501366
goto exit;
13511367
}
13521368

0 commit comments

Comments
 (0)