Skip to content

Commit 7db4c0d

Browse files
committed
Merge branch 'qed-XDP-header-adjust'
Yuval Mintz says: ==================== qede: support XDP head adjustments Daniel has brought to my attention the fact that qede is the only driver that currently supports XDP but still fails any program where xdp_adjust_head is set on the bpf_prog. This series is meant to remedy this and align qede with the rest of the driver, making it possible to remove said field. Patch #1 contains a minor cache-saving optimization for latter patches. Patches #2 & #3 address existing issues with the qede implementation [#2 should have been a part of this as it addresses something that's affected by the additional headroom; #3 is simply here for the ride]. Patches #4 & #5 add the necessary logic in driver for ingress headroom, the first adding the infrastrucutre needed for supporting the headroon [as currently qede doesn't support such], and the second removing the existing XDP limitation. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 1ca2212 + 059eeb0 commit 7db4c0d

File tree

4 files changed

+68
-48
lines changed

4 files changed

+68
-48
lines changed

drivers/net/ethernet/qlogic/qede/qede.h

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -313,21 +313,24 @@ struct qede_rx_queue {
313313
u8 data_direction;
314314
u8 rxq_id;
315315

316+
/* Used once per each NAPI run */
317+
u16 num_rx_buffers;
318+
319+
u16 rx_headroom;
320+
316321
u32 rx_buf_size;
317322
u32 rx_buf_seg_size;
318323

319-
u64 rcv_pkts;
320-
321324
struct sw_rx_data *sw_rx_ring;
322325
struct qed_chain rx_bd_ring;
323326
struct qed_chain rx_comp_ring ____cacheline_aligned;
324327

325-
/* Used once per each NAPI run */
326-
u16 num_rx_buffers;
327-
328328
/* GRO */
329329
struct qede_agg_info tpa_info[ETH_TPA_MAX_AGGS_NUM];
330330

331+
/* Used once per each NAPI run */
332+
u64 rcv_pkts;
333+
331334
u64 rx_hw_errors;
332335
u64 rx_alloc_errors;
333336
u64 rx_ip_frags;
@@ -349,6 +352,11 @@ struct sw_tx_bd {
349352
#define QEDE_TSO_SPLIT_BD BIT(0)
350353
};
351354

355+
struct sw_tx_xdp {
356+
struct page *page;
357+
dma_addr_t mapping;
358+
};
359+
352360
struct qede_tx_queue {
353361
u8 is_xdp;
354362
bool is_legacy;
@@ -372,11 +380,11 @@ struct qede_tx_queue {
372380
#define QEDE_TXQ_IDX_TO_XDP(edev, idx) ((idx) + QEDE_MAX_TSS_CNT(edev))
373381

374382
/* Regular Tx requires skb + metadata for release purpose,
375-
* while XDP requires only the pages themselves.
383+
* while XDP requires the pages and the mapped address.
376384
*/
377385
union {
378386
struct sw_tx_bd *skbs;
379-
struct page **pages;
387+
struct sw_tx_xdp *xdp;
380388
} sw_tx_ring;
381389

382390
struct qed_chain tx_pbl;

drivers/net/ethernet/qlogic/qede/qede_filter.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -520,11 +520,6 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog)
520520
{
521521
struct qede_reload_args args;
522522

523-
if (prog && prog->xdp_adjust_head) {
524-
DP_ERR(edev, "Does not support bpf_xdp_adjust_head()\n");
525-
return -EOPNOTSUPP;
526-
}
527-
528523
/* If we're called, there was already a bpf reference increment */
529524
args.func = &qede_xdp_reload_func;
530525
args.u.new_prog = prog;
@@ -537,6 +532,11 @@ int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp)
537532
{
538533
struct qede_dev *edev = netdev_priv(dev);
539534

535+
if (IS_VF(edev)) {
536+
DP_NOTICE(edev, "VFs don't support XDP\n");
537+
return -EOPNOTSUPP;
538+
}
539+
540540
switch (xdp->command) {
541541
case XDP_SETUP_PROG:
542542
return qede_xdp_set(edev, xdp->prog);

drivers/net/ethernet/qlogic/qede/qede_fp.c

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy)
8787
rx_bd = (struct eth_rx_bd *)qed_chain_produce(&rxq->rx_bd_ring);
8888
WARN_ON(!rx_bd);
8989
rx_bd->addr.hi = cpu_to_le32(upper_32_bits(mapping));
90-
rx_bd->addr.lo = cpu_to_le32(lower_32_bits(mapping));
90+
rx_bd->addr.lo = cpu_to_le32(lower_32_bits(mapping) +
91+
rxq->rx_headroom);
9192

9293
rxq->sw_rx_prod++;
9394
rxq->filled_buffers++;
@@ -360,7 +361,8 @@ static int qede_xdp_xmit(struct qede_dev *edev, struct qede_fastpath *fp,
360361
metadata->mapping + padding,
361362
length, PCI_DMA_TODEVICE);
362363

363-
txq->sw_tx_ring.pages[idx] = metadata->data;
364+
txq->sw_tx_ring.xdp[idx].page = metadata->data;
365+
txq->sw_tx_ring.xdp[idx].mapping = metadata->mapping;
364366
txq->sw_tx_prod++;
365367

366368
/* Mark the fastpath for future XDP doorbell */
@@ -384,19 +386,19 @@ int qede_txq_has_work(struct qede_tx_queue *txq)
384386

385387
static void qede_xdp_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
386388
{
387-
struct eth_tx_1st_bd *bd;
388-
u16 hw_bd_cons;
389+
u16 hw_bd_cons, idx;
389390

390391
hw_bd_cons = le16_to_cpu(*txq->hw_cons_ptr);
391392
barrier();
392393

393394
while (hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl)) {
394-
bd = (struct eth_tx_1st_bd *)qed_chain_consume(&txq->tx_pbl);
395+
qed_chain_consume(&txq->tx_pbl);
396+
idx = txq->sw_tx_cons & NUM_TX_BDS_MAX;
395397

396-
dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(bd),
397-
PAGE_SIZE, DMA_BIDIRECTIONAL);
398-
__free_page(txq->sw_tx_ring.pages[txq->sw_tx_cons &
399-
NUM_TX_BDS_MAX]);
398+
dma_unmap_page(&edev->pdev->dev,
399+
txq->sw_tx_ring.xdp[idx].mapping,
400+
PAGE_SIZE, DMA_BIDIRECTIONAL);
401+
__free_page(txq->sw_tx_ring.xdp[idx].page);
400402

401403
txq->sw_tx_cons++;
402404
txq->xmit_pkts++;
@@ -508,7 +510,8 @@ static inline void qede_reuse_page(struct qede_rx_queue *rxq,
508510
new_mapping = curr_prod->mapping + curr_prod->page_offset;
509511

510512
rx_bd_prod->addr.hi = cpu_to_le32(upper_32_bits(new_mapping));
511-
rx_bd_prod->addr.lo = cpu_to_le32(lower_32_bits(new_mapping));
513+
rx_bd_prod->addr.lo = cpu_to_le32(lower_32_bits(new_mapping) +
514+
rxq->rx_headroom);
512515

513516
rxq->sw_rx_prod++;
514517
curr_cons->data = NULL;
@@ -624,7 +627,6 @@ static inline void qede_skb_receive(struct qede_dev *edev,
624627
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
625628

626629
napi_gro_receive(&fp->napi, skb);
627-
rxq->rcv_pkts++;
628630
}
629631

630632
static void qede_set_gro_params(struct qede_dev *edev,
@@ -884,9 +886,9 @@ static inline void qede_tpa_cont(struct qede_dev *edev,
884886
"Strange - TPA cont with more than a single len_list entry\n");
885887
}
886888

887-
static void qede_tpa_end(struct qede_dev *edev,
888-
struct qede_fastpath *fp,
889-
struct eth_fast_path_rx_tpa_end_cqe *cqe)
889+
static int qede_tpa_end(struct qede_dev *edev,
890+
struct qede_fastpath *fp,
891+
struct eth_fast_path_rx_tpa_end_cqe *cqe)
890892
{
891893
struct qede_rx_queue *rxq = fp->rxq;
892894
struct qede_agg_info *tpa_info;
@@ -934,11 +936,12 @@ static void qede_tpa_end(struct qede_dev *edev,
934936

935937
tpa_info->state = QEDE_AGG_STATE_NONE;
936938

937-
return;
939+
return 1;
938940
err:
939941
tpa_info->state = QEDE_AGG_STATE_NONE;
940942
dev_kfree_skb_any(tpa_info->skb);
941943
tpa_info->skb = NULL;
944+
return 0;
942945
}
943946

944947
static u8 qede_check_notunn_csum(u16 flag)
@@ -990,14 +993,15 @@ static bool qede_rx_xdp(struct qede_dev *edev,
990993
struct qede_rx_queue *rxq,
991994
struct bpf_prog *prog,
992995
struct sw_rx_data *bd,
993-
struct eth_fast_path_rx_reg_cqe *cqe)
996+
struct eth_fast_path_rx_reg_cqe *cqe,
997+
u16 *data_offset, u16 *len)
994998
{
995-
u16 len = le16_to_cpu(cqe->len_on_first_bd);
996999
struct xdp_buff xdp;
9971000
enum xdp_action act;
9981001

999-
xdp.data = page_address(bd->data) + cqe->placement_offset;
1000-
xdp.data_end = xdp.data + len;
1002+
xdp.data_hard_start = page_address(bd->data);
1003+
xdp.data = xdp.data_hard_start + *data_offset;
1004+
xdp.data_end = xdp.data + *len;
10011005

10021006
/* Queues always have a full reset currently, so for the time
10031007
* being until there's atomic program replace just mark read
@@ -1007,6 +1011,10 @@ static bool qede_rx_xdp(struct qede_dev *edev,
10071011
act = bpf_prog_run_xdp(prog, &xdp);
10081012
rcu_read_unlock();
10091013

1014+
/* Recalculate, as XDP might have changed the headers */
1015+
*data_offset = xdp.data - xdp.data_hard_start;
1016+
*len = xdp.data_end - xdp.data;
1017+
10101018
if (act == XDP_PASS)
10111019
return true;
10121020

@@ -1025,7 +1033,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
10251033
/* Now if there's a transmission problem, we'd still have to
10261034
* throw current buffer, as replacement was already allocated.
10271035
*/
1028-
if (qede_xdp_xmit(edev, fp, bd, cqe->placement_offset, len)) {
1036+
if (qede_xdp_xmit(edev, fp, bd, *data_offset, *len)) {
10291037
dma_unmap_page(rxq->dev, bd->mapping,
10301038
PAGE_SIZE, DMA_BIDIRECTIONAL);
10311039
__free_page(bd->data);
@@ -1052,7 +1060,7 @@ static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
10521060
struct sw_rx_data *bd, u16 len,
10531061
u16 pad)
10541062
{
1055-
unsigned int offset = bd->page_offset;
1063+
unsigned int offset = bd->page_offset + pad;
10561064
struct skb_frag_struct *frag;
10571065
struct page *page = bd->data;
10581066
unsigned int pull_len;
@@ -1069,15 +1077,15 @@ static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
10691077
*/
10701078
if (len + pad <= edev->rx_copybreak) {
10711079
memcpy(skb_put(skb, len),
1072-
page_address(page) + pad + offset, len);
1080+
page_address(page) + offset, len);
10731081
qede_reuse_page(rxq, bd);
10741082
goto out;
10751083
}
10761084

10771085
frag = &skb_shinfo(skb)->frags[0];
10781086

10791087
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
1080-
page, pad + offset, len, rxq->rx_buf_seg_size);
1088+
page, offset, len, rxq->rx_buf_seg_size);
10811089

10821090
va = skb_frag_address(frag);
10831091
pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE);
@@ -1178,8 +1186,7 @@ static int qede_rx_process_tpa_cqe(struct qede_dev *edev,
11781186
qede_tpa_cont(edev, rxq, &cqe->fast_path_tpa_cont);
11791187
return 0;
11801188
case ETH_RX_CQE_TYPE_TPA_END:
1181-
qede_tpa_end(edev, fp, &cqe->fast_path_tpa_end);
1182-
return 1;
1189+
return qede_tpa_end(edev, fp, &cqe->fast_path_tpa_end);
11831190
default:
11841191
return 0;
11851192
}
@@ -1224,12 +1231,13 @@ static int qede_rx_process_cqe(struct qede_dev *edev,
12241231

12251232
fp_cqe = &cqe->fast_path_regular;
12261233
len = le16_to_cpu(fp_cqe->len_on_first_bd);
1227-
pad = fp_cqe->placement_offset;
1234+
pad = fp_cqe->placement_offset + rxq->rx_headroom;
12281235

12291236
/* Run eBPF program if one is attached */
12301237
if (xdp_prog)
1231-
if (!qede_rx_xdp(edev, fp, rxq, xdp_prog, bd, fp_cqe))
1232-
return 1;
1238+
if (!qede_rx_xdp(edev, fp, rxq, xdp_prog, bd, fp_cqe,
1239+
&pad, &len))
1240+
return 0;
12331241

12341242
/* If this is an error packet then drop it */
12351243
flags = cqe->fast_path_regular.pars_flags.flags;
@@ -1290,8 +1298,8 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
12901298
{
12911299
struct qede_rx_queue *rxq = fp->rxq;
12921300
struct qede_dev *edev = fp->edev;
1301+
int work_done = 0, rcv_pkts = 0;
12931302
u16 hw_comp_cons, sw_comp_cons;
1294-
int work_done = 0;
12951303

12961304
hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr);
12971305
sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
@@ -1305,12 +1313,14 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
13051313

13061314
/* Loop to complete all indicated BDs */
13071315
while ((sw_comp_cons != hw_comp_cons) && (work_done < budget)) {
1308-
qede_rx_process_cqe(edev, fp, rxq);
1316+
rcv_pkts += qede_rx_process_cqe(edev, fp, rxq);
13091317
qed_chain_recycle_consumed(&rxq->rx_comp_ring);
13101318
sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
13111319
work_done++;
13121320
}
13131321

1322+
rxq->rcv_pkts += rcv_pkts;
1323+
13141324
/* Allocate replacement buffers */
13151325
while (rxq->num_rx_buffers - rxq->filled_buffers)
13161326
if (qede_alloc_rx_buffer(rxq, false))

drivers/net/ethernet/qlogic/qede/qede_main.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,9 +1187,11 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
11871187
rxq->num_rx_buffers = edev->q_num_rx_buffers;
11881188

11891189
rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu;
1190+
rxq->rx_headroom = edev->xdp_prog ? XDP_PACKET_HEADROOM : 0;
11901191

1191-
if (rxq->rx_buf_size > PAGE_SIZE)
1192-
rxq->rx_buf_size = PAGE_SIZE;
1192+
/* Make sure that the headroom and payload fit in a single page */
1193+
if (rxq->rx_buf_size + rxq->rx_headroom > PAGE_SIZE)
1194+
rxq->rx_buf_size = PAGE_SIZE - rxq->rx_headroom;
11931195

11941196
/* Segment size to spilt a page in multiple equal parts,
11951197
* unless XDP is used in which case we'd use the entire page.
@@ -1251,7 +1253,7 @@ static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
12511253
{
12521254
/* Free the parallel SW ring */
12531255
if (txq->is_xdp)
1254-
kfree(txq->sw_tx_ring.pages);
1256+
kfree(txq->sw_tx_ring.xdp);
12551257
else
12561258
kfree(txq->sw_tx_ring.skbs);
12571259

@@ -1269,9 +1271,9 @@ static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
12691271

12701272
/* Allocate the parallel driver ring for Tx buffers */
12711273
if (txq->is_xdp) {
1272-
size = sizeof(*txq->sw_tx_ring.pages) * TX_RING_SIZE;
1273-
txq->sw_tx_ring.pages = kzalloc(size, GFP_KERNEL);
1274-
if (!txq->sw_tx_ring.pages)
1274+
size = sizeof(*txq->sw_tx_ring.xdp) * TX_RING_SIZE;
1275+
txq->sw_tx_ring.xdp = kzalloc(size, GFP_KERNEL);
1276+
if (!txq->sw_tx_ring.xdp)
12751277
goto err;
12761278
} else {
12771279
size = sizeof(*txq->sw_tx_ring.skbs) * TX_RING_SIZE;

0 commit comments

Comments
 (0)