From e4533eb158db5ad3325c41091f0880f095c2d696 Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 8 Sep 2025 18:52:14 -0700 Subject: [PATCH 1/5] net:mana: Move NAPI from CQ to EQ With dedicated EQ created on each vPort and no longer shared between different net devices, move the NAPI code from CQ to EQ. This allows the driver to arm the EQ when NAPI is done, thus avoiding unncessary arming and results in less interrupts from the MANA device. --- .../net/ethernet/microsoft/mana/gdma_main.c | 131 ++++++++++++++++-- drivers/net/ethernet/microsoft/mana/mana_en.c | 119 ++++++---------- include/net/mana/gdma.h | 13 ++ include/net/mana/mana.h | 14 +- 4 files changed, 175 insertions(+), 102 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index e66b6314797610..5420010e456a9a 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -598,7 +598,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) } } -static void mana_gd_process_eq_events(void *arg) +static int mana_gd_process_eq_events(void *arg) { u32 owner_bits, new_bits, old_bits; union gdma_eqe_info eqe_info; @@ -606,7 +606,10 @@ static void mana_gd_process_eq_events(void *arg) struct gdma_queue *eq = arg; struct gdma_context *gc; struct gdma_eqe *eqe; - u32 head, num_eqe; + bool ring = false; + int work_done = 0; + u8 arm_bit = 0; + u32 num_eqe; int i; gc = eq->gdma_dev->gdma_context; @@ -614,8 +617,8 @@ static void mana_gd_process_eq_events(void *arg) num_eqe = eq->queue_size / GDMA_EQE_SIZE; eq_eqe_ptr = eq->queue_mem_ptr; - /* Process up to 5 EQEs at a time, and update the HW head. */ - for (i = 0; i < 5; i++) { + /* Process up to 4 EQ wraparounds at a time, and update the HW head. */ + for (i = 0; i < eq->queue_size / GDMA_EQE_SIZE * 4; i++) { eqe = &eq_eqe_ptr[eq->head % num_eqe]; eqe_info.as_uint32 = eqe->eqe_info; owner_bits = eqe_info.owner_bits; @@ -623,9 +626,16 @@ static void mana_gd_process_eq_events(void *arg) old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; /* No more entries */ if (owner_bits == old_bits) { - /* return here without ringing the doorbell */ - if (i == 0) - return; + /* + * For HWC and RDMA, return without ringing the doorbell. + * For MANA, we are in NAPI context. Need to complete + * NAPI before return. + */ + if (i == 0) { + if (mana_gd_is_mana(eq->gdma_dev)) + napi_complete_done(&eq->eq.napi, 0); + return 0; + } break; } @@ -642,13 +652,88 @@ static void mana_gd_process_eq_events(void *arg) mana_gd_process_eqe(eq); - eq->head++; + if (mana_gd_is_mana(eq->gdma_dev)) { + if (eq->eq.work_done < eq->eq.budget) + eq->head++; + else + /* + * Don't need to update EQ head as it will be + * processed by the next NAPI poll + */ + break; + } else { + eq->head++; + } } - head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS); + eq->eq.eqe_done_since_doorbell += i; + + /* Always arm the EQ for non-MANA device (HWC and RDMA) */ + if (!mana_gd_is_mana(eq->gdma_dev)) + arm_bit = SET_ARM_BIT; + + /* + * For MANA, arm the EQ when we have not used all NAPI budget. + * MANA hardware requires at least one doorbell ring every 8 + * wraparounds of EQ even if there is no need to arm the EQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + if (mana_gd_is_mana(eq->gdma_dev)) { + ring = eq->eq.eqe_done_since_doorbell > eq->queue_size / GDMA_EQE_SIZE * 4; + if (ring) + eq->eq.eqe_done_since_doorbell = 0; + + work_done = eq->eq.work_done; + if (work_done < eq->eq.budget) + arm_bit = SET_ARM_BIT; + } + + if (arm_bit || ring) + mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, + eq->head % (num_eqe << GDMA_EQE_OWNER_BITS), arm_bit); + + if (mana_gd_is_mana(eq->gdma_dev) && work_done < eq->eq.budget) + napi_complete_done(&eq->eq.napi, work_done); - mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, - head, SET_ARM_BIT); + return work_done; +} + +int mana_poll(struct napi_struct *napi, int budget) +{ + struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi); + int work_done; + + eq->eq.work_done = 0; + eq->eq.budget = budget; + + work_done = mana_gd_process_eq_events(eq); + + return min(work_done, budget); +} + +static void mana_gd_schedule_napi(void *arg) +{ + struct gdma_queue *eq = arg; + struct napi_struct *napi; + + napi = &eq->eq.napi; + napi_schedule_irqoff(napi); +} + +static void gic_handler(void *arg) +{ + struct gdma_queue *eq = arg; + struct gdma_dev *dev = eq->gdma_dev; + + /* + * Process an interrupt on the EQ. Schedule NAPI if this is a MANA + * device. For HWC and RDMA devices, process EQ directly from interrupt + */ + if (mana_gd_is_mana(dev)) + mana_gd_schedule_napi(eq); + else + mana_gd_process_eq_events(eq); } static int mana_gd_register_irq(struct gdma_queue *queue, @@ -679,10 +764,17 @@ static int mana_gd_register_irq(struct gdma_queue *queue, if (WARN_ON(!gic)) return -EINVAL; + if (mana_gd_is_mana(gd)) { + netif_napi_add_locked(spec->eq.ndev, &queue->eq.napi, mana_poll); + napi_enable_locked(&queue->eq.napi); + } + spin_lock_irqsave(&gic->lock, flags); list_add_rcu(&queue->entry, &gic->eq_list); spin_unlock_irqrestore(&gic->lock, flags); + synchronize_rcu(); + return 0; } @@ -706,6 +798,12 @@ static void mana_gd_deregister_irq(struct gdma_queue *queue) if (WARN_ON(!gic)) return; + if (mana_gd_is_mana(gd)) { + napi_disable_locked(&queue->eq.napi); + netif_napi_del_locked(&queue->eq.napi); + page_pool_destroy(queue->eq.page_pool); + } + spin_lock_irqsave(&gic->lock, flags); list_for_each_entry_rcu(eq, &gic->eq_list, entry) { if (queue == eq) { @@ -714,7 +812,6 @@ static void mana_gd_deregister_irq(struct gdma_queue *queue) } } spin_unlock_irqrestore(&gic->lock, flags); - synchronize_rcu(); } @@ -804,6 +901,13 @@ static int mana_gd_create_eq(struct gdma_dev *gd, return -EINVAL; } + /* + * queue->head could be checked as soon as the IRQ is registered and + * this queue is added to EQ list for the interrupt. Need to setup this + * value before making the call to mana_gd_register_irq() + */ + queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); + mb(); err = mana_gd_register_irq(queue, spec); if (err) { dev_err(dev, "Failed to register irq: %d\n", err); @@ -812,7 +916,6 @@ static int mana_gd_create_eq(struct gdma_dev *gd, queue->eq.callback = spec->eq.callback; queue->eq.context = spec->eq.context; - queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1; if (create_hwq) { @@ -1563,7 +1666,7 @@ struct gdma_irq_context *mana_gd_get_gic(struct gdma_context *gc, if (!gic) goto out; - gic->handler = mana_gd_process_eq_events; + gic->handler = gic_handler; gic->msi = msi; gic->irq = irq; INIT_LIST_HEAD(&gic->eq_list); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index b8ec23a7806078..1de5c00cca7f3b 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1466,6 +1466,14 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, } EXPORT_SYMBOL_NS(mana_destroy_wq_obj, "NET_MANA"); +static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf) +{ + int i; + + for (i = 0; i < CQE_POLLING_BUFFER; i++) + memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp)); +} + void mana_destroy_eq(struct mana_port_context *apc) { struct mana_context *ac = apc->ac; @@ -1503,6 +1511,7 @@ static void mana_create_eq_debugfs(struct mana_port_context *apc, int i) debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head); debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail); debugfs_create_u32("irq", 0400, eq.mana_eq_debugfs, &eq.eq->eq.irq); + debugfs_create_u32("eq_budget", 0400, eq.mana_eq_debugfs, &eq.eq->eq.budget); debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops); } @@ -1527,10 +1536,13 @@ int mana_create_eq(struct mana_port_context *apc) spec.eq.callback = NULL; spec.eq.context = apc->eqs; spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + spec.eq.ndev = apc->ndev; apc->mana_eqs_debugfs = debugfs_create_dir("EQs", apc->mana_port_debugfs); for (i = 0; i < apc->num_queues; i++) { + mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll); + if (gc->msi_sharing) spec.eq.msix_index = (i + 1) % gc->num_msix_usable; @@ -1645,6 +1657,7 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) static void mana_poll_tx_cq(struct mana_cq *cq) { + struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent; struct gdma_comp *completions = cq->gdma_comp_buf; struct gdma_posted_wqe_info *wqe_info; unsigned int pkt_transmitted = 0; @@ -1666,9 +1679,11 @@ static void mana_poll_tx_cq(struct mana_cq *cq) comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, CQE_POLLING_BUFFER); - if (comp_read < 1) + if (!comp_read) return; + cq->cqe_done_since_doorbell += comp_read; + for (i = 0; i < comp_read; i++) { struct mana_tx_comp_oob *cqe_oob; @@ -1724,9 +1739,10 @@ static void mana_poll_tx_cq(struct mana_cq *cq) mana_unmap_skb(skb, apc); - napi_consume_skb(skb, cq->budget); + napi_consume_skb(skb, gdma_eq->eq.budget); pkt_transmitted++; + gdma_eq->eq.work_done++; } if (WARN_ON_ONCE(wqe_unit_cnt == 0)) @@ -1753,8 +1769,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq) if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) WARN_ON_ONCE(1); - - cq->work_done = pkt_transmitted; } static void mana_post_pkt_rxq(struct mana_rxq *rxq) @@ -1810,13 +1824,15 @@ static void mana_rx_skb(void *buf_va, bool from_pool, uint pkt_len = cqe->ppi[i].pkt_len; u16 rxq_idx = rxq->rxq_idx; struct napi_struct *napi; + struct gdma_queue *eq; struct xdp_buff xdp = {}; struct sk_buff *skb; u32 hash_value; u32 act; - rxq->rx_cq.work_done++; - napi = &rxq->rx_cq.napi; + eq = rxq->rx_cq.gdma_cq->cq.parent; + eq->eq.work_done++; + napi = &eq->eq.napi; if (!buf_va) { ++ndev->stats.rx_dropped; @@ -2052,6 +2068,7 @@ static void mana_poll_rx_cq(struct mana_cq *cq) comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + cq->cqe_done_since_doorbell += comp_read; rxq->xdp_flush = false; @@ -2076,10 +2093,10 @@ static void mana_poll_rx_cq(struct mana_cq *cq) xdp_do_flush(); } -static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - int w; + struct gdma_queue *eq; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -2088,14 +2105,11 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) else mana_poll_tx_cq(cq); - w = cq->work_done; - cq->work_done_since_doorbell += w; - - if (w < cq->budget) { + eq = gdma_queue->cq.parent; + if (eq->eq.work_done < eq->eq.budget) { mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); - cq->work_done_since_doorbell = 0; - napi_complete_done(&cq->napi, w); - } else if (cq->work_done_since_doorbell > + cq->cqe_done_since_doorbell = 0; + } else if (cq->cqe_done_since_doorbell > cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { /* MANA hardware requires at least one doorbell ring every 8 * wraparounds of CQ even if there is no need to arm the CQ. @@ -2103,30 +2117,8 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) * 4 wraparounds. */ mana_gd_ring_cq(gdma_queue, 0); - cq->work_done_since_doorbell = 0; + cq->cqe_done_since_doorbell = 0; } - - return w; -} - -static int mana_poll(struct napi_struct *napi, int budget) -{ - struct mana_cq *cq = container_of(napi, struct mana_cq, napi); - int w; - - cq->work_done = 0; - cq->budget = budget; - - w = mana_cq_handler(cq, cq->gdma_cq); - - return min(w, budget); -} - -static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) -{ - struct mana_cq *cq = context; - - napi_schedule_irqoff(&cq->napi); } static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) @@ -2151,7 +2143,6 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) static void mana_destroy_txq(struct mana_port_context *apc) { - struct napi_struct *napi; int i; if (!apc->tx_qp) @@ -2161,13 +2152,6 @@ static void mana_destroy_txq(struct mana_port_context *apc) debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); apc->tx_qp[i].mana_tx_debugfs = NULL; - napi = &apc->tx_qp[i].tx_cq.napi; - if (apc->tx_qp[i].txq.napi_initialized) { - napi_synchronize(napi); - napi_disable_locked(napi); - netif_napi_del_locked(napi); - apc->tx_qp[i].txq.napi_initialized = false; - } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -2196,8 +2180,6 @@ static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx) &tx_qp->tx_cq.gdma_cq->head); debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs, &tx_qp->tx_cq.gdma_cq->tail); - debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs, - &tx_qp->tx_cq.budget); debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs, tx_qp->txq.gdma_sq, &mana_dbg_q_fops); debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs, @@ -2250,7 +2232,6 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset; - txq->napi_initialized = false; skb_queue_head_init(&txq->pending_skbs); memset(&spec, 0, sizeof(spec)); @@ -2263,6 +2244,7 @@ static int mana_create_txq(struct mana_port_context *apc, /* Create SQ's CQ */ cq = &apc->tx_qp[i].tx_cq; + cq->gdma_comp_buf = apc->eqs[i].cqe_poll; cq->type = MANA_CQ_TYPE_TX; cq->txq = txq; @@ -2271,7 +2253,7 @@ static int mana_create_txq(struct mana_port_context *apc, spec.type = GDMA_CQ; spec.monitor_avl_buf = false; spec.queue_size = cq_size; - spec.cq.callback = mana_schedule_napi; + spec.cq.callback = mana_cq_handler; spec.cq.parent_eq = apc->eqs[i].eq; spec.cq.context = cq; err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); @@ -2317,11 +2299,6 @@ static int mana_create_txq(struct mana_port_context *apc, mana_create_txq_debugfs(apc, i); - set_bit(NAPI_STATE_NO_BUSY_POLL, &cq->napi.state); - netif_napi_add_locked(net, &cq->napi, mana_poll); - napi_enable_locked(&cq->napi); - txq->napi_initialized = true; - mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } @@ -2334,13 +2311,12 @@ static int mana_create_txq(struct mana_port_context *apc, } static void mana_destroy_rxq(struct mana_port_context *apc, - struct mana_rxq *rxq, bool napi_initialized) + struct mana_rxq *rxq) { struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; struct mana_recv_buf_oob *rx_oob; struct device *dev = gc->dev; - struct napi_struct *napi; struct page *page; int i; @@ -2350,14 +2326,6 @@ static void mana_destroy_rxq(struct mana_port_context *apc, debugfs_remove_recursive(rxq->mana_rx_debugfs); rxq->mana_rx_debugfs = NULL; - napi = &rxq->rx_cq.napi; - - if (napi_initialized) { - napi_synchronize(napi); - - napi_disable_locked(napi); - netif_napi_del_locked(napi); - } xdp_rxq_info_unreg(&rxq->xdp_rxq); mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); @@ -2386,8 +2354,6 @@ static void mana_destroy_rxq(struct mana_port_context *apc, rx_oob->buf_va = NULL; } - page_pool_destroy(rxq->page_pool); - if (rxq->gdma_rq) mana_gd_destroy_queue(gc, rxq->gdma_rq); @@ -2485,11 +2451,13 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) { struct mana_port_context *mpc = netdev_priv(rxq->ndev); struct page_pool_params pprm = {}; + u16 rxq_idx = rxq->rxq_idx; + struct gdma_queue *eq = mpc->eqs[rxq_idx].eq; int ret; pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1; pprm.nid = gc->numa_node; - pprm.napi = &rxq->rx_cq.napi; + pprm.napi = &eq->eq.napi; pprm.netdev = rxq->ndev; pprm.order = get_order(rxq->alloc_size); pprm.queue_idx = rxq->rxq_idx; @@ -2512,6 +2480,7 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) return ret; } + eq->eq.page_pool = rxq->page_pool; return 0; } @@ -2568,6 +2537,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, /* Create RQ's CQ */ cq = &rxq->rx_cq; + cq->gdma_comp_buf = eq->cqe_poll; cq->type = MANA_CQ_TYPE_RX; cq->rxq = rxq; @@ -2575,7 +2545,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, spec.type = GDMA_CQ; spec.monitor_avl_buf = false; spec.queue_size = cq_size; - spec.cq.callback = mana_schedule_napi; + spec.cq.callback = mana_cq_handler; spec.cq.parent_eq = eq->eq; spec.cq.context = cq; err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); @@ -2617,15 +2587,11 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; - netif_napi_add_weight_locked(ndev, &cq->napi, mana_poll, 1); - WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, - cq->napi.napi_id)); + eq->eq->eq.napi.napi_id)); WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, rxq->page_pool)); - napi_enable_locked(&cq->napi); - mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); out: if (!err) @@ -2633,7 +2599,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); - mana_destroy_rxq(apc, rxq, false); + mana_destroy_rxq(apc, rxq); if (cq) mana_deinit_cq(apc, cq); @@ -2657,7 +2623,6 @@ static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx) &rxq->rx_cq.gdma_cq->head); debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.gdma_cq->tail); - debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget); debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops); debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq, &mana_dbg_q_fops); @@ -2700,7 +2665,7 @@ static void mana_destroy_rxqs(struct mana_port_context *apc) if (!rxq) continue; - mana_destroy_rxq(apc, rxq, true); + mana_destroy_rxq(apc, rxq); apc->rxqs[rxq_idx] = NULL; } } diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 9f7b70f3f65863..bf723acb27611f 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -284,8 +284,10 @@ struct gdma_event { struct gdma_queue; +#define CQE_POLLING_BUFFER 512 struct mana_eq { struct gdma_queue *eq; + struct gdma_comp cqe_poll[CQE_POLLING_BUFFER]; struct dentry *mana_eq_debugfs; }; @@ -337,6 +339,14 @@ struct gdma_queue { unsigned int irq; u32 log2_throttle_limit; + + /* NAPI data */ + struct napi_struct napi; + int work_done; + int eqe_done_since_doorbell; + int budget; + + struct page_pool *page_pool; } eq; struct { @@ -361,6 +371,8 @@ struct gdma_queue_spec { unsigned long log2_throttle_limit; unsigned int msix_index; + + struct net_device *ndev; } eq; struct { @@ -953,4 +965,5 @@ void mana_gd_put_gic(struct gdma_context *gc, bool use_msi_bitmap, int msi); int mana_gd_query_device_cfg(struct gdma_context *gc, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, u16 *max_num_vports, u8 *bm_hostmode); +int mana_poll(struct napi_struct *napi, int budget); #endif /* _GDMA_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 860f5036b32c61..95ac909b51a269 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -113,8 +113,6 @@ struct mana_txq { atomic_t pending_sends; - bool napi_initialized; - struct mana_stats_tx stats; }; @@ -265,8 +263,6 @@ struct mana_tx_comp_oob { struct mana_rxq; -#define CQE_POLLING_BUFFER 512 - struct mana_cq { struct gdma_queue *gdma_cq; @@ -286,14 +282,10 @@ struct mana_cq { */ struct mana_txq *txq; - /* Buffer which the CQ handler can copy the CQE's into. */ - struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER]; + /* Pointer to a buffer which the CQ handler can copy the CQE's into. */ + struct gdma_comp *gdma_comp_buf; - /* NAPI data */ - struct napi_struct napi; - int work_done; - int work_done_since_doorbell; - int budget; + int cqe_done_since_doorbell; }; struct mana_recv_buf_oob { From e90fe3a4b84cfe2eafb3010d37ca5bc9682ddf6c Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 26 Nov 2025 00:00:42 +0000 Subject: [PATCH 2/5] net: mana: Always arm CQ after CQE_POLLING_BUFFER * 4 CQEs --- drivers/net/ethernet/microsoft/mana/mana_en.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 1de5c00cca7f3b..6665b6533273ff 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2118,6 +2118,11 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) */ mana_gd_ring_cq(gdma_queue, 0); cq->cqe_done_since_doorbell = 0; + } else if (cq->cqe_done_since_doorbell > CQE_POLLING_BUFFER * 4 ) { + //HACK arm the CQ and move on to next EQE + eq->eq.work_done = 0; + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->cqe_done_since_doorbell = 0; } } From f970814c7299c97bb35a45cc1298b0b5bda5ff40 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 27 Nov 2025 01:35:33 +0000 Subject: [PATCH 3/5] changes --- drivers/net/ethernet/microsoft/mana/mana_en.c | 26 ++++++++----------- include/net/mana/gdma.h | 3 ++- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 6665b6533273ff..b74859481357df 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2065,6 +2065,9 @@ static void mana_poll_rx_cq(struct mana_cq *cq) struct gdma_comp *comp = cq->gdma_comp_buf; struct mana_rxq *rxq = cq->rxq; int comp_read, i; + struct gdma_context *gc; + + gc = rxq->gdma_rq->gdma_dev->gdma_context; comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); @@ -2081,13 +2084,14 @@ static void mana_poll_rx_cq(struct mana_cq *cq) return; mana_process_rx_cqe(rxq, cq, &comp[i]); - } - if (comp_read > 0) { - struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; + /* ring the wq every RING_WQ_MAX CQEs */ + if (i % RING_WQ_MAX == 0) + mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); + } + if (comp_read > 0) mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); - } if (rxq->xdp_flush) xdp_do_flush(); @@ -2109,17 +2113,9 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) if (eq->eq.work_done < eq->eq.budget) { mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); cq->cqe_done_since_doorbell = 0; - } else if (cq->cqe_done_since_doorbell > - cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { - /* MANA hardware requires at least one doorbell ring every 8 - * wraparounds of CQ even if there is no need to arm the CQ. - * This driver rings the doorbell as soon as we have exceeded - * 4 wraparounds. - */ - mana_gd_ring_cq(gdma_queue, 0); - cq->cqe_done_since_doorbell = 0; - } else if (cq->cqe_done_since_doorbell > CQE_POLLING_BUFFER * 4 ) { - //HACK arm the CQ and move on to next EQE + } else if (cq->cqe_done_since_doorbell > CQE_POLLING_BUFFER || + cq->cqe_done_since_doorbell > cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + // Arm the CQ and move on to next EQE eq->eq.work_done = 0; mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); cq->cqe_done_since_doorbell = 0; diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index bf723acb27611f..519b3d56f014fc 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -284,7 +284,8 @@ struct gdma_event { struct gdma_queue; -#define CQE_POLLING_BUFFER 512 +#define CQE_POLLING_BUFFER 128 +#define RING_WQ_MAX 32 struct mana_eq { struct gdma_queue *eq; struct gdma_comp cqe_poll[CQE_POLLING_BUFFER]; From ab9f9811c382b7baaaf6d8f1e71b69836c9f1f16 Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 1 Dec 2025 22:32:29 +0000 Subject: [PATCH 4/5] debug_trace --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 4 ++++ drivers/net/ethernet/microsoft/mana/mana_en.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 5420010e456a9a..a28f81897f947a 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -689,6 +689,8 @@ static int mana_gd_process_eq_events(void *arg) arm_bit = SET_ARM_BIT; } + trace_printk("eq id %d work_done %d arm_bit %d ring %d\n", eq->id, work_done, arm_bit, ring); + if (arm_bit || ring) mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, eq->head % (num_eqe << GDMA_EQE_OWNER_BITS), arm_bit); @@ -1563,6 +1565,8 @@ static irqreturn_t mana_gd_intr(int irq, void *arg) struct list_head *eq_list = &gic->eq_list; struct gdma_queue *eq; + trace_printk("mana int\n"); + rcu_read_lock(); list_for_each_entry_rcu(eq, eq_list, entry) { gic->handler(eq); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index b74859481357df..3697579b44e9c5 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2110,6 +2110,9 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); eq = gdma_queue->cq.parent; + + trace_printk("eq id %d cq %s work_done %d budget %d\n", eq->id, cq->type == MANA_CQ_TYPE_RX ? "rx" : "tx", eq->eq.work_done, eq->eq.budget); + if (eq->eq.work_done < eq->eq.budget) { mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); cq->cqe_done_since_doorbell = 0; From 40b3356b70e519b5142c864c17b61592bc93c999 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 2 Dec 2025 02:00:09 +0000 Subject: [PATCH 5/5] debug always ring CQ? --- drivers/net/ethernet/microsoft/mana/mana_en.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 3697579b44e9c5..811ffff0580d9f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2113,6 +2113,8 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) trace_printk("eq id %d cq %s work_done %d budget %d\n", eq->id, cq->type == MANA_CQ_TYPE_RX ? "rx" : "tx", eq->eq.work_done, eq->eq.budget); +// mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + if (eq->eq.work_done < eq->eq.budget) { mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); cq->cqe_done_since_doorbell = 0;