[PATCH 2/4] eventdev: have ethernet Rx adapter appropriately report idle
Mattias Rönnblom
mattias.ronnblom at ericsson.com
Thu Oct 13 11:53:31 CEST 2022
On 2022-10-13 03:32, Naga Harish K, S V wrote:
>
>
>> -----Original Message-----
>> From: Jayatheerthan, Jay <jay.jayatheerthan at intel.com>
>> Sent: Tuesday, October 11, 2022 12:40 PM
>> To: mattias.ronnblom <mattias.ronnblom at ericsson.com>; Carrillo, Erik G
>> <erik.g.carrillo at intel.com>; Gujjar, Abhinandan S
>> <abhinandan.gujjar at intel.com>; Jerin Jacob <jerinj at marvell.com>; Naga
>> Harish K, S V <s.v.naga.harish.k at intel.com>
>> Cc: dev at dpdk.org; Van Haaren, Harry <harry.van.haaren at intel.com>;
>> hofors at lysator.liu.se; mattias.ronnblom <mattias.ronnblom at ericsson.com>
>> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
>> report idle
>>
>> @Harish, Could you review the patch ?
>>
>> -Jay
>>
>>> -----Original Message-----
>>> From: Mattias Rönnblom <mattias.ronnblom at ericsson.com>
>>> Sent: Monday, October 10, 2022 8:24 PM
>>> To: Jayatheerthan, Jay <jay.jayatheerthan at intel.com>; Carrillo, Erik G
>>> <erik.g.carrillo at intel.com>; Gujjar, Abhinandan S
>>> <abhinandan.gujjar at intel.com>; Jerin Jacob <jerinj at marvell.com>
>>> Cc: dev at dpdk.org; Van Haaren, Harry <harry.van.haaren at intel.com>;
>>> hofors at lysator.liu.se; mattias.ronnblom
>>> <mattias.ronnblom at ericsson.com>
>>> Subject: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
>>> report idle
>>>
>>> Update the Event Ethernet Rx Adapter's service function to report as
>>> idle (i.e., return -EAGAIN) in case no Ethernet frames were received
>>> from the ethdev and no events were enqueued to the event device.
>>>
>>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom at ericsson.com>
>>> ---
>>> lib/eventdev/rte_event_eth_rx_adapter.c | 56
>>> ++++++++++++++++++-------
>>> 1 file changed, 41 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c
>>> b/lib/eventdev/rte_event_eth_rx_adapter.c
>>> index 5c3021a184..cf7bbd4d69 100644
>>> --- a/lib/eventdev/rte_event_eth_rx_adapter.c
>>> +++ b/lib/eventdev/rte_event_eth_rx_adapter.c
>>> @@ -1184,7 +1184,7 @@ rxa_intr_thread(void *arg)
>>> /* Dequeue <port, q> from interrupt ring and enqueue received
>>> * mbufs to eventdev
>>> */
>>> -static inline void
>>> +static inline bool
>>> rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter) {
>>> uint32_t n;
>>> @@ -1194,20 +1194,27 @@ rxa_intr_ring_dequeue(struct
>> event_eth_rx_adapter *rx_adapter)
>>> struct rte_event_eth_rx_adapter_stats *stats;
>>> rte_spinlock_t *ring_lock;
>>> uint8_t max_done = 0;
>>> + bool work = false;
>>>
>>> if (rx_adapter->num_rx_intr == 0)
>>> - return;
>>> + return work;
>>>
>>> if (rte_ring_count(rx_adapter->intr_ring) == 0
>>> && !rx_adapter->qd_valid)
>>> - return;
>>> + return work;
>>>
>>> buf = &rx_adapter->event_enqueue_buffer;
>>> stats = &rx_adapter->stats;
>>> ring_lock = &rx_adapter->intr_ring_lock;
>>>
>>> - if (buf->count >= BATCH_SIZE)
>>> - rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> + if (buf->count >= BATCH_SIZE) {
>>> + uint16_t n;
>>> +
>>> + n = rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> +
>>> + if (likely(n > 0))
>>> + work = true;
>>> + }
>>>
>>> while (rxa_pkt_buf_available(buf)) {
>>> struct eth_device_info *dev_info;
>>> @@ -1289,7 +1296,12 @@ rxa_intr_ring_dequeue(struct
>> event_eth_rx_adapter *rx_adapter)
>>> }
>>>
>>> done:
>>> - rx_adapter->stats.rx_intr_packets += nb_rx;
>>> + if (nb_rx > 0) {
>
> How are the performance numbers before and after this patch?
> Trying to understand the performance impact, as new condition is added to the service function Datapath.
>
I haven't tested the RX and TX adapters separately, but if you run them
on the same core, I get the following result:
Without patches, with stats disabled: 16,0 Mpps
Without patches, with stats enabled: 16,1 Mpps
With patches, with stats disabled: 16,1 Mpps
With patches, with stats enabled: 16,2 Mpps
So these patches, with this particular hardware, compiler, and test
application, adding a tiny bit of additional logic actually make the
RX+TX adapter perform better. This is contrary to what you might expect,
and I'm sure YMMV.
Enabling service core statistics (which boils down to a 2x rdtsc and
some cheap arithmetic in rte_service.c) actually make the RX+TX adapter
core perform better, both before and after this patchset. Also contrary
to what you might expect.
The results are consistent across multiple runs.
GCC 11.2.0 and AMD Zen 3 @ 3,7 GHz. Event device is DSW and I/O is the
ring Ethdev.
>>> + rx_adapter->stats.rx_intr_packets += nb_rx;
>>> + work = true;
>>> + }
>>> +
>>> + return work;
>>> }
>>>
>>> /*
>>> @@ -1305,7 +1317,7 @@ rxa_intr_ring_dequeue(struct
>> event_eth_rx_adapter *rx_adapter)
>>> * the hypervisor's switching layer where adjustments can be made to deal
>> with
>>> * it.
>>> */
>>> -static inline void
>>> +static inline bool
>>> rxa_poll(struct event_eth_rx_adapter *rx_adapter) {
>>> uint32_t num_queue;
>>> @@ -1314,6 +1326,7 @@ rxa_poll(struct event_eth_rx_adapter
>> *rx_adapter)
>>> struct rte_event_eth_rx_adapter_stats *stats = NULL;
>>> uint32_t wrr_pos;
>>> uint32_t max_nb_rx;
>>> + bool work = false;
>>>
>>> wrr_pos = rx_adapter->wrr_pos;
>>> max_nb_rx = rx_adapter->max_nb_rx;
>>> @@ -1329,14 +1342,20 @@ rxa_poll(struct event_eth_rx_adapter
>> *rx_adapter)
>>> /* Don't do a batch dequeue from the rx queue if there isn't
>>> * enough space in the enqueue buffer.
>>> */
>>> - if (buf->count >= BATCH_SIZE)
>>> - rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> + if (buf->count >= BATCH_SIZE) {
>>> + uint16_t n;
>>> +
>>> + n = rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> +
>>> + if (likely(n > 0))
>>> + work = true;
>
> Same as above
>
>>> + }
>>> if (!rxa_pkt_buf_available(buf)) {
>>> if (rx_adapter->use_queue_event_buf)
>>> goto poll_next_entry;
>>> else {
>>> rx_adapter->wrr_pos = wrr_pos;
>>> - return;
>>> + break;
>>> }
>>> }
>>>
>>> @@ -1352,6 +1371,11 @@ rxa_poll(struct event_eth_rx_adapter
>> *rx_adapter)
>>> if (++wrr_pos == rx_adapter->wrr_len)
>>> wrr_pos = 0;
>>> }
>>> +
>>> + if (nb_rx > 0)
>>> + work = true;
>>> +
>>> + return work;
>
> Same as above
>
>>> }
>>>
>>> static void
>>> @@ -1384,12 +1408,14 @@ static int
>>> rxa_service_func(void *args)
>>> {
>>> struct event_eth_rx_adapter *rx_adapter = args;
>>> + bool intr_work;
>>> + bool poll_work;
>>>
>>> if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
>>> - return 0;
>>> + return -EAGAIN;
>>> if (!rx_adapter->rxa_started) {
>>> rte_spinlock_unlock(&rx_adapter->rx_lock);
>>> - return 0;
>>> + return -EAGAIN;
>>> }
>>>
>>> if (rx_adapter->ena_vector) {
>>> @@ -1410,12 +1436,12 @@ rxa_service_func(void *args)
>>> }
>>> }
>>>
>>> - rxa_intr_ring_dequeue(rx_adapter);
>>> - rxa_poll(rx_adapter);
>>> + intr_work = rxa_intr_ring_dequeue(rx_adapter);
>>> + poll_work = rxa_poll(rx_adapter);
>>>
>>> rte_spinlock_unlock(&rx_adapter->rx_lock);
>>>
>>> - return 0;
>>> + return intr_work || poll_work ? 0 : -EAGAIN;
>>> }
>>>
>>> static void *
>>> --
>>> 2.34.1
>
More information about the dev
mailing list