<div dir="ltr"><div class="gmail_default" style="font-family:verdana,sans-serif">Hi All,</div><div class="gmail_default" style="font-family:verdana,sans-serif"><br></div><div class="gmail_default" style="font-family:verdana,sans-serif">I would need some confirmation on this patch. </div><div class="gmail_default" style="font-family:verdana,sans-serif">For some earlier issues encountered on mlx5, we have disable cqe_comp in the mlx5 driver. In that case, do we still need this fix or disabling cqe_comp will take care of it as well?</div><div class="gmail_default" style="font-family:verdana,sans-serif"><br></div><div class="gmail_default" style="font-family:verdana,sans-serif">Regards</div><div class="gmail_default" style="font-family:verdana,sans-serif">Amiya</div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, Aug 29, 2022 at 8:45 PM Thomas Monjalon <<a href="mailto:thomas@monjalon.net">thomas@monjalon.net</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Matan Azrad <<a href="mailto:matan@nvidia.com" target="_blank">matan@nvidia.com</a>><br>
<br>
The local variables are getting inconsistent in data receiving routines<br>
after queue error recovery.<br>
Receive queue consumer index is getting wrong, need to reset one to the<br>
size of the queue (as RQ was fully replenished in recovery procedure).<br>
<br>
In MPRQ case, also the local consumed strd variable should be reset.<br>
<br>
CVE-2022-28199<br>
Fixes: 88c0733535d6 ("net/mlx5: extend Rx completion with error handling")<br>
Cc: <a href="mailto:stable@dpdk.org" target="_blank">stable@dpdk.org</a><br>
<br>
Signed-off-by: Alexander Kozyrev <<a href="mailto:akozyrev@nvidia.com" target="_blank">akozyrev@nvidia.com</a>><br>
Signed-off-by: Matan Azrad <<a href="mailto:matan@nvidia.com" target="_blank">matan@nvidia.com</a>><br>
---<br>
<br>
Already applied in main branch as part of the public disclosure process.<br>
<br>
---<br>
drivers/net/mlx5/mlx5_rx.c | 34 ++++++++++++++++++++++++----------<br>
1 file changed, 24 insertions(+), 10 deletions(-)<br>
<br>
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c<br>
index bb3ccc36e5..917c517b83 100644<br>
--- a/drivers/net/mlx5/mlx5_rx.c<br>
+++ b/drivers/net/mlx5/mlx5_rx.c<br>
@@ -408,6 +408,11 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)<br>
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);<br>
}<br>
<br>
+/* Must be negative. */<br>
+#define MLX5_ERROR_CQE_RET (-1)<br>
+/* Must not be negative. */<br>
+#define MLX5_RECOVERY_ERROR_RET 0<br>
+<br>
/**<br>
* Handle a Rx error.<br>
* The function inserts the RQ state to reset when the first error CQE is<br>
@@ -422,7 +427,7 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)<br>
* 0 when called from non-vectorized Rx burst.<br>
*<br>
* @return<br>
- * -1 in case of recovery error, otherwise the CQE status.<br>
+ * MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE status.<br>
*/<br>
int<br>
mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)<br>
@@ -451,7 +456,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)<br>
sm.queue_id = rxq->idx;<br>
sm.state = IBV_WQS_RESET;<br>
if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm))<br>
- return -1;<br>
+ return MLX5_RECOVERY_ERROR_RET;<br>
if (rxq_ctrl->dump_file_n <<br>
RXQ_PORT(rxq_ctrl)->config.max_dump_files_num) {<br>
MKSTR(err_str, "Unexpected CQE error syndrome "<br>
@@ -491,7 +496,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)<br>
sm.queue_id = rxq->idx;<br>
sm.state = IBV_WQS_RDY;<br>
if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm))<br>
- return -1;<br>
+ return MLX5_RECOVERY_ERROR_RET;<br>
if (vec) {<br>
const uint32_t elts_n =<br>
mlx5_rxq_mprq_enabled(rxq) ?<br>
@@ -519,7 +524,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)<br>
rte_pktmbuf_free_seg<br>
(*elt);<br>
}<br>
- return -1;<br>
+ return MLX5_RECOVERY_ERROR_RET;<br>
}<br>
}<br>
for (i = 0; i < (int)elts_n; ++i) {<br>
@@ -538,7 +543,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)<br>
}<br>
return ret;<br>
default:<br>
- return -1;<br>
+ return MLX5_RECOVERY_ERROR_RET;<br>
}<br>
}<br>
<br>
@@ -556,7 +561,9 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)<br>
* written.<br>
*<br>
* @return<br>
- * 0 in case of empty CQE, otherwise the packet size in bytes.<br>
+ * 0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE,<br>
+ * otherwise the packet size in regular RxQ, and striding byte<br>
+ * count format in mprq case.<br>
*/<br>
static inline int<br>
mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,<br>
@@ -623,8 +630,8 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,<br>
rxq->err_state)) {<br>
ret = mlx5_rx_err_handle(rxq, 0);<br>
if (ret == MLX5_CQE_STATUS_HW_OWN ||<br>
- ret == -1)<br>
- return 0;<br>
+ ret == MLX5_RECOVERY_ERROR_RET)<br>
+ return MLX5_ERROR_CQE_RET;<br>
} else {<br>
return 0;<br>
}<br>
@@ -869,8 +876,10 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)<br>
if (!pkt) {<br>
cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];<br>
len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);<br>
- if (!len) {<br>
+ if (len <= 0) {<br>
rte_mbuf_raw_free(rep);<br>
+ if (unlikely(len == MLX5_ERROR_CQE_RET))<br>
+ rq_ci = rxq->rq_ci << sges_n;<br>
break;<br>
}<br>
pkt = seg;<br>
@@ -1093,8 +1102,13 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)<br>
}<br>
cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];<br>
ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);<br>
- if (!ret)<br>
+ if (ret == 0)<br>
break;<br>
+ if (unlikely(ret == MLX5_ERROR_CQE_RET)) {<br>
+ rq_ci = rxq->rq_ci;<br>
+ consumed_strd = rxq->consumed_strd;<br>
+ break;<br>
+ }<br>
byte_cnt = ret;<br>
len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;<br>
MLX5_ASSERT((int)len >= (rxq->crc_present << 2));<br>
-- <br>
2.36.1<br>
<br>
</blockquote></div>