[PATCH 2/2] net/crc: reduce usage of static arrays in net_crc_sse.c

Shreesh Adiga 16567adigashreesh at gmail.com
Sat Nov 15 11:09:59 CET 2025


This patch aims to combine the crc_xmm_shift_tab and shf_table
into a single 32 byte array which offers similar functionality
for left shift as well as partial_bytes shuffling.
The first 16 bytes are now 0xff..0xf0 which behave identical to
0xff..0xff when used as input to PSHUFB which will replace element by 0.

Regarding the usage of shf_table in partial_bytes section,
instead of xor with 0x80 of each byte, now we xor with 0xff and the
table entries of crc_xmm_shift_tab is sufficient for this purpose.
This allows us remove usage of mask3 array of 0x80 and replace it with
_mm_set1_epi8(0xff) which compiles to single instruction "pcmpeqd XMM, XMM".

As an example assuming remainder len is 5, we would have
previously before this commit:
temp = {0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
        0x8d, 0x8e, 0x8f, 0x00, 0x01, 0x02, 0x03, 0x04}
After xor with 0x80:
temp = {0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
        0x0d, 0x0e, 0x0f, 0x80, 0x81, 0x82, 0x83, 0x84}

with the changes in this commit:
temp = {0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3,
        0xf2, 0xf1, 0xf0, 0x00, 0x01, 0x02, 0x03, 0x04}
After xor with 0xff:
temp = {0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
        0x0d, 0x0e, 0x0f, 0xff, 0xfe, 0xfd, 0xfc, 0xfb}

Both the above pair of vectors behave same when used as input
to PSHUFB (with MSB set bytes differing), thus achieving the same
intented functionality.

Signed-off-by: Shreesh Adiga <16567adigashreesh at gmail.com>
---
Changes since v2:
Removed usage of mask3 array and changed the entries
of the table to be xor'ed with 0xff instead of 0x80

 lib/net/net_crc_sse.c | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/lib/net/net_crc_sse.c b/lib/net/net_crc_sse.c
index 94d847b301..3b6fbfecac 100644
--- a/lib/net/net_crc_sse.c
+++ b/lib/net/net_crc_sse.c
@@ -109,13 +109,11 @@ crcr32_reduce_64_to_32(__m128i data64, __m128i precomp)
 	return _mm_extract_epi32(tmp2, 2);
 }

-static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
-	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+static const alignas(16) uint8_t crc_xmm_shift_tab[32] = {
+	0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8,
+	0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0,
 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
 };

 /**
@@ -203,27 +201,15 @@ crc32_eth_calc_pclmulqdq(
 partial_bytes:
 	if (likely(n < data_len)) {

-		const alignas(16) uint32_t mask3[4] = {
-			0x80808080, 0x80808080, 0x80808080, 0x80808080
-		};
-
-		const alignas(16) uint8_t shf_table[32] = {
-			0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-			0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-		};
-
 		__m128i last16, a, b;

 		last16 = _mm_loadu_si128((const __m128i *)&data[data_len - 16]);

 		temp = _mm_loadu_si128((const __m128i *)
-			&shf_table[data_len & 15]);
+			&crc_xmm_shift_tab[data_len & 15]);
 		a = _mm_shuffle_epi8(fold, temp);

-		temp = _mm_xor_si128(temp,
-			_mm_load_si128((const __m128i *)mask3));
+		temp = _mm_xor_si128(temp, _mm_set1_epi8(0xff));
 		b = _mm_shuffle_epi8(fold, temp);
 		b = _mm_blendv_epi8(b, last16, temp);

--
2.51.0



More information about the dev mailing list