<div dir="ltr"><div>Thanks for your response, It's my fault, I got mixed up,</div><div>this problem only can reproduce after apply <a href="https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/" rel="noreferrer" target="_blank">https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/</a>,</div><div>I'll reorganize this to previous patch.</div><div>So sorry for waste your time. </div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">Stephen Hemminger <<a href="mailto:stephen@networkplumber.org">stephen@networkplumber.org</a>> 于2023年10月26日周四 00:04写道:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">On Mon, 23 Oct 2023 17:07:21 +0800<br>
Fengnan Chang <<a href="mailto:changfengnan@bytedance.com" target="_blank">changfengnan@bytedance.com</a>> wrote:<br>
<br>
> Dmitry Kozlyuk <<a href="mailto:dmitry.kozliuk@gmail.com" target="_blank">dmitry.kozliuk@gmail.com</a>> 于2023年10月23日周一 04:22写道:<br>
> ><br>
> > 2023-09-22 16:12 (UTC+0800), Fengnan Chang: <br>
> > > ping<br>
> > ><br>
> > > Fengnan Chang <<a href="mailto:changfengnan@bytedance.com" target="_blank">changfengnan@bytedance.com</a>> 于2023年9月12日周二 17:05写道: <br>
> > > ><br>
> > > > Let's look at this path:<br>
> > > > malloc_elem_free <br>
> > > > ->malloc_elem_join_adjacent_free<br>
> > > > ->join_elem(elem, elem->next) <br>
> > > ><br>
> > > > 0. cur elem's pad > 0<br>
> > > > 1. data area memset in malloc_elem_free first.<br>
> > > > 2. next elem is free, try to join cur elem and next.<br>
> > > > 3. in join_elem, try to modify inner->size, this address had<br>
> > > > memset in step 1, it casue the content of addrees become non-zero.<br>
> > > ><br>
> > > > If user call rte_zmalloc, and pick this elem, it can't get all<br>
> > > > zero'd memory. <br>
> ><br>
> > malloc_elem_join_adjacent_free() always calls memset() after join_elem(),<br>
> > for the next and the previous element respectively. <br>
> when try to call join_elem() for the next element in<br>
> malloc_elem_join_adjacent_free(),<br>
> the memset is try to memset *next* element, but join_elem() is update<br>
> *current* element's<br>
> content, which shoudn't happen, it's two different element.<br>
> <br>
> > How to reproduce this bug? <br>
> when I test this patch,<br>
> <a href="https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/" rel="noreferrer" target="_blank">https://patches.dpdk.org/project/dpdk/patch/20230831111937.60975-1-changfengnan@bytedance.com/</a><br>
> I have a case try to alloc 64/128/192 size object and free with 16 threads,<br>
> after every<br>
> alloc I'll check wheather all content is 0 or not.<br>
> It's not easy to reproduce, you can have a try, it's easier to find<br>
> this problem in code level.<br>
<br>
I tried to make a test that would reproduce the problem but it did not.<br>
<br>
diff --git a/app/test/test_malloc.c b/app/test/test_malloc.c<br>
index cd579c503cf5..cfd45d6a28eb 100644<br>
--- a/app/test/test_malloc.c<br>
+++ b/app/test/test_malloc.c<br>
@@ -28,6 +28,7 @@<br>
#include <rte_string_fns.h><br>
<br>
#define N 10000<br>
+#define BINS 100<br>
<br>
static int<br>
is_mem_on_socket(int32_t socket);<br>
@@ -69,13 +70,24 @@ is_aligned(void *p, int align)<br>
return 1;<br>
}<br>
<br>
+static bool is_all_zero(uint8_t *mem, size_t sz)<br>
+{<br>
+ size_t i;<br>
+<br>
+ for (i = 0; i < sz; i++)<br>
+ if (mem[i] != 0)<br>
+ return false;<br>
+<br>
+ return true;<br>
+}<br>
+<br>
static int<br>
test_align_overlap_per_lcore(__rte_unused void *arg)<br>
{<br>
const unsigned align1 = 8,<br>
align2 = 64,<br>
align3 = 2048;<br>
- unsigned i,j;<br>
+ unsigned int i;<br>
void *p1 = NULL, *p2 = NULL, *p3 = NULL;<br>
int ret = 0;<br>
<br>
@@ -86,11 +98,12 @@ test_align_overlap_per_lcore(__rte_unused void *arg)<br>
ret = -1;<br>
break;<br>
}<br>
- for(j = 0; j < 1000 ; j++) {<br>
- if( *(char *)p1 != 0) {<br>
- printf("rte_zmalloc didn't zero the allocated memory\n");<br>
- ret = -1;<br>
- }<br>
+<br>
+ if (!is_all_zero(p1, 1000)) {<br>
+ printf("rte_zmalloc didn't zero the allocated memory\n");<br>
+ ret = -1;<br>
+ rte_free(p1);<br>
+ break;<br>
}<br>
p2 = rte_malloc("dummy", 1000, align2);<br>
if (!p2){<br>
@@ -140,6 +153,66 @@ test_align_overlap_per_lcore(__rte_unused void *arg)<br>
return ret;<br>
}<br>
<br>
+/*<br>
+ * Allocate random size chunks and make sure that they are<br>
+ * always zero.<br>
+ */<br>
+static int<br>
+test_zmalloc(__rte_unused void *arg)<br>
+{<br>
+ unsigned int i, n;<br>
+ void *slots[BINS] = { };<br>
+ void *p1;<br>
+ size_t sz;<br>
+<br>
+ /* Allocate many variable size chunks */<br>
+ for (i = 0; i < BINS; i++) {<br>
+ sz = rte_rand_max(1024) + 1;<br>
+ p1 = rte_zmalloc("slots", sz, 0);<br>
+ if (p1 == NULL) {<br>
+ printf("rte_zmalloc(%zu) returned NULL (i=%u)\n", sz, i);<br>
+ goto fail;<br>
+ }<br>
+ slots[i] = p1;<br>
+ if (!is_all_zero(p1, sz))<br>
+ goto fail;<br>
+ }<br>
+<br>
+ /* Drop one chunk per iteration */<br>
+ for (n = BINS; n > 0; n--) {<br>
+ /* Swap in a new block into a slot */<br>
+ for (i = 0; i < N; i++) {<br>
+ unsigned int bin = rte_rand_max(n);<br>
+<br>
+ sz = rte_rand_max(1024) + 1;<br>
+ p1 = rte_zmalloc("swap", sz, 0);<br>
+ if (!p1){<br>
+ printf("rte_zmalloc(%zu) returned NULL (i=%u)\n", sz, i);<br>
+ goto fail;<br>
+ }<br>
+<br>
+ if (!is_all_zero(p1, sz)) {<br>
+ printf("rte_zmalloc didn't zero the allocated memory\n");<br>
+ goto fail;<br>
+ }<br>
+<br>
+ rte_free(slots[bin]);<br>
+ slots[bin] = p1;<br>
+ }<br>
+<br>
+ /* Drop last bin */<br>
+ rte_free(slots[n]);<br>
+ slots[n] = NULL;<br>
+ }<br>
+<br>
+ return 0;<br>
+fail:<br>
+ for (i = 0; i < BINS; i++)<br>
+ rte_free(slots[i]);<br>
+<br>
+ return -1;<br>
+}<br>
+<br>
static int<br>
test_reordered_free_per_lcore(__rte_unused void *arg)<br>
{<br>
@@ -1020,6 +1091,21 @@ test_malloc(void)<br>
}<br>
else printf("test_realloc() passed\n");<br>
<br>
+ /*----------------------------*/<br>
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {<br>
+ rte_eal_remote_launch(test_zmalloc, NULL, lcore_id);<br>
+ }<br>
+<br>
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {<br>
+ if (rte_eal_wait_lcore(lcore_id) < 0)<br>
+ ret = -1;<br>
+ }<br>
+ if (ret < 0){<br>
+ printf("test_zmalloc() failed\n");<br>
+ return ret;<br>
+ }<br>
+ else printf("test_zmalloc() passed\n");<br>
+<br>
/*----------------------------*/<br>
RTE_LCORE_FOREACH_WORKER(lcore_id) {<br>
rte_eal_remote_launch(test_align_overlap_per_lcore, NULL, lcore_id);<br>
</blockquote></div></div>