[dpdk-dev] [PATCH v2] eal_common_cpuflags: Fix %rbx corruption, and simplify the code

Neil Horman nhorman at tuxdriver.com
Fri Mar 21 15:49:00 CET 2014


From: "H. Peter Anvin" <hpa at linux.intel.com>

Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.

However, the code is really way more complex than it needs to be.  For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly.  Furthermore, we can use an array
rather than doing a switch statement inside a structure.

Reported-by: Neil Horman <nhorman at tuxdriver.com>
Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
Tested-by: Neil Horman <nhorman at tuxdriver.com>

---
Change notes:
v2) Corrected build errors
    Fixed cpuid_register_t reference passing
    Fixed typedef name typo
---
 lib/librte_eal/common/eal_common_cpuflags.c | 274 +++++++++++++---------------
 1 file changed, 123 insertions(+), 151 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..438d9c5 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -54,21 +54,12 @@
  */
 enum cpu_register_t {
 	REG_EAX = 0,
-	REG_EBX,
 	REG_ECX,
 	REG_EDX,
+	REG_EBX,
 };
 
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
-	uint32_t eax;
-	uint32_t ebx;
-	uint32_t ecx;
-	uint32_t edx;
-	enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
 
 #define CPU_FLAG_NAME_MAX_LEN 64
 
@@ -78,8 +69,10 @@ struct cpuid_parameters_t {
 struct feature_entry {
 	enum rte_cpu_flag_t feature;            /**< feature name */
 	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
-	struct cpuid_parameters_t params;       /**< cpuid parameters */
-	uint32_t feature_mask;                  /**< bitmask for feature */
+	uint32_t leaf;				/**< cpuid leaf */
+	uint32_t subleaf;			/**< cpuid subleaf */
+	uint32_t reg;				/**< cpuid register */
+	uint32_t bit;				/**< cpuid register bit */
 };
 
 #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
@@ -88,97 +81,97 @@ struct feature_entry {
  * An array that holds feature entries
  */
 static const struct feature_entry cpu_feature_table[] = {
-	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
-	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
-	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
-	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
-	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
-	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
-	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
-	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
-	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
-	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
-	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
-	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
-	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
-	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
-	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
-	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
-	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
-	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
-	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
-	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
-	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
-	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
-	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
-	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
-	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
-	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
-	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
-
-	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
-	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
-	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
-	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
-	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
-	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
-	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
-	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
-	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
-	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
-	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
-	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
-	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
-	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
-	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
-	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
-	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
-	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
-	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
-	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
-	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
-	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
-	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
-	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
-	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
-	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
-
-	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
-	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
-	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
-	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
-	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
-	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
-
-	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
-
-	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
-	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
-	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
-	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
-	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
-	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
-	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
-	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
-	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
-
-	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
-
-	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
-	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
-
-	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
+	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
+	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
+	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
+	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
+	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
+	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
+	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
+	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
+	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
+	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
+	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
+	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
+	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
+	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
+	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
+	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
+	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
+	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
+	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
+	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
+	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
+	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
+	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
+	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
+	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
+	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
+	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
+	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
+
+	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
+	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
+	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
+	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
+	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
+	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
+	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
+	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
+	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
+	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
+	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
+	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
+	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
+	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
+	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
+	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
+	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
+	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
+	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
+	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
+	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
+	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
+	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
+	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
+	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
+	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
+	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
+	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
+	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
+
+	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
+	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
+	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
+	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
+	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
+	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
+
+	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
+	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
+	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
+
+	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
+	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
+	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
+	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
+	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
+	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
+	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
+	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
+	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
+
+	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
+	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
+
+	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
+	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
+	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
+	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
+	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
+
+	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
 };
 
 /*
@@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = {
  * This function, when compiled with GCC, will generate architecture-neutral
  * code, as per GCC manual.
  */
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
 {
-	int eax, ebx, ecx, edx;            /* registers */
-
-#ifndef __PIC__
-   asm volatile ("cpuid"
-                 /* output */
-                 : "=a" (eax),
-                   "=b" (ebx),
-                   "=c" (ecx),
-                   "=d" (edx)
-                 /* input */
-                 : "a" (params.eax),
-                   "b" (params.ebx),
-                   "c" (params.ecx),
-                   "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+		 : "=r" (out[REG_EBX]),
+		   "=a" (out[REG_EAX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
 #else
-	asm volatile ( 
-            "mov %%ebx, %%edi\n"
-            "cpuid\n"
-            "xchgl %%ebx, %%edi;\n"
-            : "=a" (eax),
-              "=D" (ebx),
-              "=c" (ecx),
-              "=d" (edx)
-            /* input */
-            : "a" (params.eax),
-              "D" (params.ebx),
-              "c" (params.ecx),
-              "d" (params.edx));
-#endif
 
-	switch (params.return_register) {
-	case REG_EAX:
-		return eax;
-	case REG_EBX:
-		return ebx;
-	case REG_ECX:
-		return ecx;
-	case REG_EDX:
-		return edx;
-	default:
-		return 0;
-	}
+    asm volatile("cpuid"
+		 : "=a" (out[REG_EAX]),
+		   "=b" (out[REG_EBX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
+
+#endif
 }
 
 /*
@@ -240,17 +209,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
 {
-	int value;
+	const struct feature_entry *feat;
+	cpuid_registers_t regs;
 
 	if (feature >= RTE_CPUFLAG_NUMFLAGS)
 		/* Flag does not match anything in the feature tables */
 		return -ENOENT;
 
-	/* get value of the register containing the desired feature */
-	value = rte_cpu_get_features(cpu_feature_table[feature].params);
+	feat = &cpu_feature_table[feature];
+
+	/* get the cpuid leaf containing the desired feature */
+	rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
 
 	/* check if the feature is enabled */
-	return (cpu_feature_table[feature].feature_mask & value) > 0;
+	return (regs[feat->reg] >> feat->bit) & 1;
 }
 
 /**
@@ -273,7 +245,7 @@ rte_cpu_check_supported(void)
 	unsigned i;
 
 	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
-		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
 			fprintf(stderr,
 			        "ERROR: This system does not support \"%s\".\n"
 			        "Please check that RTE_MACHINE is set correctly.\n",
-- 
1.8.3.1



More information about the dev mailing list