[v2,1/3] x86: make AES/PCMULQDQ respectively prereqs of VAES/VPCMULQDQ

Message ID d17811b9-4b86-07db-534f-e0abc3ec2b1e@suse.com
State New
Headers
Series x86: AVX10.1 (alternative attempt) |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed

Commit Message

Jan Beulich Sept. 5, 2023, 7:51 a.m. UTC
  These probably should have been put in place already anyway, but they're
very much wanted in order to then put AVX10.1 support on top. Note that
to avoid reverse dependencies towards SSE (just like we already do for
AVX and XOP), add_isa_dependencies() needs some further tweaking.

While there also address a related anomaly: Disabling AES but neither
AVX nor VAES (similarly for {,V}PCLMULQDQ) would better keep the 128-bit
VEX-encoded forms available. Note that for this the VAES insns are moved
past the AVX+AES ones, to avoid the property-11 test suddenly failing.
The test really is wrong, but let's not also make things inconsistent:
Without the movement, YMM use would be correctly recorded for the
128-bit forms simply because the first template already matches, as long
as VAES wasn't disabled.  Yet it still wouldn't be if only AVX+AES were
enabled. Nor would behavior here then be the same as for VPCLMUL* insns.
---
v2: New.
  

Patch

--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -229,9 +229,9 @@  static const dependency isa_dependencies
   { "GFNI",
     "SSE2" },
   { "VAES",
-    "AVX2" },
+    "AVX2|AES" },
   { "VPCLMULQDQ",
-    "AVX2" },
+    "AVX2|PCLMULQDQ" },
   { "SEV_ES",
     "SVME" },
   { "SNP",
@@ -712,7 +712,8 @@  add_isa_dependencies (bitfield *flags, c
   unsigned int i;
   char *str = NULL;
   const char *isa = f;
-  bool is_isa = false, is_avx = false;
+  static bool is_avx;
+  bool is_isa = false, orig_is_avx = is_avx;
 
   /* Need to find base entry for references to auxiliary ones.  */
   if (strchr (f, ':'))
@@ -732,7 +733,7 @@  add_isa_dependencies (bitfield *flags, c
 	    && reverse > Cpu686)
 	  isa_reverse_deps[i][reverse] = 1;
 	is_isa = true;
-	if (i == CpuAVX || i == CpuXOP)
+	if (i == CpuAVX || i == CpuXOP || i == CpuVAES || i == CpuVPCLMULQDQ)
 	  is_avx = true;
 	break;
       }
@@ -740,7 +741,10 @@  add_isa_dependencies (bitfield *flags, c
 
   /* Do not turn off dependencies.  */
   if (is_isa && !value)
-    return;
+    {
+      is_avx = orig_is_avx;
+      return;
+    }
 
   for (i = 0; i < ARRAY_SIZE (isa_dependencies); ++i)
     if (strcasecmp (isa_dependencies[i].name, f) == 0)
@@ -765,11 +769,14 @@  add_isa_dependencies (bitfield *flags, c
 	if (reverse < ARRAY_SIZE (isa_reverse_deps[0]))
 	  isa_reverse_deps[reverse][reverse] = 1;
 
+	is_avx = orig_is_avx;
 	return;
       }
 
   if (!is_isa)
     fail ("unknown bitfield: %s\n", f);
+
+  is_avx = orig_is_avx;
 }
 
 static void
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1441,13 +1441,6 @@  aesenclast<aes>, 0x660f38dd, <aes:cpu>AE
 aesimc<aes>, 0x660f38db, <aes:cpu>AES, Modrm|<aes:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 aeskeygenassist<aes>, 0x660f3adf, <aes:cpu>AES, Modrm|<aes:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
 
-// VAES
-
-vaesdec, 0x66de, VAES, Modrm|Vex256|Space0F38|VexVVVV|VexWIG|NoSuf, { RegYMM|Unspecified|BaseIndex, RegYMM, RegYMM }
-vaesdeclast, 0x66df, VAES, Modrm|Vex256|Space0F38|VexVVVV|VexWIG|NoSuf, { RegYMM|Unspecified|BaseIndex, RegYMM, RegYMM }
-vaesenc, 0x66dc, VAES, Modrm|Vex256|Space0F38|VexVVVV|VexWIG|NoSuf, { RegYMM|Unspecified|BaseIndex, RegYMM, RegYMM }
-vaesenclast, 0x66dd, VAES, Modrm|Vex256|Space0F38|VexVVVV|VexWIG|NoSuf, { RegYMM|Unspecified|BaseIndex, RegYMM, RegYMM }
-
 // PCLMULQDQ
 
 <pclmul:cpu:attr, $avx:AVX|:Vex128|VexW0|SSE2AVX|VexVVVV, $sse::>
@@ -2065,13 +2058,22 @@  vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|
 
 // SM4 instructions end.
 
+// VAES
+
+vaesdec, 0x66de, VAES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vaesdeclast, 0x66df, VAES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vaesenc, 0x66dc, VAES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vaesenclast, 0x66dd, VAES, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+
+// VAES instructions end
+
 // VPCLMULQDQ instructions
 
-vpclmulqdq, 0x6644, VPCLMULQDQ, Modrm|Vex256|Space0F3A|VexWIG|VexVVVV|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpclmullqlqdq, 0x6644/0x00, VPCLMULQDQ, Modrm|Vex256|Space0F3A|VexWIG|VexVVVV|NoSuf|ImmExt, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpclmulhqlqdq, 0x6644/0x01, VPCLMULQDQ, Modrm|Vex256|Space0F3A|VexWIG|VexVVVV|NoSuf|ImmExt, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpclmullqhqdq, 0x6644/0x10, VPCLMULQDQ, Modrm|Vex256|Space0F3A|VexWIG|VexVVVV|NoSuf|ImmExt, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ, Modrm|Vex256|Space0F3A|VexWIG|VexVVVV|NoSuf|ImmExt, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
+vpclmulqdq, 0x6644, VPCLMULQDQ, Modrm|Vex|Space0F3A|VexWIG|VexVVVV|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpclmullqlqdq, 0x6644/0x00, VPCLMULQDQ, Modrm|Vex|Space0F3A|VexWIG|VexVVVV|CheckOperandSize|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpclmulhqlqdq, 0x6644/0x01, VPCLMULQDQ, Modrm|Vex|Space0F3A|VexWIG|VexVVVV|CheckOperandSize|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpclmullqhqdq, 0x6644/0x10, VPCLMULQDQ, Modrm|Vex|Space0F3A|VexWIG|VexVVVV|CheckOperandSize|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ, Modrm|Vex|Space0F3A|VexWIG|VexVVVV|CheckOperandSize|NoSuf|ImmExt, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
 
 // VPCLMULQDQ instructions end