@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AVX10.1 instructions.
+
* Add support for Intel PBNDKB instructions.
* Add support for Intel SM4 instructions.
@@ -873,6 +873,13 @@ static enum
/* Value to encode in EVEX RC bits, for SAE-only instructions. */
static enum rc_type evexrcig = rne;
+/* Max vector length for AVX10 instructions. */
+static enum
+ {
+ vl512 = 0,
+ vl256
+ } avx10maxvl;
+
/* Pre-defined "_GLOBAL_OFFSET_TABLE_". */
static symbolS *GOT_symbol;
@@ -1156,6 +1163,19 @@ static const arch_entry cpu_arch[] =
SUBARCH (sm3, SM3, ANY_SM3, false),
SUBARCH (sm4, SM4, ANY_SM4, false),
SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
+ SUBARCH (avx10.1, AVX512F, ANY_AVX512F, false),
+ SUBARCH (avx10.1, AVX512CD, ANY_AVX512CD, false),
+ SUBARCH (avx10.1, AVX512DQ, ANY_AVX512DQ, false),
+ SUBARCH (avx10.1, AVX512BW, ANY_AVX512BW, false),
+ SUBARCH (avx10.1, AVX512VL, ANY_AVX512VL, false),
+ SUBARCH (avx10.1, AVX512IFMA, ANY_AVX512IFMA, false),
+ SUBARCH (avx10.1, AVX512VBMI, ANY_AVX512VBMI, false),
+ SUBARCH (avx10.1, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
+ SUBARCH (avx10.1, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
+ SUBARCH (avx10.1, AVX512_VNNI, ANY_AVX512_VNNI, false),
+ SUBARCH (avx10.1, AVX512_BITALG, ANY_AVX512_BITALG, false),
+ SUBARCH (avx10.1, AVX512_BF16, ANY_AVX512_BF16, false),
+ SUBARCH (avx10.1, AVX512_FP16, ANY_AVX512_FP16, false),
};
#undef SUBARCH
@@ -1923,6 +1943,16 @@ cpu_flags_match (const insn_template *t)
&& (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
match |= CPU_FLAGS_ARCH_MATCH;
}
+ else if (x.bitfield.cpuavx512bw)
+ {
+ /* We need to eliminate 64 bit mask instructions when AVX10 max
+ vector length is not 512. */
+ if (avx10maxvl == vl512 || t->opcode_modifier.evex
+ || t->opcode_modifier.vexw != 2
+ || (t->opcode_modifier.opcodeprefix == 1
+ && t->opcode_space != 3))
+ match |= CPU_FLAGS_ARCH_MATCH;
+ }
else
match |= CPU_FLAGS_ARCH_MATCH;
}
@@ -2801,7 +2831,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
char *s;
int e = get_symbol_name (&s);
const char *string = s;
- unsigned int j = 0;
+ unsigned int j = 0, avx10_used = 0;
i386_cpu_flags flags;
if (strcmp (string, "default") == 0)
@@ -2941,7 +2971,12 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
if (!cpu_flags_equal (&flags, &cpu_arch_flags))
{
- extend_cpu_sub_arch_name (string + 1);
+ if (!avx10_used)
+ {
+ extend_cpu_sub_arch_name (string + 1);
+ if (strcmp (cpu_arch[j].name, "avx10.1") == 0)
+ avx10_used = 1;
+ }
cpu_arch_flags = flags;
cpu_arch_isa_flags = flags;
}
@@ -2949,12 +2984,22 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
cpu_arch_isa_flags
= cpu_flags_or (cpu_arch_isa_flags,
cpu_arch[j].enable);
- (void) restore_line_pointer (e);
- demand_empty_rest_of_line ();
- return;
+ if (!avx10_used)
+ {
+ (void) restore_line_pointer (e);
+ demand_empty_rest_of_line ();
+ return;
+ }
}
}
+ if (avx10_used)
+ {
+ (void) restore_line_pointer (e);
+ demand_empty_rest_of_line ();
+ return;
+ }
+
if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
{
/* Disable an ISA extension. */
@@ -13837,6 +13882,9 @@ static bool check_register (const reg_entry *r)
if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
return false;
+ if (avx10maxvl == vl256 && r->reg_type.bitfield.zmmword)
+ return false;
+
if (!cpu_arch_flags.bitfield.cpuavx512f)
{
if (r->reg_type.bitfield.zmmword
@@ -14159,6 +14207,7 @@ const char *md_shortopts = "qnO::";
#define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
#define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
#define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
+#define OPTION_MAVX10MAXVL (OPTION_MD_BASE + 35)
struct option md_longopts[] =
{
@@ -14195,6 +14244,7 @@ struct option md_longopts[] =
{"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
{"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
{"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
+ {"mavx10maxvl", required_argument, NULL, OPTION_MAVX10MAXVL},
{"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
{"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
{"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
@@ -14552,6 +14602,15 @@ md_parse_option (int c, const char *arg)
as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
break;
+ case OPTION_MAVX10MAXVL:
+ if (strcmp (arg, "256") == 0)
+ avx10maxvl = vl256;
+ else if (strcmp (arg, "512") == 0)
+ avx10maxvl = vl512;
+ else
+ as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
+ break;
+
case OPTION_MEVEXWIG:
if (strcmp (arg, "0") == 0)
evexwig = evexw0;
@@ -14940,6 +14999,9 @@ md_show_usage (FILE *stream)
encode EVEX instructions with specific EVEX.RC value\n\
for SAE-only ignored instructions\n"));
fprintf (stream, _("\
+ -mavx10maxvl=[256|512] (default: 512)\n\
+ max vector length AVX10 instructions can use\n"));
+ fprintf (stream, _("\
-mmnemonic=[att|intel] "));
if (SYSV386_COMPAT)
fprintf (stream, _("(default: att)\n"));
@@ -212,6 +212,7 @@ accept various extension mnemonics. For example,
@code{sm3},
@code{sm4},
@code{pbndkb},
+@code{avx10.1},
@code{amx_int8},
@code{amx_bf16},
@code{amx_fp16},
@@ -354,6 +355,16 @@ EVEX instructions with evex.w = 0, which is the default.
@option{-mevexwig=@var{1}} will encode WIG EVEX instructions with
evex.w = 1.
+@cindex @samp{-mavx10maxvl=} option, i386
+@cindex @samp{-mavx10maxvl=} option, x86-64
+@item -mevexlig=@var{512}
+@itemx -mevexlig=@var{256}
+These options control the max vector length the assembler should enable
+for AVX10 vector ISA set. @option{-mavx10maxvl=@var{512}} will enable up
+to 512 bit vector register and 64 bit mask register, which is the default.
+@option{-mavx10maxvl=@var{256}} and @option{-mevexlig=@var{512}} will
+enable up to 256 bit vector register and 32 bit mask register.
+
@cindex @samp{-mmnemonic=} option, i386
@cindex @samp{-mmnemonic=} option, x86-64
@item -mmnemonic=@var{att}
@@ -1642,7 +1653,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.cmpccxadd} @tab @samp{.wrmsrns} @tab @samp{.msrlist}
@item @samp{.avx_ne_convert} @tab @samp{.rao_int} @tab @samp{.fred} @tab @samp{.lkgs}
@item @samp{.avx_vnni_int16} @tab @samp{.sha512} @tab @samp{.sm3} @tab @samp{.sm4}
-@item @samp{.pbndkb}
+@item @samp{.pbndkb} @tab @samp{.avx10.1}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
new file mode 100644
@@ -0,0 +1,6 @@
+.* Assembler messages:
+.*:7: Error: `vp2intersectq' is not supported on `x86_64.noavx512f.avx10.1'
+.*:8: Error: `vgatherpf0dpd' is not supported on `x86_64.noavx512f.avx10.1'
+.*:9: Error: `vrcp28ss' is not supported on `x86_64.noavx512f.avx10.1'
+.*:10: Error: `vp4dpwssd' is not supported on `x86_64.noavx512f.avx10.1'
+.*:11: Error: `v4fnmaddss' is not supported on `x86_64.noavx512f.avx10.1'
new file mode 100644
@@ -0,0 +1,11 @@
+# Check invalid AVX10.1 instructions
+
+ .text
+__start:
+ .arch .noavx512f
+ .arch .avx10.1
+ vp2intersectq %xmm1, %xmm2, %k3
+ vgatherpf0dpd 123(%ebp,%ymm7,8){%k1}
+ vrcp28ss %xmm4, %xmm5, %xmm6{%k7}
+ vp4dpwssd (%ecx), %zmm4, %zmm1
+ v4fnmaddss (%ecx), %xmm4, %xmm1
new file mode 100644
@@ -0,0 +1,54 @@
+#objdump: -dw
+#name: x86_64 AVX10.1 instructions
+#source: x86-64-avx10_1.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e1 ed 4a d9\s+kaddd %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c5 ed 4a d9\s+kaddb %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c5 ec 4a d9\s+kaddw %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c4 e1 ec 4a d9\s+kaddq %k1,%k2,%k3
+\s*[a-f0-9]+:\s*67 c5 f9 90 29\s+kmovb \(%ecx\),%k5
+\s*[a-f0-9]+:\s*67 c5 f9 91 ac f4 c0 1d fe ff\s+kmovb %k5,-0x1e240\(%esp,%esi,8\)
+\s*[a-f0-9]+:\s*67 c4 e1 f9 90 ac f4 c0 1d fe ff\s+kmovd -0x1e240\(%esp,%esi,8\),%k5
+\s*[a-f0-9]+:\s*c5 fb 92 ed\s+kmovd %ebp,%k5
+\s*[a-f0-9]+:\s*67 c5 f8 91 29\s+kmovw %k5,\(%ecx\)
+\s*[a-f0-9]+:\s*c5 f8 93 ed\s+kmovw %k5,%ebp
+\s*[a-f0-9]+:\s*62 f1 d5 0f 58 f4\s+vaddpd %xmm4,%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 31\s+vaddpd \(%ecx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 30\s+vaddpd \(%eax\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 b2 00 08 00 00\s+vaddpd 0x800\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 b2 f0 f7 ff ff\s+vaddpd -0x810\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 b2 00 04 00 00\s+vaddpd 0x400\(%edx\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 b2 f8 fb ff ff\s+vaddpd -0x408\(%edx\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f1 d5 cf 58 f4\s+vaddpd %zmm4,%zmm5,%zmm6\{%k7\}\{z\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 2f 58 b4 f4 c0 1d fe ff\s+vaddpd -0x1e240\(%esp,%esi,8\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 4f 58 b2 00 20 00 00\s+vaddpd 0x2000\(%edx\),%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 2f 58 72 80\s+vaddpd -0x1000\(%edx\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 3f 58 72 7f\s+vaddpd 0x3f8\(%edx\)\{1to4\},%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 5f 58 b2 00 f8 ff ff\s+vaddpd -0x800\(%edx\)\{1to8\},%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f3 d5 0f ce f4 ab\s+vgf2p8affineqb \$0xab,%xmm4,%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 2f ce b4 f4 c0 1d fe ff 7b\s+vgf2p8affineqb \$0x7b,-0x1e240\(%esp,%esi,8\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 3f ce 72 7f 7b\s+vgf2p8affineqb \$0x7b,0x3f8\(%edx\)\{1to4\},%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 0f cf 72 7f 7b\s+vgf2p8affineinvqb \$0x7b,0x7f0\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f3 d5 af cf f4 ab\s+vgf2p8affineinvqb \$0xab,%ymm4,%ymm5,%ymm6\{%k7\}\{z\}
+\s*[a-f0-9]+:\s*62 f2 55 4f cf f4\s+vgf2p8mulb %zmm4,%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f2 55 0f cf b4 f4 c0 1d fe ff\s+vgf2p8mulb -0x1e240\(%esp,%esi,8\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f2 55 4f cf b2 00 20 00 00\s+vgf2p8mulb 0x2000\(%edx\),%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 82 2d 20 dc f0\s+vaesenc %ymm24,%ymm26,%ymm22
+\s*[a-f0-9]+:\s*67 62 e2 05 08 de 84 f4 c0 1d fe ff\s+vaesdec -0x1e240\(%esp,%esi,8\),%xmm15,%xmm16
+\s*[a-f0-9]+:\s*62 02 2d 00 dd d8\s+vaesenclast %xmm24,%xmm26,%xmm27
+\s*[a-f0-9]+:\s*67 62 62 35 20 df 52 7f\s+vaesdeclast 0xfe0\(%edx\),%ymm25,%ymm26
+\s*[a-f0-9]+:\s*62 82 2d 40 de f0\s+vaesdec %zmm24,%zmm26,%zmm22
+\s*[a-f0-9]+:\s*67 62 62 2d 40 df 19\s+vaesdeclast \(%ecx\),%zmm26,%zmm27
+\s*[a-f0-9]+:\s*62 a3 4d 00 44 fe ab\s+vpclmulqdq \$0xab,%xmm22,%xmm22,%xmm23
+\s*[a-f0-9]+:\s*67 62 e3 4d 00 44 7a 7f 7b\s+vpclmulqdq \$0x7b,0x7f0\(%edx\),%xmm22,%xmm23
+\s*[a-f0-9]+:\s*67 62 73 7d 20 44 b4 f4 c0 1d fe ff 7b\s+vpclmulqdq \$0x7b,-0x1e240\(%esp,%esi,8\),%ymm16,%ymm14
+\s*[a-f0-9]+:\s*62 23 45 00 44 c6 11\s+vpclmulhqhqdq %xmm22,%xmm23,%xmm24
+\s*[a-f0-9]+:\s*62 c3 05 08 44 c6 10\s+vpclmullqhqdq %xmm14,%xmm15,%xmm16
+\s*[a-f0-9]+:\s*62 23 45 20 44 c6 01\s+vpclmulhqlqdq %ymm22,%ymm23,%ymm24
+\s*[a-f0-9]+:\s*62 c3 05 48 44 c6 00\s+vpclmullqlqdq %zmm14,%zmm15,%zmm16
+#pass
new file mode 100644
@@ -0,0 +1,51 @@
+# Check AVX10.1 instructions
+
+ .text
+_start:
+ .arch .noavx512f
+ .arch .avx10.1
+
+ kaddd %k1, %k2, %k3
+ kaddb %k1, %k2, %k3
+ kaddw %k1, %k2, %k3
+ kaddq %k1, %k2, %k3
+ kmovb (%ecx), %k5
+ kmovb %k5, -123456(%esp,%esi,8)
+ kmovd -123456(%esp,%esi,8), %k5
+ kmovd %ebp, %k5
+ kmovw %k5, (%ecx)
+ kmovw %k5, %ebp
+ vaddpd %xmm4, %xmm5, %xmm6{%k7}
+ vaddpd (%ecx), %xmm5, %xmm6{%k7}
+ vaddpd (%eax){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd 2048(%edx), %xmm5, %xmm6{%k7}
+ vaddpd -2064(%edx), %xmm5, %xmm6{%k7}
+ vaddpd 1024(%edx){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd -1032(%edx){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd %zmm4, %zmm5, %zmm6{%k7}{z}
+ vaddpd -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
+ vaddpd 8192(%edx), %zmm5, %zmm6{%k7}
+ vaddpd -4096(%edx), %ymm5, %ymm6{%k7}
+ vaddpd 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
+ vaddpd -2048(%edx){1to8}, %zmm5, %zmm6{%k7}
+ vgf2p8affineqb $0xab, %xmm4, %xmm5, %xmm6{%k7}
+ vgf2p8affineqb $123, -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
+ vgf2p8affineqb $123, 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
+ vgf2p8affineinvqb $123, 2032(%edx), %xmm5, %xmm6{%k7}
+ vgf2p8affineinvqb $0xab, %ymm4, %ymm5, %ymm6{%k7}{z}
+ vgf2p8mulb %zmm4, %zmm5, %zmm6{%k7}
+ vgf2p8mulb -123456(%esp,%esi,8), %xmm5, %xmm6{%k7}
+ vgf2p8mulb 8192(%edx), %zmm5, %zmm6{%k7}
+ vaesenc %ymm24, %ymm26, %ymm22
+ vaesdec -123456(%esp,%esi,8), %xmm15, %xmm16
+ vaesenclast %xmm24, %xmm26, %xmm27
+ vaesdeclast 4064(%edx), %ymm25, %ymm26
+ vaesdec %zmm24, %zmm26, %zmm22
+ vaesdeclast (%ecx), %zmm26, %zmm27
+ vpclmulqdq $0xab, %xmm22, %xmm22, %xmm23
+ vpclmulqdq $123, 2032(%edx), %xmm22, %xmm23
+ vpclmulqdq $123, -123456(%esp,%esi,8), %ymm16, %ymm14
+ vpclmulhqhqdq %xmm22, %xmm23, %xmm24
+ vpclmullqhqdq %xmm14, %xmm15, %xmm16
+ vpclmulhqlqdq %ymm22, %ymm23, %ymm24
+ vpclmullqlqdq %zmm14, %zmm15, %zmm16
new file mode 100644
@@ -0,0 +1,18 @@
+.* Assembler messages:
+.*:4: Error: `kaddq' is not supported on `x86_64'
+.*:5: Error: `kandq' is not supported on `x86_64'
+.*:6: Error: `kandnq' is not supported on `x86_64'
+.*:7: Error: `kmovq' is not supported on `x86_64'
+.*:8: Error: `kmovq' is not supported on `x86_64'
+.*:9: Error: `kmovq' is not supported on `x86_64'
+.*:10: Error: `kmovq' is not supported on `x86_64'
+.*:11: Error: `knotq' is not supported on `x86_64'
+.*:12: Error: `korq' is not supported on `x86_64'
+.*:13: Error: `kortestq' is not supported on `x86_64'
+.*:14: Error: `kshiftlq' is not supported on `x86_64'
+.*:15: Error: `kshiftrq' is not supported on `x86_64'
+.*:16: Error: `ktestq' is not supported on `x86_64'
+.*:17: Error: `kunpckdq' is not supported on `x86_64'
+.*:18: Error: `kxnorq' is not supported on `x86_64'
+.*:19: Error: `kxorq' is not supported on `x86_64'
+.*:20: Error: bad register name `%zmm4'
new file mode 100644
@@ -0,0 +1,20 @@
+# Check invalid AVX10.1 instructions
+ .text
+__start:
+ kaddq %k1, %k2, %k3
+ kandq %k1, %k2, %k3
+ kandnq %k1, %k2, %k3
+ kmovq (%rcx), %k1
+ kmovq %k1, (%rcx)
+ kmovq %rcx, %k1
+ kmovq %k1, %rcx
+ knotq %k1, %k2
+ korq %k1, %k2, %k3
+ kortestq %k1, %k2
+ kshiftlq $1, %k1, %k2
+ kshiftrq $1, %k1, %k2
+ ktestq %k1, %k2
+ kunpckdq %k1, %k2, %k3
+ kxnorq %k1, %k2, %k3
+ kxorq %k1, %k2, %k3
+ vaddpd %zmm4, %zmm5, %zmm6
@@ -450,6 +450,9 @@ run_dump_test "x86-64-sm4"
run_dump_test "x86-64-sm4-intel"
run_dump_test "x86-64-pbndkb"
run_dump_test "x86-64-pbndkb-intel"
+run_dump_test "x86-64-avx10_1"
+run_list_test "x86-64-avx10_1-inval"
+run_list_test "x86-64-mavx10maxvl256-inval" "-mavx10maxvl=256"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"