@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AVX10.1.
+
* Add support for Intel PBNDKB instructions.
* Add support for Intel SM4 instructions.
@@ -213,6 +213,9 @@ accept various extension mnemonics. For
@code{sm4},
@code{pbndkb},
@code{avx10.1},
+@code{avx10.1/512},
+@code{avx10.1/256},
+@code{avx10.1/128},
@code{amx_int8},
@code{amx_bf16},
@code{amx_fp16},
@@ -267,7 +270,11 @@ accept various extension mnemonics. For
@code{svme} and
@code{padlock}.
Note that these extension mnemonics can be prefixed with @code{no} to revoke
-the respective (and any dependent) functionality.
+the respective (and any dependent) functionality. Note further that the
+suffixes permitted on @code{-march=avx10.<N>} enforce a vector length
+restriction, i.e. despite these otherwise being "enabling" options, using
+these suffixes will disable all insns with wider vector or mask register
+operands.
When the @code{.arch} directive is used with @option{-march}, the
@code{.arch} directive will take precedent.
@@ -1673,6 +1680,12 @@ an unconditional jump to the target.
Note that the sub-architecture specifiers (starting with a dot) can be prefixed
with @code{no} to revoke the respective (and any dependent) functionality.
+Note further that @samp{.avx10.<N>} can be suffixed with a vector length
+restriction (@samp{/256} or @samp{/128}, with @samp{/512} simply restoring the
+default). Despite these otherwise being "enabling" specifiers, using these
+suffixes will disable all insns with wider vector or mask register operands.
+On SVR4-derived platforms, the separator character @samp{/} can be replaced by
+@samp{:}.
Following the CPU architecture (but not a sub-architecture, which are those
starting with a dot), you may specify @samp{jumps} or @samp{nojumps} to
@@ -131,6 +131,7 @@ typedef struct
unsigned int len:8; /* arch string length */
bool skip:1; /* show_arch should skip this. */
enum processor_type type; /* arch type */
+ enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
i386_cpu_flags enable; /* cpu feature enable flags */
i386_cpu_flags disable; /* cpu feature disable flags */
}
@@ -841,6 +842,10 @@ static unsigned int sse2avx;
/* Encode aligned vector move as unaligned vector move. */
static unsigned int use_unaligned_vector_move;
+/* Maximum permitted vector size. */
+#define VSZ_DEFAULT VSZ512
+static unsigned int vector_size = VSZ_DEFAULT;
+
/* Encode scalar AVX instructions with specific vector length. */
static enum
{
@@ -969,11 +974,14 @@ const relax_typeS md_relax_table[] =
};
#define ARCH(n, t, f, s) \
- { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
+ { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
CPU_NONE_FLAGS }
#define SUBARCH(n, e, d, s) \
- { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
+ { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
CPU_ ## d ## _FLAGS }
+#define VECARCH(n, e, d, v) \
+ { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
+ CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
static const arch_entry cpu_arch[] =
{
@@ -1035,15 +1043,15 @@ static const arch_entry cpu_arch[] =
SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
- SUBARCH (avx, AVX, ANY_AVX, false),
- SUBARCH (avx2, AVX2, ANY_AVX2, false),
- SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
- SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
- SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
- SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
- SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
- SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
- SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
+ VECARCH (avx, AVX, ANY_AVX, reset),
+ VECARCH (avx2, AVX2, ANY_AVX2, reset),
+ VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
+ VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
+ VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
+ VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
+ VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
+ VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
+ VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
SUBARCH (monitor, MONITOR, MONITOR, false),
SUBARCH (vmx, VMX, ANY_VMX, false),
SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
@@ -1095,15 +1103,15 @@ static const arch_entry cpu_arch[] =
SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
SUBARCH (se1, SE1, SE1, false),
SUBARCH (clwb, CLWB, CLWB, false),
- SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
- SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
- SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
- SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
- SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
- SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
- SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
- SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
- SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
+ VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
+ VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
+ VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
+ VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
+ VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
+ VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
+ VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
+ VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
+ VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
SUBARCH (clzero, CLZERO, CLZERO, false),
SUBARCH (mwaitx, MWAITX, MWAITX, false),
SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
@@ -1112,8 +1120,8 @@ static const arch_entry cpu_arch[] =
SUBARCH (ibt, IBT, IBT, false),
SUBARCH (shstk, SHSTK, SHSTK, false),
SUBARCH (gfni, GFNI, ANY_GFNI, false),
- SUBARCH (vaes, VAES, ANY_VAES, false),
- SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
+ VECARCH (vaes, VAES, ANY_VAES, reset),
+ VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
SUBARCH (pconfig, PCONFIG, PCONFIG, false),
SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
@@ -1125,9 +1133,9 @@ static const arch_entry cpu_arch[] =
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
- SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
- SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
- ANY_AVX512_VP2INTERSECT, false),
+ VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
+ VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
+ ANY_AVX512_VP2INTERSECT, reset),
SUBARCH (tdx, TDX, TDX, false),
SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
@@ -1139,24 +1147,24 @@ static const arch_entry cpu_arch[] =
SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
SUBARCH (uintr, UINTR, UINTR, false),
SUBARCH (hreset, HRESET, HRESET, false),
- SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
+ VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
- SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
- SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
+ VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
+ VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
SUBARCH (msrlist, MSRLIST, MSRLIST, false),
- SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
+ VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
SUBARCH (rao_int, RAO_INT, RAO_INT, false),
SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
SUBARCH (fred, FRED, ANY_FRED, false),
SUBARCH (lkgs, LKGS, ANY_LKGS, false),
- SUBARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, false),
- SUBARCH (sha512, SHA512, ANY_SHA512, false),
- SUBARCH (sm3, SM3, ANY_SM3, false),
- SUBARCH (sm4, SM4, ANY_SM4, false),
+ VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
+ VECARCH (sha512, SHA512, ANY_SHA512, reset),
+ VECARCH (sm3, SM3, ANY_SM3, reset),
+ VECARCH (sm4, SM4, ANY_SM4, reset),
SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
- SUBARCH (avx10.1, AVX10_1, ANY_AVX512F, false),
+ VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
};
#undef SUBARCH
@@ -2790,6 +2798,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
i386_cpu_flags isa_flags;
enum processor_type isa;
enum flag_code flag_code;
+ unsigned int vector_size;
char stackop_size;
bool no_cond_jump_promotion;
} arch_stack_entry;
@@ -2825,6 +2834,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
top->isa = cpu_arch_isa;
top->isa_flags = cpu_arch_isa_flags;
top->flag_code = flag_code;
+ top->vector_size = vector_size;
top->stackop_size = stackop_size;
top->no_cond_jump_promotion = no_cond_jump_promotion;
@@ -2865,6 +2875,7 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
cpu_arch_flags = top->flags;
cpu_arch_isa = top->isa;
cpu_arch_isa_flags = top->isa_flags;
+ vector_size = top->vector_size;
no_cond_jump_promotion = top->no_cond_jump_promotion;
XDELETE (top);
@@ -2905,6 +2916,8 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
cpu_arch_tune_flags = cpu_arch_isa_flags;
}
+ vector_size = VSZ_DEFAULT;
+
j = ARRAY_SIZE (cpu_arch) + 1;
}
}
@@ -2939,6 +2952,9 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
cpu_arch_tune = cpu_arch_isa;
cpu_arch_tune_flags = cpu_arch_isa_flags;
}
+
+ vector_size = VSZ_DEFAULT;
+
pre_386_16bit_warned = false;
break;
}
@@ -2959,6 +2975,38 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
= cpu_flags_or (cpu_arch_isa_flags, cpu_arch[j].enable);
(void) restore_line_pointer (e);
+
+ switch (cpu_arch[j].vsz)
+ {
+ default:
+ break;
+
+ case vsz_set:
+#ifdef SVR4_COMMENT_CHARS
+ if (*input_line_pointer == ':' || *input_line_pointer == '/')
+#else
+ if (*input_line_pointer == '/')
+#endif
+ {
+ ++input_line_pointer;
+ switch (get_absolute_expression ())
+ {
+ case 512: vector_size = VSZ512; break;
+ case 256: vector_size = VSZ256; break;
+ case 128: vector_size = VSZ128; break;
+ default:
+ as_bad (_("Unrecognized vector size specifier"));
+ ignore_rest_of_line ();
+ return;
+ }
+ break;
+ }
+ /* Fall through. */
+ case vsz_reset:
+ vector_size = VSZ_DEFAULT;
+ break;
+ }
+
demand_empty_rest_of_line ();
return;
}
@@ -2979,6 +3027,9 @@ set_cpu_arch (int dummy ATTRIBUTE_UNUSED
cpu_arch_isa_flags = flags;
}
+ if (cpu_arch[j].vsz == vsz_set)
+ vector_size = VSZ_DEFAULT;
+
(void) restore_line_pointer (e);
demand_empty_rest_of_line ();
return;
@@ -5431,9 +5482,11 @@ md_assemble (char *line)
case RegSIMD:
if (i.tm.operand_types[j].bitfield.tmmword)
i.xstate |= xstate_tmm;
- else if (i.tm.operand_types[j].bitfield.zmmword)
+ else if (i.tm.operand_types[j].bitfield.zmmword
+ && vector_size >= VSZ512)
i.xstate |= xstate_zmm;
- else if (i.tm.operand_types[j].bitfield.ymmword)
+ else if (i.tm.operand_types[j].bitfield.ymmword
+ && vector_size >= VSZ256)
i.xstate |= xstate_ymm;
else if (i.tm.operand_types[j].bitfield.xmmword)
i.xstate |= xstate_xmm;
@@ -6606,9 +6659,13 @@ check_VecOperands (const insn_template *
type.bitfield.xmmword = 1;
break;
case 32:
+ if (vector_size < VSZ256)
+ goto bad_broadcast;
type.bitfield.ymmword = 1;
break;
case 64:
+ if (vector_size < VSZ512)
+ goto bad_broadcast;
type.bitfield.zmmword = 1;
break;
default:
@@ -6820,6 +6877,19 @@ VEX_check_encoding (const insn_template
return 1;
}
+ /* Vector size restrictions. */
+ if ((vector_size < VSZ512
+ && (t->opcode_modifier.evex == EVEX512
+ || t->opcode_modifier.vsz >= VSZ512))
+ || (vector_size < VSZ256
+ && (t->opcode_modifier.evex == EVEX256
+ || t->opcode_modifier.vex == VEX256
+ || t->opcode_modifier.vsz >= VSZ256)))
+ {
+ i.error = unsupported;
+ return 1;
+ }
+
if (i.vec_encoding == vex_encoding_evex)
{
/* This instruction must be encoded with EVEX prefix. */
@@ -7608,8 +7678,27 @@ process_suffix (void)
for (op = 0; op < i.tm.operands; ++op)
{
- if (is_evex_encoding (&i.tm)
- && !cpu_arch_flags.bitfield.cpuavx512vl)
+ if (vector_size < VSZ512)
+ {
+ i.tm.operand_types[op].bitfield.zmmword = 0;
+ if (vector_size < VSZ256)
+ {
+ i.tm.operand_types[op].bitfield.ymmword = 0;
+ if (i.tm.operand_types[op].bitfield.xmmword
+ && (i.tm.opcode_modifier.evex == EVEXDYN
+ || (!i.tm.opcode_modifier.evex
+ && is_evex_encoding (&i.tm))))
+ i.tm.opcode_modifier.evex = EVEX128;
+ }
+ else if (i.tm.operand_types[op].bitfield.ymmword
+ && !i.tm.operand_types[op].bitfield.xmmword
+ && (i.tm.opcode_modifier.evex == EVEXDYN
+ || (!i.tm.opcode_modifier.evex
+ && is_evex_encoding (&i.tm))))
+ i.tm.opcode_modifier.evex = EVEX256;
+ }
+ else if (is_evex_encoding (&i.tm)
+ && !cpu_arch_flags.bitfield.cpuavx512vl)
{
if (i.tm.operand_types[op].bitfield.ymmword)
i.tm.operand_types[op].bitfield.xmmword = 0;
@@ -13857,6 +13946,12 @@ static bool check_register (const reg_en
}
}
+ if (vector_size < VSZ512 && r->reg_type.bitfield.zmmword)
+ return false;
+
+ if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
+ return false;
+
if (r->reg_type.bitfield.tmmword
&& (!cpu_arch_flags.bitfield.cpuamx_tile
|| flag_code != CODE_64BIT))
@@ -14355,13 +14450,21 @@ md_parse_option (int c, const char *arg)
arch++;
do
{
+ char *vsz;
+
if (*arch == '.')
as_fatal (_("invalid -march= option: `%s'"), arg);
next = strchr (arch, '+');
if (next)
*next++ = '\0';
+ vsz = strchr (arch, '/');
+ if (vsz)
+ *vsz++ = '\0';
for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
{
+ if (vsz && cpu_arch[j].vsz != vsz_set)
+ continue;
+
if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
&& strcmp (arch, cpu_arch[j].name) == 0)
{
@@ -14380,6 +14483,7 @@ md_parse_option (int c, const char *arg)
cpu_arch_tune = cpu_arch_isa;
cpu_arch_tune_flags = cpu_arch_isa_flags;
}
+ vector_size = VSZ_DEFAULT;
break;
}
else if (cpu_arch[j].type == PROCESSOR_NONE
@@ -14402,6 +14506,37 @@ md_parse_option (int c, const char *arg)
cpu_arch_isa_flags
= cpu_flags_or (cpu_arch_isa_flags,
cpu_arch[j].enable);
+
+ switch (cpu_arch[j].vsz)
+ {
+ default:
+ break;
+
+ case vsz_set:
+ if (vsz)
+ {
+ char *end;
+ unsigned long val = strtoul (vsz, &end, 0);
+
+ if (*end)
+ val = 0;
+ switch (val)
+ {
+ case 512: vector_size = VSZ512; break;
+ case 256: vector_size = VSZ256; break;
+ case 128: vector_size = VSZ128; break;
+ default:
+ as_warn (_("Unrecognized vector size specifier ignored"));
+ break;
+ }
+ break;
+ }
+ /* Fall through. */
+ case vsz_reset:
+ vector_size = VSZ_DEFAULT;
+ break;
+ }
+
break;
}
}
@@ -14423,6 +14558,8 @@ md_parse_option (int c, const char *arg)
cpu_arch_flags = flags;
cpu_arch_isa_flags = flags;
}
+ if (cpu_arch[j].vsz == vsz_set)
+ vector_size = VSZ_DEFAULT;
break;
}
}
@@ -768,10 +768,20 @@ i386_intel_operand (char *operand_string
break;
case O_ymmword_ptr:
+ if (vector_size < VSZ256)
+ {
+ as_bad (_("256-bit operands disabled"));
+ return 0;
+ }
i.types[this_operand].bitfield.ymmword = 1;
break;
case O_zmmword_ptr:
+ if (vector_size < VSZ512)
+ {
+ as_bad (_("512-bit operands disabled"));
+ return 0;
+ }
i.types[this_operand].bitfield.zmmword = 1;
break;
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 (part 1)
#dump: avx512f_vl.d
@@ -1,3 +1,3 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.include "avx512f_vl.s"
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 (part 2)
#dump: avx512bw_vl.d
@@ -1,3 +1,3 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.include "avx512bw_vl.s"
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 (part 3)
#dump: avx512dq_vl.d
@@ -1,3 +1,3 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.include "avx512dq_vl.s"
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 (part 4)
#dump: avx512_fp16_vl.d
@@ -1,3 +1,3 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.include "avx512_fp16_vl.s"
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 (part 5)
@@ -1,5 +1,5 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.include "avx512bitalg_vl.s"
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 + GFNI
#dump: avx512vl_gfni.d
@@ -1,4 +1,4 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.arch .gfni
.include "avx512vl_gfni.s"
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 + VAES
#dump: avx512vl_vaes.d
@@ -1,4 +1,4 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.arch .vaes
.include "avx512vl_vaes.s"
@@ -1,4 +1,4 @@
-#as: -I${srcdir}/$subdir
+#as: --divide -I${srcdir}/$subdir
#objdump: -dw
#name: AVX10.1/256 + VPLCMULQDQ
#dump: avx512vl_vpclmulqdq.d
@@ -1,4 +1,4 @@
.arch generic32
- .arch .avx10.1
+ .arch .avx10.1/256
.arch .vpclmulqdq
.include "avx512vl_vpclmulqdq.s"
@@ -0,0 +1,294 @@
+.*: Assembler messages:
+.*:7: Warning: default
+.*:46: Error: ambiguous operand size for `vcvtpd2ph'
+.*:47: Error: ambiguous operand size for `vcvtpd2ps'
+.*:48: Error: ambiguous operand size for `vfpclassps'
+.*:7: Warning: \.avx10\.1/256
+.*:12: Error: unsupported instruction `kunpckdq'
+.*:16: Error: unsupported instruction `kaddq'
+.*:17: Error: unsupported instruction `kandq'
+.*:18: Error: unsupported instruction `kandnq'
+.*:19: Error: unsupported instruction `kmovq'
+.*:20: Error: unsupported instruction `kmovq'
+.*:21: Error: unsupported instruction `kmovq'
+.*:22: Error: unsupported instruction `knotq'
+.*:23: Error: unsupported instruction `korq'
+.*:24: Error: unsupported instruction `kortestq'
+.*:25: Error: unsupported instruction `kshiftlq'
+.*:26: Error: unsupported instruction `kshiftrq'
+.*:27: Error: unsupported instruction `ktestq'
+.*:28: Error: unsupported instruction `kxorq'
+.*:29: Error: unsupported instruction `kxnorq'
+.*:32: Error: unsupported instruction `vcvtpd2phz'
+.*:34: Error: unsupported broadcast for `vcvtpd2ph'
+.*:37: Error: unsupported instruction `vcvtpd2ps'
+.*:43: Error: unsupported instruction `vfpclasspsz'
+.*:46: Error: ambiguous operand size for `vcvtpd2ph'
+.*:47: Error: ambiguous operand size for `vcvtpd2ps'
+.*:48: Error: ambiguous operand size for `vfpclassps'
+.*:7: Warning: \.avx10\.1/128
+.*:10: Error: unsupported instruction `kmovd'
+.*:11: Error: unsupported instruction `kmovd'
+.*:12: Error: unsupported instruction `kunpckdq'
+.*:13: Error: unsupported instruction `kunpckwd'
+.*:16: Error: unsupported instruction `kaddd'
+.*:17: Error: unsupported instruction `kandd'
+.*:18: Error: unsupported instruction `kandnd'
+.*:19: Error: unsupported instruction `kmovd'
+.*:20: Error: unsupported instruction `kmovd'
+.*:21: Error: unsupported instruction `kmovd'
+.*:22: Error: unsupported instruction `knotd'
+.*:23: Error: unsupported instruction `kord'
+.*:24: Error: unsupported instruction `kortestd'
+.*:25: Error: unsupported instruction `kshiftld'
+.*:26: Error: unsupported instruction `kshiftrd'
+.*:27: Error: unsupported instruction `ktestd'
+.*:28: Error: unsupported instruction `kxord'
+.*:29: Error: unsupported instruction `kxnord'
+.*:16: Error: unsupported instruction `kaddq'
+.*:17: Error: unsupported instruction `kandq'
+.*:18: Error: unsupported instruction `kandnq'
+.*:19: Error: unsupported instruction `kmovq'
+.*:20: Error: unsupported instruction `kmovq'
+.*:21: Error: unsupported instruction `kmovq'
+.*:22: Error: unsupported instruction `knotq'
+.*:23: Error: unsupported instruction `korq'
+.*:24: Error: unsupported instruction `kortestq'
+.*:25: Error: unsupported instruction `kshiftlq'
+.*:26: Error: unsupported instruction `kshiftrq'
+.*:27: Error: unsupported instruction `ktestq'
+.*:28: Error: unsupported instruction `kxorq'
+.*:29: Error: unsupported instruction `kxnorq'
+.*:32: Error: unsupported instruction `vcvtpd2phz'
+.*:33: Error: unsupported instruction `vcvtpd2phy'
+.*:34: Error: unsupported broadcast for `vcvtpd2ph'
+.*:35: Error: unsupported broadcast for `vcvtpd2ph'
+.*:37: Error: .*
+.*:38: Error: unsupported instruction `vcvtpd2psy'
+.*:39: Error: unsupported instruction `vcvtpd2psy'
+.*:40: Error: unsupported broadcast for `vcvtpd2ps'
+.*:42: Error: unsupported instruction `vfpclasspsy'
+.*:43: Error: unsupported instruction `vfpclasspsz'
+.*:7: Warning: \.avx10\.1
+.*:46: Error: ambiguous operand size for `vcvtpd2ph'
+.*:47: Error: ambiguous operand size for `vcvtpd2ps'
+.*:48: Error: ambiguous operand size for `vfpclassps'
+#...
+[ ]*[0-9]+[ ]+> \.arch generic32
+[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+\?+ C5FB93D1[ ]+> kmovd %k1,%edx
+[ ]*[0-9]+[ ]+\?+ C5FB92D1[ ]+> kmovd %ecx,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1EC4B D9[ ]+> kunpckdq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C5EC4BD9[ ]+> kunpckwd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.irp sz,d,q
+[ ]*[0-9]+[ ]+\?+ C4E1ED4A D9[ ]+>> kaddd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED41 D9[ ]+>> kandd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED42 D9[ ]+>> kandnd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F990 D1[ ]+>> kmovd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F991 0A[ ]+>> kmovd %k1,\(%edx\)
+[ ]*[0-9]+[ ]+\?+ C4E1F990 11[ ]+>> kmovd \(%ecx\),%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F944 D1[ ]+>> knotd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1ED45 D9[ ]+>> kord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F998 D1[ ]+>> kortestd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E37933 DA01[ ]+>> kshiftld \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E37931 DA01[ ]+>> kshiftrd \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F999 D1[ ]+>> ktestd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1ED47 D9[ ]+>> kxord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED46 D9[ ]+>> kxnord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC4A D9[ ]+>> kaddq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC41 D9[ ]+>> kandq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC42 D9[ ]+>> kandnq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F890 D1[ ]+>> kmovq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F891 0A[ ]+>> kmovq %k1,\(%edx\)
+[ ]*[0-9]+[ ]+\?+ C4E1F890 11[ ]+>> kmovq \(%ecx\),%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F844 D1[ ]+>> knotq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1EC45 D9[ ]+>> korq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F898 D1[ ]+>> kortestq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E3F933 DA01[ ]+>> kshiftlq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E3F931 DA01[ ]+>> kshiftrq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F899 D1[ ]+>> ktestq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1EC47 D9[ ]+>> kxorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC46 D9[ ]+>> kxnorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?+ 62F5FD48 5A00[ ]+> vcvtpd2phz \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD28 5A00[ ]+> vcvtpd2phy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD58 5A00[ ]+> vcvtpd2ph \(%eax\)\{1to8\},%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD38 5A00[ ]+> vcvtpd2ph \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?+ 62F1FD48 5A00[ ]+> vcvtpd2ps \(%eax\),%ymm0
+[ ]*[0-9]+[ ]+\?+ C5FD5A00[ ]+> vcvtpd2psy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F1FD29 5A00[ ]+> vcvtpd2psy \(%eax\),%xmm0\{%k1\}
+[ ]*[0-9]+[ ]+\?+ 62F1FD38 5A00[ ]+> vcvtpd2ps \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?+ 62F37D28 660000[ ]+> vfpclasspsy \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+\?+ 62F37D48 660000[ ]+> vfpclasspsz \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+> vcvtpd2ph xmm0,\[eax\]
+[ ]*[0-9]+[ ]+> vcvtpd2ps xmm0,\[eax\]
+[ ]*[0-9]+[ ]+> vfpclassps k0,\[eax\],0
+#...
+[ ]*[0-9]+[ ]+> \.arch generic32
+[ ]*[0-9]+[ ]+> \.arch \.avx10\.1/256
+[ ]*[0-9]+[ ]+\?+ C5FB93D1[ ]+> kmovd %k1,%edx
+[ ]*[0-9]+[ ]+\?+ C5FB92D1[ ]+> kmovd %ecx,%k2
+[ ]*[0-9]+[ ]+> kunpckdq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C5EC4BD9[ ]+> kunpckwd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.irp sz,d,q
+[ ]*[0-9]+[ ]+\?+ C4E1ED4A D9[ ]+>> kaddd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED41 D9[ ]+>> kandd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED42 D9[ ]+>> kandnd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F990 D1[ ]+>> kmovd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F991 0A[ ]+>> kmovd %k1,\(%edx\)
+[ ]*[0-9]+[ ]+\?+ C4E1F990 11[ ]+>> kmovd \(%ecx\),%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F944 D1[ ]+>> knotd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1ED45 D9[ ]+>> kord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F998 D1[ ]+>> kortestd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E37933 DA01[ ]+>> kshiftld \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E37931 DA01[ ]+>> kshiftrd \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F999 D1[ ]+>> ktestd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1ED47 D9[ ]+>> kxord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED46 D9[ ]+>> kxnord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kaddq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kandq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kandnq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kmovq %k1,%k2
+[ ]*[0-9]+[ ]+>> kmovq %k1,\(%edx\)
+[ ]*[0-9]+[ ]+>> kmovq \(%ecx\),%k2
+[ ]*[0-9]+[ ]+>> knotq %k1,%k2
+[ ]*[0-9]+[ ]+>> korq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kortestq %k1,%k2
+[ ]*[0-9]+[ ]+>> kshiftlq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kshiftrq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+>> ktestq %k1,%k2
+[ ]*[0-9]+[ ]+>> kxorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kxnorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> vcvtpd2phz \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD28 5A00[ ]+> vcvtpd2phy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+> vcvtpd2ph \(%eax\)\{1to8\},%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD38 5A00[ ]+> vcvtpd2ph \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> vcvtpd2ps \(%eax\),%ymm0
+[ ]*[0-9]+[ ]+\?+ C5FD5A00[ ]+> vcvtpd2psy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F1FD29 5A00[ ]+> vcvtpd2psy \(%eax\),%xmm0\{%k1\}
+[ ]*[0-9]+[ ]+\?+ 62F1FD38 5A00[ ]+> vcvtpd2ps \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?+ 62F37D28 660000[ ]+> vfpclasspsy \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+> vfpclasspsz \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+> vcvtpd2ph xmm0,\[eax\]
+[ ]*[0-9]+[ ]+> vcvtpd2ps xmm0,\[eax\]
+[ ]*[0-9]+[ ]+> vfpclassps k0,\[eax\],0
+#...
+[ ]*[0-9]+[ ]+> \.arch generic32
+[ ]*[0-9]+[ ]+> \.arch \.avx10\.1/128
+[ ]*[0-9]+[ ]+> kmovd %k1,%edx
+[ ]*[0-9]+[ ]+> kmovd %ecx,%k2
+[ ]*[0-9]+[ ]+> kunpckdq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> kunpckwd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.irp sz,d,q
+[ ]*[0-9]+[ ]+>> kaddd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kandd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kandnd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kmovd %k1,%k2
+[ ]*[0-9]+[ ]+>> kmovd %k1,\(%edx\)
+[ ]*[0-9]+[ ]+>> kmovd \(%ecx\),%k2
+[ ]*[0-9]+[ ]+>> knotd %k1,%k2
+[ ]*[0-9]+[ ]+>> kord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kortestd %k1,%k2
+[ ]*[0-9]+[ ]+>> kshiftld \$1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kshiftrd \$1,%k2,%k3
+[ ]*[0-9]+[ ]+>> ktestd %k1,%k2
+[ ]*[0-9]+[ ]+>> kxord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kxnord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kaddq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kandq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kandnq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kmovq %k1,%k2
+[ ]*[0-9]+[ ]+>> kmovq %k1,\(%edx\)
+[ ]*[0-9]+[ ]+>> kmovq \(%ecx\),%k2
+[ ]*[0-9]+[ ]+>> knotq %k1,%k2
+[ ]*[0-9]+[ ]+>> korq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kortestq %k1,%k2
+[ ]*[0-9]+[ ]+>> kshiftlq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kshiftrq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+>> ktestq %k1,%k2
+[ ]*[0-9]+[ ]+>> kxorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+>> kxnorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> vcvtpd2phz \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+> vcvtpd2phy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+> vcvtpd2ph \(%eax\)\{1to8\},%xmm0
+[ ]*[0-9]+[ ]+> vcvtpd2ph \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> vcvtpd2ps \(%eax\),%ymm0
+[ ]*[0-9]+[ ]+> vcvtpd2psy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+> vcvtpd2psy \(%eax\),%xmm0\{%k1\}
+[ ]*[0-9]+[ ]+> vcvtpd2ps \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> vfpclasspsy \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+> vfpclasspsz \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+\?+ 62F5FD08 5A00[ ]+> vcvtpd2ph xmm0,\[eax\]
+[ ]*[0-9]+[ ]+\?+ C5F95A00[ ]+> vcvtpd2ps xmm0,\[eax\]
+[ ]*[0-9]+[ ]+\?+ 62F37D08 660000[ ]+> vfpclassps k0,\[eax\],0
+#...
+[ ]*[0-9]+[ ]+\?+ C5FB93D1[ ]+> kmovd %k1,%edx
+[ ]*[0-9]+[ ]+\?+ C5FB92D1[ ]+> kmovd %ecx,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1EC4B D9[ ]+> kunpckdq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C5EC4BD9[ ]+> kunpckwd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.irp sz,d,q
+[ ]*[0-9]+[ ]+\?+ C4E1ED4A D9[ ]+>> kaddd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED41 D9[ ]+>> kandd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED42 D9[ ]+>> kandnd %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F990 D1[ ]+>> kmovd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F991 0A[ ]+>> kmovd %k1,\(%edx\)
+[ ]*[0-9]+[ ]+\?+ C4E1F990 11[ ]+>> kmovd \(%ecx\),%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F944 D1[ ]+>> knotd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1ED45 D9[ ]+>> kord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F998 D1[ ]+>> kortestd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E37933 DA01[ ]+>> kshiftld \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E37931 DA01[ ]+>> kshiftrd \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F999 D1[ ]+>> ktestd %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1ED47 D9[ ]+>> kxord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1ED46 D9[ ]+>> kxnord %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC4A D9[ ]+>> kaddq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC41 D9[ ]+>> kandq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC42 D9[ ]+>> kandnq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F890 D1[ ]+>> kmovq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F891 0A[ ]+>> kmovq %k1,\(%edx\)
+[ ]*[0-9]+[ ]+\?+ C4E1F890 11[ ]+>> kmovq \(%ecx\),%k2
+[ ]*[0-9]+[ ]+\?+ C4E1F844 D1[ ]+>> knotq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1EC45 D9[ ]+>> korq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F898 D1[ ]+>> kortestq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E3F933 DA01[ ]+>> kshiftlq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E3F931 DA01[ ]+>> kshiftrq \$1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1F899 D1[ ]+>> ktestq %k1,%k2
+[ ]*[0-9]+[ ]+\?+ C4E1EC47 D9[ ]+>> kxorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+\?+ C4E1EC46 D9[ ]+>> kxnorq %k1,%k2,%k3
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?+ 62F5FD48 5A00[ ]+> vcvtpd2phz \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD28 5A00[ ]+> vcvtpd2phy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD58 5A00[ ]+> vcvtpd2ph \(%eax\)\{1to8\},%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F5FD38 5A00[ ]+> vcvtpd2ph \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?+ 62F1FD48 5A00[ ]+> vcvtpd2ps \(%eax\),%ymm0
+[ ]*[0-9]+[ ]+\?+ C5FD5A00[ ]+> vcvtpd2psy \(%eax\),%xmm0
+[ ]*[0-9]+[ ]+\?+ 62F1FD29 5A00[ ]+> vcvtpd2psy \(%eax\),%xmm0\{%k1\}
+[ ]*[0-9]+[ ]+\?+ 62F1FD38 5A00[ ]+> vcvtpd2ps \(%eax\)\{1to4\},%xmm0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+\?+ 62F37D28 660000[ ]+> vfpclasspsy \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+\?+ 62F37D48 660000[ ]+> vfpclasspsz \$0,\(%eax\),%k0
+[ ]*[0-9]+[ ]+> *
+[ ]*[0-9]+[ ]+> \.intel_syntax noprefix
+[ ]*[0-9]+[ ]+> vcvtpd2ph xmm0,\[eax\]
+[ ]*[0-9]+[ ]+> vcvtpd2ps xmm0,\[eax\]
+[ ]*[0-9]+[ ]+> vfpclassps k0,\[eax\],0
+#pass
@@ -0,0 +1,52 @@
+# Test AVX10 vector size restriction
+ .text
+
+ .irp isa, default, .avx10.1/256, .avx10.1/128, .avx10.1
+
+ .att_syntax prefix
+ .warning "\isa"
+ .arch generic32
+ .arch \isa
+ kmovd %k1, %edx
+ kmovd %ecx, %k2
+ kunpckdq %k1, %k2, %k3
+ kunpckwd %k1, %k2, %k3
+
+ .irp sz, d, q
+ kadd\sz %k1, %k2, %k3
+ kand\sz %k1, %k2, %k3
+ kandn\sz %k1, %k2, %k3
+ kmov\sz %k1, %k2
+ kmov\sz %k1, (%edx)
+ kmov\sz (%ecx), %k2
+ knot\sz %k1, %k2
+ kor\sz %k1, %k2, %k3
+ kortest\sz %k1, %k2
+ kshiftl\sz $1, %k2, %k3
+ kshiftr\sz $1, %k2, %k3
+ ktest\sz %k1, %k2
+ kxor\sz %k1, %k2, %k3
+ kxnor\sz %k1, %k2, %k3
+ .endr
+
+ vcvtpd2phz (%eax), %xmm0
+ vcvtpd2phy (%eax), %xmm0
+ vcvtpd2ph (%eax){1to8}, %xmm0
+ vcvtpd2ph (%eax){1to4}, %xmm0
+
+ vcvtpd2ps (%eax), %ymm0
+ vcvtpd2psy (%eax), %xmm0
+ vcvtpd2psy (%eax), %xmm0{%k1}
+ vcvtpd2ps (%eax){1to4}, %xmm0
+
+ vfpclasspsy $0, (%eax), %k0
+ vfpclasspsz $0, (%eax), %k0
+
+ .intel_syntax noprefix
+ vcvtpd2ph xmm0, [eax]
+ vcvtpd2ps xmm0, [eax]
+ vfpclassps k0, [eax], 0
+
+ .endr
+
+ .p2align 4
@@ -238,7 +238,7 @@ if [gas_32_check] then {
run_list_test "noavx-3" "-al"
run_dump_test "noavx-4"
run_list_test "noavx512-1" "-almn"
- run_list_test "noavx512-2" "-al"
+ run_list_test "noavx512-2" "-almn --divide"
run_dump_test "noextreg"
run_dump_test "xmmhi32"
run_dump_test "xsave"
@@ -579,6 +579,7 @@ if [gas_32_check] then {
run_dump_test "avx10.1-256-gfni"
run_dump_test "avx10.1-256-vaes"
run_dump_test "avx10.1-256-vpclmulqdq"
+ run_list_test "avx10-vsz" "-almn --divide --listing-lhs-width=2"
run_dump_test "fpu-bad"
run_dump_test "cet"
run_dump_test "cet-intel"
@@ -1,106 +1,195 @@
.*: Assembler messages:
-.*:26: Error: .*unsupported masking.*
-.*:27: Error: .*unsupported masking.*
-.*:29: Error: .*unsupported instruction.*
-.*:30: Error: .*unsupported instruction.*
-.*:32: Error: .*unsupported instruction.*
-.*:33: Error: .*unsupported instruction.*
-.*:36: Error: .*unsupported masking.*
-.*:37: Error: .*unsupported masking.*
-.*:39: Error: .*unsupported instruction.*
-.*:40: Error: .*unsupported instruction.*
-.*:43: Error: .*unsupported instruction.*
-.*:44: Error: .*unsupported instruction.*
-GAS LISTING .*
-#...
-[ ]*1[ ]+\# Test \.arch \.noavx512vl
-[ ]*2[ ]+\.text
-[ ]*3[ ]+\?\?\?\? 62F27D4F vpabsb %zmm5, %zmm6\{%k7\} \# AVX512BW
-[ ]*3[ ]+1CF5
-[ ]*4[ ]+\?\?\?\? 62F27D0F vpabsb %xmm5, %xmm6\{%k7\} \# AVX512BW \+ AVX512VL
-[ ]*4[ ]+1CF5
-[ ]*5[ ]+\?\?\?\? 62F27D2F vpabsb %ymm5, %ymm6\{%k7\} \# AVX512BW \+ AVX512VL
-[ ]*5[ ]+1CF5
-[ ]*6[ ]+\?\?\?\? 62F27D48 vpconflictd %zmm5, %zmm6 \# AVX412CD
-[ ]*6[ ]+C4F5
-[ ]*7[ ]+\?\?\?\? 62F27D08 vpconflictd %xmm5, %xmm6 \# AVX412CD \+ AVX512VL
-[ ]*7[ ]+C4F5
-[ ]*8[ ]+\?\?\?\? 62F27D28 vpconflictd %ymm5, %ymm6 \# AVX412CD \+ AVX512VL
-[ ]*8[ ]+C4F5
-[ ]*9[ ]+\?\?\?\? 62F1FD4F vcvtpd2qq \(%ecx\), %zmm6\{%k7\} \# AVX512DQ
-[ ]*9[ ]+7B31
-[ ]*10[ ]+\?\?\?\? 62F1FD0F vcvtpd2qq \(%ecx\), %xmm6\{%k7\} \# AVX512DQ \+ AVX512VL
-[ ]*10[ ]+7B31
-[ ]*11[ ]+\?\?\?\? 62F1FD2F vcvtpd2qq \(%ecx\), %ymm6\{%k7\} \# AVX512DQ \+ AVX512VL
-[ ]*11[ ]+7B31
-[ ]*12[ ]+\?\?\?\? 62F27D4F vexp2ps %zmm5, %zmm6\{%k7\} \# AVX512ER
-[ ]*12[ ]+C8F5
-[ ]*13[ ]+\?\?\?\? 62F1D54F vaddpd %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512F
-[ ]*13[ ]+58F4
-[ ]*14[ ]+\?\?\?\? 62F1D50F vaddpd %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512F \+ AVX512VL
-[ ]*14[ ]+58F4
-[ ]*15[ ]+\?\?\?\? 62F1D52F vaddpd %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512F \+ AVX512VL
-[ ]*15[ ]+58F4
-[ ]*16[ ]+\?\?\?\? 62F2D54F vpmadd52luq %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512IFMA
-[ ]*16[ ]+B4F4
-[ ]*17[ ]+\?\?\?\? 62F2D50F vpmadd52luq %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512IFMA \+ AVX512VL
-[ ]*17[ ]+B4F4
-[ ]*18[ ]+\?\?\?\? 62F2D52F vpmadd52luq %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512IFMA \+ AVX512VL
-[ ]*18[ ]+B4F4
-[ ]*19[ ]+\?\?\?\? 62F2FD49 vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\} \# AVX512PF
-[ ]*19[ ]+C68CFD17
-[ ]*19[ ]+000000
-[ ]*20[ ]+\?\?\?\? 62F2554F vpermb %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512VBMI
-[ ]*20[ ]+8DF4
-[ ]*21[ ]+\?\?\?\? 62F2550F vpermb %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512VBMI \+ AVX512VL
-[ ]*21[ ]+8DF4
-[ ]*22[ ]+\?\?\?\? 62F2552F vpermb %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512VBMI \+ AVX512VL
-[ ]*22[ ]+8DF4
-[ ]*23[ ]+
-[ ]*24[ ]+\.arch \.noavx512vl
-[ ]*25[ ]+\?\?\?\? 62F27D4F vpabsb %zmm5, %zmm6\{%k7\} \# AVX512BW
-[ ]*25[ ]+1CF5
-[ ]*26[ ]+vpabsb %xmm5, %xmm6\{%k7\} \# AVX512BW \+ AVX512VL
-[ ]*27[ ]+vpabsb %ymm5, %ymm6\{%k7\} \# AVX512BW \+ AVX512VL
-[ ]*28[ ]+\?\?\?\? 62F27D48 vpconflictd %zmm5, %zmm6 \# AVX412CD
-[ ]*28[ ]+C4F5
-[ ]*29[ ]+vpconflictd %xmm5, %xmm6 \# AVX412CD \+ AVX512VL
-[ ]*30[ ]+vpconflictd %ymm5, %ymm6 \# AVX412CD \+ AVX512VL
-[ ]*31[ ]+\?\?\?\? 62F1FD4F vcvtpd2qq \(%ecx\), %zmm6\{%k7\} \# AVX512DQ
-[ ]*31[ ]+7B31
-[ ]*32[ ]+vcvtpd2qq \(%ecx\), %xmm6\{%k7\} \# AVX512DQ \+ AVX512VL
-[ ]*33[ ]+vcvtpd2qq \(%ecx\), %ymm6\{%k7\} \# AVX512DQ \+ AVX512VL
-GAS LISTING .*
-
-
-[ ]*34[ ]+\?\?\?\? 62F27D4F vexp2ps %zmm5, %zmm6\{%k7\} \# AVX512ER
-[ ]*34[ ]+C8F5
-[ ]*35[ ]+\?\?\?\? 62F1D54F vaddpd %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512F
-[ ]*35[ ]+58F4
-[ ]*36[ ]+vaddpd %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512F \+ AVX512VL
-[ ]*37[ ]+vaddpd %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512F \+ AVX512VL
-[ ]*38[ ]+\?\?\?\? 62F2D54F vpmadd52luq %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512IFMA
-[ ]*38[ ]+B4F4
-[ ]*39[ ]+vpmadd52luq %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512IFMA \+ AVX512VL
-[ ]*40[ ]+vpmadd52luq %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512IFMA \+ AVX512VL
-[ ]*41[ ]+\?\?\?\? 62F2FD49 vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\} \# AVX512PF
-[ ]*41[ ]+C68CFD17
-[ ]*41[ ]+000000
-[ ]*42[ ]+\?\?\?\? 62F2554F vpermb %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512VBMI
-[ ]*42[ ]+8DF4
-[ ]*43[ ]+vpermb %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512VBMI \+ AVX512VL
-[ ]*44[ ]+vpermb %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512VBMI \+ AVX512VL
-[ ]*45[ ]+
-[ ]*46[ ]+\?\?\?\? C4E2791C vpabsb %xmm5, %xmm6
-[ ]*46[ ]+F5
-[ ]*47[ ]+\?\?\?\? C4E27D1C vpabsb %ymm5, %ymm6
-[ ]*47[ ]+F5
-[ ]*48[ ]+\?\?\?\? C5D158F4 vaddpd %xmm4, %xmm5, %xmm6
-[ ]*49[ ]+\?\?\?\? C5D558F4 vaddpd %ymm4, %ymm5, %ymm6
-[ ]*50[ ]+\?\?\?\? 660F381C pabsb %xmm5, %xmm6
-[ ]*50[ ]+F5
-[ ]*51[ ]+\?\?\?\? 660F58F4 addpd %xmm4, %xmm6
-[ ]*52[ ]+
+.*:8: Error: .*bad register name `%zmm.*
+.*:11: Error: .*bad register name `%zmm.*
+.*:14: Error: .*bad register name `%zmm.*
+.*:17: Error: .*bad register name `%zmm.*
+.*:18: Error: .*bad register name `%zmm.*
+.*:21: Error: .*bad register name `%zmm.*
+.*:24: Error: .*unsupported instruction.*
+.*:25: Error: .*bad register name `%zmm.*
+.*:8: Error: .*bad register name `%zmm.*
+.*:10: Error: .*bad register name `%ymm.*
+.*:11: Error: .*bad register name `%zmm.*
+.*:13: Error: .*bad register name `%ymm.*
+.*:14: Error: .*bad register name `%zmm.*
+.*:16: Error: .*bad register name `%ymm.*
+.*:17: Error: .*bad register name `%zmm.*
+.*:18: Error: .*bad register name `%zmm.*
+.*:20: Error: .*bad register name `%ymm.*
+.*:21: Error: .*bad register name `%zmm.*
+.*:23: Error: .*bad register name `%ymm.*
+.*:24: Error: .*
+.*:25: Error: .*bad register name `%zmm.*
+.*:27: Error: .*bad register name `%ymm.*
+.*:9: Error: .*unsupported masking.*
+.*:10: Error: .*unsupported masking.*
+.*:12: Error: .*unsupported instruction.*
+.*:13: Error: .*unsupported instruction.*
+.*:15: Error: .*unsupported instruction.*
+.*:16: Error: .*unsupported instruction.*
+.*:19: Error: .*unsupported masking.*
+.*:20: Error: .*unsupported masking.*
+.*:22: Error: .*unsupported instruction.*
+.*:23: Error: .*unsupported instruction.*
+.*:26: Error: .*unsupported instruction.*
+.*:27: Error: .*unsupported instruction.*
+#...
+[ ]*[0-9]+[ ]+\# Test \.arch \.noavx512vl
+[ ]*[0-9]+[ ]+\.text
+[ ]*[0-9]+[ ]*
+[ ]*[0-9]+[ ]+\.irp isa, default, .*
+#...
+[ ]*[0-9]+[ ]+\.endr
+#...
+[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+1CF5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+1CF5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D2F > vpabsb %ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+1CF5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D48 > vpconflictd %zmm5,%zmm6
+[ ]*[0-9]+[ ]+C4F5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D08 > vpconflictd %xmm5,%xmm6
+[ ]*[0-9]+[ ]+C4F5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D28 > vpconflictd %ymm5,%ymm6
+[ ]*[0-9]+[ ]+C4F5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1FD4F > vcvtpd2qq \(%ecx\),%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+7B31
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1FD0F > vcvtpd2qq \(%ecx\),%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+7B31
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1FD2F > vcvtpd2qq \(%ecx\),%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+7B31
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vexp2ps %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+C8F5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1D54F > vaddpd %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+58F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1D50F > vaddpd %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+58F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1D52F > vaddpd %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+58F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2D54F > vpmadd52luq %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+B4F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2D50F > vpmadd52luq %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+B4F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2D52F > vpmadd52luq %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+B4F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2FD49 > vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\}
+[ ]*[0-9]+[ ]+C68CFD17 *
+[ ]*[0-9]+[ ]+000000
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2554F > vpermb %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+8DF4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2550F > vpermb %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+8DF4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2552F > vpermb %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+8DF4
+#...
+[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+> \.arch \.avx10\.1/256
+[ ]*[0-9]+[ ]+> vpabsb %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+1CF5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D2F > vpabsb %ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+1CF5
+[ ]*[0-9]+[ ]+> vpconflictd %zmm5,%zmm6
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D08 > vpconflictd %xmm5,%xmm6
+[ ]*[0-9]+[ ]+C4F5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D28 > vpconflictd %ymm5,%ymm6
+[ ]*[0-9]+[ ]+C4F5
+[ ]*[0-9]+[ ]+> vcvtpd2qq \(%ecx\),%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1FD0F > vcvtpd2qq \(%ecx\),%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+7B31
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1FD2F > vcvtpd2qq \(%ecx\),%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+7B31
+[ ]*[0-9]+[ ]+> vexp2ps %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+> vaddpd %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1D50F > vaddpd %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+58F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1D52F > vaddpd %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+58F4
+[ ]*[0-9]+[ ]+> vpmadd52luq %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2D50F > vpmadd52luq %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+B4F4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2D52F > vpmadd52luq %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+B4F4
+[ ]*[0-9]+[ ]+> vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\}
+[ ]*[0-9]+[ ]+> vpermb %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2550F > vpermb %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+8DF4
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2552F > vpermb %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+8DF4
+#...
+[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+> \.arch \.avx10\.1/128
+[ ]*[0-9]+[ ]+> vpabsb %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+1CF5
+[ ]*[0-9]+[ ]+> vpabsb %ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+> vpconflictd %zmm5,%zmm6
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D08 > vpconflictd %xmm5,%xmm6
+[ ]*[0-9]+[ ]+C4F5
+[ ]*[0-9]+[ ]+> vpconflictd %ymm5,%ymm6
+[ ]*[0-9]+[ ]+> vcvtpd2qq \(%ecx\),%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1FD0F > vcvtpd2qq \(%ecx\),%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+7B31
+[ ]*[0-9]+[ ]+> vcvtpd2qq \(%ecx\),%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+> vexp2ps %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+> vaddpd %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1D50F > vaddpd %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+58F4
+[ ]*[0-9]+[ ]+> vaddpd %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+> vpmadd52luq %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2D50F > vpmadd52luq %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+B4F4
+[ ]*[0-9]+[ ]+> vpmadd52luq %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+> vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\}
+[ ]*[0-9]+[ ]+> vpermb %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2550F > vpermb %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+8DF4
+[ ]*[0-9]+[ ]+> vpermb %ymm4,%ymm5,%ymm6\{%k7\}
+#...
+[ ]*[0-9]+[ ]+> \.arch \.noavx512vl
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+1CF5
+[ ]*[0-9]+[ ]+> vpabsb %xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+> vpabsb %ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D48 > vpconflictd %zmm5,%zmm6
+[ ]*[0-9]+[ ]+C4F5
+[ ]*[0-9]+[ ]+> vpconflictd %xmm5,%xmm6
+[ ]*[0-9]+[ ]+> vpconflictd %ymm5,%ymm6
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1FD4F > vcvtpd2qq \(%ecx\),%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+7B31
+[ ]*[0-9]+[ ]+> vcvtpd2qq \(%ecx\),%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+> vcvtpd2qq \(%ecx\),%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vexp2ps %zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+C8F5
+[ ]*[0-9]+[ ]+\?\?\?\? 62F1D54F > vaddpd %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+58F4
+[ ]*[0-9]+[ ]+> vaddpd %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+> vaddpd %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2D54F > vpmadd52luq %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+B4F4
+[ ]*[0-9]+[ ]+> vpmadd52luq %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+> vpmadd52luq %ymm4,%ymm5,%ymm6\{%k7\}
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2FD49 > vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\}
+[ ]*[0-9]+[ ]+C68CFD17 *
+[ ]*[0-9]+[ ]+000000
+[ ]*[0-9]+[ ]+\?\?\?\? 62F2554F > vpermb %zmm4,%zmm5,%zmm6\{%k7\}
+[ ]*[0-9]+[ ]+8DF4
+[ ]*[0-9]+[ ]+> vpermb %xmm4,%xmm5,%xmm6\{%k7\}
+[ ]*[0-9]+[ ]+> vpermb %ymm4,%ymm5,%ymm6\{%k7\}
+#...
+[ ]*[0-9]+[ ]+\?\?\?\? C4E2791C vpabsb %xmm5, %xmm6
+[ ]*[0-9]+[ ]+F5
+[ ]*[0-9]+[ ]+\?\?\?\? C4E27D1C vpabsb %ymm5, %ymm6
+[ ]*[0-9]+[ ]+F5
+[ ]*[0-9]+[ ]+\?\?\?\? C5D158F4 vaddpd %xmm4, %xmm5, %xmm6
+[ ]*[0-9]+[ ]+\?\?\?\? C5D558F4 vaddpd %ymm4, %ymm5, %ymm6
+[ ]*[0-9]+[ ]+\?\?\?\? 660F381C pabsb %xmm5, %xmm6
+[ ]*[0-9]+[ ]+F5
+[ ]*[0-9]+[ ]+\?\?\?\? 660F58F4 addpd %xmm4, %xmm6
+[ ]*[0-9]+[ ]+
[ ]*[1-9][0-9]*[ ]+\.intel_syntax noprefix
[ ]*[1-9][0-9]*[ ]+\?\?\?\? 62F3FD48 vfpclasspd k0, \[eax], 0
[ ]*[1-9][0-9]*[ ]+660000
@@ -1,27 +1,10 @@
# Test .arch .noavx512vl
.text
- vpabsb %zmm5, %zmm6{%k7} # AVX512BW
- vpabsb %xmm5, %xmm6{%k7} # AVX512BW + AVX512VL
- vpabsb %ymm5, %ymm6{%k7} # AVX512BW + AVX512VL
- vpconflictd %zmm5, %zmm6 # AVX412CD
- vpconflictd %xmm5, %xmm6 # AVX412CD + AVX512VL
- vpconflictd %ymm5, %ymm6 # AVX412CD + AVX512VL
- vcvtpd2qq (%ecx), %zmm6{%k7} # AVX512DQ
- vcvtpd2qq (%ecx), %xmm6{%k7} # AVX512DQ + AVX512VL
- vcvtpd2qq (%ecx), %ymm6{%k7} # AVX512DQ + AVX512VL
- vexp2ps %zmm5, %zmm6{%k7} # AVX512ER
- vaddpd %zmm4, %zmm5, %zmm6{%k7} # AVX512F
- vaddpd %xmm4, %xmm5, %xmm6{%k7} # AVX512F + AVX512VL
- vaddpd %ymm4, %ymm5, %ymm6{%k7} # AVX512F + AVX512VL
- vpmadd52luq %zmm4, %zmm5, %zmm6{%k7} # AVX512IFMA
- vpmadd52luq %xmm4, %xmm5, %xmm6{%k7} # AVX512IFMA + AVX512VL
- vpmadd52luq %ymm4, %ymm5, %ymm6{%k7} # AVX512IFMA + AVX512VL
- vgatherpf0dpd 23(%ebp,%ymm7,8){%k1} # AVX512PF
- vpermb %zmm4, %zmm5, %zmm6{%k7} # AVX512VBMI
- vpermb %xmm4, %xmm5, %xmm6{%k7} # AVX512VBMI + AVX512VL
- vpermb %ymm4, %ymm5, %ymm6{%k7} # AVX512VBMI + AVX512VL
- .arch .noavx512vl
+ .irp isa, default, .avx10.1/256, .avx10.1/128, .noavx512vl
+
+ .arch default
+ .arch \isa
vpabsb %zmm5, %zmm6{%k7} # AVX512BW
vpabsb %xmm5, %xmm6{%k7} # AVX512BW + AVX512VL
vpabsb %ymm5, %ymm6{%k7} # AVX512BW + AVX512VL
@@ -43,6 +26,8 @@
vpermb %xmm4, %xmm5, %xmm6{%k7} # AVX512VBMI + AVX512VL
vpermb %ymm4, %ymm5, %ymm6{%k7} # AVX512VBMI + AVX512VL
+ .endr
+
vpabsb %xmm5, %xmm6
vpabsb %ymm5, %ymm6
vaddpd %xmm4, %xmm5, %xmm6
@@ -464,6 +464,7 @@ static bitfield opcode_modifiers[] =
BITFIELD (StaticRounding),
BITFIELD (SAE),
BITFIELD (Disp8MemShift),
+ BITFIELD (Vsz),
BITFIELD (Optimize),
BITFIELD (ATTMnemonic),
BITFIELD (ATTSyntax),
@@ -716,6 +716,16 @@ enum
#define DISP8_SHIFT_VL 7
Disp8MemShift,
+ /* insn has vector size restrictions, requiring a minimum of:
+ 0: 128 bits.
+ 1: 256 bits.
+ 2: 512 bits.
+ */
+#define VSZ128 0 /* Not to be used in templates. */
+#define VSZ256 1
+#define VSZ512 2
+ Vsz,
+
/* Support encoding optimization. */
Optimize,
@@ -776,6 +786,7 @@ typedef struct i386_opcode_modifier
unsigned int staticrounding:1;
unsigned int sae:1;
unsigned int disp8memshift:3;
+ unsigned int vsz:3;
unsigned int optimize:1;
unsigned int attmnemonic:1;
unsigned int attsyntax:1;
@@ -131,6 +131,9 @@
#define EVexLIG EVex=EVEXLIG
#define EVexDYN EVex=EVEXDYN
+#define Vsz256 Vsz=VSZ256
+#define Vsz512 Vsz=VSZ512
+
// The EVEX purpose of StaticRounding appears only together with SAE. Re-use
// the bit to mark commutative VEX encodings where swapping the source
// operands may allow to switch from 3-byte to 2-byte VEX encoding.
@@ -987,9 +990,9 @@ pause, 0xf390, i186, NoSuf, {}
b:0:VexW0:Byte:AVX512DQ:66:AVX512VBMI, +
w:1:VexW1:Word:AVX512F::AVX512BW>
-<dq:opc:vexw:vexw64:elem:cpu64:gpr:kpfx, +
- d:0:VexW0::Dword::Reg32:66, +
- q:1:VexW1:VexW1:Qword:x64:Reg64:>
+<dq:opc:vexw:vexw64:elem:cpu64:gpr:kpfx:kvsz, +
+ d:0:VexW0::Dword::Reg32:66:Vsz256, +
+ q:1:VexW1:VexW1:Qword:x64:Reg64::Vsz512>
emms, 0xf77, MMX, NoSuf, {}
// These really shouldn't allow for Reg64 (movq is the right mnemonic for
@@ -2623,22 +2626,22 @@ vpmovzxwq, 0x6634, AVX512F|AVX512VL, Mod
// AVX512BW instructions.
-kadd<dq>, 0x<dq:kpfx>4a, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kand<dq>, 0x<dq:kpfx>41, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kandn<dq>, 0x<dq:kpfx>42, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf|Optimize, { RegMask, RegMask, RegMask }
-kmov<dq>, 0x<dq:kpfx>90, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask|<dq:elem>|Unspecified|BaseIndex, RegMask }
-kmov<dq>, 0x<dq:kpfx>91, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask, <dq:elem>|Unspecified|BaseIndex }
-kmov<dq>, 0xf292, AVX512BW, D|Modrm|Vex128|Space0F|<dq:vexw64>|NoSuf, { <dq:gpr>, RegMask }
-knot<dq>, 0x<dq:kpfx>44, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask, RegMask }
-kor<dq>, 0x<dq:kpfx>45, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kortest<dq>, 0x<dq:kpfx>98, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask, RegMask }
-ktest<dq>, 0x<dq:kpfx>99, AVX512BW, Modrm|Vex128|Space0F|VexW1|NoSuf, { RegMask, RegMask }
-kxnor<dq>, 0x<dq:kpfx>46, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kxor<dq>, 0x<dq:kpfx>47, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf|Optimize, { RegMask, RegMask, RegMask }
-kunpckdq, 0x4B, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|NoSuf, { RegMask, RegMask, RegMask }
-kunpckwd, 0x4B, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
-kshiftl<dq>, 0x6633, AVX512BW, Modrm|Vex128|Space0F3A|<dq:vexw>|NoSuf, { Imm8, RegMask, RegMask }
-kshiftr<dq>, 0x6631, AVX512BW, Modrm|Vex128|Space0F3A|<dq:vexw>|NoSuf, { Imm8, RegMask, RegMask }
+kadd<dq>, 0x<dq:kpfx>4a, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask, RegMask }
+kand<dq>, 0x<dq:kpfx>41, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask, RegMask }
+kandn<dq>, 0x<dq:kpfx>42, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf|Optimize, { RegMask, RegMask, RegMask }
+kmov<dq>, 0x<dq:kpfx>90, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask|<dq:elem>|Unspecified|BaseIndex, RegMask }
+kmov<dq>, 0x<dq:kpfx>91, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, <dq:elem>|Unspecified|BaseIndex }
+kmov<dq>, 0xf292, AVX512BW, D|Modrm|Vex128|Space0F|<dq:vexw64>|<dq:kvsz>|NoSuf, { <dq:gpr>, RegMask }
+knot<dq>, 0x<dq:kpfx>44, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask }
+kor<dq>, 0x<dq:kpfx>45, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask, RegMask }
+kortest<dq>, 0x<dq:kpfx>98, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask }
+ktest<dq>, 0x<dq:kpfx>99, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask }
+kxnor<dq>, 0x<dq:kpfx>46, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask, RegMask }
+kxor<dq>, 0x<dq:kpfx>47, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf|Optimize, { RegMask, RegMask, RegMask }
+kunpckdq, 0x4B, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|Vsz512|NoSuf, { RegMask, RegMask, RegMask }
+kunpckwd, 0x4B, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW0|Vsz256|NoSuf, { RegMask, RegMask, RegMask }
+kshiftl<dq>, 0x6633, AVX512BW, Modrm|Vex128|Space0F3A|<dq:vexw>|<dq:kvsz>|NoSuf, { Imm8, RegMask, RegMask }
+kshiftr<dq>, 0x6631, AVX512BW, Modrm|Vex128|Space0F3A|<dq:vexw>|<dq:kvsz>|NoSuf, { Imm8, RegMask, RegMask }
vdbpsadbw, 0x6642, AVX512BW, Modrm|Masking|Space0F3A|VexVVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }