@@ -8859,7 +8859,7 @@
"@
cvtsd2ss\t{%2, %0|%0, %2}
cvtsd2ss\t{%2, %0|%0, %q2}
- vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
+ vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %q2<round_mask_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double,*")
@@ -8903,7 +8903,7 @@
"@
cvtss2sd\t{%2, %0|%0, %2}
cvtss2sd\t{%2, %0|%0, %k2}
- vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
+ vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %k2<round_saeonly_mask_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "vector,double,*")
@@ -14406,8 +14406,8 @@
"TARGET_AVX512VL"
{
if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
- return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
- return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
+ return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
}
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
@@ -14506,7 +14506,7 @@
(match_dup 0)
(match_operand:QI 2 "register_operand" "Yk")))]
"TARGET_AVX512VL"
- "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
+ "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
new file mode 100644
@@ -0,0 +1,47 @@
+/* PR gcc/106095 */
+/* { dg-do assemble } */
+/* { dg-options "-mavx512vl -masm=intel" } */
+/* { dg-require-effective-target masm_intel } */
+
+#include <immintrin.h>
+#include <stdlib.h>
+
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+void bug1(void) {
+ v4si ints4 = {0, 1, 2, 3};
+ unsigned long long *addr = malloc(sizeof(*addr));
+ __builtin_ia32_pmovdw128mem_mask(addr, ints4, 0);
+}
+
+int bug2(void) {
+ v2df a = {0.0, 0.0};
+ v4sf b = {0.0, 0.0, 0.0, 0.0};
+ v2df src = {0.0, 0.0};
+ v2df res = __builtin_ia32_cvtss2sd_mask_round(a, b, src, 0, _MM_FROUND_NO_EXC);
+ return (int)res[0];
+}
+
+int bug3(void) {
+ v4sf a = {0.0, 0.0, 0.0, 0.0};
+ v2df b = {0.0, 0.0};
+ v4sf src = {0.0, 0.0, 0.0, 0.0};
+ v4sf res = __builtin_ia32_cvtsd2ss_mask_round(a, b, src, 0, _MM_FROUND_NO_EXC);
+ return (int)res[0];
+}
+
+int bug4(void) {
+ v4di ints4 = {0, 1, 2, 3};
+ unsigned long long *addr = malloc(sizeof(*addr));
+ __builtin_ia32_pmovqw256mem_mask(addr, ints4, 0);
+}
+
+int bug5(void) {
+ v2di ints4 = {0, 1};
+ unsigned int *addr = malloc(sizeof(*addr));
+ __builtin_ia32_pmovqw128mem_mask(addr, ints4, 0);
+}