[1/1] target: Fix asm generation for AVX builtins when using -masm=intel [PR106095]

Message ID 165637956824.13615.7247999336313533072-1@git.sr.ht
State Committed
Commit e484755aecd543b4c9e2adb4f348118c1e43cfd0
Headers
Series target: Fix asm generation for AVX builtins when using -masm=intel [PR106095] |

Commit Message

Antoni Boucher June 26, 2022, 10:49 p.m. UTC
  From: Antoni Boucher <bouanto@zoho.com>

gcc/ChangeLog:
	PR target/106095
	* config/i386/sse.md: Fix asm generation.

gcc/testsuite/ChangeLog:
	PR target/106095
	* gcc.target/i386/pr106095.c: Add test using those AVX builtins.
---
 gcc/config/i386/sse.md                   | 10 ++---
 gcc/testsuite/gcc.target/i386/pr106095.c | 47 ++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106095.c
  

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8cd0f617bf3..f2f72e8fb05 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8859,7 +8859,7 @@ 
   "@
    cvtsd2ss\t{%2, %0|%0, %2}
    cvtsd2ss\t{%2, %0|%0, %q2}
-   vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
+   vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %q2<round_mask_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
    (set_attr "athlon_decode" "vector,double,*")
@@ -8903,7 +8903,7 @@ 
   "@
    cvtss2sd\t{%2, %0|%0, %2}
    cvtss2sd\t{%2, %0|%0, %k2}
-   vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
+   vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %k2<round_saeonly_mask_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
    (set_attr "amdfam10_decode" "vector,double,*")
@@ -14406,8 +14406,8 @@ 
   "TARGET_AVX512VL"
 {
   if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
-    return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
-  return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
+    return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+  return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "memory" "store")
@@ -14506,7 +14506,7 @@ 
 	  (match_dup 0)
 	  (match_operand:QI 2 "register_operand" "Yk")))]
   "TARGET_AVX512VL"
-  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
+  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "store")
    (set_attr "prefix" "evex")
diff --git a/gcc/testsuite/gcc.target/i386/pr106095.c b/gcc/testsuite/gcc.target/i386/pr106095.c
new file mode 100644
index 00000000000..dfa6136500f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106095.c
@@ -0,0 +1,47 @@ 
+/* PR gcc/106095 */
+/* { dg-do assemble } */
+/* { dg-options "-mavx512vl -masm=intel" } */
+/* { dg-require-effective-target masm_intel } */
+
+#include <immintrin.h>
+#include <stdlib.h>
+
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+
+void bug1(void) {
+    v4si ints4 = {0, 1, 2, 3};
+    unsigned long long *addr = malloc(sizeof(*addr));
+    __builtin_ia32_pmovdw128mem_mask(addr, ints4, 0);
+}
+
+int bug2(void) {
+    v2df a = {0.0, 0.0};
+    v4sf b = {0.0, 0.0, 0.0, 0.0};
+    v2df src = {0.0, 0.0};
+    v2df res = __builtin_ia32_cvtss2sd_mask_round(a, b, src, 0, _MM_FROUND_NO_EXC);
+    return (int)res[0];
+}
+
+int bug3(void) {
+    v4sf a = {0.0, 0.0, 0.0, 0.0};
+    v2df b = {0.0, 0.0};
+    v4sf src = {0.0, 0.0, 0.0, 0.0};
+    v4sf res = __builtin_ia32_cvtsd2ss_mask_round(a, b, src, 0, _MM_FROUND_NO_EXC);
+    return (int)res[0];
+}
+
+int bug4(void) {
+    v4di ints4 = {0, 1, 2, 3};
+    unsigned long long *addr = malloc(sizeof(*addr));
+    __builtin_ia32_pmovqw256mem_mask(addr, ints4, 0);
+}
+
+int bug5(void) {
+    v2di ints4 = {0, 1};
+    unsigned int *addr = malloc(sizeof(*addr));
+    __builtin_ia32_pmovqw128mem_mask(addr, ints4, 0);
+}