i386: Fix GLC tuning with -masm=intel [PR104104]

Message ID 20220119000016.GO2646553@tucnak
State New
Headers
Series i386: Fix GLC tuning with -masm=intel [PR104104] |

Commit Message

Jakub Jelinek Jan. 19, 2022, midnight UTC
  On Sun, Jan 16, 2022 at 12:22:18PM +0800, Hongtao Liu via Gcc-patches wrote:
> On Sun, Jan 16, 2022 at 12:44 AM Uros Bizjak via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Sat, Jan 15, 2022 at 5:39 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
> > >
> > > Thanks for the suggestion, here is the updated patch that survived
> > > bootstrap/regtest.
> >
> > LGTM for me, but please get the final approval from Hongtao.
> >
> Ok, thanks.

Unfortunately the patch results in assembler failures with -masm=intel.

> > > > +  if (TARGET_DEST_FALSE_DEPENDENCY
> > > > +      && get_attr_dest_false_dep (insn) ==
> > > > +        DEST_FALSE_DEP_TRUE)
> > > > +    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);

All the vxorps insns were emitted like the above, which means for -masm=sysv
it looks like
	vxorps	%xmm3, %xmm3, %xmm3
but for -masm=intel like:
	vxorps	
We want obviously
	vxorps	xmm3, xmm3, xmm3
so the following patch just drops the errorneous {}s.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-01-19  Jakub Jelinek  <jakub@redhat.com>

	PR target/104104
	* config/i386/sse.md
	(<avx512>_<complexopname>_<mode><maskc_name><round_name>,
	avx512fp16_<complexopname>sh_v8hf<mask_scalarc_name><round_scalarcz_name>,
	avx512dq_mul<mode>3<mask_name>, <avx2_avx512>_permvar<mode><mask_name>,
	avx2_perm<mode>_1<mask_name>, avx512f_perm<mode>_1<mask_name>,
	avx512dq_rangep<mode><mask_name><round_saeonly_name>,
	avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>,
	<avx512>_getmant<mode><mask_name><round_saeonly_name>,
	avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>):
	Use vxorps\t%x0, %x0, %x0 instead of vxorps\t{%x0, %x0, %x0}.

	* gcc.target/i386/pr104104.c: New test.



	Jakub
  

Comments

Li, Pan2 via Gcc-patches Jan. 19, 2022, 1:01 a.m. UTC | #1
Sorry for introducing such failure and thanks for the patch, I suppose it could be treated as obvious fix?
  
Hongtao Liu Jan. 19, 2022, 1:09 a.m. UTC | #2
On Wed, Jan 19, 2022 at 8:00 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Sun, Jan 16, 2022 at 12:22:18PM +0800, Hongtao Liu via Gcc-patches wrote:
> > On Sun, Jan 16, 2022 at 12:44 AM Uros Bizjak via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Sat, Jan 15, 2022 at 5:39 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
> > > >
> > > > Thanks for the suggestion, here is the updated patch that survived
> > > > bootstrap/regtest.
> > >
> > > LGTM for me, but please get the final approval from Hongtao.
> > >
> > Ok, thanks.
>
> Unfortunately the patch results in assembler failures with -masm=intel.
>
> > > > > +  if (TARGET_DEST_FALSE_DEPENDENCY
> > > > > +      && get_attr_dest_false_dep (insn) ==
> > > > > +        DEST_FALSE_DEP_TRUE)
> > > > > +    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
>
> All the vxorps insns were emitted like the above, which means for -masm=sysv
> it looks like
>         vxorps  %xmm3, %xmm3, %xmm3
> but for -masm=intel like:
>         vxorps
> We want obviously
>         vxorps  xmm3, xmm3, xmm3
> so the following patch just drops the errorneous {}s.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Yes, thanks.
>
> 2022-01-19  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/104104
>         * config/i386/sse.md
>         (<avx512>_<complexopname>_<mode><maskc_name><round_name>,
>         avx512fp16_<complexopname>sh_v8hf<mask_scalarc_name><round_scalarcz_name>,
>         avx512dq_mul<mode>3<mask_name>, <avx2_avx512>_permvar<mode><mask_name>,
>         avx2_perm<mode>_1<mask_name>, avx512f_perm<mode>_1<mask_name>,
>         avx512dq_rangep<mode><mask_name><round_saeonly_name>,
>         avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>,
>         <avx512>_getmant<mode><mask_name><round_saeonly_name>,
>         avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>):
>         Use vxorps\t%x0, %x0, %x0 instead of vxorps\t{%x0, %x0, %x0}.
>
>         * gcc.target/i386/pr104104.c: New test.
>
> --- gcc/config/i386/sse.md.jj   2022-01-18 11:58:59.156988142 +0100
> +++ gcc/config/i386/sse.md      2022-01-18 21:20:40.022477778 +0100
> @@ -6539,7 +6539,7 @@ (define_insn "<avx512>_<complexopname>_<
>  {
>    if (TARGET_DEST_FALSE_DEP_FOR_GLC
>        && <maskc_dest_false_dep_for_glc_cond>)
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}";
>  }
>    [(set_attr "type" "ssemul")
> @@ -6750,7 +6750,7 @@ (define_insn "avx512fp16_<complexopname>
>  {
>    if (TARGET_DEST_FALSE_DEP_FOR_GLC
>        && <mask_scalarc_dest_false_dep_for_glc_cond>)
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}";
>  }
>    [(set_attr "type" "ssemul")
> @@ -15222,7 +15222,7 @@ (define_insn "avx512dq_mul<mode>3<mask_n
>        && <mask3_dest_false_dep_for_glc_cond>
>        && !reg_mentioned_p (operands[0], operands[1])
>        && !reg_mentioned_p (operands[0], operands[2]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
>  }
>    [(set_attr "type" "sseimul")
> @@ -24658,7 +24658,7 @@ (define_insn "<avx2_avx512>_permvar<mode
>        && <mask3_dest_false_dep_for_glc_cond>
>        && !reg_mentioned_p (operands[0], operands[1])
>        && !reg_mentioned_p (operands[0], operands[2]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}";
>  }
>    [(set_attr "type" "sselog")
> @@ -24900,7 +24900,7 @@ (define_insn "avx2_perm<mode>_1<mask_nam
>    if (TARGET_DEST_FALSE_DEP_FOR_GLC
>        && <mask6_dest_false_dep_for_glc_cond>
>        && !reg_mentioned_p (operands[0], operands[1]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
>  }
>    [(set_attr "type" "sselog")
> @@ -24975,7 +24975,7 @@ (define_insn "avx512f_perm<mode>_1<mask_
>    if (TARGET_DEST_FALSE_DEP_FOR_GLC
>        && <mask10_dest_false_dep_for_glc_cond>
>        && !reg_mentioned_p (operands[0], operands[1]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
>  }
>    [(set_attr "type" "sselog")
> @@ -26880,7 +26880,7 @@ (define_insn "avx512dq_rangep<mode><mask
>        && <mask4_dest_false_dep_for_glc_cond>
>        && !reg_mentioned_p (operands[0], operands[1])
>        && !reg_mentioned_p (operands[0], operands[2]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}";
>  }
>    [(set_attr "type" "sse")
> @@ -26903,7 +26903,7 @@ (define_insn "avx512dq_ranges<mode><mask
>        && <mask_scalar4_dest_false_dep_for_glc_cond>
>        && !reg_mentioned_p (operands[0], operands[1])
>        && !reg_mentioned_p (operands[0], operands[2]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
>  }
>    [(set_attr "type" "sse")
> @@ -26949,7 +26949,7 @@ (define_insn "<avx512>_getmant<mode><mas
>    if (TARGET_DEST_FALSE_DEP_FOR_GLC
>        && <mask3_dest_false_dep_for_glc_cond>
>        && MEM_P (operands[1]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
>  }
>    [(set_attr "prefix" "evex")
> @@ -26971,7 +26971,7 @@ (define_insn "avx512f_vgetmant<mode><mas
>        && <mask_scalar4_dest_false_dep_for_glc_cond>
>        && !reg_mentioned_p (operands[0], operands[1])
>        && !reg_mentioned_p (operands[0], operands[2]))
> -    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
> +    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
>    return "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
>  }
>     [(set_attr "prefix" "evex")
> --- gcc/testsuite/gcc.target/i386/pr104104.c.jj 2022-01-18 21:38:17.007906673 +0100
> +++ gcc/testsuite/gcc.target/i386/pr104104.c    2022-01-18 21:36:10.475623148 +0100
> @@ -0,0 +1,10 @@
> +/* PR target/104104 */
> +/* { dg-do assemble { target vect_simd_clones } } */
> +/* { dg-require-effective-target masm_intel } */
> +/* { dg-options "-march=alderlake -masm=intel -O1 -fallow-store-data-races -funroll-all-loops" } */
> +
> +__attribute__ ((simd)) short int
> +foo (void)
> +{
> +  return 0;
> +}
>
>
>         Jakub
>
  

Patch

--- gcc/config/i386/sse.md.jj	2022-01-18 11:58:59.156988142 +0100
+++ gcc/config/i386/sse.md	2022-01-18 21:20:40.022477778 +0100
@@ -6539,7 +6539,7 @@  (define_insn "<avx512>_<complexopname>_<
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <maskc_dest_false_dep_for_glc_cond>)
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}";
 }
   [(set_attr "type" "ssemul")
@@ -6750,7 +6750,7 @@  (define_insn "avx512fp16_<complexopname>
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask_scalarc_dest_false_dep_for_glc_cond>)
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}";
 }
   [(set_attr "type" "ssemul")
@@ -15222,7 +15222,7 @@  (define_insn "avx512dq_mul<mode>3<mask_n
       && <mask3_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
 }
   [(set_attr "type" "sseimul")
@@ -24658,7 +24658,7 @@  (define_insn "<avx2_avx512>_permvar<mode
       && <mask3_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}";
 }
   [(set_attr "type" "sselog")
@@ -24900,7 +24900,7 @@  (define_insn "avx2_perm<mode>_1<mask_nam
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask6_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
 }
   [(set_attr "type" "sselog")
@@ -24975,7 +24975,7 @@  (define_insn "avx512f_perm<mode>_1<mask_
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask10_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
 }
   [(set_attr "type" "sselog")
@@ -26880,7 +26880,7 @@  (define_insn "avx512dq_rangep<mode><mask
       && <mask4_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}";
 }
   [(set_attr "type" "sse")
@@ -26903,7 +26903,7 @@  (define_insn "avx512dq_ranges<mode><mask
       && <mask_scalar4_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
 }
   [(set_attr "type" "sse")
@@ -26949,7 +26949,7 @@  (define_insn "<avx512>_getmant<mode><mas
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask3_dest_false_dep_for_glc_cond>
       && MEM_P (operands[1]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
 }
   [(set_attr "prefix" "evex")
@@ -26971,7 +26971,7 @@  (define_insn "avx512f_vgetmant<mode><mas
       && <mask_scalar4_dest_false_dep_for_glc_cond>
       && !reg_mentioned_p (operands[0], operands[1])
       && !reg_mentioned_p (operands[0], operands[2]))
-    output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+    output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
 }
    [(set_attr "prefix" "evex")
--- gcc/testsuite/gcc.target/i386/pr104104.c.jj	2022-01-18 21:38:17.007906673 +0100
+++ gcc/testsuite/gcc.target/i386/pr104104.c	2022-01-18 21:36:10.475623148 +0100
@@ -0,0 +1,10 @@ 
+/* PR target/104104 */
+/* { dg-do assemble { target vect_simd_clones } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-march=alderlake -masm=intel -O1 -fallow-store-data-races -funroll-all-loops" } */
+
+__attribute__ ((simd)) short int
+foo (void)
+{
+  return 0;
+}