x86: Adjust gcc.target/i386/pr22076.c

Message ID 20211019182345.4034456-1-hjl.tools@gmail.com
State New
Headers
Series x86: Adjust gcc.target/i386/pr22076.c |

Commit Message

H.J. Lu Oct. 19, 2021, 6:23 p.m. UTC
  commit 247c407c83f0015f4b92d5f71e45b63192f6757e
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Mon Oct 18 12:15:40 2021 +0100

    Try placing RTL folded constants in the constant pool.

    My recent attempts to come up with a testcase for my patch to evaluate
    ss_plus in simplify-rtx.c, identified a missed optimization opportunity
    (that's potentially a long-time regression): The RTL optimizers no longer
    place constants in the constant pool.

changed -m32 codegen from

	movq    .LC1, %mm0
	paddb   .LC0, %mm0
	movq    %mm0, x
	ret

to

	movl    $807671820, %eax
	movl    $1616136252, %edx
	movl    %eax, x
	movl    %edx, x+4
	ret

and -m64 codegen from

	movq    .LC1(%rip), %mm0
	paddb   .LC0(%rip), %mm0
	movq    %xmm0, x(%rip)
	ret

to

	movq    .LC2(%rip), %rax
        movq    %rax, x(%rip)
        ret

Adjust pr22076.c to check that MMX register isn't used since avoiding
MMX register isn't a bad thing.

	PR testsuite/102840
	* gcc.target/i386/pr22076.c: Updated to check that MMX register
	isn't used.
---
 gcc/testsuite/gcc.target/i386/pr22076.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
  

Comments

Uros Bizjak Oct. 20, 2021, 6:42 a.m. UTC | #1
On Tue, Oct 19, 2021 at 8:23 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> commit 247c407c83f0015f4b92d5f71e45b63192f6757e
> Author: Roger Sayle <roger@nextmovesoftware.com>
> Date:   Mon Oct 18 12:15:40 2021 +0100
>
>     Try placing RTL folded constants in the constant pool.
>
>     My recent attempts to come up with a testcase for my patch to evaluate
>     ss_plus in simplify-rtx.c, identified a missed optimization opportunity
>     (that's potentially a long-time regression): The RTL optimizers no longer
>     place constants in the constant pool.
>
> changed -m32 codegen from
>
>         movq    .LC1, %mm0
>         paddb   .LC0, %mm0
>         movq    %mm0, x
>         ret
>
> to
>
>         movl    $807671820, %eax
>         movl    $1616136252, %edx
>         movl    %eax, x
>         movl    %edx, x+4
>         ret
>
> and -m64 codegen from
>
>         movq    .LC1(%rip), %mm0
>         paddb   .LC0(%rip), %mm0
>         movq    %xmm0, x(%rip)
>         ret
>
> to
>
>         movq    .LC2(%rip), %rax
>         movq    %rax, x(%rip)
>         ret
>
> Adjust pr22076.c to check that MMX register isn't used since avoiding
> MMX register isn't a bad thing.
>
>         PR testsuite/102840
>         * gcc.target/i386/pr22076.c: Updated to check that MMX register
>         isn't used.

The compiler is now able to evaluate the result at the compile time
and it optimizes the test accordingly. Let's provide some MMX
instruction that is implemented with UNSPEC, so the compiler won't be
able to outsmart us.

Something like the attached patch.

Uros.
> ---
>  gcc/testsuite/gcc.target/i386/pr22076.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr22076.c b/gcc/testsuite/gcc.target/i386/pr22076.c
> index 427ffcd4920..aa06f057690 100644
> --- a/gcc/testsuite/gcc.target/i386/pr22076.c
> +++ b/gcc/testsuite/gcc.target/i386/pr22076.c
> @@ -15,5 +15,6 @@ void test ()
>    x = _mm_add_pi8 (mm0, mm1);
>  }
>
> -/* { dg-final { scan-assembler-times "movq" 2 } } */
> -/* { dg-final { scan-assembler-not "movl" { target nonpic } } } */
> +/* { dg-final { scan-assembler-times "movq" 2 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "movl" 4 { target { nonpic && ia32 } } } } */
> +/* { dg-final { scan-assembler-not "%mm" } }  */
> --
> 2.32.0
>
diff --git a/gcc/testsuite/gcc.target/i386/pr22076.c b/gcc/testsuite/gcc.target/i386/pr22076.c
index 427ffcd4920..766b732c681 100644
--- a/gcc/testsuite/gcc.target/i386/pr22076.c
+++ b/gcc/testsuite/gcc.target/i386/pr22076.c
@@ -1,9 +1,9 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fomit-frame-pointer -mmmx -mno-sse2" } */
+/* { dg-options "-O2 -fomit-frame-pointer -mmmx -msse -mno-sse2" } */
 /* { dg-additional-options "-fno-common" { target *-*-darwin* } } */
 /* { dg-additional-options "-mdynamic-no-pic" { target { ia32 && *-*-darwin* } } } */
 
-#include <mmintrin.h>
+#include <xmmintrin.h>
 
 __m64 x;
 
@@ -12,7 +12,7 @@ void test ()
   __m64 mm0 = (__m64)(__v8qi) {1,2,3,4,5,6,7,8};
   __m64 mm1 = (__m64)(__v8qi) {11,22,33,44,55,66,77,88};
 
-  x = _mm_add_pi8 (mm0, mm1);
+  x = _mm_sad_pu8 (mm0, mm1);
 }
 
 /* { dg-final { scan-assembler-times "movq" 2 } } */
  
H.J. Lu Oct. 21, 2021, 4:50 p.m. UTC | #2
On Tue, Oct 19, 2021 at 11:42 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Oct 19, 2021 at 8:23 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > commit 247c407c83f0015f4b92d5f71e45b63192f6757e
> > Author: Roger Sayle <roger@nextmovesoftware.com>
> > Date:   Mon Oct 18 12:15:40 2021 +0100
> >
> >     Try placing RTL folded constants in the constant pool.
> >
> >     My recent attempts to come up with a testcase for my patch to evaluate
> >     ss_plus in simplify-rtx.c, identified a missed optimization opportunity
> >     (that's potentially a long-time regression): The RTL optimizers no longer
> >     place constants in the constant pool.
> >
> > changed -m32 codegen from
> >
> >         movq    .LC1, %mm0
> >         paddb   .LC0, %mm0
> >         movq    %mm0, x
> >         ret
> >
> > to
> >
> >         movl    $807671820, %eax
> >         movl    $1616136252, %edx
> >         movl    %eax, x
> >         movl    %edx, x+4
> >         ret
> >
> > and -m64 codegen from
> >
> >         movq    .LC1(%rip), %mm0
> >         paddb   .LC0(%rip), %mm0
> >         movq    %xmm0, x(%rip)
> >         ret
> >
> > to
> >
> >         movq    .LC2(%rip), %rax
> >         movq    %rax, x(%rip)
> >         ret
> >
> > Adjust pr22076.c to check that MMX register isn't used since avoiding
> > MMX register isn't a bad thing.
> >
> >         PR testsuite/102840
> >         * gcc.target/i386/pr22076.c: Updated to check that MMX register
> >         isn't used.
>
> The compiler is now able to evaluate the result at the compile time
> and it optimizes the test accordingly. Let's provide some MMX
> instruction that is implemented with UNSPEC, so the compiler won't be
> able to outsmart us.
>
> Something like the attached patch.
>
> Uros.

Works for me.

Thanks.
  
Uros Bizjak Oct. 21, 2021, 7 p.m. UTC | #3
On Thu, Oct 21, 2021 at 6:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Oct 19, 2021 at 11:42 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, Oct 19, 2021 at 8:23 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > commit 247c407c83f0015f4b92d5f71e45b63192f6757e
> > > Author: Roger Sayle <roger@nextmovesoftware.com>
> > > Date:   Mon Oct 18 12:15:40 2021 +0100
> > >
> > >     Try placing RTL folded constants in the constant pool.
> > >
> > >     My recent attempts to come up with a testcase for my patch to evaluate
> > >     ss_plus in simplify-rtx.c, identified a missed optimization opportunity
> > >     (that's potentially a long-time regression): The RTL optimizers no longer
> > >     place constants in the constant pool.
> > >
> > > changed -m32 codegen from
> > >
> > >         movq    .LC1, %mm0
> > >         paddb   .LC0, %mm0
> > >         movq    %mm0, x
> > >         ret
> > >
> > > to
> > >
> > >         movl    $807671820, %eax
> > >         movl    $1616136252, %edx
> > >         movl    %eax, x
> > >         movl    %edx, x+4
> > >         ret
> > >
> > > and -m64 codegen from
> > >
> > >         movq    .LC1(%rip), %mm0
> > >         paddb   .LC0(%rip), %mm0
> > >         movq    %xmm0, x(%rip)
> > >         ret
> > >
> > > to
> > >
> > >         movq    .LC2(%rip), %rax
> > >         movq    %rax, x(%rip)
> > >         ret
> > >
> > > Adjust pr22076.c to check that MMX register isn't used since avoiding
> > > MMX register isn't a bad thing.
> > >
> > >         PR testsuite/102840
> > >         * gcc.target/i386/pr22076.c: Updated to check that MMX register
> > >         isn't used.
> >
> > The compiler is now able to evaluate the result at the compile time
> > and it optimizes the test accordingly. Let's provide some MMX
> > instruction that is implemented with UNSPEC, so the compiler won't be
> > able to outsmart us.
> >
> > Something like the attached patch.
> >
> > Uros.
>
> Works for me.

Committed with the following ChangeLog:

testsuite: Adjust pr22076.c to avoid compile-time optimization [PR102840]

2021-10-21  Uroš Bizjak  <ubizjak@gmail.com>

    PR testsuite/102840

gcc/testsuite/ChangeLog:

    * gcc.target/i386/pr22076.c: Adjust to avoid compile time optimization.

Uros.
  

Patch

diff --git a/gcc/testsuite/gcc.target/i386/pr22076.c b/gcc/testsuite/gcc.target/i386/pr22076.c
index 427ffcd4920..aa06f057690 100644
--- a/gcc/testsuite/gcc.target/i386/pr22076.c
+++ b/gcc/testsuite/gcc.target/i386/pr22076.c
@@ -15,5 +15,6 @@  void test ()
   x = _mm_add_pi8 (mm0, mm1);
 }
 
-/* { dg-final { scan-assembler-times "movq" 2 } } */
-/* { dg-final { scan-assembler-not "movl" { target nonpic } } } */
+/* { dg-final { scan-assembler-times "movq" 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl" 4 { target { nonpic && ia32 } } } } */
+/* { dg-final { scan-assembler-not "%mm" } }  */