Tweak tree-ssa-math-opts.c to solve PR target/102117

Message ID 027401d7de10$8245e260$86d1a720$@nextmovesoftware.com
State New
Headers
Series Tweak tree-ssa-math-opts.c to solve PR target/102117 |

Commit Message

Roger Sayle Nov. 20, 2021, 1:14 p.m. UTC
  This patch resolves PR target/102117 on s390.  The problem is that
some of the functionality of GCC's RTL expanders is no longer triggered
following the transition to tree SSA form.  On s390, unsigned widening
multiplications are converted into WIDEN_MULT_EXPR (aka w* in tree dumps),
but signed widening multiplies are left in their original form, which
alas doesn't benefit from the clever logic in expand_widening_mult.

The fix is to teach convert_mult_to_widen, that RTL expansion can
synthesize a signed widening multiplication if the target provides
a suitable umul_widen_optab.

On s390-linux-gnu with -O2 -m64, the code in the bugzilla PR currently
generates:

imul128:
        stmg    %r12,%r13,96(%r15)
        srag    %r0,%r4,63
        srag    %r1,%r3,63
        lgr     %r13,%r3
        mlgr    %r12,%r4
        msgr    %r1,%r4
        msgr    %r0,%r3
        lgr     %r4,%r12
        agr     %r1,%r0
        lgr     %r5,%r13
        agr     %r4,%r1
        stmg    %r4,%r5,0(%r2)
        lmg     %r12,%r13,96(%r15)
        br      %r14

but with this patch should now generate the more efficient:

imul128:
        lgr     %r1,%r3
        mlgr    %r0,%r4
        srag    %r5,%r3,63
        ngr     %r5,%r4
        srag    %r4,%r4,63
        sgr     %r0,%r5
        ngr     %r4,%r3
        sgr     %r0,%r4
        stmg    %r0,%r1,0(%r2)
        br      %r14


This patch has been tested s390/z13 with a bootstrap and a regression
test (thanks to Robin Dapp), and on x86_64-pc-linux-gnu with a bootstrap
and regression test just to confirm that there are no unanticipated
side-effects.

Ok for mainline?


2021-11-20  Roger Sayle  <roger@nextmovesoftware.com>
            Robin Dapp  <rdapp@linux.ibm.com>

gcc/ChangeLog
	PR target/102117
	* tree-ssa-math-opts.c (convert_mult_to_widen): Recognize
	signed WIDEN_MULT_EXPR if the target supports umul_widen_optab.

gcc/testsuite/ChangeLog
	PR target/102117
	* gcc.target/s390/mul-wide.c: New test case.
	* gcc.target/s390/umul-wide.c: New test case.

Thanks in advance,
Roger
--
  

Comments

Jeff Law Nov. 20, 2021, 4:34 p.m. UTC | #1
On 11/20/2021 6:14 AM, Roger Sayle wrote:
> This patch resolves PR target/102117 on s390.  The problem is that
> some of the functionality of GCC's RTL expanders is no longer triggered
> following the transition to tree SSA form.  On s390, unsigned widening
> multiplications are converted into WIDEN_MULT_EXPR (aka w* in tree dumps),
> but signed widening multiplies are left in their original form, which
> alas doesn't benefit from the clever logic in expand_widening_mult.
>
> The fix is to teach convert_mult_to_widen, that RTL expansion can
> synthesize a signed widening multiplication if the target provides
> a suitable umul_widen_optab.
>
> On s390-linux-gnu with -O2 -m64, the code in the bugzilla PR currently
> generates:
>
> imul128:
>          stmg    %r12,%r13,96(%r15)
>          srag    %r0,%r4,63
>          srag    %r1,%r3,63
>          lgr     %r13,%r3
>          mlgr    %r12,%r4
>          msgr    %r1,%r4
>          msgr    %r0,%r3
>          lgr     %r4,%r12
>          agr     %r1,%r0
>          lgr     %r5,%r13
>          agr     %r4,%r1
>          stmg    %r4,%r5,0(%r2)
>          lmg     %r12,%r13,96(%r15)
>          br      %r14
>
> but with this patch should now generate the more efficient:
>
> imul128:
>          lgr     %r1,%r3
>          mlgr    %r0,%r4
>          srag    %r5,%r3,63
>          ngr     %r5,%r4
>          srag    %r4,%r4,63
>          sgr     %r0,%r5
>          ngr     %r4,%r3
>          sgr     %r0,%r4
>          stmg    %r0,%r1,0(%r2)
>          br      %r14
>
>
> This patch has been tested s390/z13 with a bootstrap and a regression
> test (thanks to Robin Dapp), and on x86_64-pc-linux-gnu with a bootstrap
> and regression test just to confirm that there are no unanticipated
> side-effects.
>
> Ok for mainline?
>
>
> 2021-11-20  Roger Sayle  <roger@nextmovesoftware.com>
>              Robin Dapp  <rdapp@linux.ibm.com>
>
> gcc/ChangeLog
> 	PR target/102117
> 	* tree-ssa-math-opts.c (convert_mult_to_widen): Recognize
> 	signed WIDEN_MULT_EXPR if the target supports umul_widen_optab.
>
> gcc/testsuite/ChangeLog
> 	PR target/102117
> 	* gcc.target/s390/mul-wide.c: New test case.
> 	* gcc.target/s390/umul-wide.c: New test case.
OK
jeff
  

Patch

diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index c4a6492..a944903 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2723,7 +2723,16 @@  convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
 	  from_unsigned1 = from_unsigned2 = false;
 	}
       else
-	return false;
+	{
+	  /* Expand can synthesize smul_widen_optab if the target
+	     supports umul_widen_optab.  */
+	  op = umul_widen_optab;
+	  handler = find_widening_optab_handler_and_mode (op, to_mode,
+							  from_mode,
+							  &actual_mode);
+	  if (handler == CODE_FOR_nothing)
+	    return false;
+	}
     }
 
   /* Ensure that the inputs to the handler are in the correct precison
diff --git a/gcc/testsuite/gcc.target/s390/mul-wide.c b/gcc/testsuite/gcc.target/s390/mul-wide.c
new file mode 100644
index 0000000..8a2092e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/mul-wide.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m64 -fdump-tree-optimized" } */
+
+__int128 foo(long long a, long long b)
+{
+   return (__int128)a * (__int128)b;
+}
+
+/* { dg-final { scan-tree-dump " w\\* " "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/umul-wide.c b/gcc/testsuite/gcc.target/s390/umul-wide.c
new file mode 100644
index 0000000..33a74e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/umul-wide.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m64 -fdump-tree-optimized" } */
+
+unsigned __int128 foo(unsigned long long a, unsigned long long b)
+{
+   return (unsigned __int128)a * (unsigned __int128)b;
+}
+
+/* { dg-final { scan-tree-dump " w\\* " "optimized" } } */