Optimize multiply/add of DImode extended to TImode, PR target/103109.
Commit Message
Optimize multiply/add of DImode extended to TImode, PR target/103109.
On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results. This patch adds support to generate these instructions.
Previously GCC had define_expands to handle conversion of the 64-bit
extend to 128-bit and multiply. This patch changes these define_expands
to define_insn_and_split and then it provides combiner patterns to
generate thes multiply/add instructions.
To support using this optimization on power9, this patch extends the sign
extend DImode to TImode to also run on power9 (added for PR
target/104698).
This patch needs the previous patch to add unsigned DImode to TImode
conversion so that the combiner can combine the extend, multiply, and add
instructions.
I have built this patch on little endian power10, little endian power9, and big
endian power8 systems. There were no regressions when I ran it. Can I install
this patch into the GCC 13 master branch?
2022-05-13 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/103109
* config/rs6000/rs6000.md (su_int32): New code attribute.
(<u>mul<mode><dmode>3): Convert from define_expand to
define_insn_and_split.
(maddld<mode>4): Add generator function.
(<u>mulditi3_<u>adddi3): New insn.
(<u>mulditi3_add_const): New insn.
(<u>mulditi3_<u>adddi3_upper): New insn.
gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New test.
---
gcc/config/rs6000/rs6000.md | 128 +++++++++++++++++++-
gcc/testsuite/gcc.target/powerpc/pr103109.c | 62 ++++++++++
2 files changed, 184 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103109.c
Comments
On Fri, 2022-05-13 at 12:17 -0400, Michael Meissner wrote:
> Optimize multiply/add of DImode extended to TImode, PR target/103109.
>
> On power9 and power10 systems, we have instructions that support doing
> 64-bit integers converted to 128-bit integers and producing 128-bit
> results. This patch adds support to generate these instructions.
>
> Previously GCC had define_expands to handle conversion of the 64-bit
> extend to 128-bit and multiply. This patch changes these define_expands
> to define_insn_and_split and then it provides combiner patterns to
> generate thes multiply/add instructions.
>
> To support using this optimization on power9, this patch extends the sign
> extend DImode to TImode to also run on power9 (added for PR
> target/104698).
>
> This patch needs the previous patch to add unsigned DImode to TImode
> conversion so that the combiner can combine the extend, multiply, and add
> instructions.
>
> I have built this patch on little endian power10, little endian power9, and big
> endian power8 systems. There were no regressions when I ran it. Can I install
> this patch into the GCC 13 master branch?
>
> 2022-05-13 Michael Meissner <meissner@linux.ibm.com>
>
> gcc/
> PR target/103109
> * config/rs6000/rs6000.md (su_int32): New code attribute.
> (<u>mul<mode><dmode>3): Convert from define_expand to
> define_insn_and_split.
> (maddld<mode>4): Add generator function.
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
Is the removal of the "*" considering adding generator? (Thats
terminology that I'm not immediately familiar with).
> (<u>mulditi3_<u>adddi3): New insn.
> (<u>mulditi3_add_const): New insn.
> (<u>mulditi3_<u>adddi3_upper): New insn.
>
> gcc/testsuite/
> PR target/103109
> * gcc.target/powerpc/pr103109.c: New test.
ok
> ---
> gcc/config/rs6000/rs6000.md | 128 +++++++++++++++++++-
> gcc/testsuite/gcc.target/powerpc/pr103109.c | 62 ++++++++++
> 2 files changed, 184 insertions(+), 6 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103109.c
>
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 2aba70393d8..83eacec57ba 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -667,6 +667,9 @@ (define_code_attr uns [(fix "")
> (float "")
> (unsigned_float "uns")])
>
> +(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
> + (zero_extend "c32bit_cint_operand")])
> +
> ; Various instructions that come in SI and DI forms.
> ; A generic w/d attribute, for things like cmpw/cmpd.
> (define_mode_attr wd [(QI "b")
> @@ -3190,13 +3193,16 @@ (define_insn "<su>mulsi3_highpart_64"
> "mulhw<u> %0,%1,%2"
> [(set_attr "type" "mul")])
>
> -(define_expand "<u>mul<mode><dmode>3"
> - [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
> +(define_insn_and_split "<u>mul<mode><dmode>3"
> + [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
> (mult:<DMODE> (any_extend:<DMODE>
> - (match_operand:GPR 1 "gpc_reg_operand"))
> + (match_operand:GPR 1 "gpc_reg_operand" "r"))
> (any_extend:<DMODE>
> - (match_operand:GPR 2 "gpc_reg_operand"))))]
> + (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
> "!(<MODE>mode == SImode && TARGET_POWERPC64)"
> + "#"
> + "&& 1"
> + [(pc)]
> {
> rtx l = gen_reg_rtx (<MODE>mode);
> rtx h = gen_reg_rtx (<MODE>mode);
> @@ -3205,9 +3211,10 @@ (define_expand "<u>mul<mode><dmode>3"
> emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
> emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
> DONE;
> -})
> +}
> + [(set_attr "length" "8")])
>
ok
> -(define_insn "*maddld<mode>4"
> +(define_insn "maddld<mode>4"
> [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
> (plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
> (match_operand:GPR 2 "gpc_reg_operand" "r"))
ok
> @@ -3216,6 +3223,115 @@ (define_insn "*maddld<mode>4"
> "maddld %0,%1,%2,%3"
> [(set_attr "type" "mul")])
>
> +(define_insn_and_split "*<u>mulditi3_<u>adddi3"
> + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
> + (plus:TI
> + (mult:TI
> + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
> + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
> + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
> + "TARGET_MADDLD && TARGET_POWERPC64"
> + "#"
> + "&& 1"
> + [(pc)]
> +{
> + rtx dest = operands[0];
> + rtx dest_hi = gen_highpart (DImode, dest);
> + rtx dest_lo = gen_lowpart (DImode, dest);
> + rtx op1 = operands[1];
> + rtx op2 = operands[2];
> + rtx op3 = operands[3];
> + rtx tmp_hi, tmp_lo;
> +
> + if (can_create_pseudo_p ())
> + {
> + tmp_hi = gen_reg_rtx (DImode);
> + tmp_lo = gen_reg_rtx (DImode);
> + }
> + else
> + {
> + tmp_hi = dest_hi;
> + tmp_lo = dest_lo;
> + }
> +
> + emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
> + emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
> +
> + if (can_create_pseudo_p ())
> + {
> + emit_move_insn (dest_hi, tmp_hi);
> + emit_move_insn (dest_lo, tmp_lo);
> + }
> + DONE;
> +}
> + [(set_attr "length" "8")])
Seems OK, I did not closely scrutinize.
> +
> +;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
> +;; force the constant into a register to generate li, maddhd, and maddld,
> +;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
> +;; with the pattern that handles registers, since constants don't have a sign
> +;; or zero extend around them.
ok.
> +(define_insn_and_split "*<u>mulditi3_add_const"
> + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
> + (plus:TI
> + (mult:TI
> + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
> + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
> + (match_operand 3 "<su_int32>" "r")))]
> + "TARGET_MADDLD && TARGET_POWERPC64
> +"
> + "#"
> + "&& 1"
> + [(pc)]
> +{
> + rtx dest = operands[0];
> + rtx dest_hi = gen_highpart (DImode, dest);
> + rtx dest_lo = gen_lowpart (DImode, dest);
> + rtx op1 = operands[1];
> + rtx op2 = operands[2];
> + rtx op3 = force_reg (DImode, operands[3]);
> + rtx tmp_hi, tmp_lo;
> +
> + if (can_create_pseudo_p ())
> + {
> + tmp_hi = gen_reg_rtx (DImode);
> + tmp_lo = gen_reg_rtx (DImode);
> + }
> + else
> + {
> + tmp_hi = dest_hi;
> + tmp_lo = dest_lo;
> + }
> +
> + emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
> + emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
> +
> + if (can_create_pseudo_p ())
> + {
> + emit_move_insn (dest_hi, tmp_hi);
> + emit_move_insn (dest_lo, tmp_lo);
> + }
> + DONE;
> +}
> + [(set_attr "length" "8")
> + (set_attr "type" "mul")
> + (set_attr "size" "64")])
> +
> +(define_insn "<u>mulditi3_<u>adddi3_upper"
> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (truncate:DI
> + (lshiftrt:TI
> + (plus:TI
> + (mult:TI
> + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
> + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
> + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
> + (const_int 64))))]
> + "TARGET_MADDLD && TARGET_POWERPC64"
> + "maddhd<u> %0,%1,%2,%3"
> + [(set_attr "type" "mul")
> + (set_attr "size" "64")])
> +
> (define_insn "udiv<mode>3"
> [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
> (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
appears OK. I defer to others for scrutiny :-)
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c
> new file mode 100644
> index 00000000000..ae2cfb9eda7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
> @@ -0,0 +1,62 @@
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
> + power9 instructions when doing some forms of 64-bit integers converted to
> + 128-bit integers and used with multiply/add operations. */
> +
> +__int128_t
> +s_mult_add (long long a,
> + long long b,
> + long long c)
> +{
> + /* maddhd, maddld. */
> + return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
> +}
> +
> +/* Test 32-bit constants that are loaded into GPRs instead of doing the
> + mulld/mulhd and then addic/addime or addc/addze. */
addme ?
Thanks,
-Will
> +__int128_t
> +s_mult_add_m10 (long long a,
> + long long b)
> +{
> + /* maddhd, maddld. */
> + return ((__int128_t)a * (__int128_t)b) - 10;
> +}
> +
> +__int128_t
> +s_mult_add_70000 (long long a,
> + long long b)
> +{
> + /* maddhd, maddld. */
> + return ((__int128_t)a * (__int128_t)b) + 70000;
> +}
> +
> +__uint128_t
> +u_mult_add (unsigned long long a,
> + unsigned long long b,
> + unsigned long long c)
> +{
> + /* maddhd, maddld. */
> + return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
> +}
> +
> +__uint128_t
> +u_mult_add_0x80000000 (unsigned long long a,
> + unsigned long long b)
> +{
> + /* maddhd, maddld. */
> + return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
> +}
> +
> +/* { dg-final { scan-assembler-not {\maddc\M} } } */
> +/* { dg-final { scan-assembler-not {\madde\M} } } */
> +/* { dg-final { scan-assembler-not {\maddid\M} } } */
> +/* { dg-final { scan-assembler-not {\maddme\M} } } */
> +/* { dg-final { scan-assembler-not {\maddze\M} } } */
> +/* { dg-final { scan-assembler-not {\mmulhd\M} } } */
> +/* { dg-final { scan-assembler-not {\mmulld\M} } } */
> +/* { dg-final { scan-assembler-times {\mmaddhd\M} 3 } } */
> +/* { dg-final { scan-assembler-times {\mmaddhdu\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mmaddld\M} 5 } } */
> --
> 2.35.3
>
>
On Fri, May 13, 2022 at 01:20:30PM -0500, will schmidt wrote:
> On Fri, 2022-05-13 at 12:17 -0400, Michael Meissner wrote:
> > Optimize multiply/add of DImode extended to TImode, PR target/103109.
> >
> > On power9 and power10 systems, we have instructions that support doing
> > 64-bit integers converted to 128-bit integers and producing 128-bit
> > results. This patch adds support to generate these instructions.
> >
> > Previously GCC had define_expands to handle conversion of the 64-bit
> > extend to 128-bit and multiply. This patch changes these define_expands
> > to define_insn_and_split and then it provides combiner patterns to
> > generate thes multiply/add instructions.
> >
> > To support using this optimization on power9, this patch extends the sign
> > extend DImode to TImode to also run on power9 (added for PR
> > target/104698).
> >
> > This patch needs the previous patch to add unsigned DImode to TImode
> > conversion so that the combiner can combine the extend, multiply, and add
> > instructions.
> >
> > I have built this patch on little endian power10, little endian power9, and big
> > endian power8 systems. There were no regressions when I ran it. Can I install
> > this patch into the GCC 13 master branch?
> >
> > 2022-05-13 Michael Meissner <meissner@linux.ibm.com>
> >
> > gcc/
> > PR target/103109
> > * config/rs6000/rs6000.md (su_int32): New code attribute.
> > (<u>mul<mode><dmode>3): Convert from define_expand to
> > define_insn_and_split.
> > (maddld<mode>4): Add generator function.
>
> -(define_insn "*maddld<mode>4"
> +(define_insn "maddld<mode>4"
>
> Is the removal of the "*" considering adding generator? (Thats
> terminology that I'm not immediately familiar with).
Yes. If you have a pattern:
(define_insn "foosi2"
[(set (match_operand:SI 0 "register_operand" "=r")
(foo:SI (match_operand:SI 1 "register_operand" "r")))]
""
"foo %0,%1")
It creates a 'gen_foosi2' function that has 2 arguments, and it makes the insn
listed.
It then has support for insn recognition and output.
If the pattern starts with a '*', there is no 'gen_foosi2' function created,
but the insn recognitiion and output are still done.
In practice, you typically use the '*' names for patterns that are used as the
targets of combination, or separate insns for different machines.
Here is the verbage from rtl.texi:
These names serve one of two purposes. The first is to indicate that the
instruction performs a certain standard job for the RTL-generation
pass of the compiler, such as a move, an addition, or a conditional
jump. The second is to help the target generate certain target-specific
operations, such as when implementing target-specific intrinsic functions.
It is better to prefix target-specific names with the name of the
target, to avoid any clash with current or future standard names.
The absence of a name is indicated by writing an empty string
where the name should go. Nameless instruction patterns are never
used for generating RTL code, but they may permit several simpler insns
to be combined later on.
For the purpose of debugging the compiler, you may also specify a
name beginning with the @samp{*} character. Such a name is used only
for identifying the instruction in RTL dumps; it is equivalent to having
a nameless pattern for all other purposes. Names beginning with the
@samp{*} character are not required to be unique.
> > diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c b/gcc/testsuite/gcc.target/powerpc/pr103109.c
> > new file mode 100644
> > index 00000000000..ae2cfb9eda7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
> > @@ -0,0 +1,62 @@
> > +/* { dg-require-effective-target int128 } */
> > +/* { dg-require-effective-target power10_ok } */
> > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> > +
> > +/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
> > + power9 instructions when doing some forms of 64-bit integers converted to
> > + 128-bit integers and used with multiply/add operations. */
> > +
> > +__int128_t
> > +s_mult_add (long long a,
> > + long long b,
> > + long long c)
> > +{
> > + /* maddhd, maddld. */
> > + return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
> > +}
> > +
> > +/* Test 32-bit constants that are loaded into GPRs instead of doing the
> > + mulld/mulhd and then addic/addime or addc/addze. */
>
> addme ?
Yes, I meant addme. Good catch.
On Tue, 2022-05-17 at 23:15 -0400, Michael Meissner wrote:
> On Fri, May 13, 2022 at 01:20:30PM -0500, will schmidt wrote:
> > On Fri, 2022-05-13 at 12:17 -0400, Michael Meissner wrote:
> > >
> > >
<snip>
> > > gcc/
> > > PR target/103109
> > > * config/rs6000/rs6000.md (su_int32): New code attribute.
> > > (<u>mul<mode><dmode>3): Convert from define_expand to
> > > define_insn_and_split.
> > > (maddld<mode>4): Add generator function.
> >
> > -(define_insn "*maddld<mode>4"
> > +(define_insn "maddld<mode>4"
> >
> > Is the removal of the "*" considering adding generator? (Thats
> > terminology that I'm not immediately familiar with).
>
> Yes. If you have a pattern:
>
> (define_insn "foosi2"
> [(set (match_operand:SI 0 "register_operand" "=r")
> (foo:SI (match_operand:SI 1 "register_operand" "r")))]
> ""
> "foo %0,%1")
>
> It creates a 'gen_foosi2' function that has 2 arguments, and it makes
> the insn
> listed.
>
> It then has support for insn recognition and output.
>
> If the pattern starts with a '*', there is no 'gen_foosi2' function
> created,
> but the insn recognitiion and output are still done.
>
> In practice, you typically use the '*' names for patterns that are
> used as the
> targets of combination, or separate insns for different machines.
>
> Here is the verbage from rtl.texi:
>
> These names serve one of two purposes. The first is to indicate that
> the
> instruction performs a certain standard job for the RTL-generation
> pass of the compiler, such as a move, an addition, or a conditional
> jump. The second is to help the target generate certain target-
> specific
> operations, such as when implementing target-specific intrinsic
> functions.
>
> It is better to prefix target-specific names with the name of the
> target, to avoid any clash with current or future standard names.
>
> The absence of a name is indicated by writing an empty string
> where the name should go. Nameless instruction patterns are never
> used for generating RTL code, but they may permit several simpler
> insns
> to be combined later on.
>
> For the purpose of debugging the compiler, you may also specify a
> name beginning with the @samp{*} character. Such a name is used only
> for identifying the instruction in RTL dumps; it is equivalent to
> having
> a nameless pattern for all other purposes. Names beginning with the
> @samp{*} character are not required to be unique.
Thanks for the explanation. :-)
-Will
@@ -667,6 +667,9 @@ (define_code_attr uns [(fix "")
(float "")
(unsigned_float "uns")])
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+ (zero_extend "c32bit_cint_operand")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b")
@@ -3190,13 +3193,16 @@ (define_insn "<su>mulsi3_highpart_64"
"mulhw<u> %0,%1,%2"
[(set_attr "type" "mul")])
-(define_expand "<u>mul<mode><dmode>3"
- [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+ [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
(mult:<DMODE> (any_extend:<DMODE>
- (match_operand:GPR 1 "gpc_reg_operand"))
+ (match_operand:GPR 1 "gpc_reg_operand" "r"))
(any_extend:<DMODE>
- (match_operand:GPR 2 "gpc_reg_operand"))))]
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
"!(<MODE>mode == SImode && TARGET_POWERPC64)"
+ "#"
+ "&& 1"
+ [(pc)]
{
rtx l = gen_reg_rtx (<MODE>mode);
rtx h = gen_reg_rtx (<MODE>mode);
@@ -3205,9 +3211,10 @@ (define_expand "<u>mul<mode><dmode>3"
emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
DONE;
-})
+}
+ [(set_attr "length" "8")])
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3216,6 +3223,115 @@ (define_insn "*maddld<mode>4"
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+ [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (match_operand 3 "<su_int32>" "r")))]
+ "TARGET_MADDLD && TARGET_POWERPC64
+"
+ "#"
+ "&& 1"
+ [(pc)]
+{
+ rtx dest = operands[0];
+ rtx dest_hi = gen_highpart (DImode, dest);
+ rtx dest_lo = gen_lowpart (DImode, dest);
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = force_reg (DImode, operands[3]);
+ rtx tmp_hi, tmp_lo;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp_hi = gen_reg_rtx (DImode);
+ tmp_lo = gen_reg_rtx (DImode);
+ }
+ else
+ {
+ tmp_hi = dest_hi;
+ tmp_lo = dest_lo;
+ }
+
+ emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+ emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+ if (can_create_pseudo_p ())
+ {
+ emit_move_insn (dest_hi, tmp_hi);
+ emit_move_insn (dest_lo, tmp_lo);
+ }
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "mul")
+ (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI
+ (lshiftrt:TI
+ (plus:TI
+ (mult:TI
+ (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+ (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+ (const_int 64))))]
+ "TARGET_MADDLD && TARGET_POWERPC64"
+ "maddhd<u> %0,%1,%2,%3"
+ [(set_attr "type" "mul")
+ (set_attr "size" "64")])
+
(define_insn "udiv<mode>3"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
new file mode 100644
@@ -0,0 +1,62 @@
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
+ power9 instructions when doing some forms of 64-bit integers converted to
+ 128-bit integers and used with multiply/add operations. */
+
+__int128_t
+s_mult_add (long long a,
+ long long b,
+ long long c)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
+}
+
+/* Test 32-bit constants that are loaded into GPRs instead of doing the
+ mulld/mulhd and then addic/addime or addc/addze. */
+__int128_t
+s_mult_add_m10 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) - 10;
+}
+
+__int128_t
+s_mult_add_70000 (long long a,
+ long long b)
+{
+ /* maddhd, maddld. */
+ return ((__int128_t)a * (__int128_t)b) + 70000;
+}
+
+__uint128_t
+u_mult_add (unsigned long long a,
+ unsigned long long b,
+ unsigned long long c)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
+}
+
+__uint128_t
+u_mult_add_0x80000000 (unsigned long long a,
+ unsigned long long b)
+{
+ /* maddhd, maddld. */
+ return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
+}
+
+/* { dg-final { scan-assembler-not {\maddc\M} } } */
+/* { dg-final { scan-assembler-not {\madde\M} } } */
+/* { dg-final { scan-assembler-not {\maddid\M} } } */
+/* { dg-final { scan-assembler-not {\maddme\M} } } */
+/* { dg-final { scan-assembler-not {\maddze\M} } } */
+/* { dg-final { scan-assembler-not {\mmulhd\M} } } */
+/* { dg-final { scan-assembler-not {\mmulld\M} } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 5 } } */