diff mbox series

Generate vadduqm and vsubuqm for TImode add/subtract

Message ID	Yn6FKAeRKy0GPD6M@toto.the-meissners.org
State	New
Headers	DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 5F4533857368 Date: Fri, 13 May 2022 12:19:52 -0400 To: gcc-patches@gcc.gnu.org, Michael Meissner <meissner@linux.ibm.com>, Segher Boessenkool <segher@kernel.crashing.org>, "Kewen.Lin" <linkw@linux.ibm.com>, David Edelsohn <dje.gcc@gmail.com>, Peter Bergner <bergner@linux.ibm.com>, Will Schmidt <will_schmidt@vnet.ibm.com> Subject: [PATCH] Generate vadduqm and vsubuqm for TImode add/subtract Message-ID: <Yn6FKAeRKy0GPD6M@toto.the-meissners.org> Mail-Followup-To: Michael Meissner <meissner@linux.ibm.com>, gcc-patches@gcc.gnu.org, Segher Boessenkool <segher@kernel.crashing.org>, "Kewen.Lin" <linkw@linux.ibm.com>, David Edelsohn <dje.gcc@gmail.com>, Peter Bergner <bergner@linux.ibm.com>, Will Schmidt <will_schmidt@vnet.ibm.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Precedence: list From: Michael Meissner via Gcc-patches <gcc-patches@gcc.gnu.org> Reply-To: Michael Meissner <meissner@linux.ibm.com> Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
Series	Generate vadduqm and vsubuqm for TImode add/subtract \| Generate vadduqm and vsubuqm for TImode add/subtract

Commit Message

Michael Meissner May 13, 2022, 4:19 p.m. UTC

  Generate vadduqm and vsubuqm for TImode add/subtract

If the TImode variable is in an Altivec register instead of a GPR
register, then generate vadduqm and vsubuqm instead of having to move the
value to the GPR registers and doing the add and subtract with carry
instructions.  To do this, we have to delay the splitting of the addition
and subtraction until after register allocation.

I have built this patch on little endian power10, little endian power9, and big
endian power8 systems.  There were no regressions.  Can I install this patch to
the GCC 13 master branch?

2022-05-13   Michael Meissner  <meissner@linux.ibm.com>

gcc/
	* config/rs6000/rs6000.md (addti3): Generate vadduqm if we are
	using the Altivec registers.
	(subti3): Generate vsubuqm if we using the Altivec registers.
	(negti3): New insn.

gcc/testsuite/
	* gcc.target/powerpc/vadduqm-vsubuqm.c: New test.
---
 gcc/config/rs6000/rs6000.md                   | 82 ++++++++++++++-----
 .../gcc.target/powerpc/vadduqm-vsubuqm.c      | 22 +++++
 2 files changed, 83 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c

Comments

will schmidt May 13, 2022, 6:47 p.m. UTC | #1

On Fri, 2022-05-13 at 12:19 -0400, Michael Meissner wrote:
> Generate vadduqm and vsubuqm for TImode add/subtract
> 
> If the TImode variable is in an Altivec register instead of a GPR
> register, then generate vadduqm and vsubuqm instead of having to move the
> value to the GPR registers and doing the add and subtract with carry
> instructions.  To do this, we have to delay the splitting of the addition
> and subtraction until after register allocation.

Ok.


> 
> I have built this patch on little endian power10, little endian power9, and big
> endian power8 systems.  There were no regressions.  Can I install this patch to
> the GCC 13 master branch?
> 
> 2022-05-13   Michael Meissner  <meissner@linux.ibm.com>
> 
> gcc/
> 	* config/rs6000/rs6000.md (addti3): Generate vadduqm if we are
> 	using the Altivec registers.
> 	(subti3): Generate vsubuqm if we using the Altivec registers.
> 	(negti3): New insn.
> 
> gcc/testsuite/
> 	* gcc.target/powerpc/vadduqm-vsubuqm.c: New test.
> ---
>  gcc/config/rs6000/rs6000.md                   | 82 ++++++++++++++-----
>  .../gcc.target/powerpc/vadduqm-vsubuqm.c      | 22 +++++
>  2 files changed, 83 insertions(+), 21 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
> 
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 83eacec57ba..f120ca0b48d 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -7139,15 +7139,22 @@ (define_expand "feraiseexceptsi"
>  ;;
>  ;; Addti3/subti3 are define_insn_and_splits instead of define_expand, to allow
>  ;; for combine to make things like multiply and add with extend operations.
> +;;
> +;; Also add support in case the 128-bit integer happens to be an Altivec
> +;; register.
> 
>  (define_insn_and_split "addti3"
> -  [(set (match_operand:TI 0 "gpc_reg_operand"               "=&r,r,r")
> -	(plus:TI (match_operand:TI 1 "gpc_reg_operand"       "r, 0,r")
> -		 (match_operand:TI 2 "reg_or_short_operand"  "rI,r,0")))
> +  [(set (match_operand:TI 0 "gpc_reg_operand"              "=&r, r,r,v")
> +	(plus:TI (match_operand:TI 1 "gpc_reg_operand"       "r, 0,r,v")
> +		 (match_operand:TI 2 "reg_or_short_operand"  "rI,r,0,v")))

Nit..  I still can't tell of the "r, 0,r,v" should be comma-space, or
comma delimited.

Remainder looks OK.  
thanks
-Will



>     (clobber (reg:DI CA_REGNO))]
>    "TARGET_64BIT"
> -  "#"
> -  "&& 1"
> +  "@
> +   #
> +   #
> +   #
> +   vadduqm %0,%1,%2"
> +  "&& reload_completed && int_reg_operand (operands[0], TImode)"
>    [(pc)]
>  {
>    rtx lo0 = gen_lowpart (DImode, operands[0]);
> @@ -7157,27 +7164,27 @@ (define_insn_and_split "addti3"
>    rtx hi1 = gen_highpart (DImode, operands[1]);
>    rtx hi2 = gen_highpart_mode (DImode, TImode, operands[2]);
> 
> -  if (!reg_or_short_operand (lo2, DImode))
> -    lo2 = force_reg (DImode, lo2);
> -  if (!adde_operand (hi2, DImode))
> -    hi2 = force_reg (DImode, hi2);
> -
>    emit_insn (gen_adddi3_carry (lo0, lo1, lo2));
>    emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2));
>    DONE;
>  }
> -  [(set_attr "length" "8")
> +  [(set_attr "length" "8,8,8,*")
> +   (set_attr "isa"    "*,*,*,p8v")
>     (set_attr "type"   "add")
>     (set_attr "size"   "128")])
> 
>  (define_insn_and_split "subti3"
> -  [(set (match_operand:TI 0 "gpc_reg_operand"                "=&r,r,r")
> -	(minus:TI (match_operand:TI 1 "reg_or_short_operand" "rI,0,r")
> -		  (match_operand:TI 2 "gpc_reg_operand"      "r, r,0")))
> +  [(set (match_operand:TI 0 "gpc_reg_operand"                "=&r, r,r,v")
> +	(minus:TI (match_operand:TI 1 "reg_or_short_operand"  "rI,0,r,v")
> +		  (match_operand:TI 2 "gpc_reg_operand"       "r, r,0,v")))
>     (clobber (reg:DI CA_REGNO))]
>    "TARGET_64BIT"
> -  "#"
> -  "&& 1"
> +  "@
> +   #
> +   #
> +   #
> +   vsubuqm %0,%1,%2"
> +  "&& reload_completed && int_reg_operand (operands[0], TImode)"
>    [(pc)]
>  {
>    rtx lo0 = gen_lowpart (DImode, operands[0]);
> @@ -7187,16 +7194,49 @@ (define_insn_and_split "subti3"
>    rtx hi1 = gen_highpart_mode (DImode, TImode, operands[1]);
>    rtx hi2 = gen_highpart (DImode, operands[2]);
> 
> -  if (!reg_or_short_operand (lo1, DImode))
> -    lo1 = force_reg (DImode, lo1);
> -  if (!adde_operand (hi1, DImode))
> -    hi1 = force_reg (DImode, hi1);
> -
>    emit_insn (gen_subfdi3_carry (lo0, lo2, lo1));
>    emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1));
>    DONE;
> +}
> +  [(set_attr "length" "8,8,8,*")
> +   (set_attr "isa"    "*,*,*,p8v")
> +   (set_attr "type"   "add")
> +   (set_attr "size"   "128")])
> +
> +;; 128-bit integer negation, normally use GPRs.  If we are using Altivec
> +;; registers, create a 0 and do a vsubuqm.
> +(define_insn_and_split "negti3"
> +  [(set (match_operand:TI 0 "gpc_reg_operand"         "=&r,&v")
> +	(neg:TI (match_operand:TI 1 "gpc_reg_operand"   "r,v")))
> +   (clobber (reg:DI CA_REGNO))]
> +  "TARGET_64BIT"
> +  "#"
> +  "&& reload_completed"
> +  [(pc)]
> +{
> +  rtx dest = operands[0];
> +  rtx src = operands[1];
> +
> +  if (altivec_register_operand (dest, TImode))
> +    {
> +      emit_move_insn (dest, const0_rtx);
> +      emit_insn (gen_subti3 (dest, dest, src));
> +      DONE;
> +    }
> +  else
> +    {
> +      rtx dest_lo = gen_lowpart (DImode, dest);
> +      rtx dest_hi = gen_highpart (DImode, dest);
> +      rtx src_lo = gen_lowpart (DImode, src);
> +      rtx src_hi = gen_highpart (DImode, src);
> +
> +      emit_insn (gen_subfdi3_carry (dest_lo, src_lo, const0_rtx));
> +      emit_insn (gen_subfdi3_carry_in (dest_hi, src_hi, const0_rtx));
> +      DONE;
> +    }
>  }
>    [(set_attr "length" "8")
> +   (set_attr "isa"    "*,p8v")
>     (set_attr "type"   "add")
>     (set_attr "size"   "128")])
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
> new file mode 100644
> index 00000000000..5cb2fe37e9c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +/* { dg-require-effective-target power10_ok } */
> +
> +/* Test that we generate vadduqm and vsubuqm for 128-bit integer add and
> +   subtracts if the value is in an Altivec register.  We use 128-bit divide to
> +   force the register selection to be in an altivec register.  */
> +
> +void
> +test (__int128_t *p,
> +      __int128_t *q,
> +      __int128_t *r,
> +      __int128_t *s,
> +      __int128_t *t)
> +{
> +  *p = (*q + *r) / (*s - *t);	/* vadduqm, vsubuqm, vdivsq.  */
> +}
> +
> +/* { dg-final { scan-assembler {\mvadduqm\M} } } */
> +/* { dg-final { scan-assembler {\mvdivsq\M}  } } */
> +/* { dg-final { scan-assembler {\mvsubuqm\M} } } */
> -- 
> 2.35.3
> 
>

diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 83eacec57ba..f120ca0b48d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7139,15 +7139,22 @@  (define_expand "feraiseexceptsi"
 ;;
 ;; Addti3/subti3 are define_insn_and_splits instead of define_expand, to allow
 ;; for combine to make things like multiply and add with extend operations.
+;;
+;; Also add support in case the 128-bit integer happens to be an Altivec
+;; register.
 
 (define_insn_and_split "addti3"
-  [(set (match_operand:TI 0 "gpc_reg_operand"               "=&r,r,r")
-	(plus:TI (match_operand:TI 1 "gpc_reg_operand"       "r, 0,r")
-		 (match_operand:TI 2 "reg_or_short_operand"  "rI,r,0")))
+  [(set (match_operand:TI 0 "gpc_reg_operand"              "=&r, r,r,v")
+	(plus:TI (match_operand:TI 1 "gpc_reg_operand"       "r, 0,r,v")
+		 (match_operand:TI 2 "reg_or_short_operand"  "rI,r,0,v")))
    (clobber (reg:DI CA_REGNO))]
   "TARGET_64BIT"
-  "#"
-  "&& 1"
+  "@
+   #
+   #
+   #
+   vadduqm %0,%1,%2"
+  "&& reload_completed && int_reg_operand (operands[0], TImode)"
   [(pc)]
 {
   rtx lo0 = gen_lowpart (DImode, operands[0]);
@@ -7157,27 +7164,27 @@  (define_insn_and_split "addti3"
   rtx hi1 = gen_highpart (DImode, operands[1]);
   rtx hi2 = gen_highpart_mode (DImode, TImode, operands[2]);
 
-  if (!reg_or_short_operand (lo2, DImode))
-    lo2 = force_reg (DImode, lo2);
-  if (!adde_operand (hi2, DImode))
-    hi2 = force_reg (DImode, hi2);
-
   emit_insn (gen_adddi3_carry (lo0, lo1, lo2));
   emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2));
   DONE;
 }
-  [(set_attr "length" "8")
+  [(set_attr "length" "8,8,8,*")
+   (set_attr "isa"    "*,*,*,p8v")
    (set_attr "type"   "add")
    (set_attr "size"   "128")])
 
 (define_insn_and_split "subti3"
-  [(set (match_operand:TI 0 "gpc_reg_operand"                "=&r,r,r")
-	(minus:TI (match_operand:TI 1 "reg_or_short_operand" "rI,0,r")
-		  (match_operand:TI 2 "gpc_reg_operand"      "r, r,0")))
+  [(set (match_operand:TI 0 "gpc_reg_operand"                "=&r, r,r,v")
+	(minus:TI (match_operand:TI 1 "reg_or_short_operand"  "rI,0,r,v")
+		  (match_operand:TI 2 "gpc_reg_operand"       "r, r,0,v")))
    (clobber (reg:DI CA_REGNO))]
   "TARGET_64BIT"
-  "#"
-  "&& 1"
+  "@
+   #
+   #
+   #
+   vsubuqm %0,%1,%2"
+  "&& reload_completed && int_reg_operand (operands[0], TImode)"
   [(pc)]
 {
   rtx lo0 = gen_lowpart (DImode, operands[0]);
@@ -7187,16 +7194,49 @@  (define_insn_and_split "subti3"
   rtx hi1 = gen_highpart_mode (DImode, TImode, operands[1]);
   rtx hi2 = gen_highpart (DImode, operands[2]);
 
-  if (!reg_or_short_operand (lo1, DImode))
-    lo1 = force_reg (DImode, lo1);
-  if (!adde_operand (hi1, DImode))
-    hi1 = force_reg (DImode, hi1);
-
   emit_insn (gen_subfdi3_carry (lo0, lo2, lo1));
   emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1));
   DONE;
+}
+  [(set_attr "length" "8,8,8,*")
+   (set_attr "isa"    "*,*,*,p8v")
+   (set_attr "type"   "add")
+   (set_attr "size"   "128")])
+
+;; 128-bit integer negation, normally use GPRs.  If we are using Altivec
+;; registers, create a 0 and do a vsubuqm.
+(define_insn_and_split "negti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand"         "=&r,&v")
+	(neg:TI (match_operand:TI 1 "gpc_reg_operand"   "r,v")))
+   (clobber (reg:DI CA_REGNO))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+
+  if (altivec_register_operand (dest, TImode))
+    {
+      emit_move_insn (dest, const0_rtx);
+      emit_insn (gen_subti3 (dest, dest, src));
+      DONE;
+    }
+  else
+    {
+      rtx dest_lo = gen_lowpart (DImode, dest);
+      rtx dest_hi = gen_highpart (DImode, dest);
+      rtx src_lo = gen_lowpart (DImode, src);
+      rtx src_hi = gen_highpart (DImode, src);
+
+      emit_insn (gen_subfdi3_carry (dest_lo, src_lo, const0_rtx));
+      emit_insn (gen_subfdi3_carry_in (dest_hi, src_hi, const0_rtx));
+      DONE;
+    }
 }
   [(set_attr "length" "8")
+   (set_attr "isa"    "*,p8v")
    (set_attr "type"   "add")
    (set_attr "size"   "128")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
new file mode 100644
index 00000000000..5cb2fe37e9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vadduqm-vsubuqm.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+/* { dg-require-effective-target power10_ok } */
+
+/* Test that we generate vadduqm and vsubuqm for 128-bit integer add and
+   subtracts if the value is in an Altivec register.  We use 128-bit divide to
+   force the register selection to be in an altivec register.  */
+
+void
+test (__int128_t *p,
+      __int128_t *q,
+      __int128_t *r,
+      __int128_t *s,
+      __int128_t *t)
+{
+  *p = (*q + *r) / (*s - *t);	/* vadduqm, vsubuqm, vdivsq.  */
+}
+
+/* { dg-final { scan-assembler {\mvadduqm\M} } } */
+/* { dg-final { scan-assembler {\mvdivsq\M}  } } */
+/* { dg-final { scan-assembler {\mvsubuqm\M} } } */