[x86,take,#3] PR target/91681: zero_extendditi2 pattern for more optimizations.

Message ID 06bb01d89e71$5d6283a0$18278ae0$@nextmovesoftware.com
State New
Headers
Series [x86,take,#3] PR target/91681: zero_extendditi2 pattern for more optimizations. |

Commit Message

Roger Sayle July 23, 2022, 8:51 a.m. UTC
  Hi Uros,

This is the next iteration of the zero_extendditi2 patch last reviewed here:

https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596204.html

 

[1] The sse.md changes were split out, reviewed, approved and committed.

[2] The *concat splitters have been moved post-reload matching what we

now do for many/most of the double word functionality.

[3] As you recommend, these *concat splitters now use split_double_mode

to "subreg" operand[0] into parts, via a new helper function that can also

handle overlapping registers, and even use xchg for the rare case that a

double word is constructed from its high and low parts, but the wrong

way around.

 

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap

and make -k check, both with and without -target_board=unix{-m32},

with no new failures.  Ok for mainline?

 

2022-07-23  Roger Sayle  <roger@nextmovesoftware.com>

            Uroš Bizjak  <ubizjak@gmail.com>

 

gcc/ChangeLog

        PR target/91681

        * config/i386/i386-expand.cc (split_double_concat): A new helper

        function for setting a double word value from two word values.

        * config/i386/i386-protos.h (split_double_concat): Prototype here.

        * config/i386/i386.md (zero_extendditi2): New define_insn_and_split.

        (*add<dwi>3_doubleword_zext): New define_insn_and_split.

        (*sub<dwi>3_doubleword_zext): New define_insn_and_split.

        (*concat<mode><dwi>3_1): New define_insn_and_split replacing

        previous define_split for implementing DST = (HI<<32)|LO as

        pair of move instructions, setting lopart and hipart.

        (*concat<mode><dwi>3_2): Likewise.

        (*concat<mode><dwi>3_3): Likewise, where HI is zero_extended.

        (*concat<mode><dwi>3_4): Likewise, where HI is zero_extended.

 

gcc/testsuite/ChangeLog

        PR target/91681

        * g++.target/i386/pr91681.C: New test case (from the PR).

        * gcc.target/i386/pr91681-1.c: New int128 test case.

        * gcc.target/i386/pr91681-2.c: Likewise.

        * gcc.target/i386/pr91681-3.c: Likewise, but for ia32.

 

 

Thanks in advance,

Roger

--
  

Comments

Uros Bizjak July 24, 2022, 11:03 a.m. UTC | #1
On Sat, Jul 23, 2022 at 10:51 AM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
>
> Hi Uros,
>
> This is the next iteration of the zero_extendditi2 patch last reviewed here:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596204.html
>
>
>
> [1] The sse.md changes were split out, reviewed, approved and committed.
>
> [2] The *concat splitters have been moved post-reload matching what we
>
> now do for many/most of the double word functionality.
>
> [3] As you recommend, these *concat splitters now use split_double_mode
>
> to “subreg” operand[0] into parts, via a new helper function that can also
>
> handle overlapping registers, and even use xchg for the rare case that a
>
> double word is constructed from its high and low parts, but the wrong
>
> way around.
>
>
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
>
> and make -k check, both with and without –target_board=unix{-m32},
>
> with no new failures.  Ok for mainline?
>
>
>
> 2022-07-23  Roger Sayle  <roger@nextmovesoftware.com>
>
>             Uroš Bizjak  <ubizjak@gmail.com>
>
>
>
> gcc/ChangeLog
>
>         PR target/91681
>
>         * config/i386/i386-expand.cc (split_double_concat): A new helper
>
>         function for setting a double word value from two word values.
>
>         * config/i386/i386-protos.h (split_double_concat): Prototype here.
>
>         * config/i386/i386.md (zero_extendditi2): New define_insn_and_split.
>
>         (*add<dwi>3_doubleword_zext): New define_insn_and_split.
>
>         (*sub<dwi>3_doubleword_zext): New define_insn_and_split.
>
>         (*concat<mode><dwi>3_1): New define_insn_and_split replacing
>
>         previous define_split for implementing DST = (HI<<32)|LO as
>
>         pair of move instructions, setting lopart and hipart.
>
>         (*concat<mode><dwi>3_2): Likewise.
>
>         (*concat<mode><dwi>3_3): Likewise, where HI is zero_extended.
>
>         (*concat<mode><dwi>3_4): Likewise, where HI is zero_extended.
>
>
>
> gcc/testsuite/ChangeLog
>
>         PR target/91681
>
>         * g++.target/i386/pr91681.C: New test case (from the PR).
>
>         * gcc.target/i386/pr91681-1.c: New int128 test case.
>
>         * gcc.target/i386/pr91681-2.c: Likewise.
>
>         * gcc.target/i386/pr91681-3.c: Likewise, but for ia32.

OK with two small adjustments:

+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+ (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]

Please put the new pattern above zero_extendsidi2.

+(define_insn_and_split "*add<dwi>3_doubleword_zext"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")

And this one after *add<dwi>3_doubleword, to keep all _doubleword
patterns together.

Thanks,
Uros.
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 40f821e..66d8f28 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -165,6 +165,46 @@  split_double_mode (machine_mode mode, rtx operands[],
     }
 }
 
+/* Emit the double word assignment DST = { LO, HI }.  */
+
+void
+split_double_concat (machine_mode mode, rtx dst, rtx lo, rtx hi)
+{
+  rtx dlo, dhi;
+  int deleted_move_count = 0;
+  split_double_mode (mode, &dst, 1, &dlo, &dhi);
+  if (!rtx_equal_p (dlo, hi))
+    {
+      if (!rtx_equal_p (dlo, lo))
+	emit_move_insn (dlo, lo);
+      else
+	deleted_move_count++;
+      if (!rtx_equal_p (dhi, hi))
+	emit_move_insn (dhi, hi);
+      else
+	deleted_move_count++;
+    }
+  else if (!rtx_equal_p (lo, dhi))
+    {
+      if (!rtx_equal_p (dhi, hi))
+	emit_move_insn (dhi, hi);
+      else
+	deleted_move_count++;
+      if (!rtx_equal_p (dlo, lo))
+	emit_move_insn (dlo, lo);
+      else
+	deleted_move_count++;
+    }
+  else if (mode == TImode)
+    emit_insn (gen_swapdi (dlo, dhi));
+  else
+    emit_insn (gen_swapsi (dlo, dhi));
+
+  if (deleted_move_count == 2)
+    emit_note (NOTE_INSN_DELETED);
+}
+
+
 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
    for the target.  */
 
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index cf84775..e27c14f 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -85,6 +85,7 @@  extern void print_reg (rtx, int, FILE*);
 extern void ix86_print_operand (FILE *, rtx, int);
 
 extern void split_double_mode (machine_mode, rtx[], int, rtx[], rtx[]);
+extern void split_double_concat (machine_mode, rtx, rtx lo, rtx);
 
 extern const char *output_set_got (rtx, rtx);
 extern const char *output_387_binary_op (rtx_insn *, rtx*);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9aaeb69..4560681 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4379,6 +4379,16 @@ 
    (set_attr "type" "imovx,mskmov,mskmov")
    (set_attr "mode" "SI,QI,QI")])
 
+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+	(zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (const_int 0))]
+  "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);")
+
 ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l.
 (define_peephole2
   [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
@@ -6512,6 +6522,31 @@ 
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
+(define_insn_and_split "*add<dwi>3_doubleword_zext"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(plus:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,r")) 
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (plus:DWIH (match_dup 1) (match_dup 2))
+		     (match_dup 1)))
+	      (set (match_dup 0)
+		   (plus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (plus:DWIH
+		     (plus:DWIH
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 4))
+		     (const_int 0)))
+	      (clobber (reg:CC FLAGS_REG))])]
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+
 ;; Like DWI, but use POImode instead of OImode.
 (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")])
 
@@ -6962,6 +6997,29 @@ 
     }
 })
 
+(define_insn_and_split "*sub<dwi>3_doubleword_zext"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(minus:<DWI>
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0)
+		   (minus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (minus:DWIH
+		     (minus:DWIH
+		       (match_dup 4)
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+		     (const_int 0)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+
 (define_insn "*sub<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
 	(minus:SWI
@@ -11111,34 +11169,68 @@ 
 
 ;; Split DST = (HI<<32)|LO early to minimize register usage.
 (define_code_iterator any_or_plus [plus ior xor])
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(any_or_plus:DI
-	  (ashift:DI (match_operand:DI 1 "register_operand")
-		     (const_int 32))
-	  (zero_extend:DI (match_operand:SI 2 "register_operand"))))]
-  "!TARGET_64BIT"
-  [(set (match_dup 3) (match_dup 4))
-   (set (match_dup 5) (match_dup 2))]
+(define_insn_and_split "*concat<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+	(any_or_plus:<DWI>
+	  (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+			(match_operand:<DWI> 2 "const_int_operand"))
+	  (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
+  "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
 {
-  operands[3] = gen_highpart (SImode, operands[0]);
-  operands[4] = gen_lowpart (SImode, operands[1]);
-  operands[5] = gen_lowpart (SImode, operands[0]);
+  split_double_concat (<DWI>mode, operands[0], operands[3],
+		       gen_lowpart (<MODE>mode, operands[1]));
+  DONE;
 })
 
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(any_or_plus:DI
-	  (zero_extend:DI (match_operand:SI 1 "register_operand"))
-	  (ashift:DI (match_operand:DI 2 "register_operand")
-		     (const_int 32))))]
-  "!TARGET_64BIT"
-  [(set (match_dup 3) (match_dup 4))
-   (set (match_dup 5) (match_dup 1))]
+(define_insn_and_split "*concat<mode><dwi>3_2"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+	(any_or_plus:<DWI>
+	  (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
+	  (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r")
+			(match_operand:<DWI> 3 "const_int_operand"))))]
+  "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  split_double_concat (<DWI>mode, operands[0], operands[1],
+		       gen_lowpart (<MODE>mode, operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*concat<mode><dwi>3_3"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+	(any_or_plus:<DWI>
+	  (ashift:<DWI>
+	    (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
+	    (match_operand:<DWI> 2 "const_int_operand"))
+	  (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
+  "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
 {
-  operands[3] = gen_highpart (SImode, operands[0]);
-  operands[4] = gen_lowpart (SImode, operands[2]);
-  operands[5] = gen_lowpart (SImode, operands[0]);
+  split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
+  DONE;
+})
+
+(define_insn_and_split "*concat<mode><dwi>3_4"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+	(any_or_plus:<DWI>
+	  (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
+	  (ashift:<DWI>
+	    (zero_extend:<DWI> (match_operand:DWIH 2 "register_operand" "r"))
+	    (match_operand:<DWI> 3 "const_int_operand"))))]
+  "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
+  DONE;
 })
 
 ;; Negation instructions
diff --git a/gcc/testsuite/g++.target/i386/pr91681.C b/gcc/testsuite/g++.target/i386/pr91681.C
new file mode 100644
index 0000000..0271e43
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr91681.C
@@ -0,0 +1,20 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+void multiply128x64x2_3 ( 
+    const unsigned long a, 
+    const unsigned long b, 
+    const unsigned long c, 
+    const unsigned long d, 
+    __uint128_t o[2])
+{
+    __uint128_t B0 = (__uint128_t) b * c;
+    __uint128_t B2 = (__uint128_t) a * c;
+    __uint128_t B1 = (__uint128_t) b * d;
+    __uint128_t B3 = (__uint128_t) a * d;
+
+    o[0] = B2 + (B0 >> 64);
+    o[1] = B3 + (B1 >> 64);
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr91681-1.c b/gcc/testsuite/gcc.target/i386/pr91681-1.c
new file mode 100644
index 0000000..ab83cc4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr91681-1.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+unsigned __int128 m;
+
+unsigned __int128 foo(unsigned __int128 x, unsigned long long y)
+{
+    return x + y;
+}
+
+void bar(unsigned __int128 x, unsigned long long y)
+{
+    m = x + y;
+}
+
+void baz(unsigned long long y)
+{
+    m += y;
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr91681-2.c b/gcc/testsuite/gcc.target/i386/pr91681-2.c
new file mode 100644
index 0000000..ea52c72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr91681-2.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+unsigned __int128 m;
+
+unsigned __int128 foo(unsigned __int128 x, unsigned long long y)
+{
+    return x - y;
+}
+
+void bar(unsigned __int128 x, unsigned long long y)
+{
+    m = x - y;
+}
+
+void baz(unsigned long long y)
+{
+    m -= y;
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr91681-3.c b/gcc/testsuite/gcc.target/i386/pr91681-3.c
new file mode 100644
index 0000000..22a03c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr91681-3.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2" } */
+
+unsigned long long m;
+
+unsigned long long foo(unsigned long long x, unsigned int y)
+{
+    return x - y;
+}
+
+void bar(unsigned long long x, unsigned int y)
+{
+    m = x - y;
+}
+
+/* { dg-final { scan-assembler-not "xor" } } */