[v2,1/2] LoongArch: Redundant sign extension elimination optimization.

Message ID 20240111113619.2063055-1-liwei@loongson.cn
State New
Headers
Series [v2,1/2] LoongArch: Redundant sign extension elimination optimization. |

Commit Message

Li Wei Jan. 11, 2024, 11:36 a.m. UTC
  We found that the current combine optimization pass in gcc cannot handle
the following redundant sign extension situations:

(insn 77 76 78 5 (set (reg:SI 143)
        (plus:SI (subreg/s/u:SI (reg/v:DI 104 [ len ]) 0)
            (const_int 1 [0x1]))) {addsi3}
    (expr_list:REG_DEAD (reg/v:DI 104 [ len ])
        (nil)))
(insn 78 77 82 5 (set (reg/v:DI 104 [ len ])
        (sign_extend:DI (reg:SI 143))) {extendsidi2}
        (nil))

Because reg:SI 143 is not died or set in insn 78, no replacement merge will
be performed for the insn sequence. We adjusted the add template to eliminate
redundant sign extensions during the expand pass.
Adjusted based on upstream comments:
https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html

gcc/ChangeLog:

	* config/loongarch/loongarch.md (add<mode>3): Removed.
	(*addsi3): New.
	(addsi3): Ditto.
	(adddi3): Ditto.
	(*addsi3_extended): Removed.
	(addsi3_extended): New.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/sign-extend.c: Moved to...
	* gcc.target/loongarch/sign-extend-1.c: ...here.
	* gcc.target/loongarch/sign-extend-2.c: New test.
---
 gcc/config/loongarch/loongarch.md             | 93 ++++++++++++++-----
 .../{sign-extend.c => sign-extend-1.c}        |  0
 .../gcc.target/loongarch/sign-extend-2.c      | 59 ++++++++++++
 3 files changed, 128 insertions(+), 24 deletions(-)
 rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
  

Comments

Lulu Cheng Jan. 12, 2024, 1:49 a.m. UTC | #1
Pushed to r14-7160 and r14-7161.

在 2024/1/11 下午7:36, Li Wei 写道:
> We found that the current combine optimization pass in gcc cannot handle
> the following redundant sign extension situations:
>
> (insn 77 76 78 5 (set (reg:SI 143)
>          (plus:SI (subreg/s/u:SI (reg/v:DI 104 [ len ]) 0)
>              (const_int 1 [0x1]))) {addsi3}
>      (expr_list:REG_DEAD (reg/v:DI 104 [ len ])
>          (nil)))
> (insn 78 77 82 5 (set (reg/v:DI 104 [ len ])
>          (sign_extend:DI (reg:SI 143))) {extendsidi2}
>          (nil))
>
> Because reg:SI 143 is not died or set in insn 78, no replacement merge will
> be performed for the insn sequence. We adjusted the add template to eliminate
> redundant sign extensions during the expand pass.
> Adjusted based on upstream comments:
> https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641988.html
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.md (add<mode>3): Removed.
> 	(*addsi3): New.
> 	(addsi3): Ditto.
> 	(adddi3): Ditto.
> 	(*addsi3_extended): Removed.
> 	(addsi3_extended): New.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/loongarch/sign-extend.c: Moved to...
> 	* gcc.target/loongarch/sign-extend-1.c: ...here.
> 	* gcc.target/loongarch/sign-extend-2.c: New test.
> ---
>   gcc/config/loongarch/loongarch.md             | 93 ++++++++++++++-----
>   .../{sign-extend.c => sign-extend-1.c}        |  0
>   .../gcc.target/loongarch/sign-extend-2.c      | 59 ++++++++++++
>   3 files changed, 128 insertions(+), 24 deletions(-)
>   rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} (100%)
>   create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
>
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 497a72e165c..ebc0476ea6f 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -657,42 +657,87 @@ (define_insn "add<mode>3"
>     [(set_attr "type" "fadd")
>      (set_attr "mode" "<UNITMODE>")])
>   
> -(define_insn_and_split "add<mode>3"
> -  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
> -	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
> -		  (match_operand:GPR 2 "plus_<mode>_operand"
> -				       "r,I,La,Lb,Lc,Ld,Le")))]
> +(define_insn_and_split "*addsi3"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
> +	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
> +		  (match_operand:SI 2 "plus_si_operand"
> +				       "r,I,La,Lb,Le")))]
>     ""
>     "@
> -   add.<d>\t%0,%1,%2
> -   addi.<d>\t%0,%1,%2
> +   add.w\t%0,%1,%2
> +   addi.w\t%0,%1,%2
>      #
>      * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
>        return \"addu16i.d\t%0,%1,%2\";
> +   #"
> +  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
> +   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
> +  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
> +   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
> +  {
> +    loongarch_split_plus_constant (&operands[2], SImode);
> +  }
> +  [(set_attr "alu_type" "add")
> +   (set_attr "mode" "SI")
> +   (set_attr "insn_count" "1,1,2,1,2")])
> +
> +(define_expand "addsi3"
> +  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
> +	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
> +		 (match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))]
> +  "TARGET_64BIT"
> +{
> +  if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])
> +      && ADDU16I_OPERAND (INTVAL (operands[2])))
> +    {
> +      rtx t1 = gen_reg_rtx (DImode);
> +      rtx t2 = gen_reg_rtx (DImode);
> +      rtx t3 = gen_reg_rtx (DImode);
> +      emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0));
> +      t2 = operands[2];
> +      emit_insn (gen_adddi3 (t3, t1, t2));
> +      t3 = gen_lowpart (SImode, t3);
> +      emit_move_insn (operands[0], t3);
> +      DONE;
> +    }
> +  else
> +    {
> +      rtx t = gen_reg_rtx (DImode);
> +      emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
> +      t = gen_lowpart (SImode, t);
> +      SUBREG_PROMOTED_VAR_P (t) = 1;
> +      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
> +      emit_move_insn (operands[0], t);
> +      DONE;
> +    }
> +})
> +
> +(define_insn_and_split "adddi3"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
> +	(plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r")
> +		  (match_operand:DI 2 "plus_di_operand"
> +				       "r,I,La,Lb,Lc,Ld")))]
> +  "TARGET_64BIT"
> +  "@
> +   add.d\t%0,%1,%2
> +   addi.d\t%0,%1,%2
>      #
> +   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
> +     return \"addu16i.d\t%0,%1,%2\";
>      #
>      #"
> -  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
> +  "&& CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
>      && !ADDU16I_OPERAND (INTVAL (operands[2]))"
> -  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
> -   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
> +  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
> +   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
>     {
> -    loongarch_split_plus_constant (&operands[2], <MODE>mode);
> +    loongarch_split_plus_constant (&operands[2], DImode);
>     }
>     [(set_attr "alu_type" "add")
> -   (set_attr "mode" "<MODE>")
> -   (set_attr "insn_count" "1,1,2,1,2,2,2")
> -   (set (attr "enabled")
> -      (cond
> -	[(match_test "<MODE>mode != DImode && which_alternative == 4")
> -	 (const_string "no")
> -	 (match_test "<MODE>mode != DImode && which_alternative == 5")
> -	 (const_string "no")
> -	 (match_test "<MODE>mode != SImode && which_alternative == 6")
> -	 (const_string "no")]
> -	(const_string "yes")))])
> -
> -(define_insn_and_split "*addsi3_extended"
> +   (set_attr "mode" "DI")
> +   (set_attr "insn_count" "1,1,2,1,2,2")])
> +
> +(define_insn_and_split "addsi3_extended"
>     [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
>   	(sign_extend:DI
>   	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
> diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
> similarity index 100%
> rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c
> rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
> diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
> new file mode 100644
> index 00000000000..a45dde4f73f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
> @@ -0,0 +1,59 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mabi=lp64d -O2" } */
> +/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 } } */
> +
> +#include <stdint.h>
> +#define my_min(x, y) ((x) < (y) ? (x) : (y))
> +
> +void
> +bt_skip_func (const uint32_t len_limit, const uint32_t pos,
> +              const uint8_t *const cur, uint32_t cur_match,
> +              uint32_t *const son, const uint32_t cyclic_pos,
> +              const uint32_t cyclic_size)
> +{
> +  uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
> +  uint32_t *ptr1 = son + (cyclic_pos << 1);
> +
> +  uint32_t len0 = 0;
> +  uint32_t len1 = 0;
> +
> +  while (1)
> +    {
> +      const uint32_t delta = pos - cur_match;
> +      uint32_t *pair
> +          = son
> +            + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0))
> +               << 1);
> +      const uint8_t *pb = cur - delta;
> +      uint32_t len = my_min (len0, len1);
> +
> +      if (pb[len] == cur[len])
> +        {
> +          while (++len != len_limit)
> +            if (pb[len] != cur[len])
> +              break;
> +
> +          if (len == len_limit)
> +            {
> +              *ptr1 = pair[0];
> +              *ptr0 = pair[1];
> +              return;
> +            }
> +        }
> +
> +      if (pb[len] < cur[len])
> +        {
> +          *ptr1 = cur_match;
> +          ptr1 = pair + 1;
> +          cur_match = *ptr1;
> +          len1 = len;
> +        }
> +      else
> +        {
> +          *ptr0 = cur_match;
> +          ptr0 = pair;
> +          cur_match = *ptr0;
> +          len0 = len;
> +        }
> +    }
> +}
  

Patch

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 497a72e165c..ebc0476ea6f 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -657,42 +657,87 @@  (define_insn "add<mode>3"
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn_and_split "add<mode>3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
-	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
-		  (match_operand:GPR 2 "plus_<mode>_operand"
-				       "r,I,La,Lb,Lc,Ld,Le")))]
+(define_insn_and_split "*addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
+		  (match_operand:SI 2 "plus_si_operand"
+				       "r,I,La,Lb,Le")))]
   ""
   "@
-   add.<d>\t%0,%1,%2
-   addi.<d>\t%0,%1,%2
+   add.w\t%0,%1,%2
+   addi.w\t%0,%1,%2
    #
    * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
      return \"addu16i.d\t%0,%1,%2\";
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
+  {
+    loongarch_split_plus_constant (&operands[2], SImode);
+  }
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "SI")
+   (set_attr "insn_count" "1,1,2,1,2")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
+		 (match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))]
+  "TARGET_64BIT"
+{
+  if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])
+      && ADDU16I_OPERAND (INTVAL (operands[2])))
+    {
+      rtx t1 = gen_reg_rtx (DImode);
+      rtx t2 = gen_reg_rtx (DImode);
+      rtx t3 = gen_reg_rtx (DImode);
+      emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0));
+      t2 = operands[2];
+      emit_insn (gen_adddi3 (t3, t1, t2));
+      t3 = gen_lowpart (SImode, t3);
+      emit_move_insn (operands[0], t3);
+      DONE;
+    }
+  else
+    {
+      rtx t = gen_reg_rtx (DImode);
+      emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
+      t = gen_lowpart (SImode, t);
+      SUBREG_PROMOTED_VAR_P (t) = 1;
+      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
+})
+
+(define_insn_and_split "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r")
+		  (match_operand:DI 2 "plus_di_operand"
+				       "r,I,La,Lb,Lc,Ld")))]
+  "TARGET_64BIT"
+  "@
+   add.d\t%0,%1,%2
+   addi.d\t%0,%1,%2
    #
+   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
+     return \"addu16i.d\t%0,%1,%2\";
    #
    #"
-  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+  "&& CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
    && !ADDU16I_OPERAND (INTVAL (operands[2]))"
-  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
   {
-    loongarch_split_plus_constant (&operands[2], <MODE>mode);
+    loongarch_split_plus_constant (&operands[2], DImode);
   }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "<MODE>")
-   (set_attr "insn_count" "1,1,2,1,2,2,2")
-   (set (attr "enabled")
-      (cond
-	[(match_test "<MODE>mode != DImode && which_alternative == 4")
-	 (const_string "no")
-	 (match_test "<MODE>mode != DImode && which_alternative == 5")
-	 (const_string "no")
-	 (match_test "<MODE>mode != SImode && which_alternative == 6")
-	 (const_string "no")]
-	(const_string "yes")))])
-
-(define_insn_and_split "*addsi3_extended"
+   (set_attr "mode" "DI")
+   (set_attr "insn_count" "1,1,2,1,2,2")])
+
+(define_insn_and_split "addsi3_extended"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
 	(sign_extend:DI
 	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
similarity index 100%
rename from gcc/testsuite/gcc.target/loongarch/sign-extend.c
rename to gcc/testsuite/gcc.target/loongarch/sign-extend-1.c
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
new file mode 100644
index 00000000000..a45dde4f73f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
@@ -0,0 +1,59 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 } } */
+
+#include <stdint.h>
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+void
+bt_skip_func (const uint32_t len_limit, const uint32_t pos,
+              const uint8_t *const cur, uint32_t cur_match,
+              uint32_t *const son, const uint32_t cyclic_pos,
+              const uint32_t cyclic_size)
+{
+  uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
+  uint32_t *ptr1 = son + (cyclic_pos << 1);
+
+  uint32_t len0 = 0;
+  uint32_t len1 = 0;
+
+  while (1)
+    {
+      const uint32_t delta = pos - cur_match;
+      uint32_t *pair
+          = son
+            + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0))
+               << 1);
+      const uint8_t *pb = cur - delta;
+      uint32_t len = my_min (len0, len1);
+
+      if (pb[len] == cur[len])
+        {
+          while (++len != len_limit)
+            if (pb[len] != cur[len])
+              break;
+
+          if (len == len_limit)
+            {
+              *ptr1 = pair[0];
+              *ptr0 = pair[1];
+              return;
+            }
+        }
+
+      if (pb[len] < cur[len])
+        {
+          *ptr1 = cur_match;
+          ptr1 = pair + 1;
+          cur_match = *ptr1;
+          len1 = len;
+        }
+      else
+        {
+          *ptr0 = cur_match;
+          ptr0 = pair;
+          cur_match = *ptr0;
+          len0 = len;
+        }
+    }
+}