[v2] LoongArch: Replace -mexplicit-relocs=auto simple-used address peephole2 with combine

Message ID 20231225161723.3197-1-xry111@xry111.site
State New
Headers
Series [v2] LoongArch: Replace -mexplicit-relocs=auto simple-used address peephole2 with combine |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Testing passed

Commit Message

Xi Ruoyao Dec. 25, 2023, 4:14 p.m. UTC
  The problem with peephole2 is it uses a naive sliding-window algorithm
and misses many cases.  For example:

    float a[10000];
    float t() { return a[0] + a[8000]; }

is compiled to:

    la.local    $r13,a
    la.local    $r12,a+32768
    fld.s       $f1,$r13,0
    fld.s       $f0,$r12,-768
    fadd.s      $f0,$f1,$f0

by trunk.  But as we've explained in r14-4851, the following would be
better with -mexplicit-relocs=auto:

    pcalau12i   $r13,%pc_hi20(a)
    pcalau12i   $r12,%pc_hi20(a+32000)
    fld.s       $f1,$r13,%pc_lo12(a)
    fld.s       $f0,$r12,%pc_lo12(a+32000)
    fadd.s      $f0,$f1,$f0

However the sliding-window algorithm just won't detect the pcalau12i/fld
pair to be optimized.  Use a define_insn_and_split in combine pass will
work around the issue.

gcc/ChangeLog:

	* config/loongarch/loongarch.md:
	(simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>): New
	define_insn_and_split.
	(simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>): Likewise.
	(simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>): Likewise.
	(simple_load_off<su>ext<P:mode><SUBDI:mode><GPR:mode>):
	Likewise.
	(simple_store<ST_ANY:mode><P:mode>): Likewise.
	(simple_store_off<ST_ANY:mode><P:mode>): Likewise.
	(define_peephole2): Remove la.local/[f]ld peepholes.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c:
	New test.
---

Change from [v1]:
- Add "&& true" as the split condition [as suggested][1].

[v1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640280.html
[1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641407.html

Bootstrapped and regtested on loongarch64-linux-gnu (on top of
r14-6829).  Ok for trunk?

 gcc/config/loongarch/loongarch.md             | 165 +++++++++---------
 ...explicit-relocs-auto-single-load-store-2.c |  11 ++
 2 files changed, 98 insertions(+), 78 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
  

Patch

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 7021105b241..18a2d05325b 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -4123,101 +4123,110 @@  (define_insn "loongarch_crcc_w_<size>_w"
 ;;
 ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
 ;; 3 instructions).
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-	(match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
-	(mem:LD_AT_LEAST_32_BIT (match_dup 0)))]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0]) \
-       || REGNO (operands[0]) == REGNO (operands[2]))"
-  [(set (match_dup 2)
-	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
+(define_insn_and_split "simple_load<P:mode><LD_AT_LEAST_32_BIT:mode>"
+  [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
+	(mem:LD_AT_LEAST_32_BIT
+	  (match_operand:P 1 "symbolic_pcrel_operand" "")))]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))]
   {
-    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
+    operands[2] = gen_reg_rtx (Pmode);
+    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
   })
 
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-	(match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
-	(mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
-				(match_operand 3 "const_int_operand"))))]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0]) \
-       || REGNO (operands[0]) == REGNO (operands[2]))"
-  [(set (match_dup 2)
-	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
+(define_insn_and_split "simple_load_off<P:mode><LD_AT_LEAST_32_BIT:mode>"
+  [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
+	(mem:LD_AT_LEAST_32_BIT
+	  (plus (match_operand:P 1 "symbolic_pcrel_operand" "")
+		(match_operand 2 "const_int_operand" ""))))]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1))))]
   {
-    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
-    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
+    HOST_WIDE_INT offset = INTVAL (operands[2]);
+    operands[2] = gen_reg_rtx (Pmode);
+    operands[1] = plus_constant (Pmode, operands[1], offset);
+    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
   })
 
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-	(match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (match_operand:GPR 2 "register_operand")
-	(any_extend:GPR (mem:SUBDI (match_dup 0))))]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0]) \
-       || REGNO (operands[0]) == REGNO (operands[2]))"
-  [(set (match_dup 2)
-	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
-					     (match_dup 1)))))]
+(define_insn_and_split "simple_load_<su>ext<P:mode><SUBDI:mode><GPR:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(any_extend:GPR
+	  (mem:SUBDI (match_operand:P 1 "symbolic_pcrel_operand" ""))))]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+	(any_extend:GPR
+	  (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))]
   {
-    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
+    operands[2] = gen_reg_rtx (Pmode);
+    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
   })
 
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-	(match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (match_operand:GPR 2 "register_operand")
+(define_insn_and_split
+  "simple_load_off_<su>ext<P:mode><SUBDI:mode><GPR:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(any_extend:GPR
+	  (mem:SUBDI
+	    (plus (match_operand:P 1 "symbolic_pcrel_operand" "")
+		  (match_operand 2 "const_int_operand" "")))))]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
 	(any_extend:GPR
-	  (mem:SUBDI (plus (match_dup 0)
-			   (match_operand 3 "const_int_operand")))))]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0]) \
-       || REGNO (operands[0]) == REGNO (operands[2]))"
-  [(set (match_dup 2)
-	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
-					     (match_dup 1)))))]
+	  (mem:SUBDI (lo_sum:P (match_dup 2) (match_dup 1)))))]
   {
-    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
-    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
+    HOST_WIDE_INT offset = INTVAL (operands[2]);
+    operands[2] = gen_reg_rtx (Pmode);
+    operands[1] = plus_constant (Pmode, operands[1], offset);
+    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[1]));
   })
 
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-	(match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (mem:ST_ANY (match_dup 0))
-	(match_operand:ST_ANY 2 "register_operand"))]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0])) \
-   && REGNO (operands[0]) != REGNO (operands[2])"
-  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
+(define_insn_and_split "simple_store<ST_ANY:mode><P:mode>"
+  [(set (mem:ST_ANY (match_operand:P 0 "symbolic_pcrel_operand"))
+	(match_operand:ST_ANY 1 "register_operand" "r,f"))]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (mem:ST_ANY (lo_sum:P (match_dup 2) (match_dup 0))) (match_dup 1))]
   {
-    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
+    operands[2] = gen_reg_rtx (Pmode);
+    emit_insn (gen_pcalau12i_gr<P:mode> (operands[2], operands[0]));
   })
 
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-	(match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (mem:ST_ANY (plus (match_dup 0)
-			  (match_operand 3 "const_int_operand")))
-	(match_operand:ST_ANY 2 "register_operand"))]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0])) \
-   && REGNO (operands[0]) != REGNO (operands[2])"
-  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
+(define_insn_and_split "simple_store_off<ST_ANY:mode><P:mode>"
+  [(set (mem:ST_ANY
+	  (plus (match_operand:P 0 "symbolic_pcrel_operand" "")
+		(match_operand 1 "const_int_operand" "")))
+	(match_operand:ST_ANY 2 "register_operand" "r,f"))]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (mem:ST_ANY (lo_sum:P (match_dup 1) (match_dup 0))) (match_dup 2))]
   {
-    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
-    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
+    HOST_WIDE_INT offset = INTVAL (operands[1]);
+    operands[1] = gen_reg_rtx (Pmode);
+    operands[0] = plus_constant (Pmode, operands[0], offset);
+    emit_insn (gen_pcalau12i_gr<P:mode> (operands[1], operands[0]));
   })
 
 ;; Synchronization instructions.
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
new file mode 100644
index 00000000000..42cb966d1e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */
+
+float a[8001];
+float
+t (void)
+{
+  return a[0] + a[8000];
+}
+
+/* { dg-final { scan-assembler-not "la.local" } } */