diff mbox series

[GCC14] LoongArch: Optimize additions with immediates

Message ID	20230402140044.23073-1-xry111@xry111.site
State	New
Headers	DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org AE4163858C50 sender: xry111@xry111.site) by xry111.site (Postfix) with ESMTPSA id 0046465DC9; Sun, 2 Apr 2023 10:01:00 -0400 (EDT) To: gcc-patches@gcc.gnu.org Cc: Lulu Cheng <chenglulu@loongson.cn>, WANG Xuerui <i@xen0n.name>, Chenghua Xu <xuchenghua@loongson.cn>, Xi Ruoyao <xry111@xry111.site> Subject: [GCC14 PATCH] LoongArch: Optimize additions with immediates Date: Sun, 2 Apr 2023 22:00:44 +0800 Message-Id: <20230402140044.23073-1-xry111@xry111.site> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list From: Xi Ruoyao via Gcc-patches <gcc-patches@gcc.gnu.org> Reply-To: Xi Ruoyao <xry111@xry111.site> Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
Series	[GCC14] LoongArch: Optimize additions with immediates \| [GCC14] LoongArch: Optimize additions with immediates

Commit Message

Xi Ruoyao April 2, 2023, 2 p.m. UTC

  1. Use addu16i.d for TARGET_64BIT and suitable immediates.
2. Split one addition with immediate into two addu16i.d or addi.{d/w}
   instructions if possible.  This can avoid using a temp register w/o
   increase the count of instructions.

Inspired by https://reviews.llvm.org/D143710 and
https://reviews.llvm.org/D147222.

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for GCC 14?

gcc/ChangeLog:

	* config/loongarch/loongarch-protos.h
	(loongarch_addu16i_imm12_operand_p): New function prototype.
	(loongarch_split_plus_constant): Likewise.
	* config/loongarch/loongarch.cc
	(loongarch_addu16i_imm12_operand_p): New function.
	(loongarch_split_plus_constant): Likewise.
	* config/loongarch/loongarch.h (ADDU16I_OPERAND): New macro.
	(DUAL_IMM12_OPERAND): Likewise.
	(DUAL_ADDU16I_OPERAND): Likewise.
	* config/loongarch/constraints.md (La, Lb, Lc, Ld, Le): New
	constraint.
	* config/loongarch/predicates.md (const_dual_imm12_operand): New
	predicate.
	(const_addu16i_operand): Likewise.
	(const_addu16i_imm12_di_operand): Likewise.
	(const_addu16i_imm12_si_operand): Likewise.
	(plus_di_operand): Likewise.
	(plus_si_operand): Likewise.
	(plus_si_extend_operand): Likewise.
	* config/loongarch/loongarch.md (add<mode>3): Convert to
	define_insn_and_split.  Use plus_<mode>_operand predicate
	instead of arith_operand.  Add alternatives for La, Lb, Lc, Ld,
	and Le constraints.
	(*addsi3_extended): Convert to define_insn_and_split.  Use
	plus_si_extend_operand instead of arith_operand.  Add
	alternatives for La and Le alternatives.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/add-const.c: New test.
---
 gcc/config/loongarch/constraints.md           | 46 ++++++++++++-
 gcc/config/loongarch/loongarch-protos.h       |  2 +
 gcc/config/loongarch/loongarch.cc             | 44 +++++++++++++
 gcc/config/loongarch/loongarch.h              | 19 ++++++
 gcc/config/loongarch/loongarch.md             | 66 +++++++++++++++----
 gcc/config/loongarch/predicates.md            | 36 ++++++++++
 .../gcc.target/loongarch/add-const.c          | 47 +++++++++++++
 7 files changed, 246 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/add-const.c

Comments

Lulu Cheng April 4, 2023, 3:01 a.m. UTC | #1

/* snip */
>
> diff --git a/gcc/testsuite/gcc.target/loongarch/add-const.c b/gcc/testsuite/gcc.target/loongarch/add-const.c
> new file mode 100644
> index 00000000000..3a9f72fe83d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/add-const.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -mabi=lp64d" } */
> +
> +/* None of these functions should load the const operand into a temp
> +   register.  */
> +
> +/* { dg-final { scan-assembler-not "add\\.[dw]" } } */
> +
> +unsigned long f01 (unsigned long x) { return x + 1; }
> +unsigned long f02 (unsigned long x) { return x - 1; }
> +unsigned long f03 (unsigned long x) { return x + 2047; }
> +unsigned long f04 (unsigned long x) { return x + 4094; }
> +unsigned long f05 (unsigned long x) { return x - 2048; }
> +unsigned long f06 (unsigned long x) { return x - 4096; }
> +unsigned long f07 (unsigned long x) { return x + 0x7fff0000; }
> +unsigned long f08 (unsigned long x) { return x - 0x80000000l; }
> +unsigned long f09 (unsigned long x) { return x + 0x7fff0000l * 2; }
> +unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
> +unsigned long f11 (unsigned long x) { return x - 0x80000000l * 2; }
These two test cases are duplicates.
> +unsigned long f12 (unsigned long x) { return x + 0x7fff0000 + 0x1; }
> +unsigned long f13 (unsigned long x) { return x + 0x7fff0000 - 0x1; }
> +unsigned long f14 (unsigned long x) { return x + 0x7fff0000 + 0x7ff; }
> +unsigned long f15 (unsigned long x) { return x + 0x7fff0000 - 0x800; }
> +unsigned long f16 (unsigned long x) { return x - 0x80000000l - 1; }
> +unsigned long f17 (unsigned long x) { return x - 0x80000000l + 1; }
> +unsigned long f18 (unsigned long x) { return x - 0x80000000l - 0x800; }
> +unsigned long f19 (unsigned long x) { return x - 0x80000000l + 0x7ff; }
> +
> +unsigned int g01 (unsigned int x) { return x + 1; }
> +unsigned int g02 (unsigned int x) { return x - 1; }
> +unsigned int g03 (unsigned int x) { return x + 2047; }
> +unsigned int g04 (unsigned int x) { return x + 4094; }
> +unsigned int g05 (unsigned int x) { return x - 2048; }
> +unsigned int g06 (unsigned int x) { return x - 4096; }
> +unsigned int g07 (unsigned int x) { return x + 0x7fff0000; }
> +unsigned int g08 (unsigned int x) { return x - 0x80000000l; }
> +unsigned int g09 (unsigned int x) { return x + 0x7fff0000l * 2; }
> +unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
> +unsigned int g11 (unsigned int x) { return x - 0x80000000l * 2; }

Ditto.

I found that adding this log test case 
gcc.target/loongarch/stack-check-cfa-1.c and 
gcc.target/loongarch/stack-check-cfa-2.c test failed.

Although the test fails, the generated assembly code is better, and 
there is no problem with the logic of the assembly code. I haven't 
checked the reason for this yet.

Otherwise LGTM, thanks!

Xi Ruoyao April 4, 2023, 8 a.m. UTC | #2

On Tue, 2023-04-04 at 11:01 +0800, Lulu Cheng wrote:

/* snip */

> > +unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
> > +unsigned long f11 (unsigned long x) { return x - 0x80000000l * 2; }
>  These two test cases are duplicates.

/* snip */

> 
> > +unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
> > +unsigned int g11 (unsigned int x) { return x - 0x80000000l * 2; }
> Ditto.

I'll fix them in V2.

> I found that adding this log test case gcc.target/loongarch/stack-check-cfa-1.c and gcc.target/loongarch/stack-check-cfa-2.c test failed.
> Although the test fails, the generated assembly code is better, and there is no problem with the logic of the assembly code. I haven't checked the reason for this yet.

Looks like the change hides PR109035 (like -fpie) for some reason. (But
I still don't understand the root cause of PR109035 anyway.)

f_test:
.LFB0 = .
	.cfi_startproc
	lu12i.w	$r14,65536>>12			# 0x10000
	sub.d	$r3,$r3,$r14
	st.d	$r0,$r3,0
	sub.d	$r3,$r3,$r14
	st.d	$r0,$r3,0
	.cfi_def_cfa_offset 131072
	lu12i.w	$r13,131072>>12			#
0x20000
	or	$r12,$r3,$r0
	ldx.b	$r4,$r12,$r4
	add.d	$r3,$r3,$r13
	.cfi_def_cfa_offset 0
	jr	$r1
	.cfi_endproc

Technically there should be "addu16i.d $r3,$r3,-1" in the prologue and
"addu16i.d $r3,$r3,2" in the epilogue, so we can avoid using r14/r13.
I'll try modifying loongarch_expand_{pro,epi}logue for this.

Xi Ruoyao April 4, 2023, 8:40 a.m. UTC | #3

On Tue, 2023-04-04 at 16:00 +0800, Xi Ruoyao via Gcc-patches wrote:
> On Tue, 2023-04-04 at 11:01 +0800, Lulu Cheng wrote:
> 
> /* snip */
> 
> > > +unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
> > > +unsigned long f11 (unsigned long x) { return x - 0x80000000l * 2; }
> >  These two test cases are duplicates.
> 
> /* snip */
> 
> > 
> > > +unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
> > > +unsigned int g11 (unsigned int x) { return x - 0x80000000l * 2; }
> > Ditto.
> 
> I'll fix them in V2.
> 
> > I found that adding this log test case gcc.target/loongarch/stack-check-cfa-1.c and gcc.target/loongarch/stack-check-cfa-2.c test failed.
> > Although the test fails, the generated assembly code is better, and there is no problem with the logic of the assembly code. I haven't checked the reason for this yet.
> 
> Looks like the change hides PR109035 (like -fpie) for some reason. (But
> I still don't understand the root cause of PR109035 anyway.)

V2 sent with test cases fixed.

/* snip */

> Technically there should be "addu16i.d $r3,$r3,-1" in the prologue and
> "addu16i.d $r3,$r3,2" in the epilogue, so we can avoid using r14/r13.
> I'll try modifying loongarch_expand_{pro,epi}logue for this.

Will do this later because I'm too stupid to understand
loongarch_first_stack_step function quickly :).

Lulu Cheng April 4, 2023, 9:15 a.m. UTC | #4

在 2023/4/4 下午4:40, Xi Ruoyao 写道:
> On Tue, 2023-04-04 at 16:00 +0800, Xi Ruoyao via Gcc-patches wrote:
>> On Tue, 2023-04-04 at 11:01 +0800, Lulu Cheng wrote:
>>
>> /* snip */
>>
>>>> +unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
>>>> +unsigned long f11 (unsigned long x) { return x - 0x80000000l * 2; }
>>>   These two test cases are duplicates.
>> /* snip */
>>
>>>> +unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
>>>> +unsigned int g11 (unsigned int x) { return x - 0x80000000l * 2; }
>>> Ditto.
>> I'll fix them in V2.
>>
>>> I found that adding this log test case gcc.target/loongarch/stack-check-cfa-1.c and gcc.target/loongarch/stack-check-cfa-2.c test failed.
>>> Although the test fails, the generated assembly code is better, and there is no problem with the logic of the assembly code. I haven't checked the reason for this yet.
>> Looks like the change hides PR109035 (like -fpie) for some reason. (But
>> I still don't understand the root cause of PR109035 anyway.)
> V2 sent with test cases fixed.
>
> /* snip */
>
>> Technically there should be "addu16i.d $r3,$r3,-1" in the prologue and
>> "addu16i.d $r3,$r3,2" in the epilogue, so we can avoid using r14/r13.
>> I'll try modifying loongarch_expand_{pro,epi}logue for this.
> Will do this later because I'm too stupid to understand
> loongarch_first_stack_step function quickly :).
>
Thank you very much!

diff mbox series

Patch

diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
index cb7fa688ceb..7a38cd07ae9 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -60,7 +60,22 @@ 
 ;; "I" "A signed 12-bit constant (for arithmetic instructions)."
 ;; "J" "Integer zero."
 ;; "K" "An unsigned 12-bit constant (for logic instructions)."
-;; "L" <-----unused
+;; "L" -
+;;     "La"
+;;	 "A signed constant in [-4096, 2048) or (2047, 4094]."
+;;     "Lb"
+;;	 "A signed 32-bit constant and low 16-bit is zero, which can be
+;;	  added onto a register with addu16i.d.  It matches nothing if
+;;	  the addu16i.d instruction is not available."
+;;     "Lc"
+;;	 "A signed 64-bit constant can be expressed as Lb + I, but not a
+;;	  single Lb or I."
+;;     "Ld"
+;;	 "A signed 64-bit constant can be expressed as Lb + Lb, but not a
+;;	  single Lb."
+;;     "Le"
+;;	 "A signed 32-bit constant can be expressed as Lb + I, but not a
+;;	  single Lb or I."
 ;; "M" <-----unused
 ;; "N" <-----unused
 ;; "O" <-----unused
@@ -170,6 +185,35 @@  (define_constraint "K"
   (and (match_code "const_int")
        (match_test "IMM12_OPERAND_UNSIGNED (ival)")))
 
+(define_constraint "La"
+  "A signed constant in [-4096, 2048) or (2047, 4094]."
+  (and (match_code "const_int")
+       (match_test "DUAL_IMM12_OPERAND (ival)")))
+
+(define_constraint "Lb"
+  "A signed 32-bit constant and low 16-bit is zero, which can be added
+   onto a register with addu16i.d."
+  (and (match_code "const_int")
+       (match_test "ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Lc"
+  "A signed 64-bit constant can be expressed as Lb + I, but not a single Lb
+   or I."
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (ival, DImode)")))
+
+(define_constraint "Ld"
+  "A signed 64-bit constant can be expressed as Lb + Lb, but not a single
+   Lb."
+  (and (match_code "const_int")
+       (match_test "DUAL_ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Le"
+  "A signed 32-bit constant can be expressed as Lb + I, but not a single Lb
+   or I."
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (ival, SImode)")))
+
 (define_constraint "Yd"
   "@internal
    A constant @code{move_operand} that can be safely loaded using
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 35cc77c7367..83df489c7a5 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -83,6 +83,8 @@  extern rtx loongarch_legitimize_call_address (rtx);
 extern rtx loongarch_subword (rtx, bool);
 extern bool loongarch_split_move_p (rtx, rtx);
 extern void loongarch_split_move (rtx, rtx, rtx);
+extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
+extern void loongarch_split_plus_constant (rtx *, machine_mode);
 extern const char *loongarch_output_move (rtx, rtx);
 extern bool loongarch_cfun_has_cprestore_slot_p (void);
 #ifdef RTX_CODE
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 0ecb91ca997..dfb731fca9d 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3756,6 +3756,50 @@  loongarch_split_move (rtx dest, rtx src, rtx insn_)
     }
 }
 
+/* Check if adding an integer constant value for a specific mode can be
+   performed with an addu16i.d instruction and an addi.{w/d}
+   instruction.  */
+
+bool
+loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
+{
+  /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT.  */
+  if (!TARGET_64BIT)
+    return false;
+
+  if ((value & 0xffff) == 0)
+    return false;
+
+  if (IMM12_OPERAND (value))
+    return false;
+
+  value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
+  return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
+}
+
+/* Split one integer constant op[0] into two (op[1] and op[2]) for constant
+   plus operation in a specific mode.  The splitted constants can be added
+   onto a register with a single instruction (addi.{d/w} or addu16i.d).  */
+
+void
+loongarch_split_plus_constant (rtx *op, machine_mode mode)
+{
+  HOST_WIDE_INT v = INTVAL (op[0]), a;
+
+  if (DUAL_IMM12_OPERAND (v))
+    a = (v > 0 ? 2047 : -2048);
+  else if (loongarch_addu16i_imm12_operand_p (v, mode))
+    a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
+  else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
+    a = (v > 0 ? 0x7fff : -0x8000) << 16;
+  else
+    gcc_unreachable ();
+
+  op[1] = gen_int_mode (a, mode);
+  v = v - (unsigned HOST_WIDE_INT) a;
+  op[2] = gen_int_mode (v, mode);
+}
+
 /* Return true if a move from SRC to DEST in INSN should be split.  */
 
 static bool
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f8167875646..7151d5cabb3 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -612,6 +612,25 @@  enum reg_class
 
 #define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE))
 
+/* True if VALUE can be added onto a register with one addu16i.d
+   instruction.  */
+
+#define ADDU16I_OPERAND(VALUE)			\
+  (TARGET_64BIT && (((VALUE) & 0xffff) == 0	\
+   && IMM16_OPERAND ((HOST_WIDE_INT) (VALUE) / 65536)))
+
+/* True if VALUE can be added onto a register with two addi.{d/w}
+   instructions, but not one addi.{d/w} instruction.  */
+#define DUAL_IMM12_OPERAND(VALUE) \
+  (IN_RANGE ((VALUE), -4096, 4094) && !IMM12_OPERAND (VALUE))
+
+/* True if VALUE can be added onto a register with two addu16i.d
+   instruction, but not one addu16i.d instruction.  */
+#define DUAL_ADDU16I_OPERAND(VALUE)		\
+  (TARGET_64BIT && (((VALUE) & 0xffff) == 0	\
+   && !ADDU16I_OPERAND (VALUE)			\
+   && IN_RANGE ((VALUE) / 65536, -0x10000, 0xfffe)))
+
 #define IMM12_INT(X) IMM12_OPERAND (INTVAL (X))
 #define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X))
 #define LU12I_INT(X) LU12I_OPERAND (INTVAL (X))
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 3509c3c21c1..628ecc78088 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -598,24 +598,64 @@  (define_insn "add<mode>3"
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])
 
-(define_insn "add<mode>3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
-	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r")
-		  (match_operand:GPR 2 "arith_operand" "r,I")))]
+(define_insn_and_split "add<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
+		  (match_operand:GPR 2 "plus_<mode>_operand"
+				       "r,I,La,Lb,Lc,Ld,Le")))]
   ""
-  "add%i2.<d>\t%0,%1,%2";
+  "@
+   add.<d>\t%0,%1,%2
+   addi.<d>\t%0,%1,%2
+   #
+   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
+     return \"addu16i.d\t%0,%1,%2\";
+   #
+   #
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
+  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+  {
+    loongarch_split_plus_constant (&operands[2], <MODE>mode);
+  }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*addsi3_extended"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "1,1,2,1,2,2,2")
+   (set (attr "enabled")
+      (cond
+	[(match_test "<MODE>mode != DImode && which_alternative == 4")
+	 (const_string "no")
+	 (match_test "<MODE>mode != DImode && which_alternative == 5")
+	 (const_string "no")
+	 (match_test "<MODE>mode != SImode && which_alternative == 6")
+	 (const_string "no")]
+	(const_string "yes")))])
+
+(define_insn_and_split "*addsi3_extended"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
 	(sign_extend:DI
-	     (plus:SI (match_operand:SI 1 "register_operand" "r,r")
-		      (match_operand:SI 2 "arith_operand" "r,I"))))]
+	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+		      (match_operand:SI 2 "plus_si_extend_operand"
+					  "r,I,La,Le"))))]
   "TARGET_64BIT"
-  "add%i2.w\t%0,%1,%2"
+  "@
+   add.w\t%0,%1,%2
+   addi.w\t%0,%1,%2
+   #
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])"
+  [(set (subreg:SI (match_dup 0) 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (subreg:SI (match_dup 0) 0)
+				 (match_dup 4))))]
+  {
+    loongarch_split_plus_constant (&operands[2], SImode);
+  }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "SI")
+   (set_attr "insn_count" "1,1,2,2")])
 
 
 ;;
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index 95140280f1e..510973aa339 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -39,14 +39,50 @@  (define_predicate "const_arith_operand"
   (and (match_code "const_int")
        (match_test "IMM12_OPERAND (INTVAL (op))")))
 
+(define_predicate "const_dual_imm12_operand"
+  (and (match_code "const_int")
+       (match_test "DUAL_IMM12_OPERAND (INTVAL (op))")))
+
 (define_predicate "const_imm16_operand"
   (and (match_code "const_int")
        (match_test "IMM16_OPERAND (INTVAL (op))")))
 
+(define_predicate "const_addu16i_operand"
+  (and (match_code "const_int")
+       (match_test "ADDU16I_OPERAND (INTVAL (op))")))
+
+(define_predicate "const_addu16i_imm12_di_operand"
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), DImode)")))
+
+(define_predicate "const_addu16i_imm12_si_operand"
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), SImode)")))
+
+(define_predicate "const_dual_addu16i_operand"
+  (and (match_code "const_int")
+       (match_test "DUAL_ADDU16I_OPERAND (INTVAL (op))")))
+
 (define_predicate "arith_operand"
   (ior (match_operand 0 "const_arith_operand")
        (match_operand 0 "register_operand")))
 
+(define_predicate "plus_di_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_dual_imm12_operand")
+       (match_operand 0 "const_addu16i_operand")
+       (match_operand 0 "const_addu16i_imm12_di_operand")
+       (match_operand 0 "const_dual_addu16i_operand")))
+
+(define_predicate "plus_si_extend_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_dual_imm12_operand")
+       (match_operand 0 "const_addu16i_imm12_si_operand")))
+
+(define_predicate "plus_si_operand"
+  (ior (match_operand 0 "plus_si_extend_operand")
+       (match_operand 0 "const_addu16i_operand")))
+
 (define_predicate "const_immalsl_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 1, 4)")))
diff --git a/gcc/testsuite/gcc.target/loongarch/add-const.c b/gcc/testsuite/gcc.target/loongarch/add-const.c
new file mode 100644
index 00000000000..3a9f72fe83d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/add-const.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -mabi=lp64d" } */
+
+/* None of these functions should load the const operand into a temp
+   register.  */
+
+/* { dg-final { scan-assembler-not "add\\.[dw]" } } */
+
+unsigned long f01 (unsigned long x) { return x + 1; }
+unsigned long f02 (unsigned long x) { return x - 1; }
+unsigned long f03 (unsigned long x) { return x + 2047; }
+unsigned long f04 (unsigned long x) { return x + 4094; }
+unsigned long f05 (unsigned long x) { return x - 2048; }
+unsigned long f06 (unsigned long x) { return x - 4096; }
+unsigned long f07 (unsigned long x) { return x + 0x7fff0000; }
+unsigned long f08 (unsigned long x) { return x - 0x80000000l; }
+unsigned long f09 (unsigned long x) { return x + 0x7fff0000l * 2; }
+unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
+unsigned long f11 (unsigned long x) { return x - 0x80000000l * 2; }
+unsigned long f12 (unsigned long x) { return x + 0x7fff0000 + 0x1; }
+unsigned long f13 (unsigned long x) { return x + 0x7fff0000 - 0x1; }
+unsigned long f14 (unsigned long x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned long f15 (unsigned long x) { return x + 0x7fff0000 - 0x800; }
+unsigned long f16 (unsigned long x) { return x - 0x80000000l - 1; }
+unsigned long f17 (unsigned long x) { return x - 0x80000000l + 1; }
+unsigned long f18 (unsigned long x) { return x - 0x80000000l - 0x800; }
+unsigned long f19 (unsigned long x) { return x - 0x80000000l + 0x7ff; }
+
+unsigned int g01 (unsigned int x) { return x + 1; }
+unsigned int g02 (unsigned int x) { return x - 1; }
+unsigned int g03 (unsigned int x) { return x + 2047; }
+unsigned int g04 (unsigned int x) { return x + 4094; }
+unsigned int g05 (unsigned int x) { return x - 2048; }
+unsigned int g06 (unsigned int x) { return x - 4096; }
+unsigned int g07 (unsigned int x) { return x + 0x7fff0000; }
+unsigned int g08 (unsigned int x) { return x - 0x80000000l; }
+unsigned int g09 (unsigned int x) { return x + 0x7fff0000l * 2; }
+unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
+unsigned int g11 (unsigned int x) { return x - 0x80000000l * 2; }
+unsigned int g12 (unsigned int x) { return x + 0x7fff0000 + 0x1; }
+unsigned int g13 (unsigned int x) { return x + 0x7fff0000 - 0x1; }
+unsigned int g14 (unsigned int x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned int g15 (unsigned int x) { return x + 0x7fff0000 - 0x800; }
+unsigned int g16 (unsigned int x) { return x - 0x80000000l - 1; }
+unsigned int g17 (unsigned int x) { return x - 0x80000000l + 1; }
+unsigned int g18 (unsigned int x) { return x - 0x80000000l - 0x800; }
+unsigned int g19 (unsigned int x) { return x - 0x80000000l + 0x7ff; }