[06/17,APX,NDD] Support APX NDD for sub insns

Message ID 20231205022948.504790-7-hongyu.wang@intel.com
State Committed
Commit c601744469390f5c66075de1cead46ed0d5c7a5d
Headers
Series Support Intel APX NDD |

Commit Message

Hongyu Wang Dec. 5, 2023, 2:29 a.m. UTC
  From: Kong Lingling <lingling.kong@intel.com>

gcc/ChangeLog:

	* config/i386/i386-expand.cc (ix86_fixup_binary_operands_no_copy):
	Add use_ndd parameter and parse it.
	* config/i386/i386-protos.h (ix86_fixup_binary_operands_no_copy):
	Change define.
	* config/i386/i386.md (sub<mode>3): Add new alternatives for NDD
	and adjust output templates.
	(*sub<mode>_1): Likewise.
	(*sub<mode>_2): Likewise.
	(subv<mode>4): Likewise.
	(*subv<mode>4): Likewise.
	(subv<mode>4_1): Likewise.
	(usubv<mode>4): Likewise.
	(*sub<mode>_3): Likewise.
	(*subsi_1_zext): Likewise, and use nonimmediate_operand for operands[1]
	to accept memory input for NDD alternatives.
	(*subsi_2_zext): Likewise.
	(*subsi_3_zext): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-ndd.c: Add test for ndd sub.
---
 gcc/config/i386/i386-expand.cc          |   5 +-
 gcc/config/i386/i386-protos.h           |   2 +-
 gcc/config/i386/i386.md                 | 155 ++++++++++++++++--------
 gcc/testsuite/gcc.target/i386/apx-ndd.c |  13 ++
 4 files changed, 120 insertions(+), 55 deletions(-)
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 3ecda989cf8..93ecde4b4a8 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1326,9 +1326,10 @@  ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
 
 void
 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
-				    machine_mode mode, rtx operands[])
+				    machine_mode mode, rtx operands[],
+				    bool use_ndd)
 {
-  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+  rtx dst = ix86_fixup_binary_operands (code, mode, operands, use_ndd);
   gcc_assert (dst == operands[0]);
 }
 
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 7dfeb6af225..481527872e8 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -111,7 +111,7 @@  extern void ix86_expand_vector_move_misalign (machine_mode, rtx[]);
 extern rtx ix86_fixup_binary_operands (enum rtx_code,
 				       machine_mode, rtx[], bool = false);
 extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
-						machine_mode, rtx[]);
+						machine_mode, rtx[], bool = false);
 extern void ix86_expand_binary_operator (enum rtx_code,
 					 machine_mode, rtx[], bool = false);
 extern void ix86_expand_vector_logical_operator (enum rtx_code,
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 358a3857f89..ea5377a0b38 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -7772,7 +7772,8 @@  (define_expand "sub<mode>3"
 	(minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
 		     (match_operand:SDWIM 2 "<general_hilo_operand>")))]
   ""
-  "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
+  "ix86_expand_binary_operator (MINUS, <MODE>mode, operands,
+				TARGET_APX_NDD); DONE;")
 
 (define_insn_and_split "*sub<dwi>3_doubleword"
   [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
@@ -7798,7 +7799,10 @@  (define_insn_and_split "*sub<dwi>3_doubleword"
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
   if (operands[2] == const0_rtx)
     {
-      ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3]);
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+      ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3],
+				   TARGET_APX_NDD);
       DONE;
     }
 })
@@ -7827,25 +7831,36 @@  (define_insn_and_split "*sub<dwi>3_doubleword_zext"
   "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
 
 (define_insn "*sub<mode>_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
 	(minus:SWI
-	  (match_operand:SWI 1 "nonimmediate_operand" "0,0")
-	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+			    TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (minus:SI (match_operand:SI 1 "register_operand" "0")
-		    (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
+		    (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands,
+					    TARGET_APX_NDD)"
+  "@
+  sub{l}\t{%2, %k0|%k0, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
@@ -7936,31 +7951,42 @@  (define_insn "*sub<mode>_2"
   [(set (reg FLAGS_REG)
 	(compare
 	  (minus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
-	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
 	  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
 	(minus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+			       TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_2_zext"
   [(set (reg FLAGS_REG)
 	(compare
-	  (minus:SI (match_operand:SI 1 "register_operand" "0")
-		    (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
+		    (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
 	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
 	  (minus:SI (match_dup 1)
 		    (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, SImode, operands,
+			       TARGET_APX_NDD)"
+  "@
+  sub{l}\t{%2, %k0|%k0, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
 (define_insn "*subqi_ext<mode>_0"
@@ -8072,7 +8098,8 @@  (define_expand "subv<mode>4"
 	       (pc)))]
   ""
 {
-  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
+  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
+				      TARGET_APX_NDD);
   if (CONST_SCALAR_INT_P (operands[2]))
     operands[4] = operands[2];
   else
@@ -8083,35 +8110,45 @@  (define_insn "*subv<mode>4"
   [(set (reg:CCO FLAGS_REG)
 	(eq:CCO (minus:<DWI>
 		   (sign_extend:<DWI>
-		      (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
+		      (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r"))
 		   (sign_extend:<DWI>
-		      (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
+		      (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
 		(sign_extend:<DWI>
 		   (minus:SWI (match_dup 1) (match_dup 2)))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
 	(minus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+			    TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "subv<mode>4_1"
   [(set (reg:CCO FLAGS_REG)
 	(eq:CCO (minus:<DWI>
 		   (sign_extend:<DWI>
-		      (match_operand:SWI 1 "nonimmediate_operand" "0"))
+		      (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
 		   (match_operand:<DWI> 3 "const_int_operand"))
 		(sign_extend:<DWI>
 		   (minus:SWI
 		     (match_dup 1)
-		     (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+		     (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
 	(minus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+			    TARGET_APX_NDD)
    && CONST_INT_P (operands[2])
    && INTVAL (operands[2]) == INTVAL (operands[3])"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")
    (set (attr "length_immediate")
 	(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
@@ -8207,6 +8244,8 @@  (define_insn_and_split "*subv<dwi>4_doubleword_1"
   split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
   if (operands[2] == const0_rtx)
     {
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
       emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
 				    operands[5]));
       DONE;
@@ -8288,18 +8327,25 @@  (define_expand "usubv<mode>4"
 	       (label_ref (match_operand 3))
 	       (pc)))]
   ""
-  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
+				       TARGET_APX_NDD);")
 
 (define_insn "*sub<mode>_3"
   [(set (reg FLAGS_REG)
-	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
-		 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+		 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
 	(minus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+			       TARGET_APX_NDD)"
+  "@
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %0|%0, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+  sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
 (define_peephole2
@@ -8387,16 +8433,21 @@  (define_insn_and_split "*dec_cmov<mode>"
 
 (define_insn "*subsi_3_zext"
   [(set (reg FLAGS_REG)
-	(compare (match_operand:SI 1 "register_operand" "0")
-		 (match_operand:SI 2 "x86_64_general_operand" "rBMe")))
-   (set (match_operand:DI 0 "register_operand" "=r")
+	(compare (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
+		 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))
+   (set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
 	  (minus:SI (match_dup 1)
 		    (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %1|%1, %2}"
-  [(set_attr "type" "alu")
+   && ix86_binary_operator_ok (MINUS, SImode, operands,
+			       TARGET_APX_NDD)"
+  "@
+  sub{l}\t{%2, %1|%1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}
+  sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "isa" "*,apx_ndd,apx_ndd")
+   (set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
 ;; Add with carry and subtract with borrow
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index c1049022f2a..0c7952ef018 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -42,3 +42,16 @@  FOO (long, add, +)
 FOO1 (long, add, +)
 FOO2 (long, add, +)
 
+FOO (char, sub, -)
+FOO1 (char, sub, -)
+FOO (short, sub, -)
+FOO1 (short, sub, -)
+FOO (int, sub, -)
+FOO1 (int, sub, -)
+FOO (long, sub, -)
+FOO1 (long, sub, -)
+/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), %(?:|r|e)di, %(?:|r|e)a(?:x|l)" 4 } } */