[2/2,APX,CFCMOV] Support APX CFCMOV in backend

Message ID 20250109030958.1750148-2-hongyu.wang@intel.com
State New
Headers
Series [v5,1/2,APX,CFCMOV] Support APX CFCMOV in if_convert pass |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed

Commit Message

Hongyu Wang Jan. 9, 2025, 3:09 a.m. UTC
  From: Lingling Kong <lingling.kong@intel.com>

gcc/ChangeLog:

	* config/i386/i386-expand.cc (ix86_expand_int_cfmovcc):  Expand
	to cfcmov pattern.
	* config/i386/i386-opts.h (enum apx_features): New.
	* config/i386/i386-protos.h (ix86_expand_int_cfmovcc): Define.
	* config/i386/i386.cc (ix86_rtx_costs): Add UNSPEC_APX_CFCMOV
	cost.
	* config/i386/i386.h (TARGET_APX_CFCMOV): Define.
	* config/i386/i386.md (maskload<mode><mode>): New define_expand.
	(maskstore<mode><mode>): Ditto.
	(*cfmov<mode>cc): New define_insn.
	(*cfmov<mode>cc_2): Ditto.
	(*cfmov<mode>ccz): Ditto.
	(UNSPEC_APX_CFCMOV): New unspec for cfcmov.
	* config/i386/i386.opt: Add enum value for cfcmov.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-cfcmov-1.c: New test.
	* gcc.target/i386/apx-cfcmov-2.c: Ditto.
---
 gcc/config/i386/i386-expand.cc               | 46 ++++++++++++
 gcc/config/i386/i386-opts.h                  |  4 +-
 gcc/config/i386/i386-protos.h                |  1 +
 gcc/config/i386/i386.cc                      | 16 +++--
 gcc/config/i386/i386.h                       |  1 +
 gcc/config/i386/i386.md                      | 76 +++++++++++++++++++-
 gcc/config/i386/i386.opt                     |  3 +
 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c | 73 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c | 40 +++++++++++
 9 files changed, 254 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 2ab57874234..48809b5b289 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3536,6 +3536,52 @@  ix86_expand_int_addcc (rtx operands[])
   return true;
 }
 
+void
+ix86_expand_int_cfmovcc (rtx dest, rtx compare_op, rtx vtrue, rtx vfalse)
+{
+  machine_mode mode = GET_MODE(dest);
+  enum rtx_code code = GET_CODE (compare_op);
+  rtx_insn *compare_seq;
+  rtx op0 = XEXP (compare_op, 0);
+  rtx op1 = XEXP (compare_op, 1);
+  rtx op2 = vtrue;
+  rtx op3 = vfalse;
+
+  gcc_assert (may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3));
+  /* For Conditional store only handle "if (test) *x = a; else skip;".  */
+  if (MEM_P (dest))
+    gcc_assert (rtx_equal_p (dest, op3));
+
+  start_sequence ();
+  compare_op = ix86_expand_compare (code, op0, op1);
+  compare_seq = get_insns ();
+  end_sequence ();
+
+  if (may_trap_or_fault_p (op2))
+    op2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, op2),
+			  UNSPEC_APX_CFCMOV);
+  if (may_trap_or_fault_p (op3))
+    op3 = gen_rtx_UNSPEC (mode, gen_rtvec (1, op3),
+			  UNSPEC_APX_CFCMOV);
+  emit_insn (compare_seq);
+  /* For "if (test) x = *a; else x = *b",generate 2 cfcmov.  */
+  if (may_trap_or_fault_p (op2) && may_trap_or_fault_p (op3))
+    {
+      emit_insn (gen_rtx_SET (dest,
+			      gen_rtx_IF_THEN_ELSE (mode, compare_op,
+						    op2, dest)));
+      emit_insn (gen_rtx_SET (dest,
+			      gen_rtx_IF_THEN_ELSE (mode, compare_op,
+						    dest, op3)));
+    }
+  /* For conditional load one mem, like "if (test) x = *a; else x = b/0."
+     and "if (test) x = b/0; else x = *b".  */
+  else
+    emit_insn (gen_rtx_SET (dest,
+			    gen_rtx_IF_THEN_ELSE (mode, compare_op,
+						  op2, op3)));
+}
+
 bool
 ix86_expand_int_movcc (rtx operands[])
 {
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index d47184e2879..899873dfeca 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -144,8 +144,10 @@  enum apx_features {
   apx_nf = 1 << 4,
   apx_ccmp = 1 << 5,
   apx_zu = 1 << 6,
+  apx_cfcmov = 1 << 7,
   apx_all = apx_egpr | apx_push2pop2 | apx_ndd
-	    | apx_ppx | apx_nf | apx_ccmp | apx_zu,
+	    | apx_ppx | apx_nf | apx_ccmp | apx_zu
+	    | apx_cfcmov,
 };
 
 #endif
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index f122fd8a0a3..e98fcc8c857 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -153,6 +153,7 @@  extern bool ix86_match_ccmode (rtx, machine_mode);
 extern bool ix86_match_ptest_ccmode (rtx);
 extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx);
 extern void ix86_expand_setcc (rtx, enum rtx_code, rtx, rtx);
+extern void ix86_expand_int_cfmovcc (rtx, rtx, rtx, rtx);
 extern bool ix86_expand_int_movcc (rtx[]);
 extern bool ix86_expand_fp_movcc (rtx[]);
 extern bool ix86_expand_fp_vcond (rtx[]);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 11770aa8a50..85af9347421 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22621,10 +22621,18 @@  ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	  *total = COSTS_N_INSNS (1);
 	  if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
 	    *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	  if (!REG_P (XEXP (x, 1)))
-	    *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  if (!REG_P (XEXP (x, 2)))
-	    *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
+	  rtx op1, op2;
+	  op1 = XEXP (x, 1);
+	  op2 = XEXP (x, 2);
+	  /* Handle UNSPEC_APX_CFCMOV for cfcmov.  */
+	  if (GET_CODE (op1) == UNSPEC && XINT (op1, 1) == UNSPEC_APX_CFCMOV)
+	    op1 = XVECEXP (op1, 0, 0);
+	  if (GET_CODE (op2) == UNSPEC && XINT (op2, 1) == UNSPEC_APX_CFCMOV)
+	    op2 = XVECEXP (op2, 0, 0);
+	  if (!REG_P (op1))
+	    *total += rtx_cost (op1, mode, code, 1, speed);
+	  if (!REG_P (op2))
+	    *total += rtx_cost (op2, mode, code, 2, speed);
 	  return true;
 	}
       return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e8e528c7811..51f2cea5f84 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -58,6 +58,7 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_APX_NF (ix86_apx_features & apx_nf)
 #define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
 #define TARGET_APX_ZU (ix86_apx_features & apx_zu)
+#define TARGET_APX_CFCMOV (ix86_apx_features & apx_cfcmov)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 362b0ddcf40..e89cc0153db 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -223,6 +223,9 @@  (define_c_enum "unspec" [
   ;; For APX CCMP support
   ;; DFV = default flag value
   UNSPEC_APX_DFV
+
+  ;; For APX CFCMOV support
+  UNSPEC_APX_CFCMOV
 ])
 
 (define_c_enum "unspecv" [
@@ -584,7 +587,7 @@  (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
 		    noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
 		    avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
 		    avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
-		    vaes_avx512vl,noapx_nf,avx10_2"
+		    vaes_avx512vl,noapx_nf,avx10_2,apx_cfcmov"
   (const_string "base"))
 
 ;; The (bounding maximum) length of an instruction immediate.
@@ -995,6 +998,7 @@  (define_attr "enabled" ""
 	 (eq_attr "mmx_isa" "avx")
 	   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 	 (eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF")
+	 (eq_attr "isa" "apx_cfcmov") (symbol_ref "TARGET_APX_CFCMOV")
 	]
 	(const_int 1)))
 
@@ -26088,6 +26092,30 @@  (define_expand "mov<mode>cc"
   ""
   "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
+(define_expand "maskload<mode>void"
+  [(set (match_operand:SWI248 0 "register_operand")
+	(if_then_else:SWI248 (match_operand 2 "comparison_operator")
+			   (match_operand:SWI248 1 "nonimm_or_0_operand")
+			   (match_operand:SWI248 3 "nonimm_or_0_operand")))]
+  "TARGET_APX_CFCMOV"
+{
+  ix86_expand_int_cfmovcc (operands[0], operands[2],
+			    operands[1], operands[3]);
+  DONE;
+})
+
+(define_expand "maskstore<mode>void"
+  [(set (match_operand:SWI248 0 "memory_operand")
+	(if_then_else:SWI248 (match_operand 2 "comparison_operator")
+			     (match_operand:SWI248 1 "register_operand")
+			     (match_dup 0)))]
+  "TARGET_APX_CFCMOV"
+{
+  ix86_expand_int_cfmovcc (operands[0], operands[2],
+			   operands[1], operands[0]);
+  DONE;
+})
+
 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
 ;; the register first winds up with `sbbl $0,reg', which is also weird.
 ;; So just document what we're doing explicitly.
@@ -26189,6 +26217,52 @@  (define_split
    (set (match_dup 0)
 	(neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
 
+(define_insn "*cfmov<mode>cc"
+  [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (unspec:SWI248
+	   [(match_operand:SWI248 2 "memory_operand" "m,m")]
+	   UNSPEC_APX_CFCMOV)
+	  (match_operand:SWI248 3 "reg_or_0_operand" "C,r")))]
+  "TARGET_CMOVE && TARGET_APX_CFCMOV"
+  "@
+  cfcmov%O2%C1\t{%2, %0|%0, %2}
+  cfcmov%O2%C1\t{%2, %3, %0|%0, %3, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cfmov<mode>cc_2"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,r,m")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 2 "reg_or_0_operand" "r,C,r")
+	  (unspec:SWI248
+	   [(match_operand:SWI248 3 "memory_operand" "m,m,0")]
+	   UNSPEC_APX_CFCMOV)))]
+  "TARGET_CMOVE && TARGET_APX_CFCMOV"
+  "@
+  cfcmov%O2%c1\t{%3, %2, %0|%0, %2, %3}
+  cfcmov%O2%c1\t{%3, %0|%0, %3}
+  cfcmov%O2%C1\t{%2, %0|%0, %2}"
+  [(set_attr "isa" "apx_ndd,*,*")
+   (set_attr "type" "icmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cfmov<mode>ccz"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 2 "register_operand" "r")
+	  (match_operand:SWI248 3 "const0_operand" "C")))]
+  "TARGET_CMOVE && TARGET_APX_CFCMOV"
+  "cfcmov%O2%C1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*mov<mode>cc_noc"
   [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
 	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 5c889b72cc5..1bfe372724c 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1348,6 +1348,9 @@  Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
 EnumValue
 Enum(apx_features) String(zu) Value(apx_zu) Set(8)
 
+EnumValue
+Enum(apx_features) String(cfcmov) Value(apx_cfcmov) Set(9)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
new file mode 100644
index 00000000000..4a1fb91b24c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
@@ -0,0 +1,73 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -mapxf" } */
+
+/* { dg-final { scan-assembler-times "cfcmovne" 1 } } */
+/* { dg-final { scan-assembler-times "cfcmovg" 2} } */
+/* { dg-final { scan-assembler-times "cfcmove" 1 } } */
+/* { dg-final { scan-assembler-times "cfcmovl" 2 } } */
+/* { dg-final { scan-assembler-times "cfcmovle" 1 } } */
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_store (int a, int b, int c, int d, int *arr)
+{
+    if (a != b)
+        *arr = c;
+    return d;
+
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_ndd (int a, int b, int c, int *p)
+{
+  if (a > b)
+    return *p;
+  return c;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_2_trap (int a, int b, int *c, int *p)
+{
+  if (a > b)
+    return *p;
+  return *c;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_zero (int a, int b, int c)
+{
+  int sum = 0;
+  if (a == b)
+    return c;
+  return sum;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_mem (int a, int b, int *p)
+{
+    int sum = 0;
+    if (a < b )
+	sum = *p;
+    return sum;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_arith_1 (int a, int b, int c, int *p)
+{
+  int sum = 0;
+  if (a > b)
+    sum = *p;
+  else
+    sum = a + c;
+  return sum + 1;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_arith_2 (int a, int b, int c, int *p)
+{
+  int sum = 0;
+  if (a > b)
+    sum = a + c;
+  else
+    sum = *p;
+  return sum + 1;
+}
diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
new file mode 100644
index 00000000000..2b1660f64fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
@@ -0,0 +1,40 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-mapxf -march=x86-64 -O3" } */
+
+#include "apx-cfcmov-1.c"
+
+extern void abort (void);
+
+int main ()
+{
+  if (!__builtin_cpu_supports ("apxf"))
+    return 0;
+
+  int arr = 6;
+  int arr1 = 5;
+  int res = cfc_store (1, 2, 3, 4, &arr);
+  if (arr != 3 && res != 4)
+    abort ();
+  res = cfc_load_ndd (2, 1, 2, &arr);
+  if (res != 3)
+    abort ();
+  res = cfc_load_2_trap (1, 2, &arr1, &arr);
+  if (res != 5)
+    abort ();
+  res = cfc_load_zero (1, 2, 3);
+  res = cfc_load_zero (1, 2, 3);
+  if (res != 0)
+    abort ();
+  res = cfc_load_mem (2, 1, &arr);
+  if (res != 0)
+    abort ();
+  res = cfc_load_arith_1 (1, 2, 3, &arr);
+  if (res != 5)
+    abort();
+  res = cfc_load_arith_2 (2, 1, 3,&arr);
+  if (res != 6)
+    abort();
+  return 0;
+}
+