s390: Add expander for uaddc/usubc optabs

Message ID 20240918172504.3545237-1-stefansf@gcc.gnu.org
State New
Headers
Series s390: Add expander for uaddc/usubc optabs |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed

Commit Message

Stefan Schulze Frielinghaus Sept. 18, 2024, 5:25 p.m. UTC
  Bootstrapped and regtested on s390.  Both expander are constrained to
z196 because of the conditional moves.  I guess this is reasonable
nowadays.

Would be great if you could have a second look that setting the
carry/borrow bit (bit 18 of the PSW) is indeed correct.  Brain twisted
me at first ;-)

-- 8< --

gcc/ChangeLog:

	* config/s390/s390.md (*add<mode>3_carry1_cc): Renamed to ...
	(add<mode>3_carry1_cc): this and in order to use the
	corresponding gen function, encode CC mode into pattern.
	(*sub<mode>3_borrow_cc): Renamed to ...
	(sub<mode>3_borrow_cc): this and in order to use the
	corresponding gen function, encode CC mode into pattern.
	(*add<mode>3_alc_carry1_cc): Renamed to ...
	(add<mode>3_alc_carry1_cc): this and in order to use the
	corresponding gen function, encode CC mode into pattern.
	(sub<mode>3_slb_borrow1_cc): New.
	(uaddc<mode>5): New.
	(usubc<mode>5): New.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/uaddc-1.c: New test.
	* gcc.target/s390/uaddc-2.c: New test.
	* gcc.target/s390/usubc-1.c: New test.
	* gcc.target/s390/usubc-2.c: New test.
---
 gcc/config/s390/s390.md                 | 103 +++++++++++++++++++-----
 gcc/testsuite/gcc.target/s390/uaddc-1.c |  80 ++++++++++++++++++
 gcc/testsuite/gcc.target/s390/uaddc-2.c |  25 ++++++
 gcc/testsuite/gcc.target/s390/usubc-1.c |  80 ++++++++++++++++++
 gcc/testsuite/gcc.target/s390/usubc-2.c |  25 ++++++
 5 files changed, 295 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/uaddc-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/uaddc-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/usubc-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/usubc-2.c
  

Patch

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 4a225ae24f3..6fd3f943fe1 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -6001,14 +6001,14 @@ 
                         z10_super_E1,z10_super_E1,z10_super_E1")])
 
 ; alr, alfi, slfi, al, aly, alrk, alhsik, algr, algfi, slgfi, alg, alsi, algsi, algrk, alghsik
-(define_insn "*add<mode>3_carry1_cc"
-  [(set (reg CC_REGNUM)
-        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0,0")
-			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T,C"))
-                 (match_dup 1)))
-   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,d")
+(define_insn "add<mode>3_carry1_cc"
+  [(set (reg:CCL1 CC_REGNUM)
+        (compare:CCL1 (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0,0")
+				(match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T,C"))
+		      (match_dup 1)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                         "=d,d, d, d,d,d,d,d")
         (plus:GPR (match_dup 1) (match_dup 2)))]
-  "s390_match_ccmode (insn, CCL1mode)"
+  ""
   "@
    al<g>r\t%0,%2
    al<g>rk\t%0,%1,%2
@@ -6541,14 +6541,14 @@ 
    (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
 
 ; slr, sl, sly, slgr, slg, slrk, slgrk
-(define_insn "*sub<mode>3_borrow_cc"
-  [(set (reg CC_REGNUM)
-        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
-			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
-                 (match_dup 1)))
-   (set (match_operand:GPR 0 "register_operand"                    "=d,d,d,d")
+(define_insn "sub<mode>3_borrow_cc"
+  [(set (reg:CCL2 CC_REGNUM)
+        (compare:CCL2 (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+				 (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+		      (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand"                         "=d,d,d,d")
         (minus:GPR (match_dup 1) (match_dup 2)))]
-  "s390_match_ccmode (insn, CCL2mode)"
+  ""
   "@
    sl<g>r\t%0,%2
    sl<g>rk\t%0,%1,%2
@@ -6754,22 +6754,50 @@ 
 ; add(di|si)cc instruction pattern(s).
 ;
 
+(define_expand "uaddc<mode>5"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "nonimmediate_operand")
+   (match_operand:GPR 2 "nonimmediate_operand")
+   (match_operand:GPR 3 "nonimmediate_operand")
+   (match_operand:GPR 4 "general_operand")]
+  "TARGET_Z196 && (<MODE>mode != DImode || TARGET_64BIT)"
+{
+  rtx cond = gen_rtx_LTU (<MODE>mode, gen_rtx_REG (CCL1mode, CC_REGNUM), const0_rtx);
+  if (operands[4] == const0_rtx)
+    emit_insn (gen_add<mode>3_carry1_cc (operands[0], operands[2], operands[3]));
+  else
+    {
+      rtx tmp;
+      if (CONSTANT_P (operands[4]))
+	{
+	  tmp = gen_reg_rtx (SImode);
+	  emit_move_insn (tmp, operands[4]);
+	}
+      else
+	tmp = operands[4];
+      s390_emit_compare (LTU, tmp, const0_rtx);
+      emit_insn (gen_add<mode>3_alc_carry1_cc (operands[0], operands[2], operands[3], cond));
+    }
+  emit_insn (gen_mov<mode>cc (operands[1], cond, const1_rtx, const0_rtx));
+  DONE;
+})
+
 ; the following 4 patterns are used when the result of an add with
 ; carry is checked for an overflow condition
 
 ; op1 + op2 + c < op1
 
 ; alcr, alc, alcgr, alcg
-(define_insn "*add<mode>3_alc_carry1_cc"
-  [(set (reg CC_REGNUM)
-        (compare
+(define_insn "add<mode>3_alc_carry1_cc"
+  [(set (reg:CCL1 CC_REGNUM)
+        (compare:CCL1
           (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
                               (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
                     (match_operand:GPR 2 "general_operand" "d,T"))
           (match_dup 1)))
    (set (match_operand:GPR 0 "register_operand" "=d,d")
         (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
-  "s390_match_ccmode (insn, CCL1mode)"
+  ""
   "@
    alc<g>r\t%0,%2
    alc<g>\t%0,%2"
@@ -6854,6 +6882,28 @@ 
    alc<g>\t%0,%2"
   [(set_attr "op_type"  "RRE,RXY")])
 
+(define_expand "usubc<mode>5"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "nonimmediate_operand")
+   (match_operand:GPR 2 "nonimmediate_operand")
+   (match_operand:GPR 3 "nonimmediate_operand")
+   (match_operand:GPR 4 "general_operand")]
+  "TARGET_Z196 && (<MODE>mode != DImode || TARGET_64BIT)"
+{
+  rtx cond = gen_rtx_GTU (<MODE>mode, gen_rtx_REG (CCL2mode, CC_REGNUM), const0_rtx);
+  if (operands[4] == const0_rtx)
+    emit_insn (gen_sub<mode>3_borrow_cc (operands[0], operands[2], operands[3]));
+  else
+    {
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_xor<mode>3 (tmp, operands[4], const1_rtx));
+      s390_emit_compare (GTU, tmp, const0_rtx);
+      emit_insn (gen_sub<mode>3_slb_borrow1_cc (operands[0], operands[2], operands[3], cond));
+    }
+  emit_insn (gen_mov<mode>cc (operands[1], cond, const1_rtx, const0_rtx));
+  DONE;
+})
+
 ; slbr, slb, slbgr, slbg
 (define_insn "*sub<mode>3_slb_cc"
   [(set (reg CC_REGNUM)
@@ -6885,6 +6935,23 @@ 
   [(set_attr "op_type"  "RRE,RXY")
    (set_attr "z10prop" "z10_c,*")])
 
+; slbr, slb, slbgr, slbg
+(define_insn "sub<mode>3_slb_borrow1_cc"
+  [(set (reg:CCL2 CC_REGNUM)
+        (compare:CCL2
+          (minus:GPR (minus:GPR (match_operand:GPR 1 "nonimmediate_operand" "0,0")
+                                (match_operand:GPR 2 "general_operand" "d,T"))
+                     (match_operand:GPR 3 "s390_slb_comparison" ""))
+          (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (minus:GPR (minus:GPR (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "@
+   slb<g>r\t%0,%2
+   slb<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")])
+
 (define_expand "add<mode>cc"
   [(match_operand:GPR 0 "register_operand" "")
    (match_operand 1 "comparison_operator" "")
diff --git a/gcc/testsuite/gcc.target/s390/uaddc-1.c b/gcc/testsuite/gcc.target/s390/uaddc-1.c
new file mode 100644
index 00000000000..dfad5915690
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/uaddc-1.c
@@ -0,0 +1,80 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mzarch -save-temps -fdump-tree-optimized" }  */
+/* { dg-final { scan-tree-dump-times "\\.UADDC \\(" 2 "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump-times "\\.UADDC \\(" 1 "optimized" { target { ! lp64 } } } } */
+/* { dg-final { scan-assembler-times "\\talcr\\t" 1 } } */
+/* { dg-final { scan-assembler-times "\\talcgr\\t" 1 { target lp64 } } } */
+
+#include <assert.h>
+
+unsigned int __attribute__ ((noipa))
+uaddc (unsigned int x, unsigned int y, _Bool carry_in, _Bool *carry_out)
+{
+  unsigned int r;
+  _Bool c1 = __builtin_add_overflow (x, y, &r);
+  _Bool c2 = __builtin_add_overflow (r, carry_in, &r);
+  *carry_out = c1 | c2;
+  return r;
+}
+
+#ifdef __s390x__
+unsigned long __attribute__ ((noipa))
+uaddcl (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out)
+{
+  unsigned long r;
+  _Bool c1 = __builtin_add_overflow (x, y, &r);
+  _Bool c2 = __builtin_add_overflow (r, carry_in, &r);
+  *carry_out = c1 | c2;
+  return r;
+}
+#endif
+
+void
+test_int (void)
+{
+  _Bool c;
+  unsigned int r;
+
+  r = uaddc (0xf0000000u, 0x0fffffffu, 0, &c);
+  assert (r == 0xffffffffu && !c);
+
+  r = uaddc (0xf0000000u, 0x0fffffffu, 1, &c);
+  assert (r == 0 && c);
+
+  r = uaddc (0xf0000001u, 0x0fffffffu, 0, &c);
+  assert (r == 0 && c);
+
+  r = uaddc (0xf0000001u, 0x0fffffffu, 1, &c);
+  assert (r == 1 && c);
+}
+
+#ifdef __s390x__
+void
+test_long (void)
+{
+  _Bool c;
+  unsigned long r;
+
+  r = uaddcl (0xf000000000000000u, 0x0fffffffffffffffu, 0, &c);
+  assert (r == 0xffffffffffffffffu && !c);
+
+  r = uaddcl (0xf000000000000000u, 0x0fffffffffffffffu, 1, &c);
+  assert (r == 0 && c);
+
+  r = uaddcl (0xf000000000000001u, 0x0fffffffffffffffu, 0, &c);
+  assert (r == 0 && c);
+
+  r = uaddcl (0xf000000000000001u, 0x0fffffffffffffffu, 1, &c);
+  assert (r == 1 && c);
+}
+#endif
+
+int
+main (void)
+{
+  test_int ();
+#ifdef __s390x__
+  test_long ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/uaddc-2.c b/gcc/testsuite/gcc.target/s390/uaddc-2.c
new file mode 100644
index 00000000000..61bcc6dee59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/uaddc-2.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mzarch -save-temps -fdump-tree-optimized" }  */
+/* { dg-final { scan-tree-dump-times "\\.UADDC \\(" 6 "optimized" } } */
+/* { dg-final { scan-assembler-times "\\talcr\\t" 6 { target { ! lp64 } } } } */
+/* { dg-final { scan-assembler-times "\\talcr\\t" 3 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "\\talcgr\\t" 3 { target lp64 } } } */
+
+#define TEST(T, V, OP) \
+  unsigned T \
+  uaddc_##T##_##V (unsigned T x, unsigned T y, _Bool carry_in, _Bool *carry_out) \
+  { \
+    unsigned T r; \
+    _Bool c1 = __builtin_add_overflow (x, y, &r); \
+    _Bool c2 = __builtin_add_overflow (r, carry_in, &r); \
+    *carry_out = c1 OP c2; \
+    return r; \
+  }
+
+TEST(int, 1, |)
+TEST(int, 2, ||)
+TEST(int, 3, ^)
+
+TEST(long, 1, |)
+TEST(long, 2, ||)
+TEST(long, 3, ^)
diff --git a/gcc/testsuite/gcc.target/s390/usubc-1.c b/gcc/testsuite/gcc.target/s390/usubc-1.c
new file mode 100644
index 00000000000..f4bee1e1534
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/usubc-1.c
@@ -0,0 +1,80 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mzarch -save-temps -fdump-tree-optimized" }  */
+/* { dg-final { scan-tree-dump-times "\\.USUBC \\(" 2 "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump-times "\\.USUBC \\(" 1 "optimized" { target { ! lp64 } } } } */
+/* { dg-final { scan-assembler-times "\\tslbr\\t" 1 } } */
+/* { dg-final { scan-assembler-times "\\tslbgr\\t" 1 { target lp64 } } } */
+
+#include <assert.h>
+
+unsigned int __attribute__ ((noipa))
+usubc (unsigned int x, unsigned int y, _Bool borrow_in, _Bool *borrow_out)
+{
+  unsigned int r;
+  _Bool b1 = __builtin_sub_overflow (x, y, &r);
+  _Bool b2 = __builtin_sub_overflow (r, borrow_in, &r);
+  *borrow_out = b1 | b2;
+  return r;
+}
+
+#ifdef __s390x__
+unsigned long __attribute__ ((noipa))
+usubcl (unsigned long x, unsigned long y, _Bool borrow_in, _Bool *borrow_out)
+{
+  unsigned long r;
+  _Bool b1 = __builtin_sub_overflow (x, y, &r);
+  _Bool b2 = __builtin_sub_overflow (r, borrow_in, &r);
+  *borrow_out = b1 | b2;
+  return r;
+}
+#endif
+
+void
+test_int (void)
+{
+  _Bool b;
+  unsigned int r;
+
+  r = usubc (0xfu, 0xfu, 0, &b);
+  assert (r == 0 && !b);
+
+  r = usubc (0xfu, 0xfu, 1, &b);
+  assert (r == 0xffffffffu && b);
+
+  r = usubc (0xfu, 0xffu, 0, &b);
+  assert (r == 0xffffff10u && b);
+
+  r = usubc (0xfu, 0xffu, 1, &b);
+  assert (r == 0xffffff0fu && b);
+}
+
+#ifdef __s390x__
+void
+test_long (void)
+{
+  _Bool b;
+  unsigned long r;
+
+  r = usubcl (0xfu, 0xfu, 0, &b);
+  assert (r == 0 && !b);
+
+  r = usubcl (0xfu, 0xfu, 1, &b);
+  assert (r == 0xffffffffffffffffu && b);
+
+  r = usubcl (0xfu, 0xffu, 0, &b);
+  assert (r == 0xffffffffffffff10u && b);
+
+  r = usubcl (0xfu, 0xffu, 1, &b);
+  assert (r == 0xffffffffffffff0fu && b);
+}
+#endif
+
+int
+main (void)
+{
+  test_int ();
+#ifdef __s390x__
+  test_long ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/usubc-2.c b/gcc/testsuite/gcc.target/s390/usubc-2.c
new file mode 100644
index 00000000000..b67907ed663
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/usubc-2.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mzarch -save-temps -fdump-tree-optimized" }  */
+/* { dg-final { scan-tree-dump-times "\\.USUBC \\(" 6 "optimized" } } */
+/* { dg-final { scan-assembler-times "\\tslbr\\t" 6 { target { ! lp64 } } } } */
+/* { dg-final { scan-assembler-times "\\tslbr\\t" 3 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "\\tslbgr\\t" 3 { target lp64 } } } */
+
+#define TEST(T, V, OP) \
+  unsigned T \
+  uaddc_##T##_##V (unsigned T x, unsigned T y, _Bool carry_in, _Bool *carry_out) \
+  { \
+    unsigned T r; \
+    _Bool c1 = __builtin_sub_overflow (x, y, &r); \
+    _Bool c2 = __builtin_sub_overflow (r, carry_in, &r); \
+    *carry_out = c1 OP c2; \
+    return r; \
+  }
+
+TEST(int, 1, |)
+TEST(int, 2, ||)
+TEST(int, 3, ^)
+
+TEST(long, 1, |)
+TEST(long, 2, ||)
+TEST(long, 3, ^)