[3/5] Match: Simplify branch form 8 of unsigned SAT_ADD into branchless
Checks
| Context |
Check |
Description |
| linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
| linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
Commit Message
From: Pan Li <pan2.li@intel.com>
There are sorts of forms for the unsigned SAT_ADD. Some of them are
complicated while others are cheap. This patch would like to simplify
the complicated form into the cheap ones. For example as below:
From the form 8 (branch):
SAT_U_ADD = x > (T)(x + y) ? -1 : (x + y).
To (branchless):
SAT_U_ADD = (X + Y) | - ((X + Y) < X).
#define T uint8_t
T sat_add_u_1 (T x, T y)
{
return x > (T)(x + y) ? -1 : (x + y);
}
Before this patch:
1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y)
2 │ {
3 │ uint8_t D.2809;
4 │
5 │ _1 = x + y;
6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>;
7 │ <D.2810>:
8 │ D.2809 = x + y;
9 │ goto <D.2812>;
10 │ <D.2811>:
11 │ D.2809 = 255;
12 │ <D.2812>:
13 │ return D.2809;
14 │ }
After this patch:
1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y)
2 │ {
3 │ uint8_t D.2809;
4 │
5 │ _1 = x + y;
6 │ _2 = x + y;
7 │ _3 = x > _2;
8 │ _4 = (unsigned char) _3;
9 │ _5 = -_4;
10 │ D.2809 = _1 | _5;
11 │ return D.2809;
12 │ }
The simplify doesn't need to check if target support the SAT_ADD, it
is somehow the optimization in gimple level.
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Remove unsigned branch form 8 for SAT_ADD, and
add simplify to branchless instead.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/sat_u_add-simplify-4-u16.c: New test.
* gcc.dg/tree-ssa/sat_u_add-simplify-4-u32.c: New test.
* gcc.dg/tree-ssa/sat_u_add-simplify-4-u64.c: New test.
* gcc.dg/tree-ssa/sat_u_add-simplify-4-u8.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/match.pd | 13 ++++++++-----
.../gcc.dg/tree-ssa/sat_u_add-simplify-4-u16.c | 15 +++++++++++++++
.../gcc.dg/tree-ssa/sat_u_add-simplify-4-u32.c | 15 +++++++++++++++
.../gcc.dg/tree-ssa/sat_u_add-simplify-4-u64.c | 15 +++++++++++++++
.../gcc.dg/tree-ssa/sat_u_add-simplify-4-u8.c | 15 +++++++++++++++
5 files changed, 68 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-4-u16.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-4-u32.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-4-u64.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-4-u8.c
@@ -3170,6 +3170,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& types_match (type, @0, @1))
(bit_ior @2 (negate (convert (lt @2 @0))))))
+/* Simplify SAT_U_ADD to the cheap form
+ From: SAT_U_ADD = x > (X + Y) ? -1 : (X + Y).
+ To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */
+(simplify (cond (gt @0 (plus:c@2 @0 @1)) integer_minus_onep @2)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))
+ (bit_ior @2 (negate (convert (lt @2 @0))))))
+
/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */
(match (unsigned_integer_sat_add @0 @1)
@@ -3182,11 +3190,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
integer_minus_onep (usadd_left_part_2 @0 @1)))
-/* Unsigned saturation add, case 8 (branch with gt):
- SAT_ADD = x > (X + Y) ? -1 : (X + Y). */
-(match (unsigned_integer_sat_add @0 @1)
- (cond^ (gt @0 (usadd_left_part_1@2 @0 @1)) integer_minus_onep @2))
-
/* Unsigned saturation add, case 9 (one op is imm):
SAT_U_ADD = (X + 3) >= x ? (X + 3) : -1. */
(match (unsigned_integer_sat_add @0 @1)
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-gimple-details" } */
+
+#include <stdint.h>
+
+#define T uint16_t
+
+T sat_add_u_1 (T x, T y)
+{
+ return x > (T)(x + y) ? -1 : (x + y);
+}
+
+/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-gimple-details" } */
+
+#include <stdint.h>
+
+#define T uint32_t
+
+T sat_add_u_1 (T x, T y)
+{
+ return x > (T)(x + y) ? -1 : (x + y);
+}
+
+/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-gimple-details" } */
+
+#include <stdint.h>
+
+#define T uint64_t
+
+T sat_add_u_1 (T x, T y)
+{
+ return x > (T)(x + y) ? -1 : (x + y);
+}
+
+/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-gimple-details" } */
+
+#include <stdint.h>
+
+#define T uint8_t
+
+T sat_add_u_1 (T x, T y)
+{
+ return x > (T)(x + y) ? -1 : (x + y);
+}
+
+/* { dg-final { scan-tree-dump-not " if " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " else " "gimple" } } */
+/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */