[v2] Match: Support form 1 for scalar signed integer .SAT_ADD
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
Commit Message
From: Pan Li <pan2.li@intel.com>
This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD. Aka below example:
Form 1:
#define DEF_SAT_S_ADD_FMT_1(T, MIN, MAX) \
T __attribute__((noinline)) \
sat_s_add_##T##_fmt_1 (T x, T y) \
{ \
T sum = x + y; \
return (x ^ y) < 0 \
? sum \
: (sum ^ x) >= 0 \
? sum \
: x < 0 ? MIN : MAX; \
}
DEF_SAT_S_ADD_FMT_1(int64_t, INT64_MIN, INT64_MAX)
We can tell the difference before and after this patch if backend
implemented the ssadd<m>3 pattern similar as below.
Before this patch:
4 │ __attribute__((noinline))
5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
6 │ {
7 │ int64_t sum;
8 │ long int _1;
9 │ long int _2;
10 │ int64_t _3;
11 │ _Bool _8;
12 │ long int _9;
13 │ long int _10;
14 │ long int _11;
15 │ long int _12;
16 │ long int _13;
17 │
18 │ <bb 2> [local count: 1073741824]:
19 │ sum_6 = x_4(D) + y_5(D);
20 │ _1 = x_4(D) ^ y_5(D);
21 │ _2 = x_4(D) ^ sum_6;
22 │ _12 = ~_1;
23 │ _13 = _2 & _12;
24 │ if (_13 < 0)
25 │ goto <bb 3>; [41.00%]
26 │ else
27 │ goto <bb 4>; [59.00%]
28 │
29 │ <bb 3> [local count: 259738147]:
30 │ _8 = x_4(D) < 0;
31 │ _9 = (long int) _8;
32 │ _10 = -_9;
33 │ _11 = _10 ^ 9223372036854775807;
34 │
35 │ <bb 4> [local count: 1073741824]:
36 │ # _3 = PHI <sum_6(2), _11(3)>
37 │ return _3;
38 │
39 │ }
After this patch:
4 │ __attribute__((noinline))
5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
6 │ {
7 │ int64_t _4;
8 │
9 │ ;; basic block 2, loop depth 0
10 │ ;; pred: ENTRY
11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
12 │ return _4;
13 │ ;; succ: EXIT
14 │
15 │ }
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Add the matching for signed .SAT_ADD.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
matching func decl.
(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
to ...
(match_saturation_add): ... here.
(math_opts_dom_walker::after_dom_children): Update the above renamed
func from caller.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/match.pd | 17 ++++++++++++++++
gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
2 files changed, 54 insertions(+), 5 deletions(-)
Comments
Kindly ping.
Pan
-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com>
Sent: Wednesday, August 7, 2024 5:31 PM
To: gcc-patches@gcc.gnu.org
Cc: richard.guenther@gmail.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Li, Pan2 <pan2.li@intel.com>
Subject: [PATCH v2] Match: Support form 1 for scalar signed integer .SAT_ADD
From: Pan Li <pan2.li@intel.com>
This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD. Aka below example:
Form 1:
#define DEF_SAT_S_ADD_FMT_1(T, MIN, MAX) \
T __attribute__((noinline)) \
sat_s_add_##T##_fmt_1 (T x, T y) \
{ \
T sum = x + y; \
return (x ^ y) < 0 \
? sum \
: (sum ^ x) >= 0 \
? sum \
: x < 0 ? MIN : MAX; \
}
DEF_SAT_S_ADD_FMT_1(int64_t, INT64_MIN, INT64_MAX)
We can tell the difference before and after this patch if backend
implemented the ssadd<m>3 pattern similar as below.
Before this patch:
4 │ __attribute__((noinline))
5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
6 │ {
7 │ int64_t sum;
8 │ long int _1;
9 │ long int _2;
10 │ int64_t _3;
11 │ _Bool _8;
12 │ long int _9;
13 │ long int _10;
14 │ long int _11;
15 │ long int _12;
16 │ long int _13;
17 │
18 │ <bb 2> [local count: 1073741824]:
19 │ sum_6 = x_4(D) + y_5(D);
20 │ _1 = x_4(D) ^ y_5(D);
21 │ _2 = x_4(D) ^ sum_6;
22 │ _12 = ~_1;
23 │ _13 = _2 & _12;
24 │ if (_13 < 0)
25 │ goto <bb 3>; [41.00%]
26 │ else
27 │ goto <bb 4>; [59.00%]
28 │
29 │ <bb 3> [local count: 259738147]:
30 │ _8 = x_4(D) < 0;
31 │ _9 = (long int) _8;
32 │ _10 = -_9;
33 │ _11 = _10 ^ 9223372036854775807;
34 │
35 │ <bb 4> [local count: 1073741824]:
36 │ # _3 = PHI <sum_6(2), _11(3)>
37 │ return _3;
38 │
39 │ }
After this patch:
4 │ __attribute__((noinline))
5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
6 │ {
7 │ int64_t _4;
8 │
9 │ ;; basic block 2, loop depth 0
10 │ ;; pred: ENTRY
11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
12 │ return _4;
13 │ ;; succ: EXIT
14 │
15 │ }
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Add the matching for signed .SAT_ADD.
* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
matching func decl.
(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
to ...
(match_saturation_add): ... here.
(math_opts_dom_walker::after_dom_children): Update the above renamed
func from caller.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/match.pd | 17 ++++++++++++++++
gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
2 files changed, 54 insertions(+), 5 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index c9c8478d286..8b8a5dbcfe3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3311,6 +3311,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
(if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
+/* Signed saturation add, case 1:
+ T sum = X + Y;
+ SAT_S_ADD = (X ^ Y) < 0
+ ? sum
+ : (sum ^ x) >= 0
+ ? sum
+ : x < 0 ? MIN : MAX; */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 (convert?@2 (plus:c (convert? @0)
+ (convert? @1))))
+ (bit_not (bit_xor:c @0 @1)))
+ integer_zerop)
+ (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+ @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8d96a4c964b..f39c88741a4 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
static void
build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
tree lhs, tree op_0, tree op_1)
@@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
}
/*
- * Try to match saturation unsigned add with PHI.
+ * Try to match saturation add with PHI.
+ * For unsigned integer:
* <bb 2> :
* _1 = x_3(D) + y_4(D);
* if (_1 >= x_3(D))
@@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
* # _2 = PHI <255(2), _1(3)>
* =>
* <bb 4> [local count: 1073741824]:
- * _2 = .SAT_ADD (x_4(D), y_5(D)); */
+ * _2 = .SAT_ADD (x_4(D), y_5(D));
+ *
+ * For signed integer:
+ * _1 = x_5(D) ^ y_6(D);
+ * _13 = x_5(D) + y_6(D);
+ * _3 = x_5(D) ^ _13;
+ * _2 = ~_1;
+ * _7 = _2 & _3;
+ * if (_7 < 0)
+ * goto <bb 3>; [59.00%]
+ * else
+ * goto <bb 4>; [41.00%]
+ * ;; succ: 4
+ * ;; 3
+ * ;; basic block 3, loop depth 0
+ * ;; pred: 2
+ * _9 = x_5(D) < 0;
+ * _10 = (long int) _9;
+ * _11 = -_10;
+ * _12 = _11 ^ 9223372036854775807;
+ * ;; succ: 4
+ * ;; basic block 4, loop depth 0
+ * ;; pred: 2
+ * ;; 3
+ * # _4 = PHI <_13(2), _12(3)>
+ * =>
+ * ;; basic block 2, loop depth 0
+ * ;; pred: ENTRY
+ * _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
static void
-match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
+match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
{
if (gimple_phi_num_args (phi) != 2)
return;
@@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
tree ops[2];
tree phi_result = gimple_phi_result (phi);
- if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
+ if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+ || gimple_signed_integer_sat_add (phi_result, ops, NULL))
build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
ops[0], ops[1]);
}
@@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
gsi_next (&psi))
{
gimple_stmt_iterator gsi = gsi_after_labels (bb);
- match_unsigned_saturation_add (&gsi, psi.phi ());
+ match_saturation_add (&gsi, psi.phi ());
match_unsigned_saturation_sub (&gsi, psi.phi ());
}
--
2.43.0
On Wed, Aug 7, 2024 at 11:31 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD. Aka below example:
>
> Form 1:
> #define DEF_SAT_S_ADD_FMT_1(T, MIN, MAX) \
> T __attribute__((noinline)) \
> sat_s_add_##T##_fmt_1 (T x, T y) \
> { \
> T sum = x + y; \
> return (x ^ y) < 0 \
> ? sum \
> : (sum ^ x) >= 0 \
> ? sum \
> : x < 0 ? MIN : MAX; \
> }
Wow. I wonder why this isn't simplified to never saturate since
signed x + y has undefined behavior on overflow? So I'd
expect instead
T sum = (unsigned T)x + (unsigned T)y;
to be used.
> DEF_SAT_S_ADD_FMT_1(int64_t, INT64_MIN, INT64_MAX)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
> 4 │ __attribute__((noinline))
> 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> 6 │ {
> 7 │ int64_t sum;
> 8 │ long int _1;
> 9 │ long int _2;
> 10 │ int64_t _3;
> 11 │ _Bool _8;
> 12 │ long int _9;
> 13 │ long int _10;
> 14 │ long int _11;
> 15 │ long int _12;
> 16 │ long int _13;
> 17 │
> 18 │ <bb 2> [local count: 1073741824]:
> 19 │ sum_6 = x_4(D) + y_5(D);
> 20 │ _1 = x_4(D) ^ y_5(D);
> 21 │ _2 = x_4(D) ^ sum_6;
> 22 │ _12 = ~_1;
> 23 │ _13 = _2 & _12;
> 24 │ if (_13 < 0)
> 25 │ goto <bb 3>; [41.00%]
> 26 │ else
> 27 │ goto <bb 4>; [59.00%]
> 28 │
> 29 │ <bb 3> [local count: 259738147]:
> 30 │ _8 = x_4(D) < 0;
> 31 │ _9 = (long int) _8;
> 32 │ _10 = -_9;
> 33 │ _11 = _10 ^ 9223372036854775807;
> 34 │
> 35 │ <bb 4> [local count: 1073741824]:
> 36 │ # _3 = PHI <sum_6(2), _11(3)>
> 37 │ return _3;
> 38 │
> 39 │ }
>
> After this patch:
> 4 │ __attribute__((noinline))
> 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> 6 │ {
> 7 │ int64_t _4;
> 8 │
> 9 │ ;; basic block 2, loop depth 0
> 10 │ ;; pred: ENTRY
> 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> 12 │ return _4;
> 13 │ ;; succ: EXIT
> 14 │
> 15 │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add the matching for signed .SAT_ADD.
> * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> matching func decl.
> (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> to ...
> (match_saturation_add): ... here.
> (math_opts_dom_walker::after_dom_children): Update the above renamed
> func from caller.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/match.pd | 17 ++++++++++++++++
> gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
> 2 files changed, 54 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c9c8478d286..8b8a5dbcfe3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3311,6 +3311,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> }
> (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
>
> +/* Signed saturation add, case 1:
> + T sum = X + Y;
> + SAT_S_ADD = (X ^ Y) < 0
> + ? sum
> + : (sum ^ x) >= 0
> + ? sum
> + : x < 0 ? MIN : MAX; */
> +(match (signed_integer_sat_add @0 @1)
> + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert?@2 (plus:c (convert? @0)
> + (convert? @1))))
> + (bit_not (bit_xor:c @0 @1)))
> + integer_zerop)
> + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
> + @2)
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..f39c88741a4 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> static void
> build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> }
>
> /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
> * <bb 2> :
> * _1 = x_3(D) + y_4(D);
> * if (_1 >= x_3(D))
> @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> * # _2 = PHI <255(2), _1(3)>
> * =>
> * <bb 4> [local count: 1073741824]:
> - * _2 = .SAT_ADD (x_4(D), y_5(D)); */
> + * _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + * _1 = x_5(D) ^ y_6(D);
> + * _13 = x_5(D) + y_6(D);
> + * _3 = x_5(D) ^ _13;
> + * _2 = ~_1;
> + * _7 = _2 & _3;
> + * if (_7 < 0)
> + * goto <bb 3>; [59.00%]
> + * else
> + * goto <bb 4>; [41.00%]
> + * ;; succ: 4
> + * ;; 3
> + * ;; basic block 3, loop depth 0
> + * ;; pred: 2
> + * _9 = x_5(D) < 0;
> + * _10 = (long int) _9;
> + * _11 = -_10;
> + * _12 = _11 ^ 9223372036854775807;
> + * ;; succ: 4
> + * ;; basic block 4, loop depth 0
> + * ;; pred: 2
> + * ;; 3
> + * # _4 = PHI <_13(2), _12(3)>
> + * =>
> + * ;; basic block 2, loop depth 0
> + * ;; pred: ENTRY
> + * _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
>
> static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> {
> if (gimple_phi_num_args (phi) != 2)
> return;
> @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> tree ops[2];
> tree phi_result = gimple_phi_result (phi);
>
> - if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> + || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> ops[0], ops[1]);
> }
> @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> gsi_next (&psi))
> {
> gimple_stmt_iterator gsi = gsi_after_labels (bb);
> - match_unsigned_saturation_add (&gsi, psi.phi ());
> + match_saturation_add (&gsi, psi.phi ());
> match_unsigned_saturation_sub (&gsi, psi.phi ());
> }
>
> --
> 2.43.0
>
> Wow. I wonder why this isn't simplified to never saturate since
> signed x + y has undefined behavior on overflow? So I'd
> expect instead
> T sum = (unsigned T)x + (unsigned T)y;
> to be used.
Thanks, let me update in v3.
Pan
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Thursday, August 22, 2024 5:47 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v2] Match: Support form 1 for scalar signed integer .SAT_ADD
On Wed, Aug 7, 2024 at 11:31 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD. Aka below example:
>
> Form 1:
> #define DEF_SAT_S_ADD_FMT_1(T, MIN, MAX) \
> T __attribute__((noinline)) \
> sat_s_add_##T##_fmt_1 (T x, T y) \
> { \
> T sum = x + y; \
> return (x ^ y) < 0 \
> ? sum \
> : (sum ^ x) >= 0 \
> ? sum \
> : x < 0 ? MIN : MAX; \
> }
Wow. I wonder why this isn't simplified to never saturate since
signed x + y has undefined behavior on overflow? So I'd
expect instead
T sum = (unsigned T)x + (unsigned T)y;
to be used.
> DEF_SAT_S_ADD_FMT_1(int64_t, INT64_MIN, INT64_MAX)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
> 4 │ __attribute__((noinline))
> 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> 6 │ {
> 7 │ int64_t sum;
> 8 │ long int _1;
> 9 │ long int _2;
> 10 │ int64_t _3;
> 11 │ _Bool _8;
> 12 │ long int _9;
> 13 │ long int _10;
> 14 │ long int _11;
> 15 │ long int _12;
> 16 │ long int _13;
> 17 │
> 18 │ <bb 2> [local count: 1073741824]:
> 19 │ sum_6 = x_4(D) + y_5(D);
> 20 │ _1 = x_4(D) ^ y_5(D);
> 21 │ _2 = x_4(D) ^ sum_6;
> 22 │ _12 = ~_1;
> 23 │ _13 = _2 & _12;
> 24 │ if (_13 < 0)
> 25 │ goto <bb 3>; [41.00%]
> 26 │ else
> 27 │ goto <bb 4>; [59.00%]
> 28 │
> 29 │ <bb 3> [local count: 259738147]:
> 30 │ _8 = x_4(D) < 0;
> 31 │ _9 = (long int) _8;
> 32 │ _10 = -_9;
> 33 │ _11 = _10 ^ 9223372036854775807;
> 34 │
> 35 │ <bb 4> [local count: 1073741824]:
> 36 │ # _3 = PHI <sum_6(2), _11(3)>
> 37 │ return _3;
> 38 │
> 39 │ }
>
> After this patch:
> 4 │ __attribute__((noinline))
> 5 │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> 6 │ {
> 7 │ int64_t _4;
> 8 │
> 9 │ ;; basic block 2, loop depth 0
> 10 │ ;; pred: ENTRY
> 11 │ _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> 12 │ return _4;
> 13 │ ;; succ: EXIT
> 14 │
> 15 │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add the matching for signed .SAT_ADD.
> * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> matching func decl.
> (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> to ...
> (match_saturation_add): ... here.
> (math_opts_dom_walker::after_dom_children): Update the above renamed
> func from caller.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/match.pd | 17 ++++++++++++++++
> gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
> 2 files changed, 54 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c9c8478d286..8b8a5dbcfe3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3311,6 +3311,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> }
> (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
>
> +/* Signed saturation add, case 1:
> + T sum = X + Y;
> + SAT_S_ADD = (X ^ Y) < 0
> + ? sum
> + : (sum ^ x) >= 0
> + ? sum
> + : x < 0 ? MIN : MAX; */
> +(match (signed_integer_sat_add @0 @1)
> + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert?@2 (plus:c (convert? @0)
> + (convert? @1))))
> + (bit_not (bit_xor:c @0 @1)))
> + integer_zerop)
> + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
> + @2)
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> + && types_match (type, @0, @1))))
> +
> /* x > y && x != XXX_MIN --> x > y
> x > y && x == XXX_MIN --> false . */
> (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..f39c88741a4 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
> static void
> build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> }
>
> /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
> * <bb 2> :
> * _1 = x_3(D) + y_4(D);
> * if (_1 >= x_3(D))
> @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> * # _2 = PHI <255(2), _1(3)>
> * =>
> * <bb 4> [local count: 1073741824]:
> - * _2 = .SAT_ADD (x_4(D), y_5(D)); */
> + * _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + * _1 = x_5(D) ^ y_6(D);
> + * _13 = x_5(D) + y_6(D);
> + * _3 = x_5(D) ^ _13;
> + * _2 = ~_1;
> + * _7 = _2 & _3;
> + * if (_7 < 0)
> + * goto <bb 3>; [59.00%]
> + * else
> + * goto <bb 4>; [41.00%]
> + * ;; succ: 4
> + * ;; 3
> + * ;; basic block 3, loop depth 0
> + * ;; pred: 2
> + * _9 = x_5(D) < 0;
> + * _10 = (long int) _9;
> + * _11 = -_10;
> + * _12 = _11 ^ 9223372036854775807;
> + * ;; succ: 4
> + * ;; basic block 4, loop depth 0
> + * ;; pred: 2
> + * ;; 3
> + * # _4 = PHI <_13(2), _12(3)>
> + * =>
> + * ;; basic block 2, loop depth 0
> + * ;; pred: ENTRY
> + * _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
>
> static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> {
> if (gimple_phi_num_args (phi) != 2)
> return;
> @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> tree ops[2];
> tree phi_result = gimple_phi_result (phi);
>
> - if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> + || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> ops[0], ops[1]);
> }
> @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> gsi_next (&psi))
> {
> gimple_stmt_iterator gsi = gsi_after_labels (bb);
> - match_unsigned_saturation_add (&gsi, psi.phi ());
> + match_saturation_add (&gsi, psi.phi ());
> match_unsigned_saturation_sub (&gsi, psi.phi ());
> }
>
> --
> 2.43.0
>
@@ -3311,6 +3311,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
(if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
+/* Signed saturation add, case 1:
+ T sum = X + Y;
+ SAT_S_ADD = (X ^ Y) < 0
+ ? sum
+ : (sum ^ x) >= 0
+ ? sum
+ : x < 0 ? MIN : MAX; */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 (convert?@2 (plus:c (convert? @0)
+ (convert? @1))))
+ (bit_not (bit_xor:c @0 @1)))
+ integer_zerop)
+ (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+ @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
@@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
static void
build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
tree lhs, tree op_0, tree op_1)
@@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
}
/*
- * Try to match saturation unsigned add with PHI.
+ * Try to match saturation add with PHI.
+ * For unsigned integer:
* <bb 2> :
* _1 = x_3(D) + y_4(D);
* if (_1 >= x_3(D))
@@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
* # _2 = PHI <255(2), _1(3)>
* =>
* <bb 4> [local count: 1073741824]:
- * _2 = .SAT_ADD (x_4(D), y_5(D)); */
+ * _2 = .SAT_ADD (x_4(D), y_5(D));
+ *
+ * For signed integer:
+ * _1 = x_5(D) ^ y_6(D);
+ * _13 = x_5(D) + y_6(D);
+ * _3 = x_5(D) ^ _13;
+ * _2 = ~_1;
+ * _7 = _2 & _3;
+ * if (_7 < 0)
+ * goto <bb 3>; [59.00%]
+ * else
+ * goto <bb 4>; [41.00%]
+ * ;; succ: 4
+ * ;; 3
+ * ;; basic block 3, loop depth 0
+ * ;; pred: 2
+ * _9 = x_5(D) < 0;
+ * _10 = (long int) _9;
+ * _11 = -_10;
+ * _12 = _11 ^ 9223372036854775807;
+ * ;; succ: 4
+ * ;; basic block 4, loop depth 0
+ * ;; pred: 2
+ * ;; 3
+ * # _4 = PHI <_13(2), _12(3)>
+ * =>
+ * ;; basic block 2, loop depth 0
+ * ;; pred: ENTRY
+ * _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
static void
-match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
+match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
{
if (gimple_phi_num_args (phi) != 2)
return;
@@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
tree ops[2];
tree phi_result = gimple_phi_result (phi);
- if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
+ if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+ || gimple_signed_integer_sat_add (phi_result, ops, NULL))
build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
ops[0], ops[1]);
}
@@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
gsi_next (&psi))
{
gimple_stmt_iterator gsi = gsi_after_labels (bb);
- match_unsigned_saturation_add (&gsi, psi.phi ());
+ match_saturation_add (&gsi, psi.phi ());
match_unsigned_saturation_sub (&gsi, psi.phi ());
}