[v1,1/2] Match: Support form 2 for vector signed integer .SAT_ADD
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
From: Pan Li <pan2.li@intel.com>
This patch would like to support the form 2 of the vector signed
integer .SAT_ADD. Aka below example:
Form 2:
#define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
void __attribute__((noinline)) \
vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
{ \
unsigned i; \
for (i = 0; i < limit; i++) \
{ \
T x = op_1[i]; \
T y = op_2[i]; \
T sum = (UT)x + (UT)y; \
if ((x ^ y) < 0 || (sum ^ x) >= 0) \
out[i] = sum; \
else \
out[i] = x < 0 ? MIN : MAX; \
} \
}
DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)
Before this patch:
104 │ loop_len_79 = MIN_EXPR <ivtmp.51_53, POLY_INT_CST [16, 16]>;
105 │ _50 = &MEM <vector([16,16]) signed char> [(int8_t *)vectp_op_1.9_77];
106 │ vect_x_18.11_80 = .MASK_LEN_LOAD (_50, 8B, { -1, ... }, loop_len_79, 0);
107 │ _70 = vect_x_18.11_80 >> 7;
108 │ vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_x_18.11_80);
109 │ _26 = (void *) ivtmp.47_20;
110 │ _27 = &MEM <vector([16,16]) signed char> [(int8_t *)_26];
111 │ vect_y_20.15_84 = .MASK_LEN_LOAD (_27, 8B, { -1, ... }, loop_len_79, 0);
112 │ vect__7.21_90 = vect_x_18.11_80 ^ vect_y_20.15_84;
113 │ mask__50.23_92 = vect__7.21_90 >= { 0, ... };
114 │ vect_y.16_85 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_y_20.15_84);
115 │ vect__6.17_86 = vect_x.12_81 + vect_y.16_85;
116 │ vect_sum_21.18_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(vect__6.17_86);
117 │ vect__8.19_88 = vect_x_18.11_80 ^ vect_sum_21.18_87;
118 │ mask__45.20_89 = vect__8.19_88 < { 0, ... };
119 │ mask__44.24_93 = mask__45.20_89 & mask__50.23_92;
120 │ _40 = .COND_XOR (mask__44.24_93, _70, { 127, ... }, vect_sum_21.18_87);
121 │ _60 = (void *) ivtmp.49_6;
122 │ _61 = &MEM <vector([16,16]) signed char> [(int8_t *)_60];
123 │ .MASK_LEN_STORE (_61, 8B, { -1, ... }, loop_len_79, 0, _40);
124 │ vectp_op_1.9_78 = vectp_op_1.9_77 + POLY_INT_CST [16, 16];
125 │ ivtmp.47_4 = ivtmp.47_20 + POLY_INT_CST [16, 16];
126 │ ivtmp.49_21 = ivtmp.49_6 + POLY_INT_CST [16, 16];
127 │ ivtmp.51_98 = ivtmp.51_53;
128 │ ivtmp.51_8 = ivtmp.51_53 + POLY_INT_CST [18446744073709551600, 18446744073709551600];
After this patch:
88 │ _103 = .SELECT_VL (ivtmp_101, POLY_INT_CST [16, 16]);
89 │ vect_x_18.11_90 = .MASK_LEN_LOAD (vectp_op_1.9_88, 8B, { -1, ... }, _103, 0);
90 │ vect_y_20.14_94 = .MASK_LEN_LOAD (vectp_op_2.12_92, 8B, { -1, ... }, _103, 0);
91 │ vect_patt_49.15_95 = .SAT_ADD (vect_x_18.11_90, vect_y_20.14_94);
92 │ .MASK_LEN_STORE (vectp_out.16_97, 8B, { -1, ... }, _103, 0, vect_patt_49.15_95);
93 │ vectp_op_1.9_89 = vectp_op_1.9_88 + _103;
94 │ vectp_op_2.12_93 = vectp_op_2.12_92 + _103;
95 │ vectp_out.16_98 = vectp_out.16_97 + _103;
96 │ ivtmp_102 = ivtmp_101 - _103;
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Add the case 3 for signed .SAT_ADD matching.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/match.pd | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
Comments
On Fri, Sep 20, 2024 at 12:58 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 2 of the vector signed
> integer .SAT_ADD. Aka below example:
>
> Form 2:
> #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \
> void __attribute__((noinline)) \
> vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
> { \
> unsigned i; \
> for (i = 0; i < limit; i++) \
> { \
> T x = op_1[i]; \
> T y = op_2[i]; \
> T sum = (UT)x + (UT)y; \
> if ((x ^ y) < 0 || (sum ^ x) >= 0) \
> out[i] = sum; \
> else \
> out[i] = x < 0 ? MIN : MAX; \
> } \
> }
>
> DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)
>
> Before this patch:
> 104 │ loop_len_79 = MIN_EXPR <ivtmp.51_53, POLY_INT_CST [16, 16]>;
> 105 │ _50 = &MEM <vector([16,16]) signed char> [(int8_t *)vectp_op_1.9_77];
> 106 │ vect_x_18.11_80 = .MASK_LEN_LOAD (_50, 8B, { -1, ... }, loop_len_79, 0);
> 107 │ _70 = vect_x_18.11_80 >> 7;
> 108 │ vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_x_18.11_80);
> 109 │ _26 = (void *) ivtmp.47_20;
> 110 │ _27 = &MEM <vector([16,16]) signed char> [(int8_t *)_26];
> 111 │ vect_y_20.15_84 = .MASK_LEN_LOAD (_27, 8B, { -1, ... }, loop_len_79, 0);
> 112 │ vect__7.21_90 = vect_x_18.11_80 ^ vect_y_20.15_84;
> 113 │ mask__50.23_92 = vect__7.21_90 >= { 0, ... };
> 114 │ vect_y.16_85 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_y_20.15_84);
> 115 │ vect__6.17_86 = vect_x.12_81 + vect_y.16_85;
> 116 │ vect_sum_21.18_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(vect__6.17_86);
> 117 │ vect__8.19_88 = vect_x_18.11_80 ^ vect_sum_21.18_87;
> 118 │ mask__45.20_89 = vect__8.19_88 < { 0, ... };
> 119 │ mask__44.24_93 = mask__45.20_89 & mask__50.23_92;
> 120 │ _40 = .COND_XOR (mask__44.24_93, _70, { 127, ... }, vect_sum_21.18_87);
> 121 │ _60 = (void *) ivtmp.49_6;
> 122 │ _61 = &MEM <vector([16,16]) signed char> [(int8_t *)_60];
> 123 │ .MASK_LEN_STORE (_61, 8B, { -1, ... }, loop_len_79, 0, _40);
> 124 │ vectp_op_1.9_78 = vectp_op_1.9_77 + POLY_INT_CST [16, 16];
> 125 │ ivtmp.47_4 = ivtmp.47_20 + POLY_INT_CST [16, 16];
> 126 │ ivtmp.49_21 = ivtmp.49_6 + POLY_INT_CST [16, 16];
> 127 │ ivtmp.51_98 = ivtmp.51_53;
> 128 │ ivtmp.51_8 = ivtmp.51_53 + POLY_INT_CST [18446744073709551600, 18446744073709551600];
>
> After this patch:
> 88 │ _103 = .SELECT_VL (ivtmp_101, POLY_INT_CST [16, 16]);
> 89 │ vect_x_18.11_90 = .MASK_LEN_LOAD (vectp_op_1.9_88, 8B, { -1, ... }, _103, 0);
> 90 │ vect_y_20.14_94 = .MASK_LEN_LOAD (vectp_op_2.12_92, 8B, { -1, ... }, _103, 0);
> 91 │ vect_patt_49.15_95 = .SAT_ADD (vect_x_18.11_90, vect_y_20.14_94);
> 92 │ .MASK_LEN_STORE (vectp_out.16_97, 8B, { -1, ... }, _103, 0, vect_patt_49.15_95);
> 93 │ vectp_op_1.9_89 = vectp_op_1.9_88 + _103;
> 94 │ vectp_op_2.12_93 = vectp_op_2.12_92 + _103;
> 95 │ vectp_out.16_98 = vectp_out.16_97 + _103;
> 96 │ ivtmp_102 = ivtmp_101 - _103;
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
OK.
Thanks,
Richard.
> gcc/ChangeLog:
>
> * match.pd: Add the case 3 for signed .SAT_ADD matching.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
> gcc/match.pd | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index fdb59ff0d44..940292d0d49 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3251,6 +3251,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> && types_match (type, @0, @1))))
>
> +/* Signed saturation add, case 5:
> + T sum = (T)((UT)X + (UT)Y);
> + SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum;
> +
> + The T and UT are type pair like T=int8_t, UT=uint8_t. */
> +(match (signed_integer_sat_add @0 @1)
> + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
> + (nop_convert @1))))
> + integer_zerop)
> + (bit_not (lt (bit_xor:c @0 @1) integer_zerop)))
> + (bit_xor:c (nop_convert (negate (nop_convert (convert
> + (lt @0 integer_zerop)))))
> + max_value)
> + @2)
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))))
> +
> /* Unsigned saturation sub, case 1 (branch with gt):
> SAT_U_SUB = X > Y ? X - Y : 0 */
> (match (unsigned_integer_sat_sub @0 @1)
> --
> 2.43.0
>
@@ -3251,6 +3251,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* Signed saturation add, case 5:
+ T sum = (T)((UT)X + (UT)Y);
+ SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum;
+
+ The T and UT are type pair like T=int8_t, UT=uint8_t. */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
+ (nop_convert @1))))
+ integer_zerop)
+ (bit_not (lt (bit_xor:c @0 @1) integer_zerop)))
+ (bit_xor:c (nop_convert (negate (nop_convert (convert
+ (lt @0 integer_zerop)))))
+ max_value)
+ @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))))
+
/* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0 */
(match (unsigned_integer_sat_sub @0 @1)