[v1,1/2] Match: Support form 2 for vector signed integer .SAT_ADD

Message ID 20240920105729.1058948-1-pan2.li@intel.com
State Committed
Commit 4fc92480675bd071dd3edbaa78bb73525137c4a6
Headers
Series [v1,1/2] Match: Support form 2 for vector signed integer .SAT_ADD |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed

Commit Message

Li, Pan2 Sept. 20, 2024, 10:57 a.m. UTC
  From: Pan Li <pan2.li@intel.com>

This patch would like to support the form 2 of the vector signed
integer .SAT_ADD.  Aka below example:

Form 2:
  #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX)                     \
  void __attribute__((noinline))                                       \
  vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
  {                                                                    \
    unsigned i;                                                        \
    for (i = 0; i < limit; i++)                                        \
      {                                                                \
        T x = op_1[i];                                                 \
        T y = op_2[i];                                                 \
        T sum = (UT)x + (UT)y;                                         \
        if ((x ^ y) < 0 || (sum ^ x) >= 0)                             \
          out[i] = sum;                                                \
        else                                                           \
          out[i] = x < 0 ? MIN : MAX;                                  \
      }                                                                \
  }

DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
 104   │   loop_len_79 = MIN_EXPR <ivtmp.51_53, POLY_INT_CST [16, 16]>;
 105   │   _50 = &MEM <vector([16,16]) signed char> [(int8_t *)vectp_op_1.9_77];
 106   │   vect_x_18.11_80 = .MASK_LEN_LOAD (_50, 8B, { -1, ... }, loop_len_79, 0);
 107   │   _70 = vect_x_18.11_80 >> 7;
 108   │   vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_x_18.11_80);
 109   │   _26 = (void *) ivtmp.47_20;
 110   │   _27 = &MEM <vector([16,16]) signed char> [(int8_t *)_26];
 111   │   vect_y_20.15_84 = .MASK_LEN_LOAD (_27, 8B, { -1, ... }, loop_len_79, 0);
 112   │   vect__7.21_90 = vect_x_18.11_80 ^ vect_y_20.15_84;
 113   │   mask__50.23_92 = vect__7.21_90 >= { 0, ... };
 114   │   vect_y.16_85 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_y_20.15_84);
 115   │   vect__6.17_86 = vect_x.12_81 + vect_y.16_85;
 116   │   vect_sum_21.18_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(vect__6.17_86);
 117   │   vect__8.19_88 = vect_x_18.11_80 ^ vect_sum_21.18_87;
 118   │   mask__45.20_89 = vect__8.19_88 < { 0, ... };
 119   │   mask__44.24_93 = mask__45.20_89 & mask__50.23_92;
 120   │   _40 = .COND_XOR (mask__44.24_93, _70, { 127, ... }, vect_sum_21.18_87);
 121   │   _60 = (void *) ivtmp.49_6;
 122   │   _61 = &MEM <vector([16,16]) signed char> [(int8_t *)_60];
 123   │   .MASK_LEN_STORE (_61, 8B, { -1, ... }, loop_len_79, 0, _40);
 124   │   vectp_op_1.9_78 = vectp_op_1.9_77 + POLY_INT_CST [16, 16];
 125   │   ivtmp.47_4 = ivtmp.47_20 + POLY_INT_CST [16, 16];
 126   │   ivtmp.49_21 = ivtmp.49_6 + POLY_INT_CST [16, 16];
 127   │   ivtmp.51_98 = ivtmp.51_53;
 128   │   ivtmp.51_8 = ivtmp.51_53 + POLY_INT_CST [18446744073709551600, 18446744073709551600];

After this patch:
  88   │   _103 = .SELECT_VL (ivtmp_101, POLY_INT_CST [16, 16]);
  89   │   vect_x_18.11_90 = .MASK_LEN_LOAD (vectp_op_1.9_88, 8B, { -1, ... }, _103, 0);
  90   │   vect_y_20.14_94 = .MASK_LEN_LOAD (vectp_op_2.12_92, 8B, { -1, ... }, _103, 0);
  91   │   vect_patt_49.15_95 = .SAT_ADD (vect_x_18.11_90, vect_y_20.14_94);
  92   │   .MASK_LEN_STORE (vectp_out.16_97, 8B, { -1, ... }, _103, 0, vect_patt_49.15_95);
  93   │   vectp_op_1.9_89 = vectp_op_1.9_88 + _103;
  94   │   vectp_op_2.12_93 = vectp_op_2.12_92 + _103;
  95   │   vectp_out.16_98 = vectp_out.16_97 + _103;
  96   │   ivtmp_102 = ivtmp_101 - _103;

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

	* match.pd: Add the case 3 for signed .SAT_ADD matching.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
  

Comments

Richard Biener Sept. 20, 2024, 11:43 a.m. UTC | #1
On Fri, Sep 20, 2024 at 12:58 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 2 of the vector signed
> integer .SAT_ADD.  Aka below example:
>
> Form 2:
>   #define DEF_VEC_SAT_S_ADD_FMT_2(T, UT, MIN, MAX)                     \
>   void __attribute__((noinline))                                       \
>   vec_sat_s_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
>   {                                                                    \
>     unsigned i;                                                        \
>     for (i = 0; i < limit; i++)                                        \
>       {                                                                \
>         T x = op_1[i];                                                 \
>         T y = op_2[i];                                                 \
>         T sum = (UT)x + (UT)y;                                         \
>         if ((x ^ y) < 0 || (sum ^ x) >= 0)                             \
>           out[i] = sum;                                                \
>         else                                                           \
>           out[i] = x < 0 ? MIN : MAX;                                  \
>       }                                                                \
>   }
>
> DEF_VEC_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX)
>
> Before this patch:
>  104   │   loop_len_79 = MIN_EXPR <ivtmp.51_53, POLY_INT_CST [16, 16]>;
>  105   │   _50 = &MEM <vector([16,16]) signed char> [(int8_t *)vectp_op_1.9_77];
>  106   │   vect_x_18.11_80 = .MASK_LEN_LOAD (_50, 8B, { -1, ... }, loop_len_79, 0);
>  107   │   _70 = vect_x_18.11_80 >> 7;
>  108   │   vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_x_18.11_80);
>  109   │   _26 = (void *) ivtmp.47_20;
>  110   │   _27 = &MEM <vector([16,16]) signed char> [(int8_t *)_26];
>  111   │   vect_y_20.15_84 = .MASK_LEN_LOAD (_27, 8B, { -1, ... }, loop_len_79, 0);
>  112   │   vect__7.21_90 = vect_x_18.11_80 ^ vect_y_20.15_84;
>  113   │   mask__50.23_92 = vect__7.21_90 >= { 0, ... };
>  114   │   vect_y.16_85 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned char>(vect_y_20.15_84);
>  115   │   vect__6.17_86 = vect_x.12_81 + vect_y.16_85;
>  116   │   vect_sum_21.18_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(vect__6.17_86);
>  117   │   vect__8.19_88 = vect_x_18.11_80 ^ vect_sum_21.18_87;
>  118   │   mask__45.20_89 = vect__8.19_88 < { 0, ... };
>  119   │   mask__44.24_93 = mask__45.20_89 & mask__50.23_92;
>  120   │   _40 = .COND_XOR (mask__44.24_93, _70, { 127, ... }, vect_sum_21.18_87);
>  121   │   _60 = (void *) ivtmp.49_6;
>  122   │   _61 = &MEM <vector([16,16]) signed char> [(int8_t *)_60];
>  123   │   .MASK_LEN_STORE (_61, 8B, { -1, ... }, loop_len_79, 0, _40);
>  124   │   vectp_op_1.9_78 = vectp_op_1.9_77 + POLY_INT_CST [16, 16];
>  125   │   ivtmp.47_4 = ivtmp.47_20 + POLY_INT_CST [16, 16];
>  126   │   ivtmp.49_21 = ivtmp.49_6 + POLY_INT_CST [16, 16];
>  127   │   ivtmp.51_98 = ivtmp.51_53;
>  128   │   ivtmp.51_8 = ivtmp.51_53 + POLY_INT_CST [18446744073709551600, 18446744073709551600];
>
> After this patch:
>   88   │   _103 = .SELECT_VL (ivtmp_101, POLY_INT_CST [16, 16]);
>   89   │   vect_x_18.11_90 = .MASK_LEN_LOAD (vectp_op_1.9_88, 8B, { -1, ... }, _103, 0);
>   90   │   vect_y_20.14_94 = .MASK_LEN_LOAD (vectp_op_2.12_92, 8B, { -1, ... }, _103, 0);
>   91   │   vect_patt_49.15_95 = .SAT_ADD (vect_x_18.11_90, vect_y_20.14_94);
>   92   │   .MASK_LEN_STORE (vectp_out.16_97, 8B, { -1, ... }, _103, 0, vect_patt_49.15_95);
>   93   │   vectp_op_1.9_89 = vectp_op_1.9_88 + _103;
>   94   │   vectp_op_2.12_93 = vectp_op_2.12_92 + _103;
>   95   │   vectp_out.16_98 = vectp_out.16_97 + _103;
>   96   │   ivtmp_102 = ivtmp_101 - _103;
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.

OK.

Thanks,
Richard.

> gcc/ChangeLog:
>
>         * match.pd: Add the case 3 for signed .SAT_ADD matching.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index fdb59ff0d44..940292d0d49 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3251,6 +3251,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Signed saturation add, case 5:
> +   T sum = (T)((UT)X + (UT)Y);
> +   SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum;
> +
> +   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
> +(match (signed_integer_sat_add @0 @1)
> + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
> +                                                        (nop_convert @1))))
> +                      integer_zerop)
> +                  (bit_not (lt (bit_xor:c @0 @1) integer_zerop)))
> +       (bit_xor:c (nop_convert (negate (nop_convert (convert
> +                                                     (lt @0 integer_zerop)))))
> +                  max_value)
> +       @2)
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))))
> +
>  /* Unsigned saturation sub, case 1 (branch with gt):
>     SAT_U_SUB = X > Y ? X - Y : 0  */
>  (match (unsigned_integer_sat_sub @0 @1)
> --
> 2.43.0
>
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index fdb59ff0d44..940292d0d49 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3251,6 +3251,22 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Signed saturation add, case 5:
+   T sum = (T)((UT)X + (UT)Y);
+   SAT_S_ADD = (X ^ sum) < 0 & ~((X ^ Y) < 0) ? (-(T)(X < 0) ^ MAX) : sum;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
+							 (nop_convert @1))))
+		       integer_zerop)
+		   (bit_not (lt (bit_xor:c @0 @1) integer_zerop)))
+	(bit_xor:c (nop_convert (negate (nop_convert (convert
+						      (lt @0 integer_zerop)))))
+		   max_value)
+	@2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))))
+
 /* Unsigned saturation sub, case 1 (branch with gt):
    SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)