[v2,1/2] Match: Add form 5 of unsigned SAT_MUL for widen-mul

Message ID 20250916032320.950614-2-pan2.li@intel.com
State Committed
Commit f9c72bc02a84749b55299791b118d9fb9637f16b
Headers
Series Support unsigned scalar SAT_MUL form 5 |

Commit Message

Li, Pan2 Sept. 16, 2025, 3:19 a.m. UTC
  From: Pan Li <pan2.li@intel.com>

This patch would like to try to match the the unsigned
SAT_MUL form 4, aka below:

  #define DEF_SAT_U_MUL_FMT_5(NT, WT)             \
  NT __attribute__((noinline))                    \
  sat_u_mul_##NT##_from_##WT##_fmt_5 (NT a, NT b) \
  {                                               \
    WT x = (WT)a * (WT)b;                         \
    NT hi = x >> (sizeof(NT) * 8);                \
    NT lo = (NT)x;                                \
    return lo | -!!hi;                            \
  }

  while WT is uint128_t, T is uint8_t, uint16_t, uint32_t or uint64_t.

gcc/ChangeLog:

	* match.pd: Add pattern for SAT_MUL form 5.
	* tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children):
	Try match pattern for IOR.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd              | 25 +++++++++++++++++++++++++
 gcc/tree-ssa-math-opts.cc |  1 +
 2 files changed, 26 insertions(+)
  

Comments

Richard Biener Sept. 16, 2025, 12:05 p.m. UTC | #1
On Tue, Sep 16, 2025 at 5:22 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to try to match the the unsigned
> SAT_MUL form 4, aka below:
>
>   #define DEF_SAT_U_MUL_FMT_5(NT, WT)             \
>   NT __attribute__((noinline))                    \
>   sat_u_mul_##NT##_from_##WT##_fmt_5 (NT a, NT b) \
>   {                                               \
>     WT x = (WT)a * (WT)b;                         \
>     NT hi = x >> (sizeof(NT) * 8);                \
>     NT lo = (NT)x;                                \
>     return lo | -!!hi;                            \
>   }
>
>   while WT is uint128_t, T is uint8_t, uint16_t, uint32_t or uint64_t.

OK.

> gcc/ChangeLog:
>
>         * match.pd: Add pattern for SAT_MUL form 5.
>         * tree-ssa-math-opts.cc (math_opts_dom_walker::after_dom_children):
>         Try match pattern for IOR.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd              | 25 +++++++++++++++++++++++++
>  gcc/tree-ssa-math-opts.cc |  1 +
>  2 files changed, 26 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 53320d7614c..2e629fd31ce 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3695,6 +3695,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        bool c2_is_type_precision_p = c2 == prec;
>       }
>       (if (widen_prec > prec && c2_is_type_precision_p && c4_is_max_p)))))
> +  (match (unsigned_integer_sat_mul @0 @1)
> +   /* SAT_U_MUL (X, Y) = {
> +       WT x = (WT)a * (WT)b;
> +       NT hi = x >> (sizeof(NT) * 8);
> +       NT lo = (NT)x;
> +       return lo | -!!hi;
> +      } while WT is uint128_t, T is uint8_t, uint16_t, uint32_t or uint64_t.  */
> +   (convert1?
> +    (bit_ior (convert? (negate (convert (ne (convert (rshift @3 INTEGER_CST@2))
> +                                           integer_zerop))))
> +            (convert (widen_mult:c@3 (convert@4 @0)
> +                                     (convert@5 @1)))))
> +   (if (types_match (type, @0, @1))
> +    (with
> +     {
> +      unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3));
> +      unsigned cvt4_prec = TYPE_PRECISION (TREE_TYPE (@4));
> +      unsigned cvt5_prec = TYPE_PRECISION (TREE_TYPE (@5));
> +      bool widen_mult_p = cvt4_prec == cvt5_prec && widen_prec == cvt5_prec * 2;
> +
> +      unsigned c2 = tree_to_uhwi (@2);
> +      unsigned prec = TYPE_PRECISION (type);
> +      bool c2_is_type_precision_p = c2 == prec;
> +     }
> +     (if (widen_mult_p && c2_is_type_precision_p)))))
>  )
>
>  /* The boundary condition for case 10: IMM = 1:
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 6fafa1b4096..344ffddd385 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -6529,6 +6529,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               break;
>
>             case BIT_IOR_EXPR:
> +             match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
>               match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
>               match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
> --
> 2.43.0
>
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 53320d7614c..2e629fd31ce 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3695,6 +3695,31 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       bool c2_is_type_precision_p = c2 == prec;
      }
      (if (widen_prec > prec && c2_is_type_precision_p && c4_is_max_p)))))
+  (match (unsigned_integer_sat_mul @0 @1)
+   /* SAT_U_MUL (X, Y) = {
+	WT x = (WT)a * (WT)b;
+	NT hi = x >> (sizeof(NT) * 8);
+	NT lo = (NT)x;
+	return lo | -!!hi;
+      } while WT is uint128_t, T is uint8_t, uint16_t, uint32_t or uint64_t.  */
+   (convert1?
+    (bit_ior (convert? (negate (convert (ne (convert (rshift @3 INTEGER_CST@2))
+					    integer_zerop))))
+	     (convert (widen_mult:c@3 (convert@4 @0)
+				      (convert@5 @1)))))
+   (if (types_match (type, @0, @1))
+    (with
+     {
+      unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3));
+      unsigned cvt4_prec = TYPE_PRECISION (TREE_TYPE (@4));
+      unsigned cvt5_prec = TYPE_PRECISION (TREE_TYPE (@5));
+      bool widen_mult_p = cvt4_prec == cvt5_prec && widen_prec == cvt5_prec * 2;
+
+      unsigned c2 = tree_to_uhwi (@2);
+      unsigned prec = TYPE_PRECISION (type);
+      bool c2_is_type_precision_p = c2 == prec;
+     }
+     (if (widen_mult_p && c2_is_type_precision_p)))))
 )
 
 /* The boundary condition for case 10: IMM = 1:
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 6fafa1b4096..344ffddd385 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -6529,6 +6529,7 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
 	      break;
 
 	    case BIT_IOR_EXPR:
+	      match_unsigned_saturation_mul (&gsi, as_a<gassign *> (stmt));
 	      match_saturation_add_with_assign (&gsi, as_a<gassign *> (stmt));
 	      match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
 	      /* fall-through  */