vect: Missed opportunity to use [SU]ABD

Message ID DB7PR08MB34527EC55B9EA0A097EC96C3F5769@DB7PR08MB3452.eurprd08.prod.outlook.com
State New
Headers
Series vect: Missed opportunity to use [SU]ABD |

Commit Message

Oluwatamilore Adebayo May 9, 2023, 4:07 p.m. UTC
  From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
From: oluade01 <oluwatamilore.adebayo@arm.com>
Date: Fri, 14 Apr 2023 10:24:43 +0100
Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
 4 files changed, 234 insertions(+), 31 deletions(-)
  

Comments

Richard Sandiford May 10, 2023, 9:01 a.m. UTC | #1
Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> From: oluade01 <oluwatamilore.adebayo@arm.com>
> Date: Fri, 14 Apr 2023 10:24:43 +0100
> Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
>
> This adds a recognition pattern for the non-widening
> absolute difference (ABD).
>
> gcc/ChangeLog:
>
>         * doc/md.texi (sabd, uabd): Document them.
>         * internal-fn.def (ABD): Use new optab.
>         * optabs.def (sabd_optab, uabd_optab): New optabs,
>         * tree-vect-patterns.cc (vect_recog_absolute_difference):
>         Recognize the following idiom abs (a - b).
>         (vect_recog_sad_pattern): Refactor to use
>         vect_recog_absolute_difference.
>         (vect_recog_abd_pattern): Use patterns found by
>         vect_recog_absolute_difference to build a new ABD
>         internal call.
> ---
>  gcc/doc/md.texi           |  10 ++
>  gcc/internal-fn.def       |   3 +
>  gcc/optabs.def            |   2 +
>  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
>  4 files changed, 234 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
>  Vector shift and rotate instructions that take vectors as operand 2
>  instead of a scalar type.
>
> +@cindex @code{uabd@var{m}} instruction pattern
> +@cindex @code{sabd@var{m}} instruction pattern
> +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> +Signed and unsigned absolute difference instructions.  These
> +instructions find the difference between operands 1 and 2
> +then return the absolute value.  A C code equivalent would be:
> +@smallexample
> +op0 = abs (op0 - op1)

op0 = abs (op1 - op2)

But that isn't the correct calculation for unsigned (where abs doesn't
really work).  It also doesn't handle some cases correctly for signed.

I think it's more:

  op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1

or (conceptually) max minus min.

E.g. for 16-bit values, the absolute difference between signed 0x7fff
and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
to signed).  But, ignoring undefined behaviour:

  0x7fff - 0x8000 = -1
  abs(-1) = 1

which gives the wrong answer.

We might still be able to fold C abs(a - b) to abd for signed a and b
by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
can't do it for -fwrapv.

Richi knows better than me what would be appropriate here.

Thanks,
Richard

> +@end smallexample
> +
>  @cindex @code{avg@var{m}3_floor} instruction pattern
>  @cindex @code{uavg@var{m}3_floor} instruction pattern
>  @item @samp{avg@var{m}3_floor}
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> +                             sabd, uabd, binary)
> +
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
>                               savg_floor, uavg_floor, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
>
> +OPTAB_D (uabd_optab, "uabd$a3")
> +OPTAB_D (sabd_optab, "sabd$a3")
>  OPTAB_D (savg_floor_optab, "avg$a3_floor")
>  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
>  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
>      }
>  }
>
> +/* Look for the following pattern
> +       X = x[i]
> +       Y = y[i]
> +       DIFF = X - Y
> +       DAD = ABS_EXPR<DIFF>
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +                               tree *half_type, bool reject_unsigned,
> +                               vect_unpromoted_value unprom[2],
> +                               tree diff_oprnds[2])
> +{
> +  if (!abs_stmt)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> +  if (code != ABS_EXPR && code != ABSU_EXPR)
> +    return false;
> +
> +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> +  tree abs_type = TREE_TYPE (abs_oprnd);
> +  if (!abs_oprnd)
> +    return false;
> +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> +    return false;
> +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> +    return false;
> +
> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +                                                   &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type))
> +    if (!reject_unsigned)
> +      return false;
> +
> +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> +  if (!diff_stmt_vinfo)
> +    return false;
> +
> +  bool assigned_oprnds = false;
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> +  {
> +    assigned_oprnds = true;
> +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> +  }
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> +                            WIDEN_MINUS_EXPR,
> +                            false, 2, unprom, half_type))
> +  {
> +    if (diff_oprnds && !assigned_oprnds)
> +    {
> +      diff_oprnds[0] = unprom[0].op;
> +      diff_oprnds[1] = unprom[1].op;
> +    }
> +  }
> +  else if (!assigned_oprnds)
> +  {
> +    return false;
> +  }
> +  else
> +  {
> +    *half_type = NULL_TREE;
> +  }
> +
> +  return true;
> +}
> +
>  /* Convert UNPROM to TYPE and return the result, adding new statements
>     to STMT_INFO's pattern definition statements if no better way is
>     available.  VECTYPE is the vector form of TYPE.
> @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> -  if (!abs_stmt
> -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> -    return NULL;
> -
> -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> -  tree abs_type = TREE_TYPE (abs_oprnd);
> -  if (TYPE_UNSIGNED (abs_type))
> -    return NULL;
> -
> -  /* Peel off conversions from the ABS input.  This can involve sign
> -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> -     or signed promotion, but it can't include unsigned promotion.
> -     (Note that ABS of an unsigned promotion should have been folded
> -     away before now anyway.)  */
> -  vect_unpromoted_value unprom_diff;
> -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> -                                                   &unprom_diff);
> -  if (!abs_oprnd)
> -    return NULL;
> -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> -      && TYPE_UNSIGNED (unprom_diff.type))
> -    return NULL;
>
> -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> -  if (!diff_stmt_vinfo)
> +  vect_unpromoted_value unprom[2];
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +                                      true, unprom, NULL))
>      return NULL;
>
> -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> -     inside the loop (in case we are analyzing an outer-loop).  */
> -  vect_unpromoted_value unprom[2];
> -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> -                            false, 2, unprom, &half_type))
> +  if (!half_type)
>      return NULL;
>
>    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>
> +/* Function vect_recog_abd_pattern
> +
> +   Try to find the following ABsolute Difference (ABD) pattern:
> +
> +     VTYPE x, y, out;
> +     type diff;
> +   loop i in range:
> +     S1 diff = x[i] - y[i]
> +     S2 out[i] = ABS_EXPR <diff>;
> +
> +   where 'type' is a integer and 'VTYPE' is a vector of integers
> +   the same size as 'type'
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins
> +
> +   Output:
> +
> +   * TYPE_out: The type of the output of this pattern
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +     stmts that constitute the pattern; either SABD or UABD:
> +       SABD_EXPR<x, y, out>
> +       UABD_EXPR<x, y, out>
> +
> +      UABD expressions are used when the input types are
> +      narrower than the output types or the output type is narrower
> +      than 32 bits
> + */
> +
> +static gimple *
> +vect_recog_abd_pattern (vec_info *vinfo,
> +               stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  /* Look for the following patterns
> +       X = x[i]
> +       Y = y[i]
> +       DIFF = X - Y
> +       DAD = ABS_EXPR<DIFF>
> +       out[i] = DAD
> +
> +     In which
> +      - X, Y, DIFF, DAD all have the same type
> +      - x, y, out are all vectors of the same type
> +  */
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt)
> +    return NULL;
> +
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +
> +  gassign *abs_stmt = last_stmt;
> +  if (gimple_assign_cast_p (last_stmt))
> +  {
> +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +    if (!SSA_VAR_P (last_rhs))
> +      return NULL;
> +
> +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> +    if (!abs_stmt)
> +      return NULL;
> +  }
> +
> +  vect_unpromoted_value unprom[2];
> +  tree diff_oprnds[2];
> +  tree half_type;
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +                                      false, unprom, diff_oprnds))
> +    return NULL;
> +
> +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> +
> +  tree abd_oprnds[2];
> +  if (half_type)
> +  {
> +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> +      return NULL;
> +
> +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> +    {
> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> +                          get_vectype_for_scalar_type (vinfo, half_type));
> +    }
> +    else
> +    {
> +      abd_oprnds[0] = diff_oprnds[0];
> +      abd_oprnds[1] = diff_oprnds[1];
> +    }
> +  }
> +  else
> +  {
> +    if (unprom[0].op && unprom[1].op
> +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> +       || !SAME_TYPE (unprom[0].type, out_type)))
> +      return NULL;
> +
> +    unprom[0].op = diff_oprnds[0];
> +    unprom[1].op = diff_oprnds[1];
> +    tree signed_out = signed_type_for (out_type);
> +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +                        signed_out, unprom, signed_out_vectype);
> +
> +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> +      return NULL;
> +  }
> +
> +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> +    return NULL;
> +
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> +  if (!vectype
> +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +                                         OPTIMIZE_FOR_SPEED))
> +    return NULL;
> +
> +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> +
> +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +                                               abd_oprnds[0], abd_oprnds[1]);
> +  gimple_call_set_lhs (abd_stmt, var);
> +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> +  return abd_stmt;
> +}
> +
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
>     so that it can be treated as though it had the form:
>
> @@ -6439,6 +6626,7 @@ struct vect_recog_func
>  static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
>    { vect_recog_bit_insert_pattern, "bit_insert" },
> +  { vect_recog_abd_pattern, "abd" },
>    { vect_recog_over_widening_pattern, "over_widening" },
>    /* Must come after over_widening, which narrows the shift as much as
>       possible beforehand.  */
> --
> 2.25.1
  
Richard Biener May 10, 2023, 9:49 a.m. UTC | #2
On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > From: oluade01 <oluwatamilore.adebayo@arm.com>
> > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >         * doc/md.texi (sabd, uabd): Document them.
> >         * internal-fn.def (ABD): Use new optab.
> >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >         Recognize the following idiom abs (a - b).
> >         (vect_recog_sad_pattern): Refactor to use
> >         vect_recog_absolute_difference.
> >         (vect_recog_abd_pattern): Use patterns found by
> >         vect_recog_absolute_difference to build a new ABD
> >         internal call.
> > ---
> >  gcc/doc/md.texi           |  10 ++
> >  gcc/internal-fn.def       |   3 +
> >  gcc/optabs.def            |   2 +
> >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> >  4 files changed, 234 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = abs (op0 - op1)
>
> op0 = abs (op1 - op2)
>
> But that isn't the correct calculation for unsigned (where abs doesn't
> really work).  It also doesn't handle some cases correctly for signed.
>
> I think it's more:
>
>   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>
> or (conceptually) max minus min.
>
> E.g. for 16-bit values, the absolute difference between signed 0x7fff
> and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> to signed).  But, ignoring undefined behaviour:
>
>   0x7fff - 0x8000 = -1
>   abs(-1) = 1
>
> which gives the wrong answer.
>
> We might still be able to fold C abs(a - b) to abd for signed a and b
> by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> can't do it for -fwrapv.
>
> Richi knows better than me what would be appropriate here.

The question is what does the hardware do?  For the widening [us]sad it's
obvious since the difference is computed in a wider signed mode and the
absolute value always fits.

So what does it actually do, esp. when the difference yields 0x8000?

Richard.

>
> Thanks,
> Richard
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +                             sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> >                               savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> >      }
> >  }
> >
> > +/* Look for the following pattern
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > +                               tree *half_type, bool reject_unsigned,
> > +                               vect_unpromoted_value unprom[2],
> > +                               tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > +    return false;
> > +
> > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
> > +
> > +  /* Peel off conversions from the ABS input.  This can involve sign
> > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > +     or signed promotion, but it can't include unsigned promotion.
> > +     (Note that ABS of an unsigned promotion should have been folded
> > +     away before now anyway.)  */
> > +  vect_unpromoted_value unprom_diff;
> > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > +                                                   &unprom_diff);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type))
> > +    if (!reject_unsigned)
> > +      return false;
> > +
> > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > +  if (!diff_stmt_vinfo)
> > +    return false;
> > +
> > +  bool assigned_oprnds = false;
> > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > +  {
> > +    assigned_oprnds = true;
> > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > +  }
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > +                            WIDEN_MINUS_EXPR,
> > +                            false, 2, unprom, half_type))
> > +  {
> > +    if (diff_oprnds && !assigned_oprnds)
> > +    {
> > +      diff_oprnds[0] = unprom[0].op;
> > +      diff_oprnds[1] = unprom[1].op;
> > +    }
> > +  }
> > +  else if (!assigned_oprnds)
> > +  {
> > +    return false;
> > +  }
> > +  else
> > +  {
> > +    *half_type = NULL_TREE;
> > +  }
> > +
> > +  return true;
> > +}
> > +
> >  /* Convert UNPROM to TYPE and return the result, adding new statements
> >     to STMT_INFO's pattern definition statements if no better way is
> >     available.  VECTYPE is the vector form of TYPE.
> > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> >       inside the loop (in case we are analyzing an outer-loop).  */
> >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > -  if (!abs_stmt
> > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > -    return NULL;
> > -
> > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > -  if (TYPE_UNSIGNED (abs_type))
> > -    return NULL;
> > -
> > -  /* Peel off conversions from the ABS input.  This can involve sign
> > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > -     or signed promotion, but it can't include unsigned promotion.
> > -     (Note that ABS of an unsigned promotion should have been folded
> > -     away before now anyway.)  */
> > -  vect_unpromoted_value unprom_diff;
> > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > -                                                   &unprom_diff);
> > -  if (!abs_oprnd)
> > -    return NULL;
> > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > -      && TYPE_UNSIGNED (unprom_diff.type))
> > -    return NULL;
> >
> > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > -  if (!diff_stmt_vinfo)
> > +  vect_unpromoted_value unprom[2];
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      true, unprom, NULL))
> >      return NULL;
> >
> > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > -     inside the loop (in case we are analyzing an outer-loop).  */
> > -  vect_unpromoted_value unprom[2];
> > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > -                            false, 2, unprom, &half_type))
> > +  if (!half_type)
> >      return NULL;
> >
> >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    return pattern_stmt;
> >  }
> >
> > +/* Function vect_recog_abd_pattern
> > +
> > +   Try to find the following ABsolute Difference (ABD) pattern:
> > +
> > +     VTYPE x, y, out;
> > +     type diff;
> > +   loop i in range:
> > +     S1 diff = x[i] - y[i]
> > +     S2 out[i] = ABS_EXPR <diff>;
> > +
> > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > +   the same size as 'type'
> > +
> > +   Input:
> > +
> > +   * STMT_VINFO: The stmt from which the pattern search begins
> > +
> > +   Output:
> > +
> > +   * TYPE_out: The type of the output of this pattern
> > +
> > +   * Return value: A new stmt that will be used to replace the sequence of
> > +     stmts that constitute the pattern; either SABD or UABD:
> > +       SABD_EXPR<x, y, out>
> > +       UABD_EXPR<x, y, out>
> > +
> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> > + */
> > +
> > +static gimple *
> > +vect_recog_abd_pattern (vec_info *vinfo,
> > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > +  /* Look for the following patterns
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > +       out[i] = DAD
> > +
> > +     In which
> > +      - X, Y, DIFF, DAD all have the same type
> > +      - x, y, out are all vectors of the same type
> > +  */
> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt)
> > +    return NULL;
> > +
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +
> > +  gassign *abs_stmt = last_stmt;
> > +  if (gimple_assign_cast_p (last_stmt))
> > +  {
> > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > +    if (!SSA_VAR_P (last_rhs))
> > +      return NULL;
> > +
> > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > +    if (!abs_stmt)
> > +      return NULL;
> > +  }
> > +
> > +  vect_unpromoted_value unprom[2];
> > +  tree diff_oprnds[2];
> > +  tree half_type;
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      false, unprom, diff_oprnds))
> > +    return NULL;
> > +
> > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > +
> > +  tree abd_oprnds[2];
> > +  if (half_type)
> > +  {
> > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +      return NULL;
> > +
> > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +    {
> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > +    }
> > +    else
> > +    {
> > +      abd_oprnds[0] = diff_oprnds[0];
> > +      abd_oprnds[1] = diff_oprnds[1];
> > +    }
> > +  }
> > +  else
> > +  {
> > +    if (unprom[0].op && unprom[1].op
> > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > +      return NULL;
> > +
> > +    unprom[0].op = diff_oprnds[0];
> > +    unprom[1].op = diff_oprnds[1];
> > +    tree signed_out = signed_type_for (out_type);
> > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                        signed_out, unprom, signed_out_vectype);
> > +
> > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +      return NULL;
> > +  }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
> > +
> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > +  if (!vectype
> > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +                                         OPTIMIZE_FOR_SPEED))
> > +    return NULL;
> > +
> > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > +
> > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > +                                               abd_oprnds[0], abd_oprnds[1]);
> > +  gimple_call_set_lhs (abd_stmt, var);
> > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > +  return abd_stmt;
> > +}
> > +
> >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> >     so that it can be treated as though it had the form:
> >
> > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > +  { vect_recog_abd_pattern, "abd" },
> >    { vect_recog_over_widening_pattern, "over_widening" },
> >    /* Must come after over_widening, which narrows the shift as much as
> >       possible beforehand.  */
> > --
> > 2.25.1
  
Richard Biener May 10, 2023, 9:51 a.m. UTC | #3
On Wed, May 10, 2023 at 11:49 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
> >
> > Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> > > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > > From: oluade01 <oluwatamilore.adebayo@arm.com>
> > > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> > >
> > > This adds a recognition pattern for the non-widening
> > > absolute difference (ABD).
> > >
> > > gcc/ChangeLog:
> > >
> > >         * doc/md.texi (sabd, uabd): Document them.
> > >         * internal-fn.def (ABD): Use new optab.
> > >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> > >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> > >         Recognize the following idiom abs (a - b).
> > >         (vect_recog_sad_pattern): Refactor to use
> > >         vect_recog_absolute_difference.
> > >         (vect_recog_abd_pattern): Use patterns found by
> > >         vect_recog_absolute_difference to build a new ABD
> > >         internal call.
> > > ---
> > >  gcc/doc/md.texi           |  10 ++
> > >  gcc/internal-fn.def       |   3 +
> > >  gcc/optabs.def            |   2 +
> > >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> > >  4 files changed, 234 insertions(+), 31 deletions(-)
> > >
> > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > > --- a/gcc/doc/md.texi
> > > +++ b/gcc/doc/md.texi
> > > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> > >  Vector shift and rotate instructions that take vectors as operand 2
> > >  instead of a scalar type.
> > >
> > > +@cindex @code{uabd@var{m}} instruction pattern
> > > +@cindex @code{sabd@var{m}} instruction pattern
> > > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > > +Signed and unsigned absolute difference instructions.  These
> > > +instructions find the difference between operands 1 and 2
> > > +then return the absolute value.  A C code equivalent would be:
> > > +@smallexample
> > > +op0 = abs (op0 - op1)
> >
> > op0 = abs (op1 - op2)
> >
> > But that isn't the correct calculation for unsigned (where abs doesn't
> > really work).  It also doesn't handle some cases correctly for signed.
> >
> > I think it's more:
> >
> >   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
> >
> > or (conceptually) max minus min.
> >
> > E.g. for 16-bit values, the absolute difference between signed 0x7fff
> > and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> > to signed).  But, ignoring undefined behaviour:
> >
> >   0x7fff - 0x8000 = -1
> >   abs(-1) = 1
> >
> > which gives the wrong answer.
> >
> > We might still be able to fold C abs(a - b) to abd for signed a and b
> > by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> > can't do it for -fwrapv.
> >
> > Richi knows better than me what would be appropriate here.
>
> The question is what does the hardware do?  For the widening [us]sad it's
> obvious since the difference is computed in a wider signed mode and the
> absolute value always fits.
>
> So what does it actually do, esp. when the difference yields 0x8000?

A "sensible" definition would be that it works like the widening [us]sad
and applies truncation to the result (modulo-reducing when the result
isn't always unsigned).

Richard.

> Richard.
>
> >
> > Thanks,
> > Richard
> >
> > > +@end smallexample
> > > +
> > >  @cindex @code{avg@var{m}3_floor} instruction pattern
> > >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> > >  @item @samp{avg@var{m}3_floor}
> > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > > --- a/gcc/internal-fn.def
> > > +++ b/gcc/internal-fn.def
> > > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> > >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> > >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> > >
> > > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > > +                             sabd, uabd, binary)
> > > +
> > >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> > >                               savg_floor, uavg_floor, binary)
> > >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > > --- a/gcc/optabs.def
> > > +++ b/gcc/optabs.def
> > > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> > >  OPTAB_D (extract_last_optab, "extract_last_$a")
> > >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> > >
> > > +OPTAB_D (uabd_optab, "uabd$a3")
> > > +OPTAB_D (sabd_optab, "sabd$a3")
> > >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> > >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> > >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > > --- a/gcc/tree-vect-patterns.cc
> > > +++ b/gcc/tree-vect-patterns.cc
> > > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> > >      }
> > >  }
> > >
> > > +/* Look for the following pattern
> > > +       X = x[i]
> > > +       Y = y[i]
> > > +       DIFF = X - Y
> > > +       DAD = ABS_EXPR<DIFF>
> > > + */
> > > +static bool
> > > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > > +                               tree *half_type, bool reject_unsigned,
> > > +                               vect_unpromoted_value unprom[2],
> > > +                               tree diff_oprnds[2])
> > > +{
> > > +  if (!abs_stmt)
> > > +    return false;
> > > +
> > > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > > +     inside the loop (in case we are analyzing an outer-loop).  */
> > > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > > +    return false;
> > > +
> > > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > > +  if (!abs_oprnd)
> > > +    return false;
> > > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > > +    return false;
> > > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > > +    return false;
> > > +
> > > +  /* Peel off conversions from the ABS input.  This can involve sign
> > > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > > +     or signed promotion, but it can't include unsigned promotion.
> > > +     (Note that ABS of an unsigned promotion should have been folded
> > > +     away before now anyway.)  */
> > > +  vect_unpromoted_value unprom_diff;
> > > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > > +                                                   &unprom_diff);
> > > +  if (!abs_oprnd)
> > > +    return false;
> > > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > > +      && TYPE_UNSIGNED (unprom_diff.type))
> > > +    if (!reject_unsigned)
> > > +      return false;
> > > +
> > > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > > +  if (!diff_stmt_vinfo)
> > > +    return false;
> > > +
> > > +  bool assigned_oprnds = false;
> > > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > > +  {
> > > +    assigned_oprnds = true;
> > > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > > +  }
> > > +
> > > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > > +     inside the loop (in case we are analyzing an outer-loop).  */
> > > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > > +                            WIDEN_MINUS_EXPR,
> > > +                            false, 2, unprom, half_type))
> > > +  {
> > > +    if (diff_oprnds && !assigned_oprnds)
> > > +    {
> > > +      diff_oprnds[0] = unprom[0].op;
> > > +      diff_oprnds[1] = unprom[1].op;
> > > +    }
> > > +  }
> > > +  else if (!assigned_oprnds)
> > > +  {
> > > +    return false;
> > > +  }
> > > +  else
> > > +  {
> > > +    *half_type = NULL_TREE;
> > > +  }
> > > +
> > > +  return true;
> > > +}
> > > +
> > >  /* Convert UNPROM to TYPE and return the result, adding new statements
> > >     to STMT_INFO's pattern definition statements if no better way is
> > >     available.  VECTYPE is the vector form of TYPE.
> > > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> > >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > >       inside the loop (in case we are analyzing an outer-loop).  */
> > >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > > -  if (!abs_stmt
> > > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > > -    return NULL;
> > > -
> > > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > > -  if (TYPE_UNSIGNED (abs_type))
> > > -    return NULL;
> > > -
> > > -  /* Peel off conversions from the ABS input.  This can involve sign
> > > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > > -     or signed promotion, but it can't include unsigned promotion.
> > > -     (Note that ABS of an unsigned promotion should have been folded
> > > -     away before now anyway.)  */
> > > -  vect_unpromoted_value unprom_diff;
> > > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > > -                                                   &unprom_diff);
> > > -  if (!abs_oprnd)
> > > -    return NULL;
> > > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > > -      && TYPE_UNSIGNED (unprom_diff.type))
> > > -    return NULL;
> > >
> > > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > > -  if (!diff_stmt_vinfo)
> > > +  vect_unpromoted_value unprom[2];
> > > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > > +                                      true, unprom, NULL))
> > >      return NULL;
> > >
> > > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > > -     inside the loop (in case we are analyzing an outer-loop).  */
> > > -  vect_unpromoted_value unprom[2];
> > > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > > -                            false, 2, unprom, &half_type))
> > > +  if (!half_type)
> > >      return NULL;
> > >
> > >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> > >    return pattern_stmt;
> > >  }
> > >
> > > +/* Function vect_recog_abd_pattern
> > > +
> > > +   Try to find the following ABsolute Difference (ABD) pattern:
> > > +
> > > +     VTYPE x, y, out;
> > > +     type diff;
> > > +   loop i in range:
> > > +     S1 diff = x[i] - y[i]
> > > +     S2 out[i] = ABS_EXPR <diff>;
> > > +
> > > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > > +   the same size as 'type'
> > > +
> > > +   Input:
> > > +
> > > +   * STMT_VINFO: The stmt from which the pattern search begins
> > > +
> > > +   Output:
> > > +
> > > +   * TYPE_out: The type of the output of this pattern
> > > +
> > > +   * Return value: A new stmt that will be used to replace the sequence of
> > > +     stmts that constitute the pattern; either SABD or UABD:
> > > +       SABD_EXPR<x, y, out>
> > > +       UABD_EXPR<x, y, out>
> > > +
> > > +      UABD expressions are used when the input types are
> > > +      narrower than the output types or the output type is narrower
> > > +      than 32 bits
> > > + */
> > > +
> > > +static gimple *
> > > +vect_recog_abd_pattern (vec_info *vinfo,
> > > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > > +{
> > > +  /* Look for the following patterns
> > > +       X = x[i]
> > > +       Y = y[i]
> > > +       DIFF = X - Y
> > > +       DAD = ABS_EXPR<DIFF>
> > > +       out[i] = DAD
> > > +
> > > +     In which
> > > +      - X, Y, DIFF, DAD all have the same type
> > > +      - x, y, out are all vectors of the same type
> > > +  */
> > > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > > +  if (!last_stmt)
> > > +    return NULL;
> > > +
> > > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > > +
> > > +  gassign *abs_stmt = last_stmt;
> > > +  if (gimple_assign_cast_p (last_stmt))
> > > +  {
> > > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > > +    if (!SSA_VAR_P (last_rhs))
> > > +      return NULL;
> > > +
> > > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > > +    if (!abs_stmt)
> > > +      return NULL;
> > > +  }
> > > +
> > > +  vect_unpromoted_value unprom[2];
> > > +  tree diff_oprnds[2];
> > > +  tree half_type;
> > > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > > +                                      false, unprom, diff_oprnds))
> > > +    return NULL;
> > > +
> > > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > > +
> > > +  tree abd_oprnds[2];
> > > +  if (half_type)
> > > +  {
> > > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > > +      return NULL;
> > > +
> > > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > > +    {
> > > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > > +    }
> > > +    else
> > > +    {
> > > +      abd_oprnds[0] = diff_oprnds[0];
> > > +      abd_oprnds[1] = diff_oprnds[1];
> > > +    }
> > > +  }
> > > +  else
> > > +  {
> > > +    if (unprom[0].op && unprom[1].op
> > > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > > +      return NULL;
> > > +
> > > +    unprom[0].op = diff_oprnds[0];
> > > +    unprom[1].op = diff_oprnds[1];
> > > +    tree signed_out = signed_type_for (out_type);
> > > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > > +                        signed_out, unprom, signed_out_vectype);
> > > +
> > > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > > +      return NULL;
> > > +  }
> > > +
> > > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > > +    return NULL;
> > > +
> > > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > > +
> > > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > > +  if (!vectype
> > > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > > +                                         OPTIMIZE_FOR_SPEED))
> > > +    return NULL;
> > > +
> > > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > > +
> > > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > > +                                               abd_oprnds[0], abd_oprnds[1]);
> > > +  gimple_call_set_lhs (abd_stmt, var);
> > > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > > +  return abd_stmt;
> > > +}
> > > +
> > >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> > >     so that it can be treated as though it had the form:
> > >
> > > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> > >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> > >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> > >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > > +  { vect_recog_abd_pattern, "abd" },
> > >    { vect_recog_over_widening_pattern, "over_widening" },
> > >    /* Must come after over_widening, which narrows the shift as much as
> > >       possible beforehand.  */
> > > --
> > > 2.25.1
  
Oluwatamilore Adebayo May 10, 2023, 1:29 p.m. UTC | #4
When using inputs of 0x7fff and 0x8000 the result yielded is -1.
When using inputs of -1 and 0x7fff the results yielded is 0x8000.

Tami
________________________________
From: Richard Biener <richard.guenther@gmail.com>
Sent: Wednesday, May 10, 2023 10:49 AM
To: Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; richard.guenther@gmail.com <richard.guenther@gmail.com>; Richard Sandiford <Richard.Sandiford@arm.com>
Subject: Re: [PATCH] vect: Missed opportunity to use [SU]ABD

On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > From: oluade01 <oluwatamilore.adebayo@arm.com>
> > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >         * doc/md.texi (sabd, uabd): Document them.
> >         * internal-fn.def (ABD): Use new optab.
> >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >         Recognize the following idiom abs (a - b).
> >         (vect_recog_sad_pattern): Refactor to use
> >         vect_recog_absolute_difference.
> >         (vect_recog_abd_pattern): Use patterns found by
> >         vect_recog_absolute_difference to build a new ABD
> >         internal call.
> > ---
> >  gcc/doc/md.texi           |  10 ++
> >  gcc/internal-fn.def       |   3 +
> >  gcc/optabs.def            |   2 +
> >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> >  4 files changed, 234 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = abs (op0 - op1)
>
> op0 = abs (op1 - op2)
>
> But that isn't the correct calculation for unsigned (where abs doesn't
> really work).  It also doesn't handle some cases correctly for signed.
>
> I think it's more:
>
>   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>
> or (conceptually) max minus min.
>
> E.g. for 16-bit values, the absolute difference between signed 0x7fff
> and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> to signed).  But, ignoring undefined behaviour:
>
>   0x7fff - 0x8000 = -1
>   abs(-1) = 1
>
> which gives the wrong answer.
>
> We might still be able to fold C abs(a - b) to abd for signed a and b
> by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> can't do it for -fwrapv.
>
> Richi knows better than me what would be appropriate here.

The question is what does the hardware do?  For the widening [us]sad it's
obvious since the difference is computed in a wider signed mode and the
absolute value always fits.

So what does it actually do, esp. when the difference yields 0x8000?

Richard.

>
> Thanks,
> Richard
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +                             sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> >                               savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> >      }
> >  }
> >
> > +/* Look for the following pattern
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > +                               tree *half_type, bool reject_unsigned,
> > +                               vect_unpromoted_value unprom[2],
> > +                               tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > +    return false;
> > +
> > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
> > +
> > +  /* Peel off conversions from the ABS input.  This can involve sign
> > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > +     or signed promotion, but it can't include unsigned promotion.
> > +     (Note that ABS of an unsigned promotion should have been folded
> > +     away before now anyway.)  */
> > +  vect_unpromoted_value unprom_diff;
> > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > +                                                   &unprom_diff);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type))
> > +    if (!reject_unsigned)
> > +      return false;
> > +
> > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > +  if (!diff_stmt_vinfo)
> > +    return false;
> > +
> > +  bool assigned_oprnds = false;
> > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > +  {
> > +    assigned_oprnds = true;
> > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > +  }
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > +                            WIDEN_MINUS_EXPR,
> > +                            false, 2, unprom, half_type))
> > +  {
> > +    if (diff_oprnds && !assigned_oprnds)
> > +    {
> > +      diff_oprnds[0] = unprom[0].op;
> > +      diff_oprnds[1] = unprom[1].op;
> > +    }
> > +  }
> > +  else if (!assigned_oprnds)
> > +  {
> > +    return false;
> > +  }
> > +  else
> > +  {
> > +    *half_type = NULL_TREE;
> > +  }
> > +
> > +  return true;
> > +}
> > +
> >  /* Convert UNPROM to TYPE and return the result, adding new statements
> >     to STMT_INFO's pattern definition statements if no better way is
> >     available.  VECTYPE is the vector form of TYPE.
> > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> >       inside the loop (in case we are analyzing an outer-loop).  */
> >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > -  if (!abs_stmt
> > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > -    return NULL;
> > -
> > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > -  if (TYPE_UNSIGNED (abs_type))
> > -    return NULL;
> > -
> > -  /* Peel off conversions from the ABS input.  This can involve sign
> > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > -     or signed promotion, but it can't include unsigned promotion.
> > -     (Note that ABS of an unsigned promotion should have been folded
> > -     away before now anyway.)  */
> > -  vect_unpromoted_value unprom_diff;
> > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > -                                                   &unprom_diff);
> > -  if (!abs_oprnd)
> > -    return NULL;
> > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > -      && TYPE_UNSIGNED (unprom_diff.type))
> > -    return NULL;
> >
> > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > -  if (!diff_stmt_vinfo)
> > +  vect_unpromoted_value unprom[2];
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      true, unprom, NULL))
> >      return NULL;
> >
> > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > -     inside the loop (in case we are analyzing an outer-loop).  */
> > -  vect_unpromoted_value unprom[2];
> > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > -                            false, 2, unprom, &half_type))
> > +  if (!half_type)
> >      return NULL;
> >
> >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    return pattern_stmt;
> >  }
> >
> > +/* Function vect_recog_abd_pattern
> > +
> > +   Try to find the following ABsolute Difference (ABD) pattern:
> > +
> > +     VTYPE x, y, out;
> > +     type diff;
> > +   loop i in range:
> > +     S1 diff = x[i] - y[i]
> > +     S2 out[i] = ABS_EXPR <diff>;
> > +
> > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > +   the same size as 'type'
> > +
> > +   Input:
> > +
> > +   * STMT_VINFO: The stmt from which the pattern search begins
> > +
> > +   Output:
> > +
> > +   * TYPE_out: The type of the output of this pattern
> > +
> > +   * Return value: A new stmt that will be used to replace the sequence of
> > +     stmts that constitute the pattern; either SABD or UABD:
> > +       SABD_EXPR<x, y, out>
> > +       UABD_EXPR<x, y, out>
> > +
> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> > + */
> > +
> > +static gimple *
> > +vect_recog_abd_pattern (vec_info *vinfo,
> > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > +  /* Look for the following patterns
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > +       out[i] = DAD
> > +
> > +     In which
> > +      - X, Y, DIFF, DAD all have the same type
> > +      - x, y, out are all vectors of the same type
> > +  */
> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt)
> > +    return NULL;
> > +
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +
> > +  gassign *abs_stmt = last_stmt;
> > +  if (gimple_assign_cast_p (last_stmt))
> > +  {
> > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > +    if (!SSA_VAR_P (last_rhs))
> > +      return NULL;
> > +
> > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > +    if (!abs_stmt)
> > +      return NULL;
> > +  }
> > +
> > +  vect_unpromoted_value unprom[2];
> > +  tree diff_oprnds[2];
> > +  tree half_type;
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      false, unprom, diff_oprnds))
> > +    return NULL;
> > +
> > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > +
> > +  tree abd_oprnds[2];
> > +  if (half_type)
> > +  {
> > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +      return NULL;
> > +
> > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +    {
> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > +    }
> > +    else
> > +    {
> > +      abd_oprnds[0] = diff_oprnds[0];
> > +      abd_oprnds[1] = diff_oprnds[1];
> > +    }
> > +  }
> > +  else
> > +  {
> > +    if (unprom[0].op && unprom[1].op
> > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > +      return NULL;
> > +
> > +    unprom[0].op = diff_oprnds[0];
> > +    unprom[1].op = diff_oprnds[1];
> > +    tree signed_out = signed_type_for (out_type);
> > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                        signed_out, unprom, signed_out_vectype);
> > +
> > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +      return NULL;
> > +  }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
> > +
> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > +  if (!vectype
> > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +                                         OPTIMIZE_FOR_SPEED))
> > +    return NULL;
> > +
> > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > +
> > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > +                                               abd_oprnds[0], abd_oprnds[1]);
> > +  gimple_call_set_lhs (abd_stmt, var);
> > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > +  return abd_stmt;
> > +}
> > +
> >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> >     so that it can be treated as though it had the form:
> >
> > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > +  { vect_recog_abd_pattern, "abd" },
> >    { vect_recog_over_widening_pattern, "over_widening" },
> >    /* Must come after over_widening, which narrows the shift as much as
> >       possible beforehand.  */
> > --
> > 2.25.1
  
Richard Sandiford May 10, 2023, 3:27 p.m. UTC | #5
Richard Biener <richard.guenther@gmail.com> writes:
> On Wed, May 10, 2023 at 11:49 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
>>
>> On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
>> <richard.sandiford@arm.com> wrote:
>> >
>> > Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
>> > > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
>> > > From: oluade01 <oluwatamilore.adebayo@arm.com>
>> > > Date: Fri, 14 Apr 2023 10:24:43 +0100
>> > > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
>> > >
>> > > This adds a recognition pattern for the non-widening
>> > > absolute difference (ABD).
>> > >
>> > > gcc/ChangeLog:
>> > >
>> > >         * doc/md.texi (sabd, uabd): Document them.
>> > >         * internal-fn.def (ABD): Use new optab.
>> > >         * optabs.def (sabd_optab, uabd_optab): New optabs,
>> > >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
>> > >         Recognize the following idiom abs (a - b).
>> > >         (vect_recog_sad_pattern): Refactor to use
>> > >         vect_recog_absolute_difference.
>> > >         (vect_recog_abd_pattern): Use patterns found by
>> > >         vect_recog_absolute_difference to build a new ABD
>> > >         internal call.
>> > > ---
>> > >  gcc/doc/md.texi           |  10 ++
>> > >  gcc/internal-fn.def       |   3 +
>> > >  gcc/optabs.def            |   2 +
>> > >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
>> > >  4 files changed, 234 insertions(+), 31 deletions(-)
>> > >
>> > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
>> > > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
>> > > --- a/gcc/doc/md.texi
>> > > +++ b/gcc/doc/md.texi
>> > > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
>> > >  Vector shift and rotate instructions that take vectors as operand 2
>> > >  instead of a scalar type.
>> > >
>> > > +@cindex @code{uabd@var{m}} instruction pattern
>> > > +@cindex @code{sabd@var{m}} instruction pattern
>> > > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
>> > > +Signed and unsigned absolute difference instructions.  These
>> > > +instructions find the difference between operands 1 and 2
>> > > +then return the absolute value.  A C code equivalent would be:
>> > > +@smallexample
>> > > +op0 = abs (op0 - op1)
>> >
>> > op0 = abs (op1 - op2)
>> >
>> > But that isn't the correct calculation for unsigned (where abs doesn't
>> > really work).  It also doesn't handle some cases correctly for signed.
>> >
>> > I think it's more:
>> >
>> >   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>> >
>> > or (conceptually) max minus min.
>> >
>> > E.g. for 16-bit values, the absolute difference between signed 0x7fff
>> > and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
>> > to signed).  But, ignoring undefined behaviour:
>> >
>> >   0x7fff - 0x8000 = -1
>> >   abs(-1) = 1
>> >
>> > which gives the wrong answer.
>> >
>> > We might still be able to fold C abs(a - b) to abd for signed a and b
>> > by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
>> > can't do it for -fwrapv.
>> >
>> > Richi knows better than me what would be appropriate here.
>>
>> The question is what does the hardware do?  For the widening [us]sad it's
>> obvious since the difference is computed in a wider signed mode and the
>> absolute value always fits.
>>
>> So what does it actually do, esp. when the difference yields 0x8000?
>
> A "sensible" definition would be that it works like the widening [us]sad
> and applies truncation to the result (modulo-reducing when the result
> isn't always unsigned).

Yeah.  Like Tami says, this is what the instruction does.

I think all three definitions are equivalent: the extend/operate/truncate
one, the ?: one above, and the "max - min" one.  Probably just personal
preference as to which seems more natural.

Reading the patch again, it does check TYPE_OVERFLOW_WRAPS, so -fwrapv
might be handled correctly after all.  Sorry for missing it first time.

On the patch:

> +/* Look for the following pattern
> +	X = x[i]
> +	Y = y[i]
> +	DIFF = X - Y
> +	DAD = ABS_EXPR<DIFF>
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +				tree *half_type, bool reject_unsigned,
> +				vect_unpromoted_value unprom[2],
> +				tree diff_oprnds[2])

It would be good to document what the parameters mean (except VINFO,
which is obvious).

> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +						    &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type))
> +    if (!reject_unsigned)
> +      return false;

I think this should instead be:

  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
      && TYPE_UNSIGNED (unprom_diff.type)
      && TYPE_UNSIGNED (abs_type))
    return false;

The point is that if any promotion (from "A" to "B" say) happens on the
input to the ABS, it has to be signed rather than unsigned promotion,
so that the extra bits in B get filled with the sign of A.  Without that,
the ABS will always be a no-op (which is why it should have been removed
by now).

> +  bool assigned_oprnds = false;
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> +  {
> +    assigned_oprnds = true;
> +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> +  }
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> +			     WIDEN_MINUS_EXPR,
> +			     false, 2, unprom, half_type))
> +  {
> +    if (diff_oprnds && !assigned_oprnds)
> +    {
> +      diff_oprnds[0] = unprom[0].op;
> +      diff_oprnds[1] = unprom[1].op;
> +    }
> +  }
> +  else if (!assigned_oprnds)
> +  {
> +    return false;
> +  }
> +  else
> +  {
> +    *half_type = NULL_TREE;
> +  }
> +
> +  return true;
> +}

I think the code would be easier to follow if it used vect_widened_op_tree
first, and only considered the unextended case on failure.

Minor formatting nit: GCC style is to indent braces by two spaces
further than an "if":

  if (...)
    {
      ...
    }

Thanks,
Richard
  
Oluwatamilore Adebayo May 15, 2023, 12:35 p.m. UTC | #6
From: Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com>
Sent: Wednesday, May 10, 2023 14:29
To: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; Richard Sandiford <Richard.Sandiford@arm.com>
Subject: Re: [PATCH] vect: Missed opportunity to use [SU]ABD

When using inputs of 0x7fff and 0x8000 the result yielded is -1.
When using inputs of -1 and 0x7fff the results yielded is 0x8000.

Tami
________________________________
From: Richard Biener <richard.guenther@gmail.com<mailto:richard.guenther@gmail.com>>
Sent: Wednesday, May 10, 2023 10:49 AM
To: Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com<mailto:Oluwatamilore.Adebayo@arm.com>>; gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org> <gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org>>; richard.guenther@gmail.com<mailto:richard.guenther@gmail.com> <richard.guenther@gmail.com<mailto:richard.guenther@gmail.com>>; Richard Sandiford <Richard.Sandiford@arm.com<mailto:Richard.Sandiford@arm.com>>
Subject: Re: [PATCH] vect: Missed opportunity to use [SU]ABD

On Wed, May 10, 2023 at 11:01 AM Richard Sandiford
<richard.sandiford@arm.com<mailto:richard.sandiford@arm.com>> wrote:
>
> Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com<mailto:Oluwatamilore.Adebayo@arm.com>> writes:
> > From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> > From: oluade01 <oluwatamilore.adebayo@arm.com<mailto:oluwatamilore.adebayo@arm.com>>
> > Date: Fri, 14 Apr 2023 10:24:43 +0100
> > Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
> >
> > This adds a recognition pattern for the non-widening
> > absolute difference (ABD).
> >
> > gcc/ChangeLog:
> >
> >         * doc/md.texi (sabd, uabd): Document them.
> >         * internal-fn.def (ABD): Use new optab.
> >         * optabs.def (sabd_optab, uabd_optab): New optabs,
> >         * tree-vect-patterns.cc (vect_recog_absolute_difference):
> >         Recognize the following idiom abs (a - b).
> >         (vect_recog_sad_pattern): Refactor to use
> >         vect_recog_absolute_difference.
> >         (vect_recog_abd_pattern): Use patterns found by
> >         vect_recog_absolute_difference to build a new ABD
> >         internal call.
> > ---
> >  gcc/doc/md.texi           |  10 ++
> >  gcc/internal-fn.def       |   3 +
> >  gcc/optabs.def            |   2 +
> >  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
> >  4 files changed, 234 insertions(+), 31 deletions(-)
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
> >  Vector shift and rotate instructions that take vectors as operand 2
> >  instead of a scalar type.
> >
> > +@cindex @code{uabd@var{m}} instruction pattern
> > +@cindex @code{sabd@var{m}} instruction pattern
> > +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> > +Signed and unsigned absolute difference instructions.  These
> > +instructions find the difference between operands 1 and 2
> > +then return the absolute value.  A C code equivalent would be:
> > +@smallexample
> > +op0 = abs (op0 - op1)
>
> op0 = abs (op1 - op2)
>
> But that isn't the correct calculation for unsigned (where abs doesn't
> really work).  It also doesn't handle some cases correctly for signed.
>
> I think it's more:
>
>   op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1
>
> or (conceptually) max minus min.
>
> E.g. for 16-bit values, the absolute difference between signed 0x7fff
> and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
> to signed).  But, ignoring undefined behaviour:
>
>   0x7fff - 0x8000 = -1
>   abs(-1) = 1
>
> which gives the wrong answer.
>
> We might still be able to fold C abs(a - b) to abd for signed a and b
> by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
> can't do it for -fwrapv.
>
> Richi knows better than me what would be appropriate here.

The question is what does the hardware do?  For the widening [us]sad it's
obvious since the difference is computed in a wider signed mode and the
absolute value always fits.

So what does it actually do, esp. when the difference yields 0x8000?

Richard.

>
> Thanks,
> Richard
>
> > +@end smallexample
> > +
> >  @cindex @code{avg@var{m}3_floor} instruction pattern
> >  @cindex @code{uavg@var{m}3_floor} instruction pattern
> >  @item @samp{avg@var{m}3_floor}
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
> >  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
> >
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> > +                             sabd, uabd, binary)
> > +
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
> >                               savg_floor, uavg_floor, binary)
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> >  OPTAB_D (extract_last_optab, "extract_last_$a")
> >  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> >
> > +OPTAB_D (uabd_optab, "uabd$a3")
> > +OPTAB_D (sabd_optab, "sabd$a3")
> >  OPTAB_D (savg_floor_optab, "avg$a3_floor")
> >  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
> >  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
> >      }
> >  }
> >
> > +/* Look for the following pattern
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > + */
> > +static bool
> > +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> > +                               tree *half_type, bool reject_unsigned,
> > +                               vect_unpromoted_value unprom[2],
> > +                               tree diff_oprnds[2])
> > +{
> > +  if (!abs_stmt)
> > +    return false;
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> > +  if (code != ABS_EXPR && code != ABSU_EXPR)
> > +    return false;
> > +
> > +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > +  tree abs_type = TREE_TYPE (abs_oprnd);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> > +    return false;
> > +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> > +    return false;
> > +
> > +  /* Peel off conversions from the ABS input.  This can involve sign
> > +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> > +     or signed promotion, but it can't include unsigned promotion.
> > +     (Note that ABS of an unsigned promotion should have been folded
> > +     away before now anyway.)  */
> > +  vect_unpromoted_value unprom_diff;
> > +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > +                                                   &unprom_diff);
> > +  if (!abs_oprnd)
> > +    return false;
> > +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > +      && TYPE_UNSIGNED (unprom_diff.type))
> > +    if (!reject_unsigned)
> > +      return false;
> > +
> > +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > +  if (!diff_stmt_vinfo)
> > +    return false;
> > +
> > +  bool assigned_oprnds = false;
> > +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> > +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> > +  {
> > +    assigned_oprnds = true;
> > +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> > +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> > +  }
> > +
> > +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > +     inside the loop (in case we are analyzing an outer-loop).  */
> > +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> > +                            WIDEN_MINUS_EXPR,
> > +                            false, 2, unprom, half_type))
> > +  {
> > +    if (diff_oprnds && !assigned_oprnds)
> > +    {
> > +      diff_oprnds[0] = unprom[0].op;
> > +      diff_oprnds[1] = unprom[1].op;
> > +    }
> > +  }
> > +  else if (!assigned_oprnds)
> > +  {
> > +    return false;
> > +  }
> > +  else
> > +  {
> > +    *half_type = NULL_TREE;
> > +  }
> > +
> > +  return true;
> > +}
> > +
> >  /* Convert UNPROM to TYPE and return the result, adding new statements
> >     to STMT_INFO's pattern definition statements if no better way is
> >     available.  VECTYPE is the vector form of TYPE.
> > @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> >       inside the loop (in case we are analyzing an outer-loop).  */
> >    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> > -  if (!abs_stmt
> > -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> > -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> > -    return NULL;
> > -
> > -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> > -  tree abs_type = TREE_TYPE (abs_oprnd);
> > -  if (TYPE_UNSIGNED (abs_type))
> > -    return NULL;
> > -
> > -  /* Peel off conversions from the ABS input.  This can involve sign
> > -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> > -     or signed promotion, but it can't include unsigned promotion.
> > -     (Note that ABS of an unsigned promotion should have been folded
> > -     away before now anyway.)  */
> > -  vect_unpromoted_value unprom_diff;
> > -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> > -                                                   &unprom_diff);
> > -  if (!abs_oprnd)
> > -    return NULL;
> > -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> > -      && TYPE_UNSIGNED (unprom_diff.type))
> > -    return NULL;
> >
> > -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> > -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> > -  if (!diff_stmt_vinfo)
> > +  vect_unpromoted_value unprom[2];
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      true, unprom, NULL))
> >      return NULL;
> >
> > -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> > -     inside the loop (in case we are analyzing an outer-loop).  */
> > -  vect_unpromoted_value unprom[2];
> > -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> > -                            false, 2, unprom, &half_type))
> > +  if (!half_type)
> >      return NULL;
> >
> >    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> > @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
> >    return pattern_stmt;
> >  }
> >
> > +/* Function vect_recog_abd_pattern
> > +
> > +   Try to find the following ABsolute Difference (ABD) pattern:
> > +
> > +     VTYPE x, y, out;
> > +     type diff;
> > +   loop i in range:
> > +     S1 diff = x[i] - y[i]
> > +     S2 out[i] = ABS_EXPR <diff>;
> > +
> > +   where 'type' is a integer and 'VTYPE' is a vector of integers
> > +   the same size as 'type'
> > +
> > +   Input:
> > +
> > +   * STMT_VINFO: The stmt from which the pattern search begins
> > +
> > +   Output:
> > +
> > +   * TYPE_out: The type of the output of this pattern
> > +
> > +   * Return value: A new stmt that will be used to replace the sequence of
> > +     stmts that constitute the pattern; either SABD or UABD:
> > +       SABD_EXPR<x, y, out>
> > +       UABD_EXPR<x, y, out>
> > +
> > +      UABD expressions are used when the input types are
> > +      narrower than the output types or the output type is narrower
> > +      than 32 bits
> > + */
> > +
> > +static gimple *
> > +vect_recog_abd_pattern (vec_info *vinfo,
> > +               stmt_vec_info stmt_vinfo, tree *type_out)
> > +{
> > +  /* Look for the following patterns
> > +       X = x[i]
> > +       Y = y[i]
> > +       DIFF = X - Y
> > +       DAD = ABS_EXPR<DIFF>
> > +       out[i] = DAD
> > +
> > +     In which
> > +      - X, Y, DIFF, DAD all have the same type
> > +      - x, y, out are all vectors of the same type
> > +  */
> > +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> > +  if (!last_stmt)
> > +    return NULL;
> > +
> > +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> > +
> > +  gassign *abs_stmt = last_stmt;
> > +  if (gimple_assign_cast_p (last_stmt))
> > +  {
> > +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> > +    if (!SSA_VAR_P (last_rhs))
> > +      return NULL;
> > +
> > +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> > +    if (!abs_stmt)
> > +      return NULL;
> > +  }
> > +
> > +  vect_unpromoted_value unprom[2];
> > +  tree diff_oprnds[2];
> > +  tree half_type;
> > +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> > +                                      false, unprom, diff_oprnds))
> > +    return NULL;
> > +
> > +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> > +
> > +  tree abd_oprnds[2];
> > +  if (half_type)
> > +  {
> > +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> > +      return NULL;
> > +
> > +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> > +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> > +    {
> > +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> > +                          get_vectype_for_scalar_type (vinfo, half_type));
> > +    }
> > +    else
> > +    {
> > +      abd_oprnds[0] = diff_oprnds[0];
> > +      abd_oprnds[1] = diff_oprnds[1];
> > +    }
> > +  }
> > +  else
> > +  {
> > +    if (unprom[0].op && unprom[1].op
> > +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> > +       || !SAME_TYPE (unprom[0].type, out_type)))
> > +      return NULL;
> > +
> > +    unprom[0].op = diff_oprnds[0];
> > +    unprom[1].op = diff_oprnds[1];
> > +    tree signed_out = signed_type_for (out_type);
> > +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> > +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> > +                        signed_out, unprom, signed_out_vectype);
> > +
> > +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> > +      return NULL;
> > +  }
> > +
> > +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> > +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> > +    return NULL;
> > +
> > +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> > +  if (!vectype
> > +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +                                         OPTIMIZE_FOR_SPEED))
> > +    return NULL;
> > +
> > +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> > +
> > +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> > +                                               abd_oprnds[0], abd_oprnds[1]);
> > +  gimple_call_set_lhs (abd_stmt, var);
> > +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> > +  return abd_stmt;
> > +}
> > +
> >  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> >     so that it can be treated as though it had the form:
> >
> > @@ -6439,6 +6626,7 @@ struct vect_recog_func
> >  static vect_recog_func vect_vect_recog_func_ptrs[] = {
> >    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
> >    { vect_recog_bit_insert_pattern, "bit_insert" },
> > +  { vect_recog_abd_pattern, "abd" },
> >    { vect_recog_over_widening_pattern, "over_widening" },
> >    /* Must come after over_widening, which narrows the shift as much as
> >       possible beforehand.  */
> > --
> > 2.25.1
  
Li, Pan2 via Gcc-patches May 17, 2023, 12:21 p.m. UTC | #7
> Yeah.  Like Tami says, this is what the instruction does.
> 
> I think all three definitions are equivalent: the extend/operate/truncate
> one, the ?: one above, and the "max - min" one.  Probably just personal
> preference as to which seems more natural.

Decided to switch to using the ?: one as it makes more sense
for unsigned types.

> It would be good to document what the parameters mean (except VINFO,
> which is obvious).

Documentation added for vect_recog_absolute_difference.

> I think this should instead be:
> 
>   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
>       && TYPE_UNSIGNED (unprom_diff.type)
>       && TYPE_UNSIGNED (abs_type))
>     return false;
> 
> ....
> 
> I think the code would be easier to follow if it used vect_widened_op_tree
> first, and only considered the unextended case on failure.

Implemented Richard's suggested changes to vect_recog_absolute_difference.

> Minor formatting nit: GCC style is to indent braces by two spaces
> further than an "if":
> 
>   if (...)
>     {
>       ...
>     }

Adopted this style.

New patch will be in the reply.
  

Patch

From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
From: oluade01 <oluwatamilore.adebayo@arm.com>
Date: Fri, 14 Apr 2023 10:24:43 +0100
Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD

This adds a recognition pattern for the non-widening
absolute difference (ABD).

gcc/ChangeLog:

	* doc/md.texi (sabd, uabd): Document them.
	* internal-fn.def (ABD): Use new optab.
	* optabs.def (sabd_optab, uabd_optab): New optabs,
	* tree-vect-patterns.cc (vect_recog_absolute_difference):
	Recognize the following idiom abs (a - b).
	(vect_recog_sad_pattern): Refactor to use
	vect_recog_absolute_difference.
	(vect_recog_abd_pattern): Use patterns found by
	vect_recog_absolute_difference to build a new ABD
	internal call.
---
 gcc/doc/md.texi           |  10 ++
 gcc/internal-fn.def       |   3 +
 gcc/optabs.def            |   2 +
 gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
 4 files changed, 234 insertions(+), 31 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5778,6 +5778,16 @@  Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
+@cindex @code{uabd@var{m}} instruction pattern
+@cindex @code{sabd@var{m}} instruction pattern
+@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
+Signed and unsigned absolute difference instructions.  These
+instructions find the difference between operands 1 and 2
+then return the absolute value.  A C code equivalent would be:
+@smallexample
+op0 = abs (op0 - op1)
+@end smallexample
+
 @cindex @code{avg@var{m}3_floor} instruction pattern
 @cindex @code{uavg@var{m}3_floor} instruction pattern
 @item @samp{avg@var{m}3_floor}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -167,6 +167,9 @@  DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
+			      sabd, uabd, binary)
+
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
 			      savg_floor, uavg_floor, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -359,6 +359,8 @@  OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
 
+OPTAB_D (uabd_optab, "uabd$a3")
+OPTAB_D (sabd_optab, "sabd$a3")
 OPTAB_D (savg_floor_optab, "avg$a3_floor")
 OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
 OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -770,6 +770,89 @@  vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     }
 }
 
+/* Look for the following pattern
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+ */
+static bool
+vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
+				tree *half_type, bool reject_unsigned,
+				vect_unpromoted_value unprom[2],
+				tree diff_oprnds[2])
+{
+  if (!abs_stmt)
+    return false;
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
+  if (code != ABS_EXPR && code != ABSU_EXPR)
+    return false;
+
+  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+  tree abs_type = TREE_TYPE (abs_oprnd);
+  if (!abs_oprnd)
+    return false;
+  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
+    return false;
+  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
+    return false;
+
+  /* Peel off conversions from the ABS input.  This can involve sign
+     changes (e.g.  from an unsigned subtraction to a signed ABS input)
+     or signed promotion, but it can't include unsigned promotion.
+     (Note that ABS of an unsigned promotion should have been folded
+     away before now anyway.)  */
+  vect_unpromoted_value unprom_diff;
+  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
+						    &unprom_diff);
+  if (!abs_oprnd)
+    return false;
+  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
+      && TYPE_UNSIGNED (unprom_diff.type))
+    if (!reject_unsigned)
+      return false;
+
+  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
+  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
+  if (!diff_stmt_vinfo)
+    return false;
+
+  bool assigned_oprnds = false;
+  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
+  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
+  {
+    assigned_oprnds = true;
+    diff_oprnds[0] = gimple_assign_rhs1 (diff);
+    diff_oprnds[1] = gimple_assign_rhs2 (diff);
+  }
+
+  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
+     inside the loop (in case we are analyzing an outer-loop).  */
+  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
+			     WIDEN_MINUS_EXPR,
+			     false, 2, unprom, half_type))
+  {
+    if (diff_oprnds && !assigned_oprnds)
+    {
+      diff_oprnds[0] = unprom[0].op;
+      diff_oprnds[1] = unprom[1].op;
+    }
+  }
+  else if (!assigned_oprnds)
+  {
+    return false;
+  }
+  else
+  {
+    *half_type = NULL_TREE;
+  }
+
+  return true;
+}
+
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
    available.  VECTYPE is the vector form of TYPE.
@@ -1308,40 +1391,13 @@  vect_recog_sad_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
-  if (!abs_stmt
-      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
-	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
-    return NULL;
-
-  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
-  tree abs_type = TREE_TYPE (abs_oprnd);
-  if (TYPE_UNSIGNED (abs_type))
-    return NULL;
-
-  /* Peel off conversions from the ABS input.  This can involve sign
-     changes (e.g. from an unsigned subtraction to a signed ABS input)
-     or signed promotion, but it can't include unsigned promotion.
-     (Note that ABS of an unsigned promotion should have been folded
-     away before now anyway.)  */
-  vect_unpromoted_value unprom_diff;
-  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
-						    &unprom_diff);
-  if (!abs_oprnd)
-    return NULL;
-  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
-      && TYPE_UNSIGNED (unprom_diff.type))
-    return NULL;
 
-  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
-  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
-  if (!diff_stmt_vinfo)
+  vect_unpromoted_value unprom[2];
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       true, unprom, NULL))
     return NULL;
 
-  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
-     inside the loop (in case we are analyzing an outer-loop).  */
-  vect_unpromoted_value unprom[2];
-  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
-			     false, 2, unprom, &half_type))
+  if (!half_type)
     return NULL;
 
   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
@@ -1363,6 +1419,137 @@  vect_recog_sad_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+/* Function vect_recog_abd_pattern
+
+   Try to find the following ABsolute Difference (ABD) pattern:
+
+     VTYPE x, y, out;
+     type diff;
+   loop i in range:
+     S1 diff = x[i] - y[i]
+     S2 out[i] = ABS_EXPR <diff>;
+
+   where 'type' is a integer and 'VTYPE' is a vector of integers
+   the same size as 'type'
+
+   Input:
+
+   * STMT_VINFO: The stmt from which the pattern search begins
+
+   Output:
+
+   * TYPE_out: The type of the output of this pattern
+
+   * Return value: A new stmt that will be used to replace the sequence of
+     stmts that constitute the pattern; either SABD or UABD:
+	SABD_EXPR<x, y, out>
+	UABD_EXPR<x, y, out>
+
+      UABD expressions are used when the input types are
+      narrower than the output types or the output type is narrower
+      than 32 bits
+ */
+
+static gimple *
+vect_recog_abd_pattern (vec_info *vinfo,
+		stmt_vec_info stmt_vinfo, tree *type_out)
+{
+  /* Look for the following patterns
+	X = x[i]
+	Y = y[i]
+	DIFF = X - Y
+	DAD = ABS_EXPR<DIFF>
+	out[i] = DAD
+
+     In which
+      - X, Y, DIFF, DAD all have the same type
+      - x, y, out are all vectors of the same type
+  */
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt)
+    return NULL;
+
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+
+  gassign *abs_stmt = last_stmt;
+  if (gimple_assign_cast_p (last_stmt))
+  {
+    tree last_rhs = gimple_assign_rhs1 (last_stmt);
+    if (!SSA_VAR_P (last_rhs))
+      return NULL;
+
+    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
+    if (!abs_stmt)
+      return NULL;
+  }
+
+  vect_unpromoted_value unprom[2];
+  tree diff_oprnds[2];
+  tree half_type;
+  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
+				       false, unprom, diff_oprnds))
+    return NULL;
+
+#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
+
+  tree abd_oprnds[2];
+  if (half_type)
+  {
+    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
+      return NULL;
+
+    tree diff_type = TREE_TYPE (diff_oprnds[0]);
+    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
+    {
+      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
+			   get_vectype_for_scalar_type (vinfo, half_type));
+    }
+    else
+    {
+      abd_oprnds[0] = diff_oprnds[0];
+      abd_oprnds[1] = diff_oprnds[1];
+    }
+  }
+  else
+  {
+    if (unprom[0].op && unprom[1].op
+	&& (!SAME_TYPE (unprom[0].type, unprom[1].type)
+	|| !SAME_TYPE (unprom[0].type, out_type)))
+      return NULL;
+
+    unprom[0].op = diff_oprnds[0];
+    unprom[1].op = diff_oprnds[1];
+    tree signed_out = signed_type_for (out_type);
+    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
+			 signed_out, unprom, signed_out_vectype);
+
+    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
+      return NULL;
+  }
+
+  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
+      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
+  if (!vectype
+      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
+
+  tree var = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+						abd_oprnds[0], abd_oprnds[1]);
+  gimple_call_set_lhs (abd_stmt, var);
+  gimple_set_location (abd_stmt, gimple_location (last_stmt));
+  return abd_stmt;
+}
+
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
    so that it can be treated as though it had the form:
 
@@ -6439,6 +6626,7 @@  struct vect_recog_func
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
   { vect_recog_bit_insert_pattern, "bit_insert" },
+  { vect_recog_abd_pattern, "abd" },
   { vect_recog_over_widening_pattern, "over_widening" },
   /* Must come after over_widening, which narrows the shift as much as
      possible beforehand.  */
-- 
2.25.1