[v11,11/12] Add extra conditional dump output to the vectorizer

Message ID 20260603151924.53706-12-chris.bazley@arm.com
State New
Headers
Series Extend BB SLP vectorization to use predicated tails |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed

Commit Message

Christopher Bazley June 3, 2026, 3:19 p.m. UTC
  Instruments those things that needed to be instrumented
in order to develop predicated tails for basic block
SLP (superword-level parallelism).

gcc/ChangeLog:

	* tree-vect-loop.cc (vect_get_max_nscalars_per_iter): Dump the
	result.
	(vect_verify_full_masking): Dump the number of vectors and
	number of scalars per iteration.
	* tree-vect-slp.cc (vect_prologue_cost_for_slp): Dump the SLP
	tree node's address, vector type and group size.
	* tree-vect-stmts.cc (vectorizable_conversion): Dump the vector
	type inferred from the scalar type.
	(vectorizable_comparison_1): Dump the vector type guessed for
	an invariant comparison.
	(get_vectype_for_scalar_type): Dump the natural choice of
	vector type and instrument the fallback loop which searches for
	a narrower type.
	(vect_maybe_update_slp_op_vectype): Dump the new vector type
	and number of lanes of an operand when its vector type is
	updated.
---
 gcc/tree-vect-loop.cc  | 10 ++++++++++
 gcc/tree-vect-slp.cc   |  6 ++++++
 gcc/tree-vect-stmts.cc | 38 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 1 deletion(-)
  

Comments

Richard Biener June 9, 2026, 11:03 a.m. UTC | #1
On Wed, Jun 3, 2026 at 5:21 PM Christopher Bazley <chris.bazley@arm.com> wrote:
>
> Instruments those things that needed to be instrumented
> in order to develop predicated tails for basic block
> SLP (superword-level parallelism).

I believe this is too much verboseness, so I'd prefer to not merge it (the
vect dump is already very much too verbose)

> gcc/ChangeLog:
>
>         * tree-vect-loop.cc (vect_get_max_nscalars_per_iter): Dump the
>         result.
>         (vect_verify_full_masking): Dump the number of vectors and
>         number of scalars per iteration.
>         * tree-vect-slp.cc (vect_prologue_cost_for_slp): Dump the SLP
>         tree node's address, vector type and group size.
>         * tree-vect-stmts.cc (vectorizable_conversion): Dump the vector
>         type inferred from the scalar type.
>         (vectorizable_comparison_1): Dump the vector type guessed for
>         an invariant comparison.
>         (get_vectype_for_scalar_type): Dump the natural choice of
>         vector type and instrument the fallback loop which searches for
>         a narrower type.
>         (vect_maybe_update_slp_op_vectype): Dump the new vector type
>         and number of lanes of an operand when its vector type is
>         updated.
> ---
>  gcc/tree-vect-loop.cc  | 10 ++++++++++
>  gcc/tree-vect-slp.cc   |  6 ++++++
>  gcc/tree-vect-stmts.cc | 38 +++++++++++++++++++++++++++++++++++++-
>  3 files changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 7503fd084cf..f8e33a1d846 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -922,6 +922,11 @@ vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
>    rgroup_controls *rgm;
>    FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec, i, rgm)
>      res = MAX (res, rgm->max_nscalars_per_iter);
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location, "max_nscalars_per_iter=%u\n",
> +                    res);
> +
>    return res;
>  }
>
> @@ -1023,6 +1028,11 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
>        unsigned int nscalars_per_iter
>           = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
>                        LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
> +      if (dump_enabled_p ())
> +       dump_printf_loc (
> +         MSG_NOTE, vect_location,
> +         "verify_full_masking: nvectors=%u, nscalars_per_iter=%u\n", nvectors,
> +         nscalars_per_iter);
>
>        if (rgm->max_nscalars_per_iter < nscalars_per_iter)
>         {
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 6af13e65e19..03435dc653d 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -8876,6 +8876,12 @@ vect_prologue_cost_for_slp (vec_info *vinfo, slp_tree node,
>       When all elements are the same we can use a splat.  */
>    tree vectype = SLP_TREE_VECTYPE (node);
>    unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                    "vect_prologue_cost_for_slp: node %p, vector type %T, "
> +                    "group_size %u\n",
> +                    (void *) node, vectype, group_size);
> +
>    unsigned HOST_WIDE_INT const_nunits;
>    unsigned nelt_limit;
>    unsigned nvectors = vect_get_num_copies (vinfo, node);
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 367a9c63ea4..773cc9ed37f 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -5557,7 +5557,12 @@ vectorizable_conversion (vec_info *vinfo,
>    /* If op0 is an external or constant def, infer the vector type
>       from the scalar type.  */
>    if (!vectype_in)
> -    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
> +    {
> +      vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_NOTE, vect_location, "inferred vector type %T\n",
> +                        vectype_in);
> +    }
>
>    if (!cost_vec)
>      gcc_assert (vectype_in);
> @@ -12786,6 +12791,16 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
>    if (!vectype)
>      {
>        vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
> +      if (dump_enabled_p ())
> +       {
> +         if (vectype)
> +           dump_printf_loc (MSG_NOTE, vect_location,
> +                            "invariant comparison, guessed type %T\n",
> +                            vectype);
> +         else
> +           dump_printf_loc (MSG_NOTE, vect_location,
> +                            "invariant comparison, no usable vector type\n");
> +       }
>        if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
>         return false;
>      }
> @@ -13829,7 +13844,19 @@ get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
>    /* Register the natural choice of vector type, before the group size
>       has been applied.  */
>    if (vectype)
> +  {
> +    if (dump_enabled_p ())
> +      dump_printf_loc (MSG_NOTE, vect_location,
> +                      "get_vectype_for_scalar_type: natural type for %T "
> +                      "(ignoring group size %u): %T\n",
> +                      scalar_type, group_size, vectype);
>      vinfo->used_vector_modes.add (TYPE_MODE (vectype));
> +  }
> +  else if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                    "get_vectype_for_scalar_type: no natural type for %T "
> +                    "(ignoring group size %u)\n",
> +                    scalar_type, group_size);
>
>    /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
>       try again with an explicit number of elements.  A vector type satisfies
> @@ -13862,6 +13889,11 @@ get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
>         {
>           vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
>                                                          scalar_type, nunits);
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_NOTE, vect_location,
> +                            "get_vectype_for_scalar_type: trying %u elements "
> +                            "of type %T: %T\n",
> +                            nunits, scalar_type, vectype);
>           nunits /= 2;
>         }
>        while (nunits > 1 && !vectype);
> @@ -14190,6 +14222,10 @@ vect_maybe_update_slp_op_vectype (vec_info *vinfo, slp_tree op, tree vectype)
>      }
>
>    SLP_TREE_VECTYPE (op) = vectype;
> +  if (dump_enabled_p ())
> +      dump_printf_loc (MSG_NOTE, vect_location,
> +                      "updated vectype of operand %p with %u lanes to %T\n",
> +                      (void *) op, SLP_TREE_LANES (op), vectype);
>    return true;
>  }
>
> --
> 2.43.0
>
  

Patch

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 7503fd084cf..f8e33a1d846 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -922,6 +922,11 @@  vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
   rgroup_controls *rgm;
   FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec, i, rgm)
     res = MAX (res, rgm->max_nscalars_per_iter);
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location, "max_nscalars_per_iter=%u\n",
+		     res);
+
   return res;
 }
 
@@ -1023,6 +1028,11 @@  vect_verify_full_masking (loop_vec_info loop_vinfo)
       unsigned int nscalars_per_iter
 	  = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
 		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
+      if (dump_enabled_p ())
+	dump_printf_loc (
+	  MSG_NOTE, vect_location,
+	  "verify_full_masking: nvectors=%u, nscalars_per_iter=%u\n", nvectors,
+	  nscalars_per_iter);
 
       if (rgm->max_nscalars_per_iter < nscalars_per_iter)
 	{
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 6af13e65e19..03435dc653d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8876,6 +8876,12 @@  vect_prologue_cost_for_slp (vec_info *vinfo, slp_tree node,
      When all elements are the same we can use a splat.  */
   tree vectype = SLP_TREE_VECTYPE (node);
   unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "vect_prologue_cost_for_slp: node %p, vector type %T, "
+		     "group_size %u\n",
+		     (void *) node, vectype, group_size);
+
   unsigned HOST_WIDE_INT const_nunits;
   unsigned nelt_limit;
   unsigned nvectors = vect_get_num_copies (vinfo, node);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 367a9c63ea4..773cc9ed37f 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5557,7 +5557,12 @@  vectorizable_conversion (vec_info *vinfo,
   /* If op0 is an external or constant def, infer the vector type
      from the scalar type.  */
   if (!vectype_in)
-    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
+    {
+      vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location, "inferred vector type %T\n",
+			 vectype_in);
+    }
 
   if (!cost_vec)
     gcc_assert (vectype_in);
@@ -12786,6 +12791,16 @@  vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
   if (!vectype)
     {
       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
+      if (dump_enabled_p ())
+	{
+	  if (vectype)
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "invariant comparison, guessed type %T\n",
+			     vectype);
+	  else
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "invariant comparison, no usable vector type\n");
+	}
       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
 	return false;
     }
@@ -13829,7 +13844,19 @@  get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
   /* Register the natural choice of vector type, before the group size
      has been applied.  */
   if (vectype)
+  {
+    if (dump_enabled_p ())
+      dump_printf_loc (MSG_NOTE, vect_location,
+		       "get_vectype_for_scalar_type: natural type for %T "
+		       "(ignoring group size %u): %T\n",
+		       scalar_type, group_size, vectype);
     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
+  }
+  else if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "get_vectype_for_scalar_type: no natural type for %T "
+		     "(ignoring group size %u)\n",
+		     scalar_type, group_size);
 
   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
      try again with an explicit number of elements.  A vector type satisfies
@@ -13862,6 +13889,11 @@  get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
 	{
 	  vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
 							 scalar_type, nunits);
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "get_vectype_for_scalar_type: trying %u elements "
+			     "of type %T: %T\n",
+			     nunits, scalar_type, vectype);
 	  nunits /= 2;
 	}
       while (nunits > 1 && !vectype);
@@ -14190,6 +14222,10 @@  vect_maybe_update_slp_op_vectype (vec_info *vinfo, slp_tree op, tree vectype)
     }
 
   SLP_TREE_VECTYPE (op) = vectype;
+  if (dump_enabled_p ())
+      dump_printf_loc (MSG_NOTE, vect_location,
+		       "updated vectype of operand %p with %u lanes to %T\n",
+		       (void *) op, SLP_TREE_LANES (op), vectype);
   return true;
 }