[2/2] tree-optimization/117874 - optimize SLP discovery budget use

Message ID 20241203112618.9AF8B13A2E@imap1.dmz-prg2.suse.org
State Committed
Commit af9a3fe6a52974252516b3eea4c5ab5caae47b4b
Headers
Series [1/2] Use the number of relevant stmts to limit SLP build |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Richard Biener Dec. 3, 2024, 11:26 a.m. UTC
  The following tries to avoid eating into the SLP discovery limit
when we can do cheaper checks first.  Together with the previous
patch this allows to use two-lane SLP discovery for mult_su3_an
in 433.milc.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	PR tree-optimization/117874
	* tree-vect-slp.cc (vect_build_slp_tree_2): Perform early
	reassoc checks before eating into discovery limit.
---
 gcc/tree-vect-slp.cc | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)
  

Patch

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 1799d5a619b..425135a9ee0 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2292,6 +2292,9 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 		}
 	    }
 	  /* 2. try to build children nodes, associating as necessary.  */
+	  /* 2a. prepare and perform early checks to avoid eating into
+	     discovery limit unnecessarily.  */
+	  vect_def_type *dts = XALLOCAVEC (vect_def_type, chain_len);
 	  for (unsigned n = 0; n < chain_len; ++n)
 	    {
 	      vect_def_type dt = chains[0][n].dt;
@@ -2319,6 +2322,7 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 		    matches[0] = false;
 		  goto out;
 		}
+	      dts[n] = dt;
 	      if (dt == vect_constant_def
 		  || dt == vect_external_def)
 		{
@@ -2333,16 +2337,6 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 		      matches[0] = false;
 		      goto out;
 		    }
-		  vec<tree> ops;
-		  ops.create (group_size);
-		  for (lane = 0; lane < group_size; ++lane)
-		    if (stmts[lane])
-		      ops.quick_push (chains[lane][n].op);
-		    else
-		      ops.quick_push (NULL_TREE);
-		  slp_tree child = vect_create_new_slp_node (ops);
-		  SLP_TREE_DEF_TYPE (child) = dt;
-		  children.safe_push (child);
 		}
 	      else if (dt != vect_internal_def)
 		{
@@ -2354,6 +2348,26 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 		  hard_fail = false;
 		  goto out;
 		}
+	    }
+	  /* 2b. do the actual build.  */
+	  for (unsigned n = 0; n < chain_len; ++n)
+	    {
+	      vect_def_type dt = dts[n];
+	      unsigned lane;
+	      if (dt == vect_constant_def
+		  || dt == vect_external_def)
+		{
+		  vec<tree> ops;
+		  ops.create (group_size);
+		  for (lane = 0; lane < group_size; ++lane)
+		    if (stmts[lane])
+		      ops.quick_push (chains[lane][n].op);
+		    else
+		      ops.quick_push (NULL_TREE);
+		  slp_tree child = vect_create_new_slp_node (ops);
+		  SLP_TREE_DEF_TYPE (child) = dt;
+		  children.safe_push (child);
+		}
 	      else
 		{
 		  vec<stmt_vec_info> op_stmts;
@@ -2396,6 +2410,11 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 				term = true;
 				break;
 			      }
+			    if (dump_enabled_p ())
+			      dump_printf_loc (MSG_NOTE, vect_location,
+					       "swapping operand %d and %d "
+					       "of lane %d\n",
+					       n, n + perms[lane] + 1, lane);
 			    std::swap (chains[lane][n],
 				       chains[lane][n + perms[lane] + 1]);
 			    perms[lane]++;