[RFC,3/5] vect: Enable lane-reducing operation that is not loop reduction statement

Message ID LV2PR01MB783953E7DB764D4FBCAF38D1F7AF2@LV2PR01MB7839.prod.exchangelabs.com
State New
Headers
Series [RFC,1/5] vect: Fix single_imm_use in tree_vect_patterns |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Feng Xue OS July 21, 2024, 9:15 a.m. UTC
  This patch extends original vect analysis and transform to support a new kind
of lane-reducing operation that participates in loop reduction indirectly. The
operation itself is not reduction statement, but its value would be accumulated
into reduction result finally.

Thanks,
Feng
---
gcc/
    	* tree-vect-loop.cc (vectorizable_lane_reducing): Allow indirect lane-
	reducing operation.
	(vect_transform_reduction): Extend transform for indirect lane-reducing
	operation.
---
 gcc/tree-vect-loop.cc | 48 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 8 deletions(-)
  

Patch

From 5e65c65786d9594c172b58a6cd1af50c67efb927 Mon Sep 17 00:00:00 2001
From: Feng Xue <fxue@os.amperecomputing.com>
Date: Wed, 24 Apr 2024 16:46:49 +0800
Subject: [PATCH 3/5] vect: Enable lane-reducing operation that is not loop
 reduction statement

This patch extends original vect analysis and transform to support a new kind
of lane-reducing operation that participates in loop reduction indirectly. The
operation itself is not reduction statement, but its value would be accumulated
into reduction result finally.

2024-04-24 Feng Xue <fxue@os.amperecomputing.com>

gcc/
    	* tree-vect-loop.cc (vectorizable_lane_reducing): Allow indirect lane-
	reducing operation.
	(vect_transform_reduction): Extend transform for indirect lane-reducing
	operation.
---
 gcc/tree-vect-loop.cc | 48 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index d7d628efa60..c344158b419 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7520,9 +7520,7 @@  vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
 
   stmt_vec_info reduc_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
 
-  /* TODO: Support lane-reducing operation that does not directly participate
-     in loop reduction.  */
-  if (!reduc_info || STMT_VINFO_REDUC_IDX (stmt_info) < 0)
+  if (!reduc_info)
     return false;
 
   /* Lane-reducing pattern inside any inner loop of LOOP_VINFO is not
@@ -7530,7 +7528,16 @@  vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
   gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_reduction_def);
   gcc_assert (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION);
 
-  for (int i = 0; i < (int) gimple_num_ops (stmt) - 1; i++)
+  int sum_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+  int num_ops = (int) gimple_num_ops (stmt) - 1;
+
+  /* Participate in loop reduction either directly or indirectly.  */
+  if (sum_idx >= 0)
+    gcc_assert (sum_idx  == num_ops - 1);
+  else
+    sum_idx = num_ops - 1;
+
+  for (int i = 0; i < num_ops; i++)
     {
       stmt_vec_info def_stmt_info;
       slp_tree slp_op;
@@ -7573,7 +7580,24 @@  vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
 
   tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (stmt_info);
 
-  gcc_assert (vectype_in);
+  if (!vectype_in)
+    {
+      enum vect_def_type dt;
+      tree rhs1 = gimple_assign_rhs1 (stmt);
+
+      if (!vect_is_simple_use (rhs1, loop_vinfo, &dt, &vectype_in))
+	return false;
+
+      if (!vectype_in)
+	{
+	  vectype_in = get_vectype_for_scalar_type (loop_vinfo,
+						    TREE_TYPE (rhs1));
+	  if (!vectype_in)
+	    return false;
+	}
+
+      STMT_VINFO_REDUC_VECTYPE_IN (stmt_info) = vectype_in;
+    }
 
   /* Compute number of effective vector statements for costing.  */
   unsigned int ncopies_for_cost = vect_get_num_copies (loop_vinfo, slp_node,
@@ -8750,9 +8774,17 @@  vect_transform_reduction (loop_vec_info loop_vinfo,
   gcc_assert (single_defuse_cycle || lane_reducing);
 
   if (lane_reducing)
-    {
-      /* The last operand of lane-reducing op is for reduction.  */
-      gcc_assert (reduc_index == (int) op.num_ops - 1);
+    {  
+      if (reduc_index < 0)
+	{
+	  reduc_index = (int) op.num_ops - 1;
+	  single_defuse_cycle = false;
+	}
+      else
+	{
+	  /* The last operand of lane-reducing op is for reduction.  */
+	  gcc_assert (reduc_index == (int) op.num_ops - 1);
+	}
     }
 
   /* Create the destination vector  */
-- 
2.17.1