From patchwork Fri Nov 12 17:57:29 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Sandiford <richard.sandiford@arm.com>
X-Patchwork-Id: 47556
Return-Path: <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
X-Original-To: patchwork@sourceware.org
Delivered-To: patchwork@sourceware.org
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 24322385840F
	for <patchwork@sourceware.org>; Fri, 12 Nov 2021 17:58:00 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 24322385840F
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1636739880;
	bh=ECTR6M0znjAhi3okjwA4rmGBhlIQVlsuveotjRsxH3g=;
	h=To:Subject:Date:List-Id:List-Unsubscribe:List-Archive:List-Post:
	 List-Help:List-Subscribe:From:Reply-To:From;
	b=atlR94Vhk6skcVpdFskEGmULrXOU6kaUGLg1POl6AKK4wvpRZZ4+q51rGrehytjk0
	 Rf11oRQNbn48ICqkiKR1/NJljr02lJYd0L95l5Uih1tsJ4z5wAE9mhzv0kpK9Db2HP
	 8wlMHfNThol2UjX4VeMsb60WNm5a6I+OgePZYcMc=
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
 by sourceware.org (Postfix) with ESMTP id 3AA7A385840F
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 17:57:31 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 3AA7A385840F
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id D9932D6E
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 09:57:30 -0800 (PST)
Received: from localhost (unknown [10.32.98.88])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 80DA63F718
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 09:57:30 -0800 (PST)
To: gcc-patches@gcc.gnu.org
Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com
Subject: [PATCH 1/5] vect: Use code_helper when building SLP nodes
Date: Fri, 12 Nov 2021 17:57:29 +0000
Message-ID: <mpty25tmf3q.fsf@arm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)
MIME-Version: 1.0
X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.4
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
X-Patchwork-Original-From: Richard Sandiford via Gcc-patches
 <gcc-patches@gcc.gnu.org>
From: Richard Sandiford <richard.sandiford@arm.com>
Reply-To: Richard Sandiford <richard.sandiford@arm.com>
Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org
Sender: "Gcc-patches"
 <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>

This patch uses code_helper to represent the common (and
alternative) operations when building an SLP node.  It's not
much of a saving on its own, but it helps with later patches.

Regstrapped on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
	* tree-vect-slp.c (vect_build_slp_tree_1): Use code_helper
	to record the operations performed by statements, only using
	CALL_EXPR for things that don't map to built-in or internal
	functions.  For shifts, require all shift amounts to be equal
	if optab_vector is not supported but optab_scalar is.
---
 gcc/tree-vect-slp.c | 77 +++++++++++++++------------------------------
 1 file changed, 26 insertions(+), 51 deletions(-)

diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 94c75497495..f4123cf830a 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -876,17 +876,13 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 {
   unsigned int i;
   stmt_vec_info first_stmt_info = stmts[0];
-  enum tree_code first_stmt_code = ERROR_MARK;
-  enum tree_code alt_stmt_code = ERROR_MARK;
-  enum tree_code rhs_code = ERROR_MARK;
-  enum tree_code first_cond_code = ERROR_MARK;
+  code_helper first_stmt_code = ERROR_MARK;
+  code_helper alt_stmt_code = ERROR_MARK;
+  code_helper rhs_code = ERROR_MARK;
+  code_helper first_cond_code = ERROR_MARK;
   tree lhs;
   bool need_same_oprnds = false;
   tree vectype = NULL_TREE, first_op1 = NULL_TREE;
-  optab optab;
-  int icode;
-  machine_mode optab_op2_mode;
-  machine_mode vec_mode;
   stmt_vec_info first_load = NULL, prev_first_load = NULL;
   bool first_stmt_load_p = false, load_p = false;
   bool first_stmt_phi_p = false, phi_p = false;
@@ -966,13 +962,16 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
       gcall *call_stmt = dyn_cast <gcall *> (stmt);
       if (call_stmt)
 	{
-	  rhs_code = CALL_EXPR;
+	  combined_fn cfn = gimple_call_combined_fn (call_stmt);
+	  if (cfn != CFN_LAST)
+	    rhs_code = cfn;
+	  else
+	    rhs_code = CALL_EXPR;
 
-	  if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
+	  if (cfn == CFN_MASK_LOAD)
 	    load_p = true;
-	  else if ((gimple_call_internal_p (call_stmt)
-		    && (!vectorizable_internal_fn_p
-			(gimple_call_internal_fn (call_stmt))))
+	  else if ((internal_fn_p (cfn)
+		    && !vectorizable_internal_fn_p (as_internal_fn (cfn)))
 		   || gimple_call_tail_p (call_stmt)
 		   || gimple_call_noreturn_p (call_stmt)
 		   || gimple_call_chain (call_stmt))
@@ -1013,32 +1012,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	      || rhs_code == LROTATE_EXPR
 	      || rhs_code == RROTATE_EXPR)
 	    {
-	      vec_mode = TYPE_MODE (vectype);
-
 	      /* First see if we have a vector/vector shift.  */
-	      optab = optab_for_tree_code (rhs_code, vectype,
-					   optab_vector);
-
-	      if (!optab
-		  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+	      if (!directly_supported_p (rhs_code, vectype, optab_vector))
 		{
 		  /* No vector/vector shift, try for a vector/scalar shift.  */
-		  optab = optab_for_tree_code (rhs_code, vectype,
-					       optab_scalar);
-
-		  if (!optab)
-		    {
-		      if (dump_enabled_p ())
-			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-					 "Build SLP failed: no optab.\n");
-		      if (is_a <bb_vec_info> (vinfo) && i != 0)
-			continue;
-		      /* Fatal mismatch.  */
-		      matches[0] = false;
-		      return false;
-		    }
-		  icode = (int) optab_handler (optab, vec_mode);
-		  if (icode == CODE_FOR_nothing)
+		  if (!directly_supported_p (rhs_code, vectype, optab_scalar))
 		    {
 		      if (dump_enabled_p ())
 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1050,12 +1028,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 		      matches[0] = false;
 		      return false;
 		    }
-		  optab_op2_mode = insn_data[icode].operand[2].mode;
-		  if (!VECTOR_MODE_P (optab_op2_mode))
-		    {
-		      need_same_oprnds = true;
-		      first_op1 = gimple_assign_rhs2 (stmt);
-		    }
+		  need_same_oprnds = true;
+		  first_op1 = gimple_assign_rhs2 (stmt);
 		}
 	    }
 	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
@@ -1081,8 +1055,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 		  return false;
 		}
 	    }
-	  else if (call_stmt
-		   && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
+	  else if (rhs_code == CFN_DIV_POW2)
 	    {
 	      need_same_oprnds = true;
 	      first_op1 = gimple_call_arg (call_stmt, 1);
@@ -1139,10 +1112,10 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	      continue;
 	    }
 
-	  if (!load_p && rhs_code == CALL_EXPR)
+	  if (!load_p && call_stmt)
 	    {
 	      if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
-				       as_a <gcall *> (stmt)))
+				       call_stmt))
 		{
 		  if (dump_enabled_p ())
 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1243,10 +1216,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 
 	  /* Not memory operation.  */
 	  if (!phi_p
-	      && TREE_CODE_CLASS (rhs_code) != tcc_binary
-	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
-	      && TREE_CODE_CLASS (rhs_code) != tcc_expression
-	      && TREE_CODE_CLASS (rhs_code) != tcc_comparison
+	      && rhs_code.is_tree_code ()
+	      && TREE_CODE_CLASS (tree_code (rhs_code)) != tcc_binary
+	      && TREE_CODE_CLASS (tree_code (rhs_code)) != tcc_unary
+	      && TREE_CODE_CLASS (tree_code (rhs_code)) != tcc_expression
+	      && TREE_CODE_CLASS (tree_code (rhs_code)) != tcc_comparison
 	      && rhs_code != VIEW_CONVERT_EXPR
 	      && rhs_code != CALL_EXPR
 	      && rhs_code != BIT_FIELD_REF)
@@ -1308,7 +1282,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
   /* If we allowed a two-operation SLP node verify the target can cope
      with the permute we are going to use.  */
   if (alt_stmt_code != ERROR_MARK
-      && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
+      && (!alt_stmt_code.is_tree_code ()
+	  || TREE_CODE_CLASS (tree_code (alt_stmt_code)) != tcc_reference))
     {
       *two_operators = true;
     }

From patchwork Fri Nov 12 17:59:22 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Sandiford <richard.sandiford@arm.com>
X-Patchwork-Id: 47558
Return-Path: <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
X-Original-To: patchwork@sourceware.org
Delivered-To: patchwork@sourceware.org
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 9A4F83858435
	for <patchwork@sourceware.org>; Fri, 12 Nov 2021 18:00:04 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9A4F83858435
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1636740004;
	bh=WWITIGU7T/2zH2gyVKpwm0okOq2i8rwTIxDRx2Hjhpc=;
	h=To:Subject:Date:List-Id:List-Unsubscribe:List-Archive:List-Post:
	 List-Help:List-Subscribe:From:Reply-To:From;
	b=nO+93hoYwtrg0iasu02apbNVwXzmNQByAuoOn//+S0gxU2KeEDcfmB3QXQ2TfqjFj
	 PHMaGc+CbPD6fLZX57Y8fsogWjsOkS2O67m73OTR77Daq9SUU2CrTiQCpUQP0OSwRr
	 iAKPRXCOCj+8gOVZDXGgZkgFh6DBqVraSlvjDjbQ=
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
 by sourceware.org (Postfix) with ESMTP id 171B03858033
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 17:59:24 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 171B03858033
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id C8ADED6E
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 09:59:23 -0800 (PST)
Received: from localhost (unknown [10.32.98.88])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 6CC6A3F718
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 09:59:23 -0800 (PST)
To: gcc-patches@gcc.gnu.org
Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com
Subject: [PATCH 2/5] vect: Use generalised accessors to build SLP nodes
Date: Fri, 12 Nov 2021 17:59:22 +0000
Message-ID: <mpttughmf0l.fsf@arm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)
MIME-Version: 1.0
X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, SPF_HELO_NONE, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.4
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
X-Patchwork-Original-From: Richard Sandiford via Gcc-patches
 <gcc-patches@gcc.gnu.org>
From: Richard Sandiford <richard.sandiford@arm.com>
Reply-To: Richard Sandiford <richard.sandiford@arm.com>
Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org
Sender: "Gcc-patches"
 <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>

This patch adds:

- gimple_num_args
- gimple_arg
- gimple_arg_ptr

for accessing rhs operands of an assignment, call or PHI.  This is
similar to the existing gimple_get_lhs.

I guess there's a danger that these routines could be overused,
such as in cases where gimple_assign_rhs1 etc. would be more
appropriate.  I think the routines are still worth having though.
These days, most new operations are added as internal functions rather
than tree codes, so it's useful to be able to handle assignments and
calls in a consistent way.

The patch also generalises the way that SLP child nodes map
to gimple stmt operands.  This is useful for later patches.

Regstrapped on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
	* gimple.h (gimple_num_args, gimple_arg, gimple_arg_ptr): New
	functions.
	* tree-vect-slp.c (cond_expr_maps, arg2_map): New variables.
	(vect_get_operand_map): New function.
	(vect_get_and_check_slp_defs): Fix outdated comment.
	Use vect_get_operand_map and new gimple argument accessors.
	(vect_build_slp_tree_2): Likewise.
---
 gcc/gimple.h        |  38 ++++++++++++
 gcc/tree-vect-slp.c | 148 +++++++++++++++++++++++---------------------
 2 files changed, 114 insertions(+), 72 deletions(-)

diff --git a/gcc/gimple.h b/gcc/gimple.h
index 3cde3cde7fe..f7fdefc5362 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -4692,6 +4692,44 @@ gimple_phi_arg_has_location (const gphi *phi, size_t i)
   return gimple_phi_arg_location (phi, i) != UNKNOWN_LOCATION;
 }
 
+/* Return the number of arguments that can be accessed by gimple_arg.  */
+
+static inline unsigned
+gimple_num_args (const gimple *gs)
+{
+  if (auto phi = dyn_cast<const gphi *> (gs))
+    return gimple_phi_num_args (phi);
+  if (auto call = dyn_cast<const gcall *> (gs))
+    return gimple_call_num_args (call);
+  return gimple_num_ops (as_a <const gassign *> (gs)) - 1;
+}
+
+/* GS must be an assignment, a call, or a PHI.
+   If it's an assignment, return rhs operand I.
+   If it's a call, return function argument I.
+   If it's a PHI, return the value of PHI argument I.  */
+
+static inline tree
+gimple_arg (const gimple *gs, unsigned int i)
+{
+  if (auto phi = dyn_cast<const gphi *> (gs))
+    return gimple_phi_arg_def (phi, i);
+  if (auto call = dyn_cast<const gcall *> (gs))
+    return gimple_call_arg (call, i);
+  return gimple_op (as_a <const gassign *> (gs), i + 1);
+}
+
+/* Return a pointer to gimple_arg (GS, I).  */
+
+static inline tree *
+gimple_arg_ptr (gimple *gs, unsigned int i)
+{
+  if (auto phi = dyn_cast<gphi *> (gs))
+    return gimple_phi_arg_def_ptr (phi, i);
+  if (auto call = dyn_cast<gcall *> (gs))
+    return gimple_call_arg_ptr (call, i);
+  return gimple_op_ptr (as_a <gassign *> (gs), i + 1);
+}
 
 /* Return the region number for GIMPLE_RESX RESX_STMT.  */
 
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f4123cf830a..2594ab7607f 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -454,15 +454,57 @@ vect_def_types_match (enum vect_def_type dta, enum vect_def_type dtb)
 	      && (dtb == vect_external_def || dtb == vect_constant_def)));
 }
 
+static const int cond_expr_maps[3][5] = {
+  { 4, -1, -2, 1, 2 },
+  { 4, -2, -1, 1, 2 },
+  { 4, -1, -2, 2, 1 }
+};
+static const int arg2_map[] = { 1, 2 };
+
+/* For most SLP statements, there is a one-to-one mapping between
+   gimple arguments and child nodes.  If that is not true for STMT,
+   return an array that contains:
+
+   - the number of child nodes, followed by
+   - for each child node, the index of the argument associated with that node.
+     The special index -1 is the first operand of an embedded comparison and
+     the special index -2 is the second operand of an embedded comparison.
+
+   SWAP is as for vect_get_and_check_slp_defs.  */
+
+static const int *
+vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
+{
+  if (auto assign = dyn_cast<const gassign *> (stmt))
+    {
+      if (gimple_assign_rhs_code (assign) == COND_EXPR
+	  && COMPARISON_CLASS_P (gimple_assign_rhs1 (assign)))
+	return cond_expr_maps[swap];
+    }
+  gcc_assert (!swap);
+  if (auto call = dyn_cast<const gcall *> (stmt))
+    {
+      if (gimple_call_internal_p (call))
+	switch (gimple_call_internal_fn (call))
+	  {
+	  case IFN_MASK_LOAD:
+	    return arg2_map;
+
+	  default:
+	    break;
+	  }
+    }
+  return nullptr;
+}
+
 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
    they are of a valid type and that they match the defs of the first stmt of
    the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
-   by swapping operands of STMTS[STMT_NUM] when possible.  Non-zero *SWAP
-   indicates swap is required for cond_expr stmts.  Specifically, *SWAP
+   by swapping operands of STMTS[STMT_NUM] when possible.  Non-zero SWAP
+   indicates swap is required for cond_expr stmts.  Specifically, SWAP
    is 1 if STMT is cond and operands of comparison need to be swapped;
-   *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
-   If there is any operand swap in this function, *SWAP is set to non-zero
-   value.
+   SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
+
    If there was a fatal error return -1; if the error could be corrected by
    swapping operands of father node of this one, return 1; if everything is
    ok return 0.  */
@@ -477,76 +519,48 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
   unsigned int i, number_of_oprnds;
   enum vect_def_type dt = vect_uninitialized_def;
   slp_oprnd_info oprnd_info;
-  int first_op_idx = 1;
   unsigned int commutative_op = -1U;
-  bool first_op_cond = false;
   bool first = stmt_num == 0;
 
+  if (!is_a<gcall *> (stmt_info->stmt)
+      && !is_a<gassign *> (stmt_info->stmt)
+      && !is_a<gphi *> (stmt_info->stmt))
+    return -1;
+
+  number_of_oprnds = gimple_num_args (stmt_info->stmt);
+  const int *map = vect_get_operand_map (stmt_info->stmt, swap);
+  if (map)
+    number_of_oprnds = *map++;
   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
     {
-      number_of_oprnds = gimple_call_num_args (stmt);
-      first_op_idx = 3;
       if (gimple_call_internal_p (stmt))
 	{
 	  internal_fn ifn = gimple_call_internal_fn (stmt);
 	  commutative_op = first_commutative_argument (ifn);
-
-	  /* Masked load, only look at mask.  */
-	  if (ifn == IFN_MASK_LOAD)
-	    {
-	      number_of_oprnds = 1;
-	      /* Mask operand index.  */
-	      first_op_idx = 5;
-	    }
 	}
     }
   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
     {
-      enum tree_code code = gimple_assign_rhs_code (stmt);
-      number_of_oprnds = gimple_num_ops (stmt) - 1;
-      /* Swap can only be done for cond_expr if asked to, otherwise we
-	 could result in different comparison code to the first stmt.  */
-      if (code == COND_EXPR
-	  && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
-	{
-	  first_op_cond = true;
-	  number_of_oprnds++;
-	}
-      else
-	commutative_op = commutative_tree_code (code) ? 0U : -1U;
+      if (commutative_tree_code (gimple_assign_rhs_code (stmt)))
+	commutative_op = 0;
     }
-  else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
-    number_of_oprnds = gimple_phi_num_args (stmt);
-  else
-    return -1;
 
   bool swapped = (swap != 0);
   bool backedge = false;
-  gcc_assert (!swapped || first_op_cond);
   enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds);
   for (i = 0; i < number_of_oprnds; i++)
     {
-      if (first_op_cond)
-	{
-	  /* Map indicating how operands of cond_expr should be swapped.  */
-	  int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
-	  int *map = maps[swap];
-
-	  if (i < 2)
-	    oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
-					     first_op_idx), map[i]);
-	  else
-	    oprnd = gimple_op (stmt_info->stmt, map[i]);
-	}
-      else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+      int opno = map ? map[i] : int (i);
+      if (opno < 0)
+	oprnd = TREE_OPERAND (gimple_arg (stmt_info->stmt, 0), -1 - opno);
+      else
 	{
-	  oprnd = gimple_phi_arg_def (stmt, i);
-	  backedge = dominated_by_p (CDI_DOMINATORS,
-				     gimple_phi_arg_edge (stmt, i)->src,
-				     gimple_bb (stmt_info->stmt));
+	  oprnd = gimple_arg (stmt_info->stmt, opno);
+	  if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+	    backedge = dominated_by_p (CDI_DOMINATORS,
+				       gimple_phi_arg_edge (stmt, opno)->src,
+				       gimple_bb (stmt_info->stmt));
 	}
-      else
-	oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
 	oprnd = TREE_OPERAND (oprnd, 0);
 
@@ -1140,9 +1154,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 
 	  if (need_same_oprnds)
 	    {
-	      tree other_op1 = (call_stmt
-				? gimple_call_arg (call_stmt, 1)
-				: gimple_assign_rhs2 (stmt));
+	      tree other_op1 = gimple_arg (stmt, 1);
 	      if (!operand_equal_p (first_op1, other_op1, 0))
 		{
 		  if (dump_enabled_p ())
@@ -1601,19 +1613,15 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   matches[0] = false;
 
   stmt_vec_info stmt_info = stmts[0];
-  if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-    nops = gimple_call_num_args (stmt);
-  else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
-    {
-      nops = gimple_num_ops (stmt) - 1;
-      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
-	nops++;
-    }
-  else if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt))
-    nops = gimple_phi_num_args (phi);
-  else
+  if (!is_a<gcall *> (stmt_info->stmt)
+      && !is_a<gassign *> (stmt_info->stmt)
+      && !is_a<gphi *> (stmt_info->stmt))
     return NULL;
 
+  nops = gimple_num_args (stmt_info->stmt);
+  if (const int *map = vect_get_operand_map (stmt_info->stmt))
+    nops = map[0];
+
   /* If the SLP node is a PHI (induction or reduction), terminate
      the recursion.  */
   bool *skip_args = XALLOCAVEC (bool, nops);
@@ -1684,11 +1692,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
     {
       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-	{
-	  /* Masked load.  */
-	  gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
-	  nops = 1;
-	}
+	gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
       else
 	{
 	  *max_nunits = this_max_nunits;

From patchwork Fri Nov 12 18:00:52 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Sandiford <richard.sandiford@arm.com>
X-Patchwork-Id: 47559
Return-Path: <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
X-Original-To: patchwork@sourceware.org
Delivered-To: patchwork@sourceware.org
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 51D27385840F
	for <patchwork@sourceware.org>; Fri, 12 Nov 2021 18:01:25 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 51D27385840F
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1636740085;
	bh=h0W4zS0kvTGM4xkUQDHkoLVE+2uWwGCR6c9POSsVI/A=;
	h=To:Subject:Date:List-Id:List-Unsubscribe:List-Archive:List-Post:
	 List-Help:List-Subscribe:From:Reply-To:From;
	b=iuiGsahZL8CCxiQ5vklVoQJbTh5RO8vCR91FkutK+dBa8ZYj22F9MdgYdqIrqeniJ
	 fOqUkBaqeig7RC5LZsW0cHu1Yq4T1XdrzPzIhQ2AN8FstlahsqpR/NAcae3VjIRYiu
	 MhNfv2NRo8NaCTSJIUvgIZEx3MakFpQNeXbA6LOE=
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
 by sourceware.org (Postfix) with ESMTP id 598D5385840B
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 18:00:54 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 598D5385840B
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 09C8AD6E
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 10:00:54 -0800 (PST)
Received: from localhost (unknown [10.32.98.88])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 8744B3F718
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 10:00:53 -0800 (PST)
To: gcc-patches@gcc.gnu.org
Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com
Subject: [PATCH 3/5] vect: Support gather loads with SLP
Date: Fri, 12 Nov 2021 18:00:52 +0000
Message-ID: <mpto86pmey3.fsf@arm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)
MIME-Version: 1.0
X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, KAM_SHORT, SPF_HELO_NONE, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.4
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
X-Patchwork-Original-From: Richard Sandiford via Gcc-patches
 <gcc-patches@gcc.gnu.org>
From: Richard Sandiford <richard.sandiford@arm.com>
Reply-To: Richard Sandiford <richard.sandiford@arm.com>
Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org
Sender: "Gcc-patches"
 <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>

This patch adds SLP support for IFN_GATHER_LOAD.  Like the SLP
support for IFN_MASK_LOAD, it works by treating only some of the
arguments as child nodes.  Unlike IFN_MASK_LOAD, it requires the
other arguments (base, scale, and extension type) to be the same
for all calls in the group.  It does not require/expect the loads
to be in a group (which probably wouldn't make sense for gathers).

I was worried about the possible alias effect of moving gathers
around to be part of the same SLP group.  The patch therefore
makes vect_analyze_data_ref_dependence treat gathers and scatters
as a top-level concern, punting if the accesses aren't completely
independent and if the user hasn't told us that a particular
VF is safe.  I think in practice we already punted in the same
circumstances; the idea is just to make it more explicit.

Regstrapped on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
	* doc/sourcebuild.texi (vect_gather_load_ifn): Document.
	* tree-vect-data-refs.c (vect_analyze_data_ref_dependence):
	Commonize safelen handling.  Punt for anything involving
	gathers and scatters unless safelen says otherwise.
	* tree-vect-slp.c (arg1_map): New variable.
	(vect_get_operand_map): Handle IFN_GATHER_LOAD.
	(vect_build_slp_tree_1): Likewise.
	(vect_build_slp_tree_2): Likewise.
	(compatible_calls_p): If vect_get_operand_map returns nonnull,
	check that any skipped arguments are equal.
	(vect_slp_analyze_node_operations_1): Tighten reduction check.
	* tree-vect-stmts.c (check_load_store_for_partial_vectors): Take
	an ncopies argument.
	(vect_get_gather_scatter_ops): Take slp_node and ncopies arguments.
	Handle SLP nodes.
	(vectorizable_store, vectorizable_load): Adjust accordingly.

gcc/testsuite/
	* lib/target-supports.exp
	(check_effective_target_vect_gather_load_ifn): New target test.
	* gcc.dg/vect/vect-gather-1.c: New test.
	* gcc.dg/vect/vect-gather-2.c: Likewise.
	* gcc.target/aarch64/sve/gather_load_11.c: Likewise.
---
 gcc/doc/sourcebuild.texi                      |  4 ++
 gcc/testsuite/gcc.dg/vect/vect-gather-1.c     | 60 +++++++++++++++++
 gcc/testsuite/gcc.dg/vect/vect-gather-2.c     | 36 +++++++++++
 .../gcc.target/aarch64/sve/gather_load_11.c   | 49 ++++++++++++++
 gcc/testsuite/lib/target-supports.exp         |  6 ++
 gcc/tree-vect-data-refs.c                     | 64 +++++++++----------
 gcc/tree-vect-slp.c                           | 29 +++++++--
 gcc/tree-vect-stmts.c                         | 26 ++++----
 8 files changed, 223 insertions(+), 51 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-gather-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-gather-2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/gather_load_11.c

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 40b1e0d8167..702cd0c53e4 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1639,6 +1639,10 @@ Target supports vector masked loads.
 @item vect_masked_store
 Target supports vector masked stores.
 
+@item vect_gather_load_ifn
+Target supports vector gather loads using internal functions
+(rather than via built-in functions or emulation).
+
 @item vect_scatter_store
 Target supports vector scatter stores.
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
new file mode 100644
index 00000000000..4cee73fc775
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
@@ -0,0 +1,60 @@
+#include "tree-vect.h"
+
+#define N 16
+
+void __attribute__((noipa))
+f (int *restrict y, int *restrict x, int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = x[indices[i * 2]] + 1;
+      y[i * 2 + 1] = x[indices[i * 2 + 1]] + 2;
+    }
+}
+
+int y[N * 2];
+int x[N * 2] = {
+  72704, 52152, 51301, 96681,
+  57937, 60490, 34504, 60944,
+  42225, 28333, 88336, 74300,
+  29250, 20484, 38852, 91536,
+  86917, 63941, 31590, 21998,
+  22419, 26974, 28668, 13968,
+  3451, 20247, 44089, 85521,
+  22871, 87362, 50555, 85939
+};
+int indices[N * 2] = {
+  15, 16, 9, 19,
+  7, 22, 19, 1,
+  22, 13, 15, 30,
+  5, 12, 11, 11,
+  10, 25, 5, 20,
+  22, 24, 24, 28,
+  30, 19, 6, 4,
+  7, 12, 8, 21
+};
+int expected[N * 2] = {
+  91537, 86919, 28334, 22000,
+  60945, 28670, 21999, 52154,
+  28669, 20486, 91537, 50557,
+  60491, 29252, 74301, 74302,
+  88337, 20249, 60491, 22421,
+  28669, 3453, 3452, 22873,
+  50556, 22000, 34505, 57939,
+  60945, 29252, 42226, 26976
+};
+
+int
+main (void)
+{
+  check_vect ();
+
+  f (y, x, indices);
+  for (int i = 0; i < 32; ++i)
+    if (y[i] != expected[i])
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
new file mode 100644
index 00000000000..a1f6ba458a9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+
+#define N 16
+
+void
+f1 (int *restrict y, int *restrict x1, int *restrict x2,
+    int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = x1[indices[i * 2]] + 1;
+      y[i * 2 + 1] = x2[indices[i * 2 + 1]] + 2;
+    }
+}
+
+void
+f2 (int *restrict y, int *restrict x, int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = x[indices[i * 2]] + 1;
+      y[i * 2 + 1] = x[indices[i * 2 + 1] * 2] + 2;
+    }
+}
+
+void
+f3 (int *restrict y, int *restrict x, int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = x[indices[i * 2]] + 1;
+      y[i * 2 + 1] = x[(unsigned int) indices[i * 2 + 1]] + 2;
+    }
+}
+
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_11.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_11.c
new file mode 100644
index 00000000000..f6f78c1c8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_11.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-vect-cost-model" } */
+
+#include <stdint.h>
+
+void
+f1 (int32_t *restrict y, int32_t *restrict x, int32_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = x[index[i * 2]] + 1;
+      y[i * 2 + 1] = x[index[i * 2 + 1]] + 2;
+    }
+}
+
+void
+f2 (int32_t *restrict y, int32_t *restrict x, uint32_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = x[index[i * 2]] + 1;
+      y[i * 2 + 1] = x[index[i * 2 + 1]] + 2;
+    }
+}
+
+void
+f3 (int32_t *restrict y, int32_t *restrict x, uint64_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = x[index[i * 2]] + 1;
+      y[i * 2 + 1] = x[index[i * 2 + 1]] + 2;
+    }
+}
+
+void
+f4 (int64_t *restrict y, int64_t *restrict x, uint64_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = x[index[i * 2]] + 1;
+      y[i * 2 + 1] = x[index[i * 2 + 1]] + 2;
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl #?3\]} 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 8cbda192fe0..e3cada910ca 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7985,6 +7985,12 @@ proc check_effective_target_vect_masked_store { } {
 		   || [istarget amdgcn*-*-*] }]
 }
 
+# Return 1 if the target supports vector gather loads via internal functions.
+
+proc check_effective_target_vect_gather_load_ifn { } {
+    return [expr { [check_effective_target_aarch64_sve] }]
+}
+
 # Return 1 if the target supports vector scatter stores.
 
 proc check_effective_target_vect_scatter_store { } {
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 888ad72f3a9..12a82cd694a 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -359,6 +359,20 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
   lambda_vector dist_v;
   unsigned int loop_depth;
 
+  /* If user asserted safelen consecutive iterations can be
+     executed concurrently, assume independence.  */
+  auto apply_safelen = [&]()
+    {
+      if (loop->safelen >= 2)
+	{
+	  if ((unsigned int) loop->safelen < *max_vf)
+	    *max_vf = loop->safelen;
+	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
+	  return true;
+	}
+      return false;
+    };
+
   /* In loop analysis all data references should be vectorizable.  */
   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
@@ -393,26 +407,23 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
 				 get_alias_set (DR_REF (drb))))
     return opt_result::success ();
 
+  if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
+      || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
+    {
+      if (apply_safelen ())
+	return opt_result::success ();
+
+      return opt_result::failure_at
+	(stmtinfo_a->stmt,
+	 "possible alias involving gather/scatter between %T and %T\n",
+	 DR_REF (dra), DR_REF (drb));
+    }
+
   /* Unknown data dependence.  */
   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
     {
-      /* If user asserted safelen consecutive iterations can be
-	 executed concurrently, assume independence.  */
-      if (loop->safelen >= 2)
-	{
-	  if ((unsigned int) loop->safelen < *max_vf)
-	    *max_vf = loop->safelen;
-	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
-	  return opt_result::success ();
-	}
-
-      if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
-	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
-	return opt_result::failure_at
-	  (stmtinfo_a->stmt,
-	   "versioning for alias not supported for: "
-	   "can't determine dependence between %T and %T\n",
-	   DR_REF (dra), DR_REF (drb));
+      if (apply_safelen ())
+	return opt_result::success ();
 
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
@@ -427,23 +438,8 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
   /* Known data dependence.  */
   if (DDR_NUM_DIST_VECTS (ddr) == 0)
     {
-      /* If user asserted safelen consecutive iterations can be
-	 executed concurrently, assume independence.  */
-      if (loop->safelen >= 2)
-	{
-	  if ((unsigned int) loop->safelen < *max_vf)
-	    *max_vf = loop->safelen;
-	  LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
-	  return opt_result::success ();
-	}
-
-      if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
-	  || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
-	return opt_result::failure_at
-	  (stmtinfo_a->stmt,
-	   "versioning for alias not supported for: "
-	   "bad dist vector for %T and %T\n",
-	   DR_REF (dra), DR_REF (drb));
+      if (apply_safelen ())
+	return opt_result::success ();
 
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 2594ab7607f..0f09fc1fda8 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -459,6 +459,7 @@ static const int cond_expr_maps[3][5] = {
   { 4, -2, -1, 1, 2 },
   { 4, -1, -2, 2, 1 }
 };
+static const int arg1_map[] = { 1, 1 };
 static const int arg2_map[] = { 1, 2 };
 
 /* For most SLP statements, there is a one-to-one mapping between
@@ -490,6 +491,9 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
 	  case IFN_MASK_LOAD:
 	    return arg2_map;
 
+	  case IFN_GATHER_LOAD:
+	    return arg1_map;
+
 	  default:
 	    break;
 	  }
@@ -825,6 +829,20 @@ compatible_calls_p (gcall *call1, gcall *call2)
       if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
 	return false;
     }
+
+  /* Check that any unvectorized arguments are equal.  */
+  if (const int *map = vect_get_operand_map (call1))
+    {
+      unsigned int nkept = *map++;
+      unsigned int mapi = 0;
+      for (unsigned int i = 0; i < nargs; ++i)
+	if (mapi < nkept && map[mapi] == int (i))
+	  mapi += 1;
+	else if (!operand_equal_p (gimple_call_arg (call1, i),
+				   gimple_call_arg (call2, i)))
+	  return false;
+    }
+
   return true;
 }
 
@@ -982,7 +1000,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	  else
 	    rhs_code = CALL_EXPR;
 
-	  if (cfn == CFN_MASK_LOAD)
+	  if (cfn == CFN_MASK_LOAD || cfn == CFN_GATHER_LOAD)
 	    load_p = true;
 	  else if ((internal_fn_p (cfn)
 		    && !vectorizable_internal_fn_p (as_internal_fn (cfn)))
@@ -1126,7 +1144,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	      continue;
 	    }
 
-	  if (!load_p && call_stmt)
+	  if (call_stmt && first_stmt_code != CFN_MASK_LOAD)
 	    {
 	      if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
 				       call_stmt))
@@ -1211,7 +1229,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
         } /* Grouped access.  */
       else
 	{
-	  if (load_p)
+	  if (load_p && rhs_code != CFN_GATHER_LOAD)
 	    {
 	      /* Not grouped load.  */
 	      if (dump_enabled_p ())
@@ -1692,7 +1710,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
     {
       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-	gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
+	gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+		    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD));
       else
 	{
 	  *max_nunits = this_max_nunits;
@@ -4408,7 +4427,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
      calculated by the recursive call).  Otherwise it is the number of
      scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
      VF divided by the number of elements in a vector.  */
-  if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
+  if (!STMT_VINFO_DATA_REF (stmt_info)
       && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
     {
       for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 101f61feff6..06da5a9bc13 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1674,6 +1674,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
 				      int group_size,
 				      vect_memory_access_type
 				      memory_access_type,
+				      unsigned int ncopies,
 				      gather_scatter_info *gs_info,
 				      tree scalar_mask)
 {
@@ -1698,7 +1699,6 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
 	  return;
 	}
-      unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
       return;
     }
@@ -1721,7 +1721,6 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
 	  return;
 	}
-      unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
       return;
     }
@@ -2963,6 +2962,7 @@ vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
 static void
 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
 			     class loop *loop, stmt_vec_info stmt_info,
+			     slp_tree slp_node, unsigned int ncopies,
 			     gather_scatter_info *gs_info,
 			     tree *dataref_ptr, vec<tree> *vec_offset)
 {
@@ -2975,10 +2975,12 @@ vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
       gcc_assert (!new_bb);
     }
-  unsigned ncopies = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
-  vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
-				 gs_info->offset, vec_offset,
-				 gs_info->offset_vectype);
+  if (slp_node)
+    vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
+  else
+    vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
+				   gs_info->offset, vec_offset,
+				   gs_info->offset_vectype);
 }
 
 /* Prepare to implement a grouped or strided load or store using
@@ -7484,7 +7486,7 @@ vectorizable_store (vec_info *vinfo,
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
 	check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
 					      group_size, memory_access_type,
-					      &gs_info, mask);
+					      ncopies, &gs_info, mask);
 
       if (slp_node
 	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
@@ -8147,8 +8149,8 @@ vectorizable_store (vec_info *vinfo,
 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 	    {
 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
-					   &gs_info, &dataref_ptr,
-					   &vec_offsets);
+					   slp_node, ncopies, &gs_info,
+					   &dataref_ptr, &vec_offsets);
 	      vec_offset = vec_offsets[0];
 	    }
 	  else
@@ -8827,7 +8829,7 @@ vectorizable_load (vec_info *vinfo,
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
 	check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
 					      group_size, memory_access_type,
-					      &gs_info, mask);
+					      ncopies, &gs_info, mask);
 
       if (dump_enabled_p ()
 	  && memory_access_type != VMAT_ELEMENTWISE
@@ -9445,8 +9447,8 @@ vectorizable_load (vec_info *vinfo,
 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 	    {
 	      vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
-					   &gs_info, &dataref_ptr,
-					   &vec_offsets);
+					   slp_node, ncopies, &gs_info,
+					   &dataref_ptr, &vec_offsets);
 	    }
 	  else
 	    dataref_ptr

From patchwork Fri Nov 12 18:04:13 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: Richard Sandiford <richard.sandiford@arm.com>
X-Patchwork-Id: 47561
Return-Path: <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
X-Original-To: patchwork@sourceware.org
Delivered-To: patchwork@sourceware.org
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id D47CF3858410
	for <patchwork@sourceware.org>; Fri, 12 Nov 2021 18:05:42 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D47CF3858410
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1636740342;
	bh=2XDWZkJT09NFCk3vv1PKt9nJw7jpsXhUzOkj9MFIHuw=;
	h=To:Subject:Date:List-Id:List-Unsubscribe:List-Archive:List-Post:
	 List-Help:List-Subscribe:From:Reply-To:From;
	b=p/sOABx4cXjPVgXwuR5xZHVeOvfpiF8C2rIb8LbHMADwdo64wHNnHCgcJPt6w3pko
	 KHRwW/2jk+lK9xNxMLsrYwt48Kr3pkO/e3LFQuq2uThDGxucXHASMfmOG6ax3eUM3g
	 IqnCawJDsMNshPd0tJCYoDNm2cc0Y/ssqK6Zb0ZM=
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
 by sourceware.org (Postfix) with ESMTP id 93EBA3858022
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 18:04:15 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 93EBA3858022
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 3B6B0D6E
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 10:04:15 -0800 (PST)
Received: from localhost (unknown [10.32.98.88])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id D1D4A3F718
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 10:04:14 -0800 (PST)
To: gcc-patches@gcc.gnu.org
Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com
Subject: [PATCH 4/5] if-conv: Apply VN to hoisted conversions
Date: Fri, 12 Nov 2021 18:04:13 +0000
Message-ID: <mptk0hdmesi.fsf@arm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)
MIME-Version: 1.0
X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, KAM_SHORT, SPF_HELO_NONE, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.4
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
X-Patchwork-Original-From: Richard Sandiford via Gcc-patches
 <gcc-patches@gcc.gnu.org>
From: Richard Sandiford <richard.sandiford@arm.com>
Reply-To: Richard Sandiford <richard.sandiford@arm.com>
Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org
Sender: "Gcc-patches"
 <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>

This patch is a prerequisite for a later one.  At the moment,
if-conversion converts predicated POINTER_PLUS_EXPRs into
non-wrapping forms, which for:

    … = base + offset

becomes:

    tmp = (unsigned long) base
    … = tmp + offset

It then hoists these conversions out of the loop where possible.

However, because “base” is a valid gimple operand, there can be
multiple POINTER_PLUS_EXPRs with the same base, which can in turn
lead to multiple instances of the same conversion.  The later VN pass
is (and I think needs to be) restricted to the new if-converted code,
whereas here we're deliberately inserting the conversions before the
.LOOP_VECTORIZED condition:

	/* If we versioned loop then make sure to insert invariant
	   stmts before the .LOOP_VECTORIZED check since the vectorizer
	   will re-use that for things like runtime alias versioning
	   whose condition can end up using those invariants.  */

We can therefore enter the vectoriser with redundant conversions.

The easiest fix seemed to be to defer the hoisting until after VN.
This catches other hoisting opportunities too.

Hoisting the code from the (artificial) loop in pr99102.c means
that it's no longer worth vectorising.  The patch forces vectorisation
instead of relying on the cost model.

The patch also reverts pr87007-4.c and pr87007-5.c back to their
original forms, undoing changes in 783dc66f9ccb0019c3dad.
The code at the time the tests were added was:

        testl   %edi, %edi
        je      .L10
        vxorps  %xmm1, %xmm1, %xmm1
        vsqrtsd d3(%rip), %xmm1, %xmm0
        vsqrtsd d2(%rip), %xmm1, %xmm1
	...
.L10:
        ret

with the operations being hoisted, and the vxorps was specifically
wanted (compared to the previous code).  This patch restores the code
to that form, with the hoisted operations and the vxorps.

Regstrapped on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
	* tree-if-conv.c: Include tree-eh.h.
	(predicate_statements): Remove pe argument.  Don't hoist
	statements here.
	(combine_blocks): Remove pe argument.
	(ifcvt_can_hoist, ifcvt_can_hoist_further): New functions.
	(ifcvt_hoist_invariants): Likewise.
	(tree_if_conversion): Update call to combine_blocks.  Call
	ifcvt_hoist_invariants after VN.

gcc/testsuite/
	* gcc.dg/vect/pr99102.c: Add -fno-vect-cost-model.

	Revert:

	2020-09-09  Richard Biener  <rguenther@suse.de>

	* gcc.target/i386/pr87007-4.c: Adjust.
	* gcc.target/i386/pr87007-5.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/pr99102.c       |   2 +-
 gcc/testsuite/gcc.target/i386/pr87007-4.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr87007-5.c |   2 +-
 gcc/tree-if-conv.c                        | 122 ++++++++++++++++++++--
 4 files changed, 114 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr99102.c b/gcc/testsuite/gcc.dg/vect/pr99102.c
index 6c1a13f0783..0d030d15c86 100644
--- a/gcc/testsuite/gcc.dg/vect/pr99102.c
+++ b/gcc/testsuite/gcc.dg/vect/pr99102.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fdump-tree-vect-details" } */
 /* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
 long a[44];
 short d, e = -7;
diff --git a/gcc/testsuite/gcc.target/i386/pr87007-4.c b/gcc/testsuite/gcc.target/i386/pr87007-4.c
index 9c4b8005af3..e91bdcbac44 100644
--- a/gcc/testsuite/gcc.target/i386/pr87007-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr87007-4.c
@@ -15,4 +15,4 @@ foo (int n, int k)
       d1 = ceil (d3);
 }
 
-/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 0 } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr87007-5.c b/gcc/testsuite/gcc.target/i386/pr87007-5.c
index e4d956a5d7f..20d13cf650b 100644
--- a/gcc/testsuite/gcc.target/i386/pr87007-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr87007-5.c
@@ -15,4 +15,4 @@ foo (int n, int k)
       d1 = sqrt (d3);
 }
 
-/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 0 } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index e88ddc9f788..0ad557a2f4d 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -121,6 +121,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfgcleanup.h"
 #include "tree-ssa-dse.h"
 #include "tree-vectorizer.h"
+#include "tree-eh.h"
 
 /* Only handle PHIs with no more arguments unless we are asked to by
    simd pragma.  */
@@ -2496,7 +2497,7 @@ predicate_rhs_code (gassign *stmt, tree mask, tree cond,
 */
 
 static void
-predicate_statements (loop_p loop, edge pe)
+predicate_statements (loop_p loop)
 {
   unsigned int i, orig_loop_num_nodes = loop->num_nodes;
   auto_vec<int, 1> vect_sizes;
@@ -2597,13 +2598,7 @@ predicate_statements (loop_p loop, edge pe)
 		{
 		  gassign *stmt2 = as_a <gassign *> (gsi_stmt (gsi2));
 		  gsi_remove (&gsi2, false);
-		  /* Make sure to move invariant conversions out of the
-		     loop.  */
-		  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2))
-		      && expr_invariant_in_loop_p (loop,
-						   gimple_assign_rhs1 (stmt2)))
-		    gsi_insert_on_edge_immediate (pe, stmt2);
-		  else if (first)
+		  if (first)
 		    {
 		      gsi_insert_before (&gsi, stmt2, GSI_NEW_STMT);
 		      first = false;
@@ -2684,7 +2679,7 @@ remove_conditions_and_labels (loop_p loop)
    blocks.  Replace PHI nodes with conditional modify expressions.  */
 
 static void
-combine_blocks (class loop *loop, edge pe)
+combine_blocks (class loop *loop)
 {
   basic_block bb, exit_bb, merge_target_bb;
   unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2697,7 +2692,7 @@ combine_blocks (class loop *loop, edge pe)
   predicate_all_scalar_phis (loop);
 
   if (need_to_predicate || need_to_rewrite_undefined)
-    predicate_statements (loop, pe);
+    predicate_statements (loop);
 
   /* Merge basic blocks.  */
   exit_bb = NULL;
@@ -3181,6 +3176,109 @@ ifcvt_local_dce (class loop *loop)
     }
 }
 
+/* Return true if STMT can be hoisted from if-converted loop LOOP.  */
+
+static bool
+ifcvt_can_hoist (class loop *loop, gimple *stmt)
+{
+  if (auto *call = dyn_cast<gcall *> (stmt))
+    {
+      if (gimple_call_internal_p (call)
+	  && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
+	return false;
+    }
+  else if (auto *assign = dyn_cast<gassign *> (stmt))
+    {
+      if (gimple_assign_rhs_code (assign) == COND_EXPR)
+	return false;
+    }
+  else
+    return false;
+
+  if (gimple_has_side_effects (stmt)
+      || gimple_could_trap_p (stmt)
+      || stmt_could_throw_p (cfun, stmt)
+      || gimple_vdef (stmt)
+      || gimple_vuse (stmt))
+    return false;
+
+  int num_args = gimple_num_args (stmt);
+  for (int i = 0; i < num_args; ++i)
+    if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
+      return false;
+
+  return true;
+}
+
+/* PE is the preferred hoisting edge selected by tree_if_conversion, which
+   s known to be different from (and to dominate) the preheader edge of the
+   if-converted loop.  We already know that STMT can be inserted on the loop
+   preheader edge.  Return true if we prefer to insert it on PE instead.  */
+
+static bool
+ifcvt_can_hoist_further (edge pe, gimple *stmt)
+{
+  /* As explained in tree_if_conversion, we want to hoist invariant
+     conversions further so that they can be reused by alias analysis.  */
+  auto *assign = dyn_cast<gassign *> (stmt);
+  if (assign
+      && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+    {
+      tree rhs = gimple_assign_rhs1 (assign);
+      if (is_gimple_min_invariant (rhs))
+	return true;
+
+      if (TREE_CODE (rhs) == SSA_NAME)
+	{
+	  basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
+	  if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* Hoist invariant statements from LOOP.  PE is the preferred edge for
+   hoisting conversions, as selected by tree_if_conversion; see there
+   for details.  */
+
+static void
+ifcvt_hoist_invariants (class loop *loop, edge pe)
+{
+  gimple_stmt_iterator hoist_gsi = {};
+  gimple_stmt_iterator hoist_gsi_pe = {};
+  unsigned int num_blocks = loop->num_nodes;
+  basic_block *body = get_loop_body (loop);
+  for (unsigned int i = 0; i < num_blocks; ++i)
+    for (gimple_stmt_iterator gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);)
+      {
+	gimple *stmt = gsi_stmt (gsi);
+	if (ifcvt_can_hoist (loop, stmt))
+	  {
+	    /* Once we've hoisted one statement, insert other statements
+	       after it.  */
+	    edge e = loop_preheader_edge (loop);
+	    gimple_stmt_iterator *hoist_gsi_ptr = &hoist_gsi;
+	    if (e != pe && ifcvt_can_hoist_further (pe, stmt))
+	      {
+		e = pe;
+		hoist_gsi_ptr = &hoist_gsi_pe;
+	      }
+	    gsi_remove (&gsi, false);
+	    if (hoist_gsi_ptr->ptr)
+	      gsi_insert_after (hoist_gsi_ptr, stmt, GSI_NEW_STMT);
+	    else
+	      {
+		gsi_insert_on_edge_immediate (e, stmt);
+		*hoist_gsi_ptr = gsi_for_stmt (stmt);
+	      }
+	    continue;
+	  }
+	gsi_next (&gsi);
+      }
+  free (body);
+}
+
 /* If-convert LOOP when it is legal.  For the moment this pass has no
    profitability analysis.  Returns non-zero todo flags when something
    changed.  */
@@ -3275,7 +3373,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
   /* Now all statements are if-convertible.  Combine all the basic
      blocks into one huge basic block doing the if-conversion
      on-the-fly.  */
-  combine_blocks (loop, pe);
+  combine_blocks (loop);
 
   /* Perform local CSE, this esp. helps the vectorizer analysis if loads
      and stores are involved.  CSE only the loop body, not the entry
@@ -3297,6 +3395,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
   ifcvt_local_dce (loop);
   BITMAP_FREE (exit_bbs);
 
+  ifcvt_hoist_invariants (loop, pe);
+
   todo |= TODO_cleanup_cfg;
 
  cleanup:

From patchwork Fri Nov 12 18:05:13 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Sandiford <richard.sandiford@arm.com>
X-Patchwork-Id: 47562
Return-Path: <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
X-Original-To: patchwork@sourceware.org
Delivered-To: patchwork@sourceware.org
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 3F35D385842A
	for <patchwork@sourceware.org>; Fri, 12 Nov 2021 18:06:39 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3F35D385842A
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1636740399;
	bh=ExRRTZooFGMmvMtn7WgDLZuvX0U6uGhsYt73wO2o1AE=;
	h=To:Subject:Date:List-Id:List-Unsubscribe:List-Archive:List-Post:
	 List-Help:List-Subscribe:From:Reply-To:From;
	b=MhWqQZwkzQGDWP3D43QPDrEqijtTxkYme/FJ2hxXC1ziPlHc5TLvyGWWgyiAkQk4W
	 uJqknQt4R5skOFnD/KGKKEpWUcg1/erXHfFkN7b/DkdZ+4/tmKy3u3ZW/UlIiwTwuY
	 fCbZKbSeCWQBi1Epl0m6+6PShxoAePGmL9M7HNk0=
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
 by sourceware.org (Postfix) with ESMTP id D9A2F3857C44
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 18:05:15 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org D9A2F3857C44
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 940EDD6E
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 10:05:15 -0800 (PST)
Received: from localhost (unknown [10.32.98.88])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 3AF8F3F718
 for <gcc-patches@gcc.gnu.org>; Fri, 12 Nov 2021 10:05:15 -0800 (PST)
To: gcc-patches@gcc.gnu.org
Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@arm.com
Subject: [PATCH 5/5] vect: Support masked gather loads with SLP
Date: Fri, 12 Nov 2021 18:05:13 +0000
Message-ID: <mptfss1mequ.fsf@arm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)
MIME-Version: 1.0
X-Spam-Status: No, score=-12.4 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, KAM_SHORT, SPF_HELO_NONE, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.4
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
X-Patchwork-Original-From: Richard Sandiford via Gcc-patches
 <gcc-patches@gcc.gnu.org>
From: Richard Sandiford <richard.sandiford@arm.com>
Reply-To: Richard Sandiford <richard.sandiford@arm.com>
Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org
Sender: "Gcc-patches"
 <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>

This patch extends the previous SLP gather load support so
that it can handle masked loads too.

Regstrapped on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
	* tree-vect-slp.c (arg1_arg4_map): New variable.
	(vect_get_operand_map): Handle IFN_MASK_GATHER_LOAD.
	(vect_build_slp_tree_1): Likewise.
	(vect_build_slp_tree_2): Likewise.
	* tree-vect-stmts.c (vectorizable_load): Expect the mask to be
	the last SLP child node rather than the first.

gcc/testsuite/
	* gcc.dg/vect/vect-gather-3.c: New test.
	* gcc.dg/vect/vect-gather-4.c: Likewise.
	* gcc.target/aarch64/sve/mask_gather_load_8.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/vect-gather-3.c     | 64 ++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/vect-gather-4.c     | 48 ++++++++++++++
 .../aarch64/sve/mask_gather_load_8.c          | 65 +++++++++++++++++++
 gcc/tree-vect-slp.c                           | 15 ++++-
 gcc/tree-vect-stmts.c                         | 21 ++++--
 5 files changed, 203 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-gather-3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-gather-4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_8.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
new file mode 100644
index 00000000000..738bd3f3106
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
@@ -0,0 +1,64 @@
+#include "tree-vect.h"
+
+#define N 16
+
+void __attribute__((noipa))
+f (int *restrict y, int *restrict x, int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = (indices[i * 2] < N * 2
+		  ? x[indices[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+		      ? x[indices[i * 2 + 1]] + 2
+		      : 2);
+    }
+}
+
+int y[N * 2];
+int x[N * 2] = {
+  72704, 52152, 51301, 96681,
+  57937, 60490, 34504, 60944,
+  42225, 28333, 88336, 74300,
+  29250, 20484, 38852, 91536,
+  86917, 63941, 31590, 21998,
+  22419, 26974, 28668, 13968,
+  3451, 20247, 44089, 85521,
+  22871, 87362, 50555, 85939
+};
+int indices[N * 2] = {
+  15, 0x10000, 0xcafe0, 19,
+  7, 22, 19, 1,
+  0x20000, 0x70000, 15, 30,
+  5, 12, 11, 11,
+  10, 25, 5, 20,
+  22, 24, 32, 28,
+  30, 19, 6, 0xabcdef,
+  7, 12, 8, 21
+};
+int expected[N * 2] = {
+  91537, 2, 1, 22000,
+  60945, 28670, 21999, 52154,
+  1, 2, 91537, 50557,
+  60491, 29252, 74301, 74302,
+  88337, 20249, 60491, 22421,
+  28669, 3453, 1, 22873,
+  50556, 22000, 34505, 2,
+  60945, 29252, 42226, 26976
+};
+
+int
+main (void)
+{
+  check_vect ();
+
+  f (y, x, indices);
+  for (int i = 0; i < 32; ++i)
+    if (y[i] != expected[i])
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { vect_gather_load_ifn && vect_masked_load } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
new file mode 100644
index 00000000000..ee2e4e4999a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+
+#define N 16
+
+void
+f1 (int *restrict y, int *restrict x1, int *restrict x2,
+    int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = (indices[i * 2] < N * 2
+		  ? x1[indices[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+		      ? x2[indices[i * 2 + 1]] + 2
+		      : 2);
+    }
+}
+
+void
+f2 (int *restrict y, int *restrict x, int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = (indices[i * 2] < N * 2
+		  ? x[indices[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+		      ? x[indices[i * 2 + 1] * 2] + 2
+		      : 2);
+    }
+}
+
+void
+f3 (int *restrict y, int *restrict x, int *restrict indices)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      y[i * 2] = (indices[i * 2] < N * 2
+		  ? x[indices[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
+		      ? x[(unsigned int) indices[i * 2 + 1]] + 2
+		      : 2);
+    }
+}
+
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_8.c
new file mode 100644
index 00000000000..95767f30a80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_8.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-vect-cost-model" } */
+
+#include <stdint.h>
+
+void
+f1 (int32_t *restrict y, int32_t *restrict x, int32_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = (index[i * 2] < 128
+		  ? x[index[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (index[i * 2 + 1] < 128
+		      ? x[index[i * 2 + 1]] + 2
+		      : 2);
+    }
+}
+
+void
+f2 (int32_t *restrict y, int32_t *restrict x, uint32_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = (index[i * 2] < 128
+		  ? x[index[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (index[i * 2 + 1] < 128
+		      ? x[index[i * 2 + 1]] + 2
+		      : 2);
+    }
+}
+
+void
+f3 (int32_t *restrict y, int32_t *restrict x, uint64_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = (index[i * 2] < 128
+		  ? x[index[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (index[i * 2 + 1] < 128
+		      ? x[index[i * 2 + 1]] + 2
+		      : 2);
+    }
+}
+
+void
+f4 (int64_t *restrict y, int64_t *restrict x, uint64_t *restrict index)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      y[i * 2] = (index[i * 2] < 128
+		  ? x[index[i * 2]] + 1
+		  : 1);
+      y[i * 2 + 1] = (index[i * 2 + 1] < 128
+		      ? x[index[i * 2 + 1]] + 2
+		      : 2);
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl #?2\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl #?3\]} 1 } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 0f09fc1fda8..35ec2e2ad5e 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -461,6 +461,7 @@ static const int cond_expr_maps[3][5] = {
 };
 static const int arg1_map[] = { 1, 1 };
 static const int arg2_map[] = { 1, 2 };
+static const int arg1_arg4_map[] = { 2, 1, 4 };
 
 /* For most SLP statements, there is a one-to-one mapping between
    gimple arguments and child nodes.  If that is not true for STMT,
@@ -494,6 +495,9 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
 	  case IFN_GATHER_LOAD:
 	    return arg1_map;
 
+	  case IFN_MASK_GATHER_LOAD:
+	    return arg1_arg4_map;
+
 	  default:
 	    break;
 	  }
@@ -1000,7 +1004,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	  else
 	    rhs_code = CALL_EXPR;
 
-	  if (cfn == CFN_MASK_LOAD || cfn == CFN_GATHER_LOAD)
+	  if (cfn == CFN_MASK_LOAD
+	      || cfn == CFN_GATHER_LOAD
+	      || cfn == CFN_MASK_GATHER_LOAD)
 	    load_p = true;
 	  else if ((internal_fn_p (cfn)
 		    && !vectorizable_internal_fn_p (as_internal_fn (cfn)))
@@ -1229,7 +1235,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
         } /* Grouped access.  */
       else
 	{
-	  if (load_p && rhs_code != CFN_GATHER_LOAD)
+	  if (load_p
+	      && rhs_code != CFN_GATHER_LOAD
+	      && rhs_code != CFN_MASK_GATHER_LOAD)
 	    {
 	      /* Not grouped load.  */
 	      if (dump_enabled_p ())
@@ -1711,7 +1719,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
     {
       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
 	gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
-		    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD));
+		    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+		    || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
       else
 	{
 	  *max_nunits = this_max_nunits;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 06da5a9bc13..8642acbc0b4 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -8595,6 +8595,7 @@ vectorizable_load (vec_info *vinfo,
     return false;
 
   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
+  int mask_index = -1;
   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
     {
       scalar_dest = gimple_assign_lhs (assign);
@@ -8626,12 +8627,12 @@ vectorizable_load (vec_info *vinfo,
       if (!scalar_dest)
 	return false;
 
-      int mask_index = internal_fn_mask_index (ifn);
+      mask_index = internal_fn_mask_index (ifn);
+      /* ??? For SLP the mask operand is always last.  */
+      if (mask_index >= 0 && slp_node)
+	mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
       if (mask_index >= 0
-	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
-				      /* ??? For SLP we only have operands for
-					 the mask operand.  */
-				      slp_node ? 0 : mask_index,
+	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
 				      &mask, NULL, &mask_dt, &mask_vectype))
 	return false;
     }
@@ -9393,8 +9394,14 @@ vectorizable_load (vec_info *vinfo,
   vec<tree> vec_offsets = vNULL;
   auto_vec<tree> vec_masks;
   if (mask)
-    vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
-		       mask, &vec_masks, mask_vectype, NULL_TREE);
+    {
+      if (slp_node)
+	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
+			   &vec_masks);
+      else
+	vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
+				       &vec_masks, mask_vectype);
+    }
   tree vec_mask = NULL_TREE;
   poly_uint64 group_elt = 0;
   for (j = 0; j < ncopies; j++)