[5/8] fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]

Message ID 20240731200735.229898-6-morin-mikael@orange.fr
State New
Headers
Series fortran: Inline MINLOC/MAXLOC without DIM argument [PR90608] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed

Commit Message

Mikael Morin July 31, 2024, 8:07 p.m. UTC
  From: Mikael Morin <mikael@gcc.gnu.org>

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Enable generation of inline code for the MINLOC and MAXLOC intrinsic,
if the ARRAY argument is of integral type and of any rank (only the rank 1
case was previously inlined), and neither DIM nor MASK arguments are
present.

This needs a few adjustments in gfc_conv_intrinsic_minmaxloc,
mainly to replace the single variables POS and OFFSET, with collections
of variables, one variable per dimension each.

The restriction to integral ARRAY and absent MASK limits the scope of
the change to the cases where we generate single loop inline code.  The
code generation for the second loop is only accessible with ARRAY of rank
1, so it can continue using a single variable.  A later change will extend
inlining to the double loop cases.

There is some bounds checking code that was previously handled by the
library, and that needed some changes in the scalarizer to avoid regressing.
The bounds check code generation was already supported by the scalarizer,
but it was only applying to array reference sections, checking both
for array bound violation and for shape conformability between all the
involved arrays.  With this change, for MINLOC or MAXLOC, enable the
conformability check between all the scalarized arrays, and disable the
array bound violation check.

	PR fortran/90608

gcc/fortran/ChangeLog:

	* trans-array.cc (gfc_conv_ss_startstride): Set the MINLOC/MAXLOC
	result upper bound using the rank of the ARRAY argument.  Ajdust
	the error message for intrinsic result arrays.  Only check array
	bounds for array references.  Move bound check decision code...
	(bounds_check_needed): ... here as a new predicate.  Allow bound
	check for MINLOC/MAXLOC intrinsic results.
	* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Change the
	result array upper bound to the rank of ARRAY.  Update the NONEMPTY
	variable to depend on the non-empty extent of every dimension.  Use
	one variable per dimension instead of a single variable for the
	position and the offset.  Update their declaration, initialization,
	and update to affect the variable of each dimension.  Use the first
	variable only in areas only accessed with rank 1 ARRAY argument.
	Set every element of the result using its corresponding variable.
	(gfc_inline_intrinsic_function_p): Return true for integral ARRAY
	and absent DIM and MASK.

gcc/testsuite/ChangeLog:

	* gfortran.dg/maxloc_bounds_4.f90: Additionally accept the error
	message emitted by the scalarizer.
---
 gcc/fortran/trans-array.cc                    |  70 ++++++--
 gcc/fortran/trans-intrinsic.cc                | 150 +++++++++++++-----
 gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 |   4 +-
 3 files changed, 166 insertions(+), 58 deletions(-)
  

Patch

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 99a603a3afb..76448c8ac0e 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4876,6 +4876,35 @@  add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info,
 }
 
 
+/* Tells whether we need to generate bounds checking code for the array
+   associated with SS.  */
+
+bool
+bounds_check_needed (gfc_ss *ss)
+{
+  /* Catch allocatable lhs in f2003.  */
+  if (flag_realloc_lhs && ss->no_bounds_check)
+    return false;
+
+  gfc_ss_info *ss_info = ss->info;
+  if (ss_info->type == GFC_SS_SECTION)
+    return true;
+
+  if (!(ss_info->type == GFC_SS_INTRINSIC
+	&& ss_info->expr
+	&& ss_info->expr->expr_type == EXPR_FUNCTION))
+    return false;
+
+  gfc_intrinsic_sym *isym = ss_info->expr->value.function.isym;
+  if (!(isym
+	&& (isym->id == GFC_ISYM_MAXLOC
+	    || isym->id == GFC_ISYM_MINLOC)))
+    return false;
+
+  return gfc_inline_intrinsic_function_p (ss_info->expr);
+}
+
+
 /* Calculates the range start and stride for a SS chain.  Also gets the
    descriptor and data pointer.  The range of vector subscripts is the size
    of the vector.  Array bounds are also checked.  */
@@ -4977,10 +5006,17 @@  done:
 		info->data = gfc_conv_array_data (info->descriptor);
 		info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
 
-		info->offset = gfc_index_zero_node;
+		gfc_expr *array = expr->value.function.actual->expr;
+		tree rank = build_int_cst (gfc_array_index_type, array->rank);
+
+		tree tmp = fold_build2_loc (input_location, MINUS_EXPR,
+					    gfc_array_index_type, rank,
+					    gfc_index_one_node);
+
+		info->end[0] = gfc_evaluate_now (tmp, &outer_loop->pre);
 		info->start[0] = gfc_index_zero_node;
-		info->end[0] = gfc_index_zero_node;
 		info->stride[0] = gfc_index_one_node;
+		info->offset = gfc_index_zero_node;
 		continue;
 	      }
 
@@ -5098,14 +5134,10 @@  done:
 	  const char *expr_name;
 	  char *ref_name = NULL;
 
+	  if (!bounds_check_needed (ss))
+	    continue;
+
 	  ss_info = ss->info;
-	  if (ss_info->type != GFC_SS_SECTION)
-	    continue;
-
-	  /* Catch allocatable lhs in f2003.  */
-	  if (flag_realloc_lhs && ss->no_bounds_check)
-	    continue;
-
 	  expr = ss_info->expr;
 	  expr_loc = &expr->where;
 	  if (expr->ref)
@@ -5123,10 +5155,13 @@  done:
 	  for (n = 0; n < loop->dimen; n++)
 	    {
 	      dim = ss->dim[n];
-	      if (info->ref->u.ar.dimen_type[dim] != DIMEN_RANGE)
-		continue;
+	      if (ss_info->type == GFC_SS_SECTION)
+		{
+		  if (info->ref->u.ar.dimen_type[dim] != DIMEN_RANGE)
+		    continue;
 
-	      add_check_section_in_array_bounds (&inner, ss_info, dim);
+		  add_check_section_in_array_bounds (&inner, ss_info, dim);
+		}
 
 	      /* Check the section sizes match.  */
 	      tmp = fold_build2_loc (input_location, MINUS_EXPR,
@@ -5147,9 +5182,14 @@  done:
 		{
 		  tmp3 = fold_build2_loc (input_location, NE_EXPR,
 					  logical_type_node, tmp, size[n]);
-		  msg = xasprintf ("Array bound mismatch for dimension %d "
-				   "of array '%s' (%%ld/%%ld)",
-				   dim + 1, expr_name);
+		  if (ss_info->type == GFC_SS_INTRINSIC)
+		    msg = xasprintf ("Extent mismatch for dimension %d of the "
+				     "result of intrinsic '%s' (%%ld/%%ld)",
+				     dim + 1, expr_name);
+		  else
+		    msg = xasprintf ("Array bound mismatch for dimension %d "
+				     "of array '%s' (%%ld/%%ld)",
+				     dim + 1, expr_name);
 
 		  gfc_trans_runtime_check (true, false, tmp3, &inner,
 					   expr_loc, msg,
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index a947dd1ba0b..ac8bd2d4812 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5332,9 +5332,30 @@  strip_kind_from_actual (gfc_actual_arglist * actual)
 	   if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
 	   S++;
 	 }
-   B) ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
+   B: ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
       case and wrap the result in an array.
-   C) Otherwise, a call is generated
+   C: ARRAY has rank > 1, NANs are not supported, and DIM and MASK are absent.
+      Generate code similar to the single loop scalar case, but using one
+      variable per dimension, for example if ARRAY has rank 2:
+      4) NAN's aren't supported, no MASK:
+	 limit = infinities_supported ? Infinity : huge (limit);
+	 pos0 = (from0 <= to0 && from1 <= to1) ? 1 : 0;
+	 pos1 = (from0 <= to0 && from1 <= to1) ? 1 : 0;
+	 S1 = from1;
+	 while (S1 <= to1) {
+	   S0 = from0;
+	   while (S0 <= to0) {
+	     if (a[S1][S0] < limit) {
+	       limit = a[S1][S0];
+	       pos0 = S + (1 - from0);
+	       pos1 = S + (1 - from1);
+	     }
+	     S0++;
+	   }
+	   S1++;
+	 }
+	 result = { pos0, pos1 };
+   D: Otherwise, a call is generated.
    For 2) and 4), if mask is scalar, this all goes into a conditional,
    setting pos = 0; in the else branch.
 
@@ -5348,8 +5369,8 @@  strip_kind_from_actual (gfc_actual_arglist * actual)
    if (cond) {
      ....
 
-     The optimizer is smart enough to move the condition out of the loop.
-     They are now marked as unlikely too for further speedup.  */
+   The optimizer is smart enough to move the condition out of the loop.
+   They are now marked as unlikely too for further speedup.  */
 
 static void
 gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
@@ -5364,7 +5385,7 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
   tree cond;
   tree elsetmp;
   tree ifbody;
-  tree offset;
+  tree offset[GFC_MAX_DIMENSIONS];
   tree nonempty;
   tree lab1, lab2;
   tree b_if, b_else;
@@ -5379,7 +5400,7 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
   gfc_expr *maskexpr;
   gfc_expr *backexpr;
   gfc_se backse;
-  tree pos;
+  tree pos[GFC_MAX_DIMENSIONS];
   tree result_var = NULL_TREE;
   int n;
   bool optional_mask;
@@ -5447,7 +5468,7 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
 				      HOST_WIDE_INT_1);
       as.upper[0] = gfc_get_int_expr (gfc_index_integer_kind,
 				      &arrayexpr->where,
-				      HOST_WIDE_INT_1);
+				      arrayexpr->rank);
 
       tree array = gfc_get_nodesc_array_type (type, &as, PACKED_STATIC, true);
 
@@ -5455,8 +5476,13 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
     }
 
   /* Initialize the result.  */
-  pos = gfc_create_var (gfc_array_index_type, "pos");
-  offset = gfc_create_var (gfc_array_index_type, "offset");
+  for (int i = 0; i < arrayexpr->rank; i++)
+    {
+      pos[i] = gfc_create_var (gfc_array_index_type,
+			       gfc_get_string ("pos%d", i));
+      offset[i] = gfc_create_var (gfc_array_index_type,
+				  gfc_get_string ("offset%d", i));
+    }
 
   /* Walk the arguments.  */
   arrayss = gfc_walk_expr (arrayexpr);
@@ -5573,10 +5599,26 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
   loop.temp_dim = loop.dimen;
   gfc_conv_loop_setup (&loop, &expr->where);
 
-  gcc_assert (loop.dimen == 1);
-  if (nonempty == NULL && maskss == NULL && loop.from[0] && loop.to[0])
-    nonempty = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
-				loop.from[0], loop.to[0]);
+  if (nonempty == NULL && maskss == NULL)
+    {
+      nonempty = logical_true_node;
+
+      for (int i = 0; i < loop.dimen; i++)
+	{
+	  if (!(loop.from[i] && loop.to[i]))
+	    {
+	      nonempty = NULL;
+	      break;
+	    }
+
+	  tree tmp = fold_build2_loc (input_location, LE_EXPR,
+				      logical_type_node, loop.from[i],
+				      loop.to[i]);
+
+	  nonempty = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+				      logical_type_node, nonempty, tmp);
+	}
+    }
 
   lab1 = NULL;
   lab2 = NULL;
@@ -5586,14 +5628,18 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
      is non-empty and no MASK is used, we can initialize to 1 to simplify
      the inner loop.  */
   if (nonempty != NULL && !HONOR_NANS (DECL_MODE (limit)))
-    gfc_add_modify (&loop.pre, pos,
-		    fold_build3_loc (input_location, COND_EXPR,
-				     gfc_array_index_type,
-				     nonempty, gfc_index_one_node,
-				     gfc_index_zero_node));
+    {
+      tree init = fold_build3_loc (input_location, COND_EXPR,
+				   gfc_array_index_type, nonempty,
+				   gfc_index_one_node,
+				   gfc_index_zero_node);
+      for (int i = 0; i < loop.dimen; i++)
+	gfc_add_modify (&loop.pre, pos[i], init);
+    }
   else
     {
-      gfc_add_modify (&loop.pre, pos, gfc_index_zero_node);
+      gcc_assert (loop.dimen == 1);
+      gfc_add_modify (&loop.pre, pos[0], gfc_index_zero_node);
       lab1 = gfc_build_label_decl (NULL_TREE);
       TREE_USED (lab1) = 1;
       lab2 = gfc_build_label_decl (NULL_TREE);
@@ -5602,11 +5648,14 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
 
   /* An offset must be added to the loop
      counter to obtain the required position.  */
-  gcc_assert (loop.from[0]);
+  for (int i = 0; i < loop.dimen; i++)
+    {
+      gcc_assert (loop.from[i]);
 
-  tmp = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
-			 gfc_index_one_node, loop.from[0]);
-  gfc_add_modify (&loop.pre, offset, tmp);
+      tmp = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
+			     gfc_index_one_node, loop.from[i]);
+      gfc_add_modify (&loop.pre, offset[i], tmp);
+    }
 
   gfc_mark_ss_chain_used (arrayss, lab1 ? 3 : 1);
   if (maskss)
@@ -5647,20 +5696,23 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
       tree ifbody2;
 
       gfc_start_block (&ifblock2);
-      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos),
-			     loop.loopvar[0], offset);
-      gfc_add_modify (&ifblock2, pos, tmp);
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[0]),
+			     loop.loopvar[0], offset[0]);
+      gfc_add_modify (&ifblock2, pos[0], tmp);
       ifbody2 = gfc_finish_block (&ifblock2);
-      cond = fold_build2_loc (input_location, EQ_EXPR, logical_type_node, pos,
-			      gfc_index_zero_node);
+      cond = fold_build2_loc (input_location, EQ_EXPR, logical_type_node,
+			      pos[0], gfc_index_zero_node);
       tmp = build3_v (COND_EXPR, cond, ifbody2,
 		      build_empty_stmt (input_location));
       gfc_add_expr_to_block (&block, tmp);
     }
 
-  tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos),
-			 loop.loopvar[0], offset);
-  gfc_add_modify (&ifblock, pos, tmp);
+  for (int i = 0; i < loop.dimen; i++)
+    {
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[i]),
+			     loop.loopvar[i], offset[i]);
+      gfc_add_modify (&ifblock, pos[i], tmp);
+    }
 
   if (lab1)
     gfc_add_expr_to_block (&ifblock, build1_v (GOTO_EXPR, lab1));
@@ -5724,13 +5776,15 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
 
   if (lab1)
     {
+      gcc_assert (loop.dimen == 1);
+
       gfc_trans_scalarized_loop_boundary (&loop, &body);
 
       if (HONOR_NANS (DECL_MODE (limit)))
 	{
 	  if (nonempty != NULL)
 	    {
-	      ifbody = build2_v (MODIFY_EXPR, pos, gfc_index_one_node);
+	      ifbody = build2_v (MODIFY_EXPR, pos[0], gfc_index_one_node);
 	      tmp = build3_v (COND_EXPR, nonempty, ifbody,
 			      build_empty_stmt (input_location));
 	      gfc_add_expr_to_block (&loop.code[0], tmp);
@@ -5767,9 +5821,9 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
       /* Assign the value to the limit...  */
       gfc_add_modify (&ifblock, limit, arrayse.expr);
 
-      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos),
-			     loop.loopvar[0], offset);
-      gfc_add_modify (&ifblock, pos, tmp);
+      tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE (pos[0]),
+			     loop.loopvar[0], offset[0]);
+      gfc_add_modify (&ifblock, pos[0], tmp);
 
       ifbody = gfc_finish_block (&ifblock);
 
@@ -5832,6 +5886,7 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
   /* For a scalar mask, enclose the loop in an if statement.  */
   if (maskexpr && maskss == NULL)
     {
+      gcc_assert (loop.dimen == 1);
       tree ifmask;
 
       gfc_init_se (&maskse, NULL);
@@ -5846,7 +5901,7 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
 	 the pos variable the same way as above.  */
 
       gfc_init_block (&elseblock);
-      gfc_add_modify (&elseblock, pos, gfc_index_zero_node);
+      gfc_add_modify (&elseblock, pos[0], gfc_index_zero_node);
       elsetmp = gfc_finish_block (&elseblock);
       ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask);
       tmp = build3_v (COND_EXPR, ifmask, tmp, elsetmp);
@@ -5860,18 +5915,22 @@  gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
     }
   gfc_cleanup_loop (&loop);
 
-  tree value = convert (type, pos);
   if (expr->rank > 0)
     {
-      tree res_arr_ref = gfc_build_array_ref (result_var, gfc_index_zero_node,
-					      NULL_TREE, true);
+      for (int i = 0; i < arrayexpr->rank; i++)
+	{
+	  tree res_idx = build_int_cst (gfc_array_index_type, i);
+	  tree res_arr_ref = gfc_build_array_ref (result_var, res_idx,
+						  NULL_TREE, true);
 
-      gfc_add_modify (&se->pre, res_arr_ref, value);
+	  tree value = convert (type, pos[i]);
+	  gfc_add_modify (&se->pre, res_arr_ref, value);
+	}
 
       se->expr = result_var;
     }
   else
-    se->expr = value;
+    se->expr = convert (type, pos[0]);
 }
 
 /* Emit code for findloc.  */
@@ -11722,8 +11781,12 @@  gfc_inline_intrinsic_function_p (gfc_expr *expr)
 	  return false;
 
 	gfc_actual_arglist *array_arg = expr->value.function.actual;
+	gfc_actual_arglist *dim_arg = array_arg->next;
+	gfc_actual_arglist *mask_arg = dim_arg->next;
 
 	gfc_expr *array = array_arg->expr;
+	gfc_expr *dim = dim_arg->expr;
+	gfc_expr *mask = mask_arg->expr;
 
 	if (!(array->ts.type == BT_INTEGER
 	      || array->ts.type == BT_REAL))
@@ -11732,6 +11795,11 @@  gfc_inline_intrinsic_function_p (gfc_expr *expr)
 	if (array->rank == 1)
 	  return true;
 
+	if (array->ts.type == BT_INTEGER
+	    && dim == nullptr
+	    && mask == nullptr)
+	  return true;
+
 	return false;
       }
 
diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 b/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90
index b1c7ca752d0..17f6cd86dc2 100644
--- a/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90
+++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90
@@ -1,6 +1,6 @@ 
 ! { dg-do run }
 ! { dg-options "-fbounds-check" }
-! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2" }
+! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }
 module tst
 contains
   subroutine foo(res)
@@ -18,4 +18,4 @@  program main
   integer :: res(3)
   call foo(res)
 end program main
-! { dg-output "Fortran runtime error: Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2" }
+! { dg-output "Fortran runtime error: Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }