[3/3,RFC] fsra: support SET_RET_PART

Message ID 20240227070412.3471038-4-guojiufu@linux.ibm.com
State New
Headers
Series fsra: Add final gimple sra before expander |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Testing failed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 fail Testing failed

Commit Message

Jiufu Guo Feb. 27, 2024, 7:04 a.m. UTC
  This patch adds IFN_SET_RET_PARTS, and generate this IFN for the accesses of
the 'returns' in fsra pass.  And the IFN is expanded according to the outgoing
registers of the 'return'.  "fsra" is tunned for the access analyze for
'returns'.

'IFN_SET_RET_LAST_PARTS' is just for this prototype, it helps to
reuse the decl information of the 'return var'.  With enhancing the
implementation, this IFN may be removed.

	PR target/65421
	PR target/69143

gcc/ChangeLog:

	* cfgexpand.cc (expand_value_return): Update.
	(expand_return): Update for returns expand.
	* internal-fn.cc (store_outgoing_element): New function.
	(expand_SET_RET_PARTS): New IFN expand function.
	(expand_SET_RET_LAST_PARTS): New IFN expand function.
	* internal-fn.def (SET_RET_PARTS): New IFN.
	(SET_RET_LAST_PARTS): New IFN.
	* tree-sra.cc (analyze_access_subtree): Upate for returns in fsra.
	(generate_subtree_copies): Generate IFN for returns.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr65421.c: New test.
	* gcc.target/powerpc/pr69143.c: New test.

---
 gcc/cfgexpand.cc                           |  6 +-
 gcc/internal-fn.cc                         | 84 ++++++++++++++++++++++
 gcc/internal-fn.def                        |  6 ++
 gcc/tree-sra.cc                            | 39 ++++++++--
 gcc/testsuite/gcc.target/powerpc/pr65421.c | 10 +++
 gcc/testsuite/gcc.target/powerpc/pr69143.c | 23 ++++++
 6 files changed, 163 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr69143.c
  

Patch

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index eef565eddb5..1ec6c2d8102 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -3759,7 +3759,7 @@  expand_value_return (rtx val)
 
   tree decl = DECL_RESULT (current_function_decl);
   rtx return_reg = DECL_RTL (decl);
-  if (return_reg != val)
+  if (!rtx_equal_p (return_reg, val))
     {
       tree funtype = TREE_TYPE (current_function_decl);
       tree type = TREE_TYPE (decl);
@@ -3832,6 +3832,10 @@  expand_return (tree retval)
      been stored into it, so we don't have to do anything special.  */
   if (TREE_CODE (retval_rhs) == RESULT_DECL)
     expand_value_return (result_rtl);
+  /* return is scalarized by fsra: TODO use FLAG. */
+  else if (VAR_P (retval_rhs)
+	   && rtx_equal_p (result_rtl, DECL_RTL (retval_rhs)))
+    expand_null_return_1 ();
 
   /* If the result is an aggregate that is being returned in one (or more)
      registers, load the registers here.  */
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index ee19e155628..be06dc3a16c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3557,6 +3557,90 @@  expand_ARG_PARTS (internal_fn, gcall *stmt)
     }
 }
 
+static bool
+store_outgoing_element (rtx regs, HOST_WIDE_INT bitpos, HOST_WIDE_INT bitsize,
+			tree rhs)
+{
+  if (GET_CODE (regs) != PARALLEL)
+    return false;
+
+  int start_index = -1;
+  int end_index = -1;
+  HOST_WIDE_INT left_bits = 0;
+  HOST_WIDE_INT right_bits = 0;
+  query_position_in_parallel (bitpos, bitsize, regs, start_index, end_index,
+			      left_bits, right_bits);
+
+  if (start_index < 0 || end_index < 0)
+    return false;
+
+  if (end_index != start_index)
+    return false;
+
+  if (!((left_bits == 0 && !BITS_BIG_ENDIAN)
+	|| (right_bits == 0 && BITS_BIG_ENDIAN)))
+    return false;
+
+  /* Just need one reg for the access.  */
+  rtx dest = XEXP (XVECEXP (regs, 0, start_index), 0);
+  machine_mode mode = GET_MODE (dest);
+
+  if (left_bits != 0 || right_bits != 0)
+    {
+      machine_mode small_mode;
+      if (!SCALAR_INT_MODE_P (mode)
+	  || !mode_for_size (bitsize, GET_MODE_CLASS (mode), 0)
+		.exists (&small_mode))
+	return false;
+
+      dest = gen_lowpart (small_mode, dest);
+      mode = small_mode;
+    }
+
+  rtx src = expand_expr (rhs, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  if (!src)
+    return false;
+
+  machine_mode src_mode = GET_MODE (src);
+  if (mode != src_mode)
+    src = gen_lowpart (mode, src);
+
+  emit_move_insn (dest, src);
+
+  return true;
+}
+
+static void
+expand_SET_RET_PARTS (internal_fn, gcall *stmt)
+{
+  HOST_WIDE_INT offset = tree_to_shwi (gimple_call_arg (stmt, 1));
+  HOST_WIDE_INT size = tree_to_shwi (gimple_call_arg (stmt, 2));
+  tree decl = DECL_RESULT (current_function_decl);
+  rtx dest_regs = decl->decl_with_rtl.rtl; // DECL_RTL (base);
+  tree rhs = gimple_call_arg (stmt, 3);
+  bool res = store_outgoing_element (dest_regs, offset, size, rhs);
+  if (!res)
+    {
+      tree base = gimple_call_arg (stmt, 0);
+      tree lhs = gimple_call_lhs (stmt);
+      expand_assignment (base, decl, false);
+      expand_assignment (lhs, rhs, false);
+      expand_assignment (decl, base, false);
+    }
+}
+
+static void
+expand_SET_RET_LAST_PARTS (internal_fn, gcall *stmt)
+{
+  expand_SET_RET_PARTS (IFN_SET_RET_PARTS, stmt);
+
+  tree decl = DECL_RESULT (current_function_decl);
+  rtx dest_regs = decl->decl_with_rtl.rtl; // DECL_RTL (base);
+  tree base = gimple_call_arg (stmt, 0);
+  base->decl_with_rtl.rtl = dest_regs; // SET_DECL_RTL
+}
+
+
 /* The size of an OpenACC compute dimension.  */
 
 static void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 2bbf70dd6a1..e6fab4671d5 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -513,6 +513,12 @@  DEF_INTERNAL_FN (DEFERRED_INIT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 /* A function to extract elemet(s) from an aggregate argument in fsra. */
 DEF_INTERNAL_FN (ARG_PARTS, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 
+/* Functions to set/construct elemet(s) for an 'return' aggregate. */
+DEF_INTERNAL_FN (SET_RET_PARTS, ECF_LEAF | ECF_NOTHROW, NULL)
+/* Functions to set/construct elemet(s) for a 'return' aggregate just before
+return statement. */
+DEF_INTERNAL_FN (SET_RET_LAST_PARTS, ECF_LEAF | ECF_NOTHROW, NULL)
+
 /* DIM_SIZE and DIM_POS return the size of a particular compute
    dimension and the executing thread's position within that
    dimension.  DIM_POS is pure (and not const) so that it isn't
diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc
index 0bbb8940921..d78a2cc4b02 100644
--- a/gcc/tree-sra.cc
+++ b/gcc/tree-sra.cc
@@ -2777,6 +2777,15 @@  analyze_access_subtree (struct access *root, struct access *parent,
     if ((root->grp_scalar_read || root->grp_assignment_read)
 	&& TREE_CODE (root->base) == PARM_DECL)
       return true;
+    /* Now in fsra (SRA_MODE_FINAL_INTRA), only PARAM and RETURNS
+       are candidates, so if "VAR_P (root->base)", then it is used by
+       a return stmt.
+       TODO: add a flag to root->base to indicate it is used by return
+       stmt.*/
+    if ((root->grp_scalar_write || root->grp_assignment_write)
+	&& VAR_P (root->base))
+      return true;
+
     return false;
   };
 
@@ -2853,9 +2862,13 @@  analyze_access_subtree (struct access *root, struct access *parent,
   else if (root->grp_write || comes_initialized_p (root->base))
     root->grp_unscalarized_data = 1; /* not covered and written to */
 
-  if (sra_mode == SRA_MODE_FINAL_INTRA && root->grp_write
-      && TREE_CODE (root->base) == PARM_DECL)
-    return false;
+  if (sra_mode == SRA_MODE_FINAL_INTRA)
+    {/* Does not support writen to PARAM and partial-unscalarized RET yet.  */
+      if (root->grp_unscalarized_data && (VAR_P (root->base)))
+	return false;
+      if (root->grp_write && TREE_CODE (root->base) == PARM_DECL)
+	return false;
+    }
 
   return sth_created;
 }
@@ -3853,7 +3866,25 @@  generate_subtree_copies (struct access *access, tree agg,
 						 !insert_after,
 						 insert_after ? GSI_NEW_STMT
 						 : GSI_SAME_STMT);
-	      stmt = gimple_build_assign (expr, repl);
+	      if (sra_mode == SRA_MODE_FINAL_INTRA && VAR_P (access->base)
+		  && (access->grp_scalar_write || access->grp_assignment_write))
+		{
+		  enum internal_fn fcode;
+		  if (access->first_child == NULL
+		      && access->next_sibling == NULL)
+		    fcode = IFN_SET_RET_LAST_PARTS;
+		  else
+		    fcode = IFN_SET_RET_PARTS;
+
+		  gimple *call = gimple_build_call_internal (
+		    fcode, 4, access->base,
+		    wide_int_to_tree (sizetype, access->offset),
+		    wide_int_to_tree (sizetype, access->size), repl);
+		  gimple_call_set_lhs (call, expr);
+		  stmt = call;
+		}
+	      else
+		stmt = gimple_build_assign (expr, repl);
 	    }
 	  gimple_set_location (stmt, loc);
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c b/gcc/testsuite/gcc.target/powerpc/pr65421.c
new file mode 100644
index 00000000000..ea86b53afbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c
@@ -0,0 +1,10 @@ 
+/* { dg-require-effective-target hard_float } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2" } */
+
+/* { dg-final { scan-assembler-times {\mlfd\M} 4 {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mstd\M} {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mld\M} {target { lp64 && has_arch_pwr8 } } } } */
+
+typedef struct { double a[4]; } A;
+A foo (const A *a) { return *a; }
diff --git a/gcc/testsuite/gcc.target/powerpc/pr69143.c b/gcc/testsuite/gcc.target/powerpc/pr69143.c
new file mode 100644
index 00000000000..216a270fb7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr69143.c
@@ -0,0 +1,23 @@ 
+/* { dg-require-effective-target hard_float } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2" } */
+
+/* { dg-final { scan-assembler-times {\mfmr\M} 3 {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mxscvspdpn\M} {target { lp64 && has_arch_pwr8 } } } } */
+
+struct foo1
+{
+  float x;
+  float y;
+};
+
+struct foo1
+blah1 (struct foo1 y)
+{
+  struct foo1 x;
+
+  x.x = y.y;
+  x.y = y.x;
+
+  return x;
+}