@@ -3759,7 +3759,7 @@ expand_value_return (rtx val)
tree decl = DECL_RESULT (current_function_decl);
rtx return_reg = DECL_RTL (decl);
- if (return_reg != val)
+ if (!rtx_equal_p (return_reg, val))
{
tree funtype = TREE_TYPE (current_function_decl);
tree type = TREE_TYPE (decl);
@@ -3832,6 +3832,10 @@ expand_return (tree retval)
been stored into it, so we don't have to do anything special. */
if (TREE_CODE (retval_rhs) == RESULT_DECL)
expand_value_return (result_rtl);
+ /* return is scalarized by fsra: TODO use FLAG. */
+ else if (VAR_P (retval_rhs)
+ && rtx_equal_p (result_rtl, DECL_RTL (retval_rhs)))
+ expand_null_return_1 ();
/* If the result is an aggregate that is being returned in one (or more)
registers, load the registers here. */
@@ -3557,6 +3557,90 @@ expand_ARG_PARTS (internal_fn, gcall *stmt)
}
}
+static bool
+store_outgoing_element (rtx regs, HOST_WIDE_INT bitpos, HOST_WIDE_INT bitsize,
+ tree rhs)
+{
+ if (GET_CODE (regs) != PARALLEL)
+ return false;
+
+ int start_index = -1;
+ int end_index = -1;
+ HOST_WIDE_INT left_bits = 0;
+ HOST_WIDE_INT right_bits = 0;
+ query_position_in_parallel (bitpos, bitsize, regs, start_index, end_index,
+ left_bits, right_bits);
+
+ if (start_index < 0 || end_index < 0)
+ return false;
+
+ if (end_index != start_index)
+ return false;
+
+ if (!((left_bits == 0 && !BITS_BIG_ENDIAN)
+ || (right_bits == 0 && BITS_BIG_ENDIAN)))
+ return false;
+
+ /* Just need one reg for the access. */
+ rtx dest = XEXP (XVECEXP (regs, 0, start_index), 0);
+ machine_mode mode = GET_MODE (dest);
+
+ if (left_bits != 0 || right_bits != 0)
+ {
+ machine_mode small_mode;
+ if (!SCALAR_INT_MODE_P (mode)
+ || !mode_for_size (bitsize, GET_MODE_CLASS (mode), 0)
+ .exists (&small_mode))
+ return false;
+
+ dest = gen_lowpart (small_mode, dest);
+ mode = small_mode;
+ }
+
+ rtx src = expand_expr (rhs, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+ if (!src)
+ return false;
+
+ machine_mode src_mode = GET_MODE (src);
+ if (mode != src_mode)
+ src = gen_lowpart (mode, src);
+
+ emit_move_insn (dest, src);
+
+ return true;
+}
+
+static void
+expand_SET_RET_PARTS (internal_fn, gcall *stmt)
+{
+ HOST_WIDE_INT offset = tree_to_shwi (gimple_call_arg (stmt, 1));
+ HOST_WIDE_INT size = tree_to_shwi (gimple_call_arg (stmt, 2));
+ tree decl = DECL_RESULT (current_function_decl);
+ rtx dest_regs = decl->decl_with_rtl.rtl; // DECL_RTL (base);
+ tree rhs = gimple_call_arg (stmt, 3);
+ bool res = store_outgoing_element (dest_regs, offset, size, rhs);
+ if (!res)
+ {
+ tree base = gimple_call_arg (stmt, 0);
+ tree lhs = gimple_call_lhs (stmt);
+ expand_assignment (base, decl, false);
+ expand_assignment (lhs, rhs, false);
+ expand_assignment (decl, base, false);
+ }
+}
+
+static void
+expand_SET_RET_LAST_PARTS (internal_fn, gcall *stmt)
+{
+ expand_SET_RET_PARTS (IFN_SET_RET_PARTS, stmt);
+
+ tree decl = DECL_RESULT (current_function_decl);
+ rtx dest_regs = decl->decl_with_rtl.rtl; // DECL_RTL (base);
+ tree base = gimple_call_arg (stmt, 0);
+ base->decl_with_rtl.rtl = dest_regs; // SET_DECL_RTL
+}
+
+
/* The size of an OpenACC compute dimension. */
static void
@@ -513,6 +513,12 @@ DEF_INTERNAL_FN (DEFERRED_INIT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
/* A function to extract elemet(s) from an aggregate argument in fsra. */
DEF_INTERNAL_FN (ARG_PARTS, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
+/* Functions to set/construct elemet(s) for an 'return' aggregate. */
+DEF_INTERNAL_FN (SET_RET_PARTS, ECF_LEAF | ECF_NOTHROW, NULL)
+/* Functions to set/construct elemet(s) for a 'return' aggregate just before
+return statement. */
+DEF_INTERNAL_FN (SET_RET_LAST_PARTS, ECF_LEAF | ECF_NOTHROW, NULL)
+
/* DIM_SIZE and DIM_POS return the size of a particular compute
dimension and the executing thread's position within that
dimension. DIM_POS is pure (and not const) so that it isn't
@@ -2777,6 +2777,15 @@ analyze_access_subtree (struct access *root, struct access *parent,
if ((root->grp_scalar_read || root->grp_assignment_read)
&& TREE_CODE (root->base) == PARM_DECL)
return true;
+ /* Now in fsra (SRA_MODE_FINAL_INTRA), only PARAM and RETURNS
+ are candidates, so if "VAR_P (root->base)", then it is used by
+ a return stmt.
+ TODO: add a flag to root->base to indicate it is used by return
+ stmt.*/
+ if ((root->grp_scalar_write || root->grp_assignment_write)
+ && VAR_P (root->base))
+ return true;
+
return false;
};
@@ -2853,9 +2862,13 @@ analyze_access_subtree (struct access *root, struct access *parent,
else if (root->grp_write || comes_initialized_p (root->base))
root->grp_unscalarized_data = 1; /* not covered and written to */
- if (sra_mode == SRA_MODE_FINAL_INTRA && root->grp_write
- && TREE_CODE (root->base) == PARM_DECL)
- return false;
+ if (sra_mode == SRA_MODE_FINAL_INTRA)
+ {/* Does not support writen to PARAM and partial-unscalarized RET yet. */
+ if (root->grp_unscalarized_data && (VAR_P (root->base)))
+ return false;
+ if (root->grp_write && TREE_CODE (root->base) == PARM_DECL)
+ return false;
+ }
return sth_created;
}
@@ -3853,7 +3866,25 @@ generate_subtree_copies (struct access *access, tree agg,
!insert_after,
insert_after ? GSI_NEW_STMT
: GSI_SAME_STMT);
- stmt = gimple_build_assign (expr, repl);
+ if (sra_mode == SRA_MODE_FINAL_INTRA && VAR_P (access->base)
+ && (access->grp_scalar_write || access->grp_assignment_write))
+ {
+ enum internal_fn fcode;
+ if (access->first_child == NULL
+ && access->next_sibling == NULL)
+ fcode = IFN_SET_RET_LAST_PARTS;
+ else
+ fcode = IFN_SET_RET_PARTS;
+
+ gimple *call = gimple_build_call_internal (
+ fcode, 4, access->base,
+ wide_int_to_tree (sizetype, access->offset),
+ wide_int_to_tree (sizetype, access->size), repl);
+ gimple_call_set_lhs (call, expr);
+ stmt = call;
+ }
+ else
+ stmt = gimple_build_assign (expr, repl);
}
gimple_set_location (stmt, loc);
new file mode 100644
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target hard_float } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2" } */
+
+/* { dg-final { scan-assembler-times {\mlfd\M} 4 {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mstd\M} {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mld\M} {target { lp64 && has_arch_pwr8 } } } } */
+
+typedef struct { double a[4]; } A;
+A foo (const A *a) { return *a; }
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target hard_float } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2" } */
+
+/* { dg-final { scan-assembler-times {\mfmr\M} 3 {target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mxscvspdpn\M} {target { lp64 && has_arch_pwr8 } } } } */
+
+struct foo1
+{
+ float x;
+ float y;
+};
+
+struct foo1
+blah1 (struct foo1 y)
+{
+ struct foo1 x;
+
+ x.x = y.y;
+ x.y = y.x;
+
+ return x;
+}