[V3] extract DF/SF/SI/HI/QI subreg from parameter word on stack

Message ID 20230317032722.1548833-1-guojiufu@linux.ibm.com
State New
Headers
Series [V3] extract DF/SF/SI/HI/QI subreg from parameter word on stack |

Commit Message

Jiufu Guo March 17, 2023, 3:27 a.m. UTC
  Hi,

This patch is fixing an issue about parameter accessing if the
parameter is struct type and passed through integer registers, and
there is floating member is accessed. Like below code:

typedef struct DF {double a[4]; long l; } DF;
double foo_df (DF arg){return arg.a[3];}

On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
generated.  While instruction "mtvsrd 1, 6" would be enough for
this case.

This patch updates the behavior when loading floating members of a
parameter: if that floating member is stored via integer register,
then loading it as integer mode first, and converting it to floating
mode.

Compare with previous patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html
This version supports the non-zero stack offset for parameter
argument.

I also tried to enhance CSE/DSE for this issue.  But because the
limitations (e.g. CSE does not like new pseudo, DSE is not good
at cross-blocks), some cases (as this patch) can not be handled.

Bootstrap and regtest passes on ppc64{,le}.
Is this ok for trunk?  Thanks for comments!


BR,
Jeff (Jiufu)


	PR target/108073

gcc/ChangeLog:

	* expr.cc (extract_subreg_from_loading_word): New function.
	(expand_expr_real_1): Call extract_subreg_from_loading_word.

gcc/testsuite/ChangeLog:

	* g++.target/powerpc/pr102024.C: Updated.
	* gcc.target/powerpc/pr108073.c: New test.

---
 gcc/expr.cc                                 | 85 +++++++++++++++++++++
 gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr108073.c | 29 +++++++
 3 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c
  

Patch

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 3917fc24c8c..57bc29c5678 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10711,6 +10711,77 @@  stmt_is_replaceable_p (gimple *stmt)
   return false;
 }
 
+/* Return the content of the memory slot SOURCE as MODE.
+   SOURCE is based on BASE. BASE is a memory block that is stored via words.
+
+   To get the content from SOURCE:
+   first load the word from the memory which covers the SOURCE slot first;
+   next return the word's subreg which offsets to SOURCE slot;
+   then convert to MODE as necessary.  */
+
+static rtx
+extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base, poly_uint64 bytepos)
+{
+  rtx src_base = XEXP (source, 0);
+  poly_uint64 offset = MEM_OFFSET (source);
+
+  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
+    {
+      offset += INTVAL (XEXP (src_base, 1));
+      src_base = XEXP (src_base, 0);
+    }
+
+  base = XEXP (base, 0);
+  if (GET_CODE (base) == PLUS && CONSTANT_P (XEXP (base, 1)))
+    {
+      poly_uint64 offset_on_base = INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+      offset -= offset_on_base;
+    }
+
+  if (!rtx_equal_p (base, src_base) || !known_ge (offset, bytepos))
+    return NULL_RTX;
+
+  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
+  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
+  poly_uint64 mode_size = GET_MODE_SIZE (mode);
+  poly_uint64 byte_off;
+  unsigned int start;
+  machine_mode int_mode;
+  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
+      && int_mode_for_mode (mode).exists (&int_mode)
+      && can_div_trunc_p (offset, word_size, &start, &byte_off)
+      && multiple_p (byte_off, mode_size))
+    {
+      rtx word_mem = copy_rtx (source);
+      PUT_MODE (word_mem, word_mode);
+      word_mem = adjust_address (word_mem, word_mode, -byte_off);
+
+      rtx word_reg = gen_reg_rtx (word_mode);
+      emit_move_insn (word_reg, word_mem);
+
+      poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
+      if (!known_eq (byte_off, low_off))
+	{
+	  poly_uint64 shift_bytes = known_gt (byte_off, low_off)
+				      ? byte_off - low_off
+				      : low_off - byte_off;
+	  word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
+				   shift_bytes * BITS_PER_UNIT, word_reg, 0);
+	}
+
+      rtx int_subreg = gen_lowpart (int_mode, word_reg);
+      if (mode == int_mode)
+	return int_subreg;
+
+      rtx int_mode_reg = gen_reg_rtx (int_mode);
+      emit_move_insn (int_mode_reg, int_subreg);
+      return gen_lowpart (mode, int_mode_reg);
+    }
+
+  return NULL_RTX;
+}
+
 rtx
 expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 		    enum expand_modifier modifier, rtx *alt_rtl,
@@ -11892,6 +11963,20 @@  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	    && modifier != EXPAND_WRITE)
 	  op0 = flip_storage_order (mode1, op0);
 
+	/* Accessing sub-field of struct parameter which passed via integer
+	   registers.  */
+	if (mode == mode1 && TREE_CODE (tem) == PARM_DECL
+	    && DECL_INCOMING_RTL (tem) && REG_P (DECL_INCOMING_RTL (tem))
+	    && GET_MODE (DECL_INCOMING_RTL (tem)) == BLKmode && MEM_P (op0)
+	    && MEM_OFFSET_KNOWN_P (op0))
+	  {
+	    rtx subreg
+	      = extract_subreg_from_loading_word (mode, op0, DECL_RTL (tem),
+						  bytepos);
+	    if (subreg)
+	      op0 = subreg;
+	  }
+
 	if (mode == mode1 || mode1 == BLKmode || mode1 == tmode
 	    || modifier == EXPAND_CONST_ADDRESS
 	    || modifier == EXPAND_INITIALIZER)
diff --git a/gcc/testsuite/g++.target/powerpc/pr102024.C b/gcc/testsuite/g++.target/powerpc/pr102024.C
index 769585052b5..c8995cae707 100644
--- a/gcc/testsuite/g++.target/powerpc/pr102024.C
+++ b/gcc/testsuite/g++.target/powerpc/pr102024.C
@@ -5,7 +5,7 @@ 
 // Test that a zero-width bit field in an otherwise homogeneous aggregate
 // generates a psabi warning and passes arguments in GPRs.
 
-// { dg-final { scan-assembler-times {\mstd\M} 4 } }
+// { dg-final { scan-assembler-times {\mmtvsrd\M} 4 } }
 
 struct a_thing
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108073.c b/gcc/testsuite/gcc.target/powerpc/pr108073.c
new file mode 100644
index 00000000000..7dd1a4a326a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108073.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+
+typedef struct DF {double a[4]; short s1; short s2; short s3; short s4; } DF;
+typedef struct SF {float a[4]; int i1; int i2; } SF;
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlwz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-not {\mlhz\M} {target { has_arch_ppc64 && has_arch_pwr8 } } } } */
+short  __attribute__ ((noipa)) foo_hi (DF a, int flag){if (flag == 2)return a.s2+a.s3;return 0;}
+int  __attribute__ ((noipa)) foo_si (SF a, int flag){if (flag == 2)return a.i2+a.i1;return 0;}
+double __attribute__ ((noipa)) foo_df (DF arg, int flag){if (flag == 2)return arg.a[3];else return 0.0;}
+float  __attribute__ ((noipa)) foo_sf (SF arg, int flag){if (flag == 2)return arg.a[2]; return 0;}
+float  __attribute__ ((noipa)) foo_sf1 (SF arg, int flag){if (flag == 2)return arg.a[1];return 0;}
+
+DF gdf = {{1.0,2.0,3.0,4.0}, 1, 2, 3, 4};
+SF gsf = {{1.0f,2.0f,3.0f,4.0f}, 1, 2};
+
+int main()
+{
+  if (!(foo_hi (gdf, 2) == 5 && foo_si (gsf, 2) == 3 && foo_df (gdf, 2) == 4.0
+	&& foo_sf (gsf, 2) == 3.0 && foo_sf1 (gsf, 2) == 2.0))
+    __builtin_abort ();
+  if (!(foo_hi (gdf, 1) == 0 && foo_si (gsf, 1) == 0 && foo_df (gdf, 1) == 0
+	&& foo_sf (gsf, 1) == 0 && foo_sf1 (gsf, 1) == 0))
+    __builtin_abort ();
+  return 0;
+}
+