[48/61] Performance degradation for iDCT-4M example

Message ID 20250131171232.1018281-50-aleksandar.rakic@htecgroup.com
State New
Headers
Series Improve Mips target |

Commit Message

Aleksandar Rakic Jan. 31, 2025, 5:13 p.m. UTC
  From: "dragan.mladjenovic" <dragan.mladjenovic@rt-rk.com>

This workaround adds mfuse-vect-init option which causes the back-end to
emit a single load for the vect_init if all the init elements come from
the consecutive memory locations and are in the right order.

gcc/
	* config/mips/mips.cc (mips_fuse_vect_init_p): New function.
	(mips_expand_vector_init): Detect init sequence that can be
	fused into a single load.
	* config/mips/mips.opt (mfuse-vect-init): New option.

gcc/testsuite/
	* gcc.target/mips/msa-fuse-vect-init.c: New file.

Cherry-picked 4f440a87ad32b3549be8a0b89900d656ac70d4f8
and 1eb9d22dc480c962027eed522e0b26d0ebbd3e0b
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic <dragan.mladjenovic@rt-rk.com>
Signed-off-by: Faraz Shahbazker <fshahbazker@wavecomp.com>
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
---
 gcc/config/mips/mips.cc                       | 61 +++++++++++++++++++
 gcc/config/mips/mips.opt                      |  3 +
 .../gcc.target/mips/msa-fuse-vect-init.c      | 18 ++++++
 3 files changed, 82 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
  

Patch

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index bd62b8b7823..51d9812151a 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -24873,6 +24873,57 @@  mips_expand_vi_general (machine_mode vmode, machine_mode imode,
   emit_move_insn (target, mem);
 }
 
+/* Return true if elements of vector initialization list should be loaded
+   via single "fused" vector load.  */
+
+bool
+mips_fuse_vect_init_p (machine_mode imode, unsigned nelt, rtx vals)
+{
+  unsigned i;
+  rtx base;
+  rtx base1;
+  rtx first;
+  rtx next;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT offset1;
+  unsigned min_align = GET_MODE_BITSIZE (imode);
+  unsigned step_size = GET_MODE_SIZE (imode);
+
+  if (!flag_fuse_vect_init)
+    return false;
+
+  first = XVECEXP (vals, 0, 0);
+
+  if (MEM_VOLATILE_P (first))
+    return false;
+
+  if (MEM_ALIGN (first) < min_align)
+    return false;
+
+  if (GET_MODE (first) != imode)
+    return false;
+
+  mips_split_plus (XEXP (first, 0), &base, &offset);
+
+  if (!REG_P (base))
+    return false;
+
+  for (i = 1; i < nelt; ++i)
+    {
+       next = XVECEXP (vals, 0, i);
+       if (MEM_VOLATILE_P (next)
+	   || MEM_ALIGN (next) < min_align
+	   || GET_MODE (next) != imode)
+	return false;
+       mips_split_plus (XEXP (next, 0), &base1, &offset1);
+       if (!rtx_equal_p (base, base1) || (offset1 - offset) != step_size)
+	return false;
+       offset = offset1;
+    }
+
+  return true;
+}
+
 /* Expand a vector initialization.  */
 
 void
@@ -24883,6 +24934,7 @@  mips_expand_vector_init (rtx target, rtx vals)
   unsigned i, nelt = GET_MODE_NUNITS (vmode);
   unsigned nvar = 0, one_var = -1u;
   bool all_same = true;
+  bool all_mem = true;
   rtx x;
 
   for (i = 0; i < nelt; ++i)
@@ -24890,6 +24942,8 @@  mips_expand_vector_init (rtx target, rtx vals)
       x = XVECEXP (vals, 0, i);
       if (!mips_constant_elt_p (x))
 	nvar++, one_var = i;
+      if (!MEM_P (x))
+	all_mem = false;
       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
 	all_same = false;
     }
@@ -24950,6 +25004,13 @@  mips_expand_vector_init (rtx target, rtx vals)
 	}
       else
 	{
+	  if (all_mem && mips_fuse_vect_init_p (imode, nelt, vals))
+	    {
+	      rtx mem = widen_memory_access (XVECEXP (vals, 0, 0), vmode, 0);
+	      emit_move_insn (target, mem);
+	      return;
+	    }
+
 	  emit_move_insn (target, CONST0_RTX (vmode));
 
 	  for (i = 0; i < nelt; ++i)
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index d162702c220..be347155286 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -576,3 +576,6 @@  Allow inlining even if the compression flags differ between caller and callee.
 
 msched-weight
 Target Var(TARGET_SCHED_WEIGHT) Undocumented
+
+mfuse-vect-init
+Target Var(flag_fuse_vect_init) Undocumented Init(-1)
diff --git a/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
new file mode 100644
index 00000000000..faa1ff4eee6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mfp64 -mhard-float -mmsa" } */
+/* { dg-additional-options "-mfuse-vect-init" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+
+typedef int v4i32 __attribute__ ((vector_size(16)));
+
+void
+copy (int* src, v4i32* dst)
+{
+  v4i32 chunk = (v4i32){src[0], src[1], src[2], src[3]};
+  dst[0] = chunk;
+}
+
+/* { dg-final { scan-assembler-not "insert" } } */
+/* { dg-final { scan-assembler-times "\tld\\\.w" 1 } } */
+/* { dg-final { scan-assembler-times "\tst\\\.w" 1 } } */
+