Remove -mpower8-fusion options

Message ID Ynx8pW7+H6R6TwNn@toto.the-meissners.org
State New
Headers
Series Remove -mpower8-fusion options |

Commit Message

Michael Meissner May 12, 2022, 3:19 a.m. UTC
  Eliminate power8-fusion and power8-fusion-sign options.

As part of PR target/102059, one of the things came up is that we should
eliminate the power8 fusion options altogether.  This patch eliminates the
-mpower8-fusion option.  It does enable power8 fusion if the code is being
tuned for power8.

This patch gives a warning if the user tried to explicitly use -mpower8-fusion
and silently ignores the user using -mno-power8-fusion.

In addition, there used to be an option (-mpower8-fusion-sign) which split load
with sign extend options into an unsigned load (which can be fused with the
code setting up the address) and then an explicit sign extension.  This option
used to be enabled by default at -O3.  Because this option was never
documented, I didn't bother adding compatibility switches to warn if the option
was used.

I did some testing with Spec 2017.  I found that -mpower8-fusion-sign only
helped one benchark (fotonik3d_r by 2.8%), but it slowed down three other
benchmarks (gcc_r by 1.5%, omnetpp_r by 10.7%, and deepsjeng_r by 1.3%).  Over
all, this seems like a win not to do the optimization.

I also measured whether the fusion helps at all.  There are 3 benchmarks that
are slower if I disabled fusion compared to the code this patch generates
(perlbench_r slows down by 2.8%, bwaves_r slows down by 5.4%, and roms_r slows
down by 2%).  So it does seem to help to a limited extent to keep the fusion
support in (but not do the sign fusion support).

In addition to running the Spec 2017 tests, I have tested this patch on the
following systems with bootstraps and regression testing.  There were no
errors:

   1)	Power10, little endian, built with --with-cpu=power10;
   2)	Power9, little endian, built with --with-cpu=power9; (and)
   3)	Power8, big endian, built with --with-cpu=power8, and tested both
	32-bit and 64-bit code.

Can I install this patch into the GCC 13 trunk?  At the moment, I don't plan to
back port this to older branches (since Peter has already installed a patch
that fixes PR target/102059).  But if desired, I can certainly do the
backport.

2022-05-11   Michael Meissner  <meissner@linux.ibm.com>

gcc/

	PR target/102059
	* config/rs6000/predicates.md (fusion_gpr_mem_load): Remove
	support for fusing load with sign extend.
	* config/rs6000/rs6000-cpus.def (OTHER_FUSION_MASKS): Delete.
	(ISA_3_0_MASKS_SERVER): Don't reset fusion masks.
	(POWERPC_MASKS): Delete -mpower8-fusion option.
	* config/rs6000/rs6000.cc (rs6000_debug_reg_global): Delete code
	to print out power8 fusion status.
	(rs6000_option_override_internal): Delete support for power8
	fusion options.
	(rs6000_opt_masks): Delete power8-fusion and power8-fusion-sign
	options.
	(rs6000_can_inline_p): Delete resetting power8 fusion.
	(fusion_gpr_load_p): Don't fuse load with sign extend.
	(expand_fusion_gpr_load): Likewise.
	* config/rs6000/rs6000.h (MASK_P8_FUSION): Delete.
	(TARGET_P8_FUSION): New macro.
	* config/rs6000/rs6000.opt (-mpower8-fusion): Delete option, allow
	-mno-power8-fusion without warning.
	(-mpower8-fusion-sign): Delete option.
	* doc/invoke.texi (RS/6000 and PowerPC Options): Delete
	-mpower8-fusion.

gcc/testsuite/

	PR target/102059
	* gcc.target/powerpc/fusion.c: Remove load + sign extend fusion
	tests.
	* gcc.target/powerpc/pr102059-3.c: Remove -mno-power8-fusion
	option.
---
 gcc/config/rs6000/predicates.md               |  3 +-
 gcc/config/rs6000/rs6000-cpus.def             | 18 ++----
 gcc/config/rs6000/rs6000.cc                   | 64 ++-----------------
 gcc/config/rs6000/rs6000.h                    |  4 +-
 gcc/config/rs6000/rs6000.opt                  | 11 ++--
 gcc/doc/invoke.texi                           | 12 +---
 gcc/testsuite/gcc.target/powerpc/fusion.c     |  9 +--
 gcc/testsuite/gcc.target/powerpc/pr102059-3.c |  2 +-
 8 files changed, 28 insertions(+), 95 deletions(-)
  

Patch

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b1fcc69bb60..55b771d142c 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1877,8 +1877,7 @@  (define_predicate "fusion_gpr_mem_load"
   rtx addr, base, offset;
 
   /* Handle sign/zero extend.  */
-  if (GET_CODE (op) == ZERO_EXTEND
-      || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
+  if (GET_CODE (op) == ZERO_EXTEND)
     {
       op = XEXP (op, 0);
       mode = GET_MODE (op);
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 963947f6939..d913a3d6b73 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -54,19 +54,14 @@ 
 				 | OPTION_MASK_QUAD_MEMORY		\
 				 | OPTION_MASK_QUAD_MEMORY_ATOMIC)
 
-/* ISA masks setting fusion options.  */
-#define OTHER_FUSION_MASKS	(OPTION_MASK_P8_FUSION			\
-				 | OPTION_MASK_P8_FUSION_SIGN)
-
 /* Add ISEL back into ISA 3.0, since it is supposed to be a win.  Do not add
    FLOAT128_HW here until we are ready to make -mfloat128 on by default.  */
-#define ISA_3_0_MASKS_SERVER	((ISA_2_7_MASKS_SERVER			\
-				  | OPTION_MASK_ISEL			\
-				  | OPTION_MASK_MODULO			\
-				  | OPTION_MASK_P9_MINMAX		\
-				  | OPTION_MASK_P9_MISC			\
-				  | OPTION_MASK_P9_VECTOR)		\
-				 & ~OTHER_FUSION_MASKS)
+#define ISA_3_0_MASKS_SERVER	(ISA_2_7_MASKS_SERVER			\
+				 | OPTION_MASK_ISEL			\
+				 | OPTION_MASK_MODULO			\
+				 | OPTION_MASK_P9_MINMAX		\
+				 | OPTION_MASK_P9_MISC			\
+				 | OPTION_MASK_P9_VECTOR)
 
 /* Support for the IEEE 128-bit floating point hardware requires a lot of the
    VSX instructions that are part of ISA 3.0.  */
@@ -140,7 +135,6 @@ 
 				 | OPTION_MASK_MODULO			\
 				 | OPTION_MASK_MULHW			\
 				 | OPTION_MASK_NO_UPDATE		\
-				 | OPTION_MASK_P8_FUSION		\
 				 | OPTION_MASK_P8_VECTOR		\
 				 | OPTION_MASK_P9_MINMAX		\
 				 | OPTION_MASK_P9_MISC			\
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 5cb8a53e9ce..24d9ec2f8b0 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2535,17 +2535,6 @@  rs6000_debug_reg_global (void)
   if (TARGET_LINK_STACK)
     fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
 
-  if (TARGET_P8_FUSION)
-    {
-      char options[80];
-
-      strcpy (options, "power8");
-      if (TARGET_P8_FUSION_SIGN)
-	strcat (options, ", sign");
-
-      fprintf (stderr, DEBUG_FMT_S, "fusion", options);
-    }
-
   fprintf (stderr, DEBUG_FMT_S, "plt-format",
 	   TARGET_SECURE_PLT ? "secure" : "bss");
   fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -4030,41 +4019,6 @@  rs6000_option_override_internal (bool global_init_p)
       && optimize_function_for_speed_p (cfun))
     rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
 
-  /* Enable power8 fusion if we are tuning for power8, even if we aren't
-     generating power8 instructions.  Power9 does not optimize power8 fusion
-     cases.  */
-  if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
-    {
-      if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
-	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
-      else
-	rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
-    }
-
-  /* Setting additional fusion flags turns on base fusion.  */
-  if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
-    {
-      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
-	{
-	  if (TARGET_P8_FUSION_SIGN)
-	    error ("%qs requires %qs", "-mpower8-fusion-sign",
-		   "-mpower8-fusion");
-
-	  rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
-	}
-      else
-	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
-    }
-
-  /* Power8 does not fuse sign extended loads with the addis.  If we are
-     optimizing at high levels for speed, convert a sign extended load into a
-     zero extending load, and an explicit sign extension.  */
-  if (TARGET_P8_FUSION
-      && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
-      && optimize_function_for_speed_p (cfun)
-      && optimize >= 3)
-    rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
-
   /* ISA 3.0 vector instructions include ISA 2.07.  */
   if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
     {
@@ -24000,8 +23954,6 @@  static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "pcrel-opt",		OPTION_MASK_PCREL_OPT,		false, true  },
   { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
   { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
-  { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
-  { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
   { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
   { "power9-minmax",		OPTION_MASK_P9_MINMAX,		false, true  },
   { "power9-misc",		OPTION_MASK_P9_MISC,		false, true  },
@@ -25340,10 +25292,9 @@  rs6000_can_inline_p (tree caller, tree callee)
 	    }
 	}
 
-      /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
-	 purposes.  */
-      callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
-      explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
+      /* Ignore -mpower10-fusion option for inlining purposes.  */
+      callee_isa &= ~OPTION_MASK_P10_FUSION;
+      explicit_isa &= ~OPTION_MASK_P10_FUSION;
 
       /* The callee's options must be a subset of the caller's options, i.e.
 	 a vsx function may inline an altivec function, but a no-vsx function
@@ -27403,9 +27354,9 @@  fusion_gpr_load_p (rtx addis_reg,	/* register set via addis.  */
   if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
     return false;
 
-  /* Allow sign/zero extension.  */
-  if (GET_CODE (mem) == ZERO_EXTEND
-      || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
+  /* Allow zero extension.  Do not allow sign extension, since the hrdware does
+     not fuse load with sign extend instructions.  */
+  if (GET_CODE (mem) == ZERO_EXTEND)
     mem = XEXP (mem, 0);
 
   if (!MEM_P (mem))
@@ -27468,8 +27419,7 @@  expand_fusion_gpr_load (rtx *operands)
   machine_mode ptr_mode = Pmode;
   enum rtx_code extend = UNKNOWN;
 
-  if (GET_CODE (orig_mem) == ZERO_EXTEND
-      || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
+  if (GET_CODE (orig_mem) == ZERO_EXTEND)
     {
       extend = GET_CODE (orig_mem);
       orig_mem = XEXP (orig_mem, 0);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 20b9d11424d..4a1085052e2 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -517,7 +517,6 @@  extern int rs6000_vector_align[];
 #define MASK_FLOAT128_KEYWORD		OPTION_MASK_FLOAT128_KEYWORD
 #define MASK_FLOAT128_HW		OPTION_MASK_FLOAT128_HW
 #define MASK_FPRND			OPTION_MASK_FPRND
-#define MASK_P8_FUSION			OPTION_MASK_P8_FUSION
 #define MASK_HARD_FLOAT			OPTION_MASK_HARD_FLOAT
 #define MASK_HTM			OPTION_MASK_HTM
 #define MASK_ISEL			OPTION_MASK_ISEL
@@ -631,6 +630,9 @@  extern unsigned char rs6000_recip_bits[];
 #define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \
   (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE)
 
+/* Whether we need to enable power8 fusion.  */
+#define TARGET_P8_FUSION (rs6000_tune == PROCESSOR_POWER8)
+
 /* The default CPU for TARGET_OPTION_OVERRIDE.  */
 #define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT
 
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 4931d781c4e..a6ce59fc18d 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -474,13 +474,12 @@  Save the TOC in the prologue for indirect calls rather than inline.
 mvsx-timode
 Target RejectNegative Undocumented Ignore
 
-mpower8-fusion
-Target Mask(P8_FUSION) Var(rs6000_isa_flags)
-Fuse certain integer operations together for better performance on power8.
+;; This option existed in the past, but now is always off.
+mno-power8-fusion
+Target RejectNegative Undocumented Ignore
 
-mpower8-fusion-sign
-Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
-Allow sign extension in fusion operations.
+mpower8-fusion
+Target RejectNegative Undocumented WarnRemoved
 
 mpower8-vector
 Target Mask(P8_VECTOR) Var(rs6000_isa_flags)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7a35d9613a4..3d31086af46 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1266,7 +1266,7 @@  See RS/6000 and PowerPC Options.
 -mveclibabi=@var{type}  -mfriz  -mno-friz @gol
 -mpointers-to-nested-functions  -mno-pointers-to-nested-functions @gol
 -msave-toc-indirect  -mno-save-toc-indirect @gol
--mpower8-fusion  -mno-mpower8-fusion  -mpower8-vector  -mno-power8-vector @gol
+-mpower8-vector  -mno-power8-vector @gol
 -mcrypto  -mno-crypto  -mhtm  -mno-htm @gol
 -mquad-memory  -mno-quad-memory @gol
 -mquad-memory-atomic  -mno-quad-memory-atomic @gol
@@ -28353,7 +28353,7 @@  following options:
 -mpopcntb  -mpopcntd  -mpowerpc64 @gol
 -mpowerpc-gpopt  -mpowerpc-gfxopt @gol
 -mmulhw  -mdlmzb  -mmfpgpr  -mvsx @gol
--mcrypto  -mhtm  -mpower8-fusion  -mpower8-vector @gol
+-mcrypto  -mhtm  @gol
 -mquad-memory  -mquad-memory-atomic  -mfloat128 @gol
 -mfloat128-hardware -mprefixed -mpcrel -mmma @gol
 -mrop-protect}
@@ -28468,14 +28468,6 @@  Enable (disable) the use of the built-in functions that allow direct
 access to the Hardware Transactional Memory (HTM) instructions that
 were added in version 2.07 of the PowerPC ISA.
 
-@item -mpower8-fusion
-@itemx -mno-power8-fusion
-@opindex mpower8-fusion
-@opindex mno-power8-fusion
-Generate code that keeps (does not keeps) some integer operations
-adjacent so that the instructions can be fused together on power8 and
-later processors.
-
 @item -mpower8-vector
 @itemx -mno-power8-vector
 @opindex mpower8-vector
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion.c b/gcc/testsuite/gcc.target/powerpc/fusion.c
index 2a115580439..595dd556fe7 100644
--- a/gcc/testsuite/gcc.target/powerpc/fusion.c
+++ b/gcc/testsuite/gcc.target/powerpc/fusion.c
@@ -8,13 +8,10 @@ 
 int fusion_uchar (unsigned char *p){ return p[LARGE]; }
 int fusion_schar (signed char *p){ return p[LARGE]; }
 int fusion_ushort (unsigned short *p){ return p[LARGE]; }
-int fusion_short (short *p){ return p[LARGE]; }
-int fusion_int (int *p){ return p[LARGE]; }
 unsigned fusion_uns (unsigned *p){ return p[LARGE]; }
 
-/* { dg-final { scan-assembler-times "fusion_gpr_load"    6 } } */
+/* { dg-final { scan-assembler-times "fusion_gpr_load"    4 } } */
 /* { dg-final { scan-assembler-times "lbz"                2 } } */
 /* { dg-final { scan-assembler-times "extsb"              1 } } */
-/* { dg-final { scan-assembler-times "lhz"                2 } } */
-/* { dg-final { scan-assembler-times "extsh"              1 } } */
-/* { dg-final { scan-assembler-times "lwz"                2 } } */
+/* { dg-final { scan-assembler-times "lhz"                1 } } */
+/* { dg-final { scan-assembler-times "lwz"                1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr102059-3.c b/gcc/testsuite/gcc.target/powerpc/pr102059-3.c
index 21c982d93f0..0cb3a4cb9f9 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr102059-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr102059-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-power8-fusion -fdump-tree-einline-optimized" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -fdump-tree-einline-optimized" } */
 
 /* Like pr102059-1.c, to verify the inlining still happens
    even without always_inline attribute.  */