Eliminate power8-fusion and power8-fusion-sign options.
As part of PR target/102059, one of the things came up is that we should
eliminate the power8 fusion options altogether. This patch eliminates the
-mpower8-fusion option. It does enable power8 fusion if the code is being
tuned for power8.
This patch gives a warning if the user tried to explicitly use -mpower8-fusion
and silently ignores the user using -mno-power8-fusion.
In addition, there used to be an option (-mpower8-fusion-sign) which split load
with sign extend options into an unsigned load (which can be fused with the
code setting up the address) and then an explicit sign extension. This option
used to be enabled by default at -O3. Because this option was never
documented, I didn't bother adding compatibility switches to warn if the option
was used.
I did some testing with Spec 2017. I found that -mpower8-fusion-sign only
helped one benchark (fotonik3d_r by 2.8%), but it slowed down three other
benchmarks (gcc_r by 1.5%, omnetpp_r by 10.7%, and deepsjeng_r by 1.3%). Over
all, this seems like a win not to do the optimization.
I also measured whether the fusion helps at all. There are 3 benchmarks that
are slower if I disabled fusion compared to the code this patch generates
(perlbench_r slows down by 2.8%, bwaves_r slows down by 5.4%, and roms_r slows
down by 2%). So it does seem to help to a limited extent to keep the fusion
support in (but not do the sign fusion support).
In addition to running the Spec 2017 tests, I have tested this patch on the
following systems with bootstraps and regression testing. There were no
errors:
1) Power10, little endian, built with --with-cpu=power10;
2) Power9, little endian, built with --with-cpu=power9; (and)
3) Power8, big endian, built with --with-cpu=power8, and tested both
32-bit and 64-bit code.
Can I install this patch into the GCC 13 trunk? At the moment, I don't plan to
back port this to older branches (since Peter has already installed a patch
that fixes PR target/102059). But if desired, I can certainly do the
backport.
2022-05-11 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/102059
* config/rs6000/predicates.md (fusion_gpr_mem_load): Remove
support for fusing load with sign extend.
* config/rs6000/rs6000-cpus.def (OTHER_FUSION_MASKS): Delete.
(ISA_3_0_MASKS_SERVER): Don't reset fusion masks.
(POWERPC_MASKS): Delete -mpower8-fusion option.
* config/rs6000/rs6000.cc (rs6000_debug_reg_global): Delete code
to print out power8 fusion status.
(rs6000_option_override_internal): Delete support for power8
fusion options.
(rs6000_opt_masks): Delete power8-fusion and power8-fusion-sign
options.
(rs6000_can_inline_p): Delete resetting power8 fusion.
(fusion_gpr_load_p): Don't fuse load with sign extend.
(expand_fusion_gpr_load): Likewise.
* config/rs6000/rs6000.h (MASK_P8_FUSION): Delete.
(TARGET_P8_FUSION): New macro.
* config/rs6000/rs6000.opt (-mpower8-fusion): Delete option, allow
-mno-power8-fusion without warning.
(-mpower8-fusion-sign): Delete option.
* doc/invoke.texi (RS/6000 and PowerPC Options): Delete
-mpower8-fusion.
gcc/testsuite/
PR target/102059
* gcc.target/powerpc/fusion.c: Remove load + sign extend fusion
tests.
* gcc.target/powerpc/pr102059-3.c: Remove -mno-power8-fusion
option.
---
gcc/config/rs6000/predicates.md | 3 +-
gcc/config/rs6000/rs6000-cpus.def | 18 ++----
gcc/config/rs6000/rs6000.cc | 64 ++-----------------
gcc/config/rs6000/rs6000.h | 4 +-
gcc/config/rs6000/rs6000.opt | 11 ++--
gcc/doc/invoke.texi | 12 +---
gcc/testsuite/gcc.target/powerpc/fusion.c | 9 +--
gcc/testsuite/gcc.target/powerpc/pr102059-3.c | 2 +-
8 files changed, 28 insertions(+), 95 deletions(-)
@@ -1877,8 +1877,7 @@ (define_predicate "fusion_gpr_mem_load"
rtx addr, base, offset;
/* Handle sign/zero extend. */
- if (GET_CODE (op) == ZERO_EXTEND
- || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
+ if (GET_CODE (op) == ZERO_EXTEND)
{
op = XEXP (op, 0);
mode = GET_MODE (op);
@@ -54,19 +54,14 @@
| OPTION_MASK_QUAD_MEMORY \
| OPTION_MASK_QUAD_MEMORY_ATOMIC)
-/* ISA masks setting fusion options. */
-#define OTHER_FUSION_MASKS (OPTION_MASK_P8_FUSION \
- | OPTION_MASK_P8_FUSION_SIGN)
-
/* Add ISEL back into ISA 3.0, since it is supposed to be a win. Do not add
FLOAT128_HW here until we are ready to make -mfloat128 on by default. */
-#define ISA_3_0_MASKS_SERVER ((ISA_2_7_MASKS_SERVER \
- | OPTION_MASK_ISEL \
- | OPTION_MASK_MODULO \
- | OPTION_MASK_P9_MINMAX \
- | OPTION_MASK_P9_MISC \
- | OPTION_MASK_P9_VECTOR) \
- & ~OTHER_FUSION_MASKS)
+#define ISA_3_0_MASKS_SERVER (ISA_2_7_MASKS_SERVER \
+ | OPTION_MASK_ISEL \
+ | OPTION_MASK_MODULO \
+ | OPTION_MASK_P9_MINMAX \
+ | OPTION_MASK_P9_MISC \
+ | OPTION_MASK_P9_VECTOR)
/* Support for the IEEE 128-bit floating point hardware requires a lot of the
VSX instructions that are part of ISA 3.0. */
@@ -140,7 +135,6 @@
| OPTION_MASK_MODULO \
| OPTION_MASK_MULHW \
| OPTION_MASK_NO_UPDATE \
- | OPTION_MASK_P8_FUSION \
| OPTION_MASK_P8_VECTOR \
| OPTION_MASK_P9_MINMAX \
| OPTION_MASK_P9_MISC \
@@ -2535,17 +2535,6 @@ rs6000_debug_reg_global (void)
if (TARGET_LINK_STACK)
fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
- if (TARGET_P8_FUSION)
- {
- char options[80];
-
- strcpy (options, "power8");
- if (TARGET_P8_FUSION_SIGN)
- strcat (options, ", sign");
-
- fprintf (stderr, DEBUG_FMT_S, "fusion", options);
- }
-
fprintf (stderr, DEBUG_FMT_S, "plt-format",
TARGET_SECURE_PLT ? "secure" : "bss");
fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -4030,41 +4019,6 @@ rs6000_option_override_internal (bool global_init_p)
&& optimize_function_for_speed_p (cfun))
rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
- /* Enable power8 fusion if we are tuning for power8, even if we aren't
- generating power8 instructions. Power9 does not optimize power8 fusion
- cases. */
- if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
- {
- if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
- rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
- else
- rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
- }
-
- /* Setting additional fusion flags turns on base fusion. */
- if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
- {
- if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
- {
- if (TARGET_P8_FUSION_SIGN)
- error ("%qs requires %qs", "-mpower8-fusion-sign",
- "-mpower8-fusion");
-
- rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
- }
- else
- rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
- }
-
- /* Power8 does not fuse sign extended loads with the addis. If we are
- optimizing at high levels for speed, convert a sign extended load into a
- zero extending load, and an explicit sign extension. */
- if (TARGET_P8_FUSION
- && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
- && optimize_function_for_speed_p (cfun)
- && optimize >= 3)
- rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
-
/* ISA 3.0 vector instructions include ISA 2.07. */
if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
{
@@ -24000,8 +23954,6 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
{ "popcntb", OPTION_MASK_POPCNTB, false, true },
{ "popcntd", OPTION_MASK_POPCNTD, false, true },
- { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
- { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
{ "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
{ "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
{ "power9-misc", OPTION_MASK_P9_MISC, false, true },
@@ -25340,10 +25292,9 @@ rs6000_can_inline_p (tree caller, tree callee)
}
}
- /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
- purposes. */
- callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
- explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
+ /* Ignore -mpower10-fusion option for inlining purposes. */
+ callee_isa &= ~OPTION_MASK_P10_FUSION;
+ explicit_isa &= ~OPTION_MASK_P10_FUSION;
/* The callee's options must be a subset of the caller's options, i.e.
a vsx function may inline an altivec function, but a no-vsx function
@@ -27403,9 +27354,9 @@ fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
return false;
- /* Allow sign/zero extension. */
- if (GET_CODE (mem) == ZERO_EXTEND
- || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
+ /* Allow zero extension. Do not allow sign extension, since the hrdware does
+ not fuse load with sign extend instructions. */
+ if (GET_CODE (mem) == ZERO_EXTEND)
mem = XEXP (mem, 0);
if (!MEM_P (mem))
@@ -27468,8 +27419,7 @@ expand_fusion_gpr_load (rtx *operands)
machine_mode ptr_mode = Pmode;
enum rtx_code extend = UNKNOWN;
- if (GET_CODE (orig_mem) == ZERO_EXTEND
- || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
+ if (GET_CODE (orig_mem) == ZERO_EXTEND)
{
extend = GET_CODE (orig_mem);
orig_mem = XEXP (orig_mem, 0);
@@ -517,7 +517,6 @@ extern int rs6000_vector_align[];
#define MASK_FLOAT128_KEYWORD OPTION_MASK_FLOAT128_KEYWORD
#define MASK_FLOAT128_HW OPTION_MASK_FLOAT128_HW
#define MASK_FPRND OPTION_MASK_FPRND
-#define MASK_P8_FUSION OPTION_MASK_P8_FUSION
#define MASK_HARD_FLOAT OPTION_MASK_HARD_FLOAT
#define MASK_HTM OPTION_MASK_HTM
#define MASK_ISEL OPTION_MASK_ISEL
@@ -631,6 +630,9 @@ extern unsigned char rs6000_recip_bits[];
#define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \
(rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE)
+/* Whether we need to enable power8 fusion. */
+#define TARGET_P8_FUSION (rs6000_tune == PROCESSOR_POWER8)
+
/* The default CPU for TARGET_OPTION_OVERRIDE. */
#define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT
@@ -474,13 +474,12 @@ Save the TOC in the prologue for indirect calls rather than inline.
mvsx-timode
Target RejectNegative Undocumented Ignore
-mpower8-fusion
-Target Mask(P8_FUSION) Var(rs6000_isa_flags)
-Fuse certain integer operations together for better performance on power8.
+;; This option existed in the past, but now is always off.
+mno-power8-fusion
+Target RejectNegative Undocumented Ignore
-mpower8-fusion-sign
-Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
-Allow sign extension in fusion operations.
+mpower8-fusion
+Target RejectNegative Undocumented WarnRemoved
mpower8-vector
Target Mask(P8_VECTOR) Var(rs6000_isa_flags)
@@ -1266,7 +1266,7 @@ See RS/6000 and PowerPC Options.
-mveclibabi=@var{type} -mfriz -mno-friz @gol
-mpointers-to-nested-functions -mno-pointers-to-nested-functions @gol
-msave-toc-indirect -mno-save-toc-indirect @gol
--mpower8-fusion -mno-mpower8-fusion -mpower8-vector -mno-power8-vector @gol
+-mpower8-vector -mno-power8-vector @gol
-mcrypto -mno-crypto -mhtm -mno-htm @gol
-mquad-memory -mno-quad-memory @gol
-mquad-memory-atomic -mno-quad-memory-atomic @gol
@@ -28353,7 +28353,7 @@ following options:
-mpopcntb -mpopcntd -mpowerpc64 @gol
-mpowerpc-gpopt -mpowerpc-gfxopt @gol
-mmulhw -mdlmzb -mmfpgpr -mvsx @gol
--mcrypto -mhtm -mpower8-fusion -mpower8-vector @gol
+-mcrypto -mhtm @gol
-mquad-memory -mquad-memory-atomic -mfloat128 @gol
-mfloat128-hardware -mprefixed -mpcrel -mmma @gol
-mrop-protect}
@@ -28468,14 +28468,6 @@ Enable (disable) the use of the built-in functions that allow direct
access to the Hardware Transactional Memory (HTM) instructions that
were added in version 2.07 of the PowerPC ISA.
-@item -mpower8-fusion
-@itemx -mno-power8-fusion
-@opindex mpower8-fusion
-@opindex mno-power8-fusion
-Generate code that keeps (does not keeps) some integer operations
-adjacent so that the instructions can be fused together on power8 and
-later processors.
-
@item -mpower8-vector
@itemx -mno-power8-vector
@opindex mpower8-vector
@@ -8,13 +8,10 @@
int fusion_uchar (unsigned char *p){ return p[LARGE]; }
int fusion_schar (signed char *p){ return p[LARGE]; }
int fusion_ushort (unsigned short *p){ return p[LARGE]; }
-int fusion_short (short *p){ return p[LARGE]; }
-int fusion_int (int *p){ return p[LARGE]; }
unsigned fusion_uns (unsigned *p){ return p[LARGE]; }
-/* { dg-final { scan-assembler-times "fusion_gpr_load" 6 } } */
+/* { dg-final { scan-assembler-times "fusion_gpr_load" 4 } } */
/* { dg-final { scan-assembler-times "lbz" 2 } } */
/* { dg-final { scan-assembler-times "extsb" 1 } } */
-/* { dg-final { scan-assembler-times "lhz" 2 } } */
-/* { dg-final { scan-assembler-times "extsh" 1 } } */
-/* { dg-final { scan-assembler-times "lwz" 2 } } */
+/* { dg-final { scan-assembler-times "lhz" 1 } } */
+/* { dg-final { scan-assembler-times "lwz" 1 } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-power8-fusion -fdump-tree-einline-optimized" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -fdump-tree-einline-optimized" } */
/* Like pr102059-1.c, to verify the inlining still happens
even without always_inline attribute. */