Message ID | YZe6WugqvxPKNQJj@toto.the-meissners.org |
---|---|
State | New |
Headers | show |
Series | Add zero cycle move support | expand |
Hi Mike, Thanks for this patch! On 11/19/21 8:53 AM, Michael Meissner wrote: > Add power10 zero cycle moves for switches. > > Power10 will fuse adjacenet 'mtctr' and 'bctr' instructions to form zero > cycle moves. This code exploits this fusion opportunity. > > I have built bootstrapped compilers with this patch on little endian power9 and > power10 systems with no regressions. Can I install this into the master > branch? > > 2021-11-19 Michael Meissner <meissner@the-meissners.org> > > * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add > support for -mpower10-fusion-zero-cycle. > (POWERPC_MASKS): Likewise. > * config/rs6000/rs6000.c (rs6000_option_override_internal): > Likewise. > * config/rs6000/rs6000.md (indirect_jump): Support zero cycle > moves. > (indirect_jump<mode>_zero_cycle): New insns. > (tablejump<mode>_normal): Likewise. > (tablejump<mode>_absolute): Likewise. > (tablejump<mode>_insn_zero_cycle): New insn. > * config/rs6000/rs6000.opt (-mpower10-fusion-zero-cycle): New > debug switch. > --- > gcc/config/rs6000/rs6000-cpus.def | 4 ++- > gcc/config/rs6000/rs6000.c | 4 +++ > gcc/config/rs6000/rs6000.md | 52 ++++++++++++++++++++++++++++--- > gcc/config/rs6000/rs6000.opt | 4 +++ > 4 files changed, 59 insertions(+), 5 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def > index f5812da0184..cc072ee94ea 100644 > --- a/gcc/config/rs6000/rs6000-cpus.def > +++ b/gcc/config/rs6000/rs6000-cpus.def > @@ -91,7 +91,8 @@ > | OPTION_MASK_P10_FUSION_LOGADD \ > | OPTION_MASK_P10_FUSION_ADDLOG \ > | OPTION_MASK_P10_FUSION_2ADD \ > - | OPTION_MASK_P10_FUSION_2STORE) > + | OPTION_MASK_P10_FUSION_2STORE \ > + | OPTION_MASK_P10_FUSION_ZERO_CYCLE) I guess it's fine to introduce one more for now, but ultimately we want all these to get collapsed down to one. No worries from me. > > /* Flags that need to be turned off if -mno-power9-vector. */ > #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ > @@ -145,6 +146,7 @@ > | OPTION_MASK_P10_FUSION_ADDLOG \ > | OPTION_MASK_P10_FUSION_2ADD \ > | OPTION_MASK_P10_FUSION_2STORE \ > + | OPTION_MASK_P10_FUSION_ZERO_CYCLE \ > | OPTION_MASK_HTM \ > | OPTION_MASK_ISEL \ > | OPTION_MASK_MFCRF \ > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index e4843eb0f1c..6780304a5eb 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -4497,6 +4497,10 @@ rs6000_option_override_internal (bool global_init_p) > && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0) > rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE; > > + if (TARGET_POWER10 > + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 0) > + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE; > + > /* Turn off vector pair/mma options on non-power10 systems. */ > else if (!TARGET_POWER10 && TARGET_MMA) > { > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 6bec2bddbde..ea41eb4ada3 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump" > emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg)); > DONE; > } > + if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE) > + { > + emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0])); > + DONE; > + } > }) > > (define_insn "*indirect_jump<mode>" > [(set (pc) > (match_operand:P 0 "register_operand" "c,*l"))] > - "rs6000_speculate_indirect_jumps" > + "rs6000_speculate_indirect_jumps > + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)" > "b%T0" > [(set_attr "type" "jmpreg")]) > > +(define_insn "@indirect_jump<mode>_zero_cycle" I don't know why this is an "@" pattern, but honestly I don't know why @indirect_jump<mode>_nospec is an "@" pattern either. The documentation for such things is hard for me to understand, so I'm probably just missing something obvious, but I don't immediately see why we would need the @ here. > + [(set (pc) > + (match_operand:P 0 "register_operand" "r,r,!cl")) > + (clobber (match_scratch:P 1 "=c,*l,X"))] Do we need the *l and X alternatives if we're only doing this for mtctr/bctr? > + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION > + && TARGET_P10_FUSION_ZERO_CYCLE" > + "@ > + mt%T1 %0\;b%T1 > + mt%T1 %0\;b%T1 > + b%T0" > + [(set_attr "type" "jmpreg") > + (set_attr "length" "8,8,4")]) > + > (define_insn "@indirect_jump<mode>_nospec" > [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) > (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))] > @@ -13050,7 +13069,11 @@ (define_expand "@tablejump<mode>_normal" > rtx addr = gen_reg_rtx (Pmode); > > emit_insn (gen_add<mode>3 (addr, off, lab)); > - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE > + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1]) > + : gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + > + emit_jump_insn (insn); > DONE; > }) > > @@ -13062,7 +13085,11 @@ (define_expand "@tablejump<mode>_absolute" > rtx addr = gen_reg_rtx (Pmode); > emit_move_insn (addr, operands[0]); > > - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE > + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1]) > + : gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + > + emit_jump_insn (insn); > DONE; > }) > > @@ -13107,10 +13134,27 @@ (define_insn "@tablejump<mode>_insn_normal" > [(set (pc) > (match_operand:P 0 "register_operand" "c,*l")) > (use (label_ref (match_operand 1)))] > - "rs6000_speculate_indirect_jumps" > + "rs6000_speculate_indirect_jumps > + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)" > "b%T0" > [(set_attr "type" "jmpreg")]) > > +;; Version of indirect jump that fuses the mtctr to bctr to achieve 0 cycle > +;; moves on Power10. > +(define_insn "@tablejump<mode>_insn_zero_cycle" Same question about @. > + [(set (pc) > + (match_operand:P 0 "register_operand" "r,r,!cl")) > + (use (label_ref (match_operand 1))) > + (clobber (match_scratch:P 2 "=c,*l,X"))] Same question about 2nd and 3rd alternatives. Otherwise LGTM... over to the maintainers. :) Thanks! Bill > + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION > + && TARGET_P10_FUSION_ZERO_CYCLE" > + "@ > + mt%T2 %0\;b%T2 > + mt%T2 %0\;b%T2 > + b%T0" > + [(set_attr "type" "jmpreg") > + (set_attr "length" "8,8,4")]) > + > (define_insn "@tablejump<mode>_insn_nospec" > [(set (pc) > (match_operand:P 0 "register_operand" "c,*l")) > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > index 9d7878f144a..ba674947557 100644 > --- a/gcc/config/rs6000/rs6000.opt > +++ b/gcc/config/rs6000/rs6000.opt > @@ -518,6 +518,10 @@ mpower10-fusion-2store > Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags) > Fuse certain store operations together for better performance on power10. > > +mpower10-fusion-zero-cycle > +Target Undocumented Mask(P10_FUSION_ZERO_CYCLE) Var(rs6000_isa_flags) > +Fuse move to special register and jump for better performance on power10. > + > mcrypto > Target Mask(CRYPTO) Var(rs6000_isa_flags) > Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
On Mon, Nov 22, 2021 at 10:36:13AM -0600, Bill Schmidt wrote: > Hi Mike, > > Thanks for this patch! > > --- a/gcc/config/rs6000/rs6000.md > > +++ b/gcc/config/rs6000/rs6000.md > > @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump" > > emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg)); > > DONE; > > } > > + if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE) > > + { > > + emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0])); > > + DONE; > > + } > > }) > > > > (define_insn "*indirect_jump<mode>" > > [(set (pc) > > (match_operand:P 0 "register_operand" "c,*l"))] > > - "rs6000_speculate_indirect_jumps" > > + "rs6000_speculate_indirect_jumps > > + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)" > > "b%T0" > > [(set_attr "type" "jmpreg")]) > > > > +(define_insn "@indirect_jump<mode>_zero_cycle" > > I don't know why this is an "@" pattern, but honestly I don't > know why @indirect_jump<mode>_nospec is an "@" pattern either. > The documentation for such things is hard for me to understand, > so I'm probably just missing something obvious, but I don't > immediately see why we would need the @ here. I didn't know about it either. Basically the next insn used it: (define_insn "@indirect_jump<mode>_nospec" [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))] "!rs6000_speculate_indirect_jumps" "crset %E1\;beq%T0- %1\;b $" [(set_attr "type" "jmpreg") (set_attr "length" "12")]) This creates a function: gen_indirect_jump_nospec (machine_mode arg0, rtx x0, rtx x1) where the mode of the P iterator is passed as argument. I.e. you can do: rtx foo = gen_indirect_jump_nospec (Pmode, op0, op1); instead of: rtx foo; if (Pmode == SImode) foo = gen_indirect_jumpsi_nospec (op0, op1); else if (Pmode == DImode) foo = gen_indirect_jumpdi_nospec (op0, op1); else gcc_unreachable (); > > + [(set (pc) > > + (match_operand:P 0 "register_operand" "r,r,!cl")) > > + (clobber (match_scratch:P 1 "=c,*l,X"))] > > Do we need the *l and X alternatives if we're only doing this for > mtctr/bctr? Probably not, but I recall back before the current allocator, that it would cause crashes if we didn't have LR. I could certainly eliminate the *l alternative.
Ping patch. | Date: Fri, 19 Nov 2021 09:53:14 -0500 | From: Michael Meissner <meissner@linux.ibm.com> | Subject: [PATCH 1/3] Add power10 zero cycle moves for switches & indirect jumps | Message-ID: <YZe6WugqvxPKNQJj@toto.the-meissners.org> https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585005.html Note, I will on-line through December 20th. I will be off-line from December 21st through January 1st.
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index f5812da0184..cc072ee94ea 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -91,7 +91,8 @@ | OPTION_MASK_P10_FUSION_LOGADD \ | OPTION_MASK_P10_FUSION_ADDLOG \ | OPTION_MASK_P10_FUSION_2ADD \ - | OPTION_MASK_P10_FUSION_2STORE) + | OPTION_MASK_P10_FUSION_2STORE \ + | OPTION_MASK_P10_FUSION_ZERO_CYCLE) /* Flags that need to be turned off if -mno-power9-vector. */ #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ @@ -145,6 +146,7 @@ | OPTION_MASK_P10_FUSION_ADDLOG \ | OPTION_MASK_P10_FUSION_2ADD \ | OPTION_MASK_P10_FUSION_2STORE \ + | OPTION_MASK_P10_FUSION_ZERO_CYCLE \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ | OPTION_MASK_MFCRF \ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e4843eb0f1c..6780304a5eb 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4497,6 +4497,10 @@ rs6000_option_override_internal (bool global_init_p) && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0) rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE; + if (TARGET_POWER10 + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 0) + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE; + /* Turn off vector pair/mma options on non-power10 systems. */ else if (!TARGET_POWER10 && TARGET_MMA) { diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6bec2bddbde..ea41eb4ada3 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump" emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg)); DONE; } + if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE) + { + emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0])); + DONE; + } }) (define_insn "*indirect_jump<mode>" [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))] - "rs6000_speculate_indirect_jumps" + "rs6000_speculate_indirect_jumps + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)" "b%T0" [(set_attr "type" "jmpreg")]) +(define_insn "@indirect_jump<mode>_zero_cycle" + [(set (pc) + (match_operand:P 0 "register_operand" "r,r,!cl")) + (clobber (match_scratch:P 1 "=c,*l,X"))] + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION + && TARGET_P10_FUSION_ZERO_CYCLE" + "@ + mt%T1 %0\;b%T1 + mt%T1 %0\;b%T1 + b%T0" + [(set_attr "type" "jmpreg") + (set_attr "length" "8,8,4")]) + (define_insn "@indirect_jump<mode>_nospec" [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))] @@ -13050,7 +13069,11 @@ (define_expand "@tablejump<mode>_normal" rtx addr = gen_reg_rtx (Pmode); emit_insn (gen_add<mode>3 (addr, off, lab)); - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1])); + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1]) + : gen_tablejump_insn_normal (Pmode, addr, operands[1])); + + emit_jump_insn (insn); DONE; }) @@ -13062,7 +13085,11 @@ (define_expand "@tablejump<mode>_absolute" rtx addr = gen_reg_rtx (Pmode); emit_move_insn (addr, operands[0]); - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1])); + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1]) + : gen_tablejump_insn_normal (Pmode, addr, operands[1])); + + emit_jump_insn (insn); DONE; }) @@ -13107,10 +13134,27 @@ (define_insn "@tablejump<mode>_insn_normal" [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) (use (label_ref (match_operand 1)))] - "rs6000_speculate_indirect_jumps" + "rs6000_speculate_indirect_jumps + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)" "b%T0" [(set_attr "type" "jmpreg")]) +;; Version of indirect jump that fuses the mtctr to bctr to achieve 0 cycle +;; moves on Power10. +(define_insn "@tablejump<mode>_insn_zero_cycle" + [(set (pc) + (match_operand:P 0 "register_operand" "r,r,!cl")) + (use (label_ref (match_operand 1))) + (clobber (match_scratch:P 2 "=c,*l,X"))] + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION + && TARGET_P10_FUSION_ZERO_CYCLE" + "@ + mt%T2 %0\;b%T2 + mt%T2 %0\;b%T2 + b%T0" + [(set_attr "type" "jmpreg") + (set_attr "length" "8,8,4")]) + (define_insn "@tablejump<mode>_insn_nospec" [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 9d7878f144a..ba674947557 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -518,6 +518,10 @@ mpower10-fusion-2store Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags) Fuse certain store operations together for better performance on power10. +mpower10-fusion-zero-cycle +Target Undocumented Mask(P10_FUSION_ZERO_CYCLE) Var(rs6000_isa_flags) +Fuse move to special register and jump for better performance on power10. + mcrypto Target Mask(CRYPTO) Var(rs6000_isa_flags) Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.