diff mbox series

[1/3] Add power10 zero cycle moves for switches & indirect jumps

Message ID YZe6WugqvxPKNQJj@toto.the-meissners.org
State New
Headers show
Series Add zero cycle move support | expand

Commit Message

Michael Meissner Nov. 19, 2021, 2:53 p.m. UTC
Add power10 zero cycle moves for switches.

Power10 will fuse adjacenet 'mtctr' and 'bctr' instructions to form zero
cycle moves.  This code exploits this fusion opportunity.

I have built bootstrapped compilers with this patch on little endian power9 and
power10 systems with no regressions.  Can I install this into the master
branch?

2021-11-19  Michael Meissner  <meissner@the-meissners.org>

	* config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
	support for -mpower10-fusion-zero-cycle.
	(POWERPC_MASKS): Likewise.
	* config/rs6000/rs6000.c (rs6000_option_override_internal):
	Likewise.
	* config/rs6000/rs6000.md (indirect_jump): Support zero cycle
	moves.
	(indirect_jump<mode>_zero_cycle): New insns.
	(tablejump<mode>_normal): Likewise.
	(tablejump<mode>_absolute): Likewise.
	(tablejump<mode>_insn_zero_cycle): New insn.
	* config/rs6000/rs6000.opt (-mpower10-fusion-zero-cycle): New
	debug switch.
---
 gcc/config/rs6000/rs6000-cpus.def |  4 ++-
 gcc/config/rs6000/rs6000.c        |  4 +++
 gcc/config/rs6000/rs6000.md       | 52 ++++++++++++++++++++++++++++---
 gcc/config/rs6000/rs6000.opt      |  4 +++
 4 files changed, 59 insertions(+), 5 deletions(-)

Comments

Pop, Sebastian via Gcc-patches Nov. 22, 2021, 4:36 p.m. UTC | #1
Hi Mike,

Thanks for this patch!

On 11/19/21 8:53 AM, Michael Meissner wrote:
> Add power10 zero cycle moves for switches.
>
> Power10 will fuse adjacenet 'mtctr' and 'bctr' instructions to form zero
> cycle moves.  This code exploits this fusion opportunity.
>
> I have built bootstrapped compilers with this patch on little endian power9 and
> power10 systems with no regressions.  Can I install this into the master
> branch?
>
> 2021-11-19  Michael Meissner  <meissner@the-meissners.org>
>
> 	* config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
> 	support for -mpower10-fusion-zero-cycle.
> 	(POWERPC_MASKS): Likewise.
> 	* config/rs6000/rs6000.c (rs6000_option_override_internal):
> 	Likewise.
> 	* config/rs6000/rs6000.md (indirect_jump): Support zero cycle
> 	moves.
> 	(indirect_jump<mode>_zero_cycle): New insns.
> 	(tablejump<mode>_normal): Likewise.
> 	(tablejump<mode>_absolute): Likewise.
> 	(tablejump<mode>_insn_zero_cycle): New insn.
> 	* config/rs6000/rs6000.opt (-mpower10-fusion-zero-cycle): New
> 	debug switch.
> ---
>  gcc/config/rs6000/rs6000-cpus.def |  4 ++-
>  gcc/config/rs6000/rs6000.c        |  4 +++
>  gcc/config/rs6000/rs6000.md       | 52 ++++++++++++++++++++++++++++---
>  gcc/config/rs6000/rs6000.opt      |  4 +++
>  4 files changed, 59 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
> index f5812da0184..cc072ee94ea 100644
> --- a/gcc/config/rs6000/rs6000-cpus.def
> +++ b/gcc/config/rs6000/rs6000-cpus.def
> @@ -91,7 +91,8 @@
>  				 | OPTION_MASK_P10_FUSION_LOGADD 	\
>  				 | OPTION_MASK_P10_FUSION_ADDLOG	\
>  				 | OPTION_MASK_P10_FUSION_2ADD		\
> -				 | OPTION_MASK_P10_FUSION_2STORE)
> +				 | OPTION_MASK_P10_FUSION_2STORE	\
> +				 | OPTION_MASK_P10_FUSION_ZERO_CYCLE)

I guess it's fine to introduce one more for now, but ultimately we want
all these to get collapsed down to one.  No worries from me.

>  
>  /* Flags that need to be turned off if -mno-power9-vector.  */
>  #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
> @@ -145,6 +146,7 @@
>  				 | OPTION_MASK_P10_FUSION_ADDLOG	\
>  				 | OPTION_MASK_P10_FUSION_2ADD    	\
>  				 | OPTION_MASK_P10_FUSION_2STORE	\
> +				 | OPTION_MASK_P10_FUSION_ZERO_CYCLE	\
>  				 | OPTION_MASK_HTM			\
>  				 | OPTION_MASK_ISEL			\
>  				 | OPTION_MASK_MFCRF			\
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index e4843eb0f1c..6780304a5eb 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4497,6 +4497,10 @@ rs6000_option_override_internal (bool global_init_p)
>        && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
>      rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
>  
> +  if (TARGET_POWER10
> +      && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 0)
> +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE;
> +
>    /* Turn off vector pair/mma options on non-power10 systems.  */
>    else if (!TARGET_POWER10 && TARGET_MMA)
>      {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6bec2bddbde..ea41eb4ada3 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump"
>      emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg));
>      DONE;
>    }
> +  if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)
> +    {
> +      emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0]));
> +      DONE;
> +    }
>  })
>  
>  (define_insn "*indirect_jump<mode>"
>    [(set (pc)
>  	(match_operand:P 0 "register_operand" "c,*l"))]
> -  "rs6000_speculate_indirect_jumps"
> +  "rs6000_speculate_indirect_jumps
> +   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
>    "b%T0"
>    [(set_attr "type" "jmpreg")])
>  
> +(define_insn "@indirect_jump<mode>_zero_cycle"

I don't know why this is an "@" pattern, but honestly I don't
know why @indirect_jump<mode>_nospec is an "@" pattern either.
The documentation for such things is hard for me to understand,
so I'm probably just missing something obvious, but I don't
immediately see why we would need the @ here.

> +  [(set (pc)
> +	(match_operand:P 0 "register_operand" "r,r,!cl"))
> +   (clobber (match_scratch:P 1 "=c,*l,X"))]

Do we need the *l and X alternatives if we're only doing this for
mtctr/bctr?

> +  "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
> +   && TARGET_P10_FUSION_ZERO_CYCLE"
> +  "@
> +   mt%T1 %0\;b%T1
> +   mt%T1 %0\;b%T1
> +   b%T0"
> +  [(set_attr "type" "jmpreg")
> +   (set_attr "length" "8,8,4")])
> +
>  (define_insn "@indirect_jump<mode>_nospec"
>    [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))
>     (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))]
> @@ -13050,7 +13069,11 @@ (define_expand "@tablejump<mode>_normal"
>    rtx addr = gen_reg_rtx (Pmode);
>  
>    emit_insn (gen_add<mode>3 (addr, off, lab));
> -  emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +  rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
> +	      ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
> +	      : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +
> +  emit_jump_insn (insn);
>    DONE;
>  })
>  
> @@ -13062,7 +13085,11 @@ (define_expand "@tablejump<mode>_absolute"
>    rtx addr = gen_reg_rtx (Pmode);
>    emit_move_insn (addr, operands[0]);
>  
> -  emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +  rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
> +	      ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
> +	      : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +
> +  emit_jump_insn (insn);
>    DONE;
>  })
>  
> @@ -13107,10 +13134,27 @@ (define_insn "@tablejump<mode>_insn_normal"
>    [(set (pc)
>  	(match_operand:P 0 "register_operand" "c,*l"))
>     (use (label_ref (match_operand 1)))]
> -  "rs6000_speculate_indirect_jumps"
> +  "rs6000_speculate_indirect_jumps
> +   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
>    "b%T0"
>    [(set_attr "type" "jmpreg")])
>  
> +;; Version of indirect jump that fuses the mtctr to bctr to achieve 0 cycle
> +;; moves on Power10.
> +(define_insn "@tablejump<mode>_insn_zero_cycle"

Same question about @.

> +  [(set (pc)
> +	(match_operand:P 0 "register_operand" "r,r,!cl"))
> +   (use (label_ref (match_operand 1)))
> +   (clobber (match_scratch:P 2 "=c,*l,X"))]

Same question about 2nd and 3rd alternatives.

Otherwise LGTM... over to the maintainers. :)

Thanks!
Bill

> +  "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
> +   && TARGET_P10_FUSION_ZERO_CYCLE"
> +  "@
> +   mt%T2 %0\;b%T2
> +   mt%T2 %0\;b%T2
> +   b%T0"
> +  [(set_attr "type" "jmpreg")
> +   (set_attr "length" "8,8,4")])
> +
>  (define_insn "@tablejump<mode>_insn_nospec"
>    [(set (pc)
>  	(match_operand:P 0 "register_operand" "c,*l"))
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index 9d7878f144a..ba674947557 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -518,6 +518,10 @@ mpower10-fusion-2store
>  Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags)
>  Fuse certain store operations together for better performance on power10.
>  
> +mpower10-fusion-zero-cycle
> +Target Undocumented Mask(P10_FUSION_ZERO_CYCLE) Var(rs6000_isa_flags)
> +Fuse move to special register and jump for better performance on power10.
> +
>  mcrypto
>  Target Mask(CRYPTO) Var(rs6000_isa_flags)
>  Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
Michael Meissner Nov. 22, 2021, 9:12 p.m. UTC | #2
On Mon, Nov 22, 2021 at 10:36:13AM -0600, Bill Schmidt wrote:
> Hi Mike,
> 
> Thanks for this patch!
> > --- a/gcc/config/rs6000/rs6000.md
> > +++ b/gcc/config/rs6000/rs6000.md
> > @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump"
> >      emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg));
> >      DONE;
> >    }
> > +  if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)
> > +    {
> > +      emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0]));
> > +      DONE;
> > +    }
> >  })
> >  
> >  (define_insn "*indirect_jump<mode>"
> >    [(set (pc)
> >  	(match_operand:P 0 "register_operand" "c,*l"))]
> > -  "rs6000_speculate_indirect_jumps"
> > +  "rs6000_speculate_indirect_jumps
> > +   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
> >    "b%T0"
> >    [(set_attr "type" "jmpreg")])
> >  
> > +(define_insn "@indirect_jump<mode>_zero_cycle"
> 
> I don't know why this is an "@" pattern, but honestly I don't
> know why @indirect_jump<mode>_nospec is an "@" pattern either.
> The documentation for such things is hard for me to understand,
> so I'm probably just missing something obvious, but I don't
> immediately see why we would need the @ here.

I didn't know about it either.  Basically the next insn used it:

(define_insn "@indirect_jump<mode>_nospec"
  [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))
   (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))]
  "!rs6000_speculate_indirect_jumps"
  "crset %E1\;beq%T0- %1\;b $"
  [(set_attr "type" "jmpreg")
   (set_attr "length" "12")])

This creates a function:

	gen_indirect_jump_nospec (machine_mode arg0, rtx x0, rtx x1)

where the mode of the P iterator is passed as argument.  I.e. you can do:

	rtx foo = gen_indirect_jump_nospec (Pmode, op0, op1);

instead of:

	rtx foo;
	if (Pmode == SImode)
	  foo = gen_indirect_jumpsi_nospec (op0, op1);
	else if (Pmode == DImode)
	  foo = gen_indirect_jumpdi_nospec (op0, op1);
	else
	  gcc_unreachable ();

> > +  [(set (pc)
> > +	(match_operand:P 0 "register_operand" "r,r,!cl"))
> > +   (clobber (match_scratch:P 1 "=c,*l,X"))]
> 
> Do we need the *l and X alternatives if we're only doing this for
> mtctr/bctr?

Probably not, but I recall back before the current allocator, that it would
cause crashes if we didn't have LR.  I could certainly eliminate the *l
alternative.
Michael Meissner Dec. 13, 2021, 5:10 p.m. UTC | #3
Ping patch.

| Date: Fri, 19 Nov 2021 09:53:14 -0500
| From: Michael Meissner <meissner@linux.ibm.com>
| Subject: [PATCH 1/3] Add power10 zero cycle moves for switches & indirect jumps
| Message-ID: <YZe6WugqvxPKNQJj@toto.the-meissners.org>

https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585005.html

Note, I will on-line through December 20th.  I will be off-line from December
21st through January 1st.
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index f5812da0184..cc072ee94ea 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -91,7 +91,8 @@ 
 				 | OPTION_MASK_P10_FUSION_LOGADD 	\
 				 | OPTION_MASK_P10_FUSION_ADDLOG	\
 				 | OPTION_MASK_P10_FUSION_2ADD		\
-				 | OPTION_MASK_P10_FUSION_2STORE)
+				 | OPTION_MASK_P10_FUSION_2STORE	\
+				 | OPTION_MASK_P10_FUSION_ZERO_CYCLE)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -145,6 +146,7 @@ 
 				 | OPTION_MASK_P10_FUSION_ADDLOG	\
 				 | OPTION_MASK_P10_FUSION_2ADD    	\
 				 | OPTION_MASK_P10_FUSION_2STORE	\
+				 | OPTION_MASK_P10_FUSION_ZERO_CYCLE	\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_ISEL			\
 				 | OPTION_MASK_MFCRF			\
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index e4843eb0f1c..6780304a5eb 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4497,6 +4497,10 @@  rs6000_option_override_internal (bool global_init_p)
       && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
     rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
 
+  if (TARGET_POWER10
+      && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 0)
+    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE;
+
   /* Turn off vector pair/mma options on non-power10 systems.  */
   else if (!TARGET_POWER10 && TARGET_MMA)
     {
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6bec2bddbde..ea41eb4ada3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -12988,15 +12988,34 @@  (define_expand "indirect_jump"
     emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg));
     DONE;
   }
+  if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)
+    {
+      emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0]));
+      DONE;
+    }
 })
 
 (define_insn "*indirect_jump<mode>"
   [(set (pc)
 	(match_operand:P 0 "register_operand" "c,*l"))]
-  "rs6000_speculate_indirect_jumps"
+  "rs6000_speculate_indirect_jumps
+   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
   "b%T0"
   [(set_attr "type" "jmpreg")])
 
+(define_insn "@indirect_jump<mode>_zero_cycle"
+  [(set (pc)
+	(match_operand:P 0 "register_operand" "r,r,!cl"))
+   (clobber (match_scratch:P 1 "=c,*l,X"))]
+  "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
+   && TARGET_P10_FUSION_ZERO_CYCLE"
+  "@
+   mt%T1 %0\;b%T1
+   mt%T1 %0\;b%T1
+   b%T0"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8,8,4")])
+
 (define_insn "@indirect_jump<mode>_nospec"
   [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))
    (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))]
@@ -13050,7 +13069,11 @@  (define_expand "@tablejump<mode>_normal"
   rtx addr = gen_reg_rtx (Pmode);
 
   emit_insn (gen_add<mode>3 (addr, off, lab));
-  emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
+  rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
+	      ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
+	      : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
+
+  emit_jump_insn (insn);
   DONE;
 })
 
@@ -13062,7 +13085,11 @@  (define_expand "@tablejump<mode>_absolute"
   rtx addr = gen_reg_rtx (Pmode);
   emit_move_insn (addr, operands[0]);
 
-  emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
+  rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
+	      ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
+	      : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
+
+  emit_jump_insn (insn);
   DONE;
 })
 
@@ -13107,10 +13134,27 @@  (define_insn "@tablejump<mode>_insn_normal"
   [(set (pc)
 	(match_operand:P 0 "register_operand" "c,*l"))
    (use (label_ref (match_operand 1)))]
-  "rs6000_speculate_indirect_jumps"
+  "rs6000_speculate_indirect_jumps
+   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
   "b%T0"
   [(set_attr "type" "jmpreg")])
 
+;; Version of indirect jump that fuses the mtctr to bctr to achieve 0 cycle
+;; moves on Power10.
+(define_insn "@tablejump<mode>_insn_zero_cycle"
+  [(set (pc)
+	(match_operand:P 0 "register_operand" "r,r,!cl"))
+   (use (label_ref (match_operand 1)))
+   (clobber (match_scratch:P 2 "=c,*l,X"))]
+  "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
+   && TARGET_P10_FUSION_ZERO_CYCLE"
+  "@
+   mt%T2 %0\;b%T2
+   mt%T2 %0\;b%T2
+   b%T0"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8,8,4")])
+
 (define_insn "@tablejump<mode>_insn_nospec"
   [(set (pc)
 	(match_operand:P 0 "register_operand" "c,*l"))
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 9d7878f144a..ba674947557 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -518,6 +518,10 @@  mpower10-fusion-2store
 Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags)
 Fuse certain store operations together for better performance on power10.
 
+mpower10-fusion-zero-cycle
+Target Undocumented Mask(P10_FUSION_ZERO_CYCLE) Var(rs6000_isa_flags)
+Fuse move to special register and jump for better performance on power10.
+
 mcrypto
 Target Mask(CRYPTO) Var(rs6000_isa_flags)
 Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.