[v2] rtl-optimization: ppc backend generates unnecessary extension.

Message ID a22c34d6-07c5-80a5-f6d5-8aa49869a03d@linux.ibm.com
State New
Headers
Series [v2] rtl-optimization: ppc backend generates unnecessary extension. |

Commit Message

Ajit Agarwal March 28, 2023, 4:49 p.m. UTC
  Hello All:

This patch makes REE pass as a default pass in rs6000 target. And
add necessary subroutines to eliminate extensions across basic blocks.

Bootstrapped and regtested on powerpc64-linu-gnu.

Thanks & Regards
Ajit


	rtl-optimization: ppc backend generates unnecessary
 	extension.

	Eliminate unnecessary redundant zero extension across basic
	blocks.

	2023-03-28  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	* ree.cc(insn_s_zext_p): New function.
	* ree.cc(is_feasible_elim_across_basic_blocks):
	New function.
	* ree.cc(merge_def_and_ext): Add call to
	is_feasible_elim_across_basic_blocks to check feasibility
	of extension elimination across basic blocks.
	* common/config/rs6000/rs6000-common.cc: Add free pass
	as default pass in rs6000 target.
---
 gcc/common/config/rs6000/rs6000-common.cc |   3 +-
 gcc/ree.cc                                | 269 +++++++++++++++++-----
 2 files changed, 209 insertions(+), 63 deletions(-)
  

Comments

Ajit Agarwal March 30, 2023, noon UTC | #1
Hello All:

This patch added enhancement to extimination elimination for rs6000 target.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit
	rtl-optimization: ppc backend generates unnecessary extension.

	Eliminate unnecessary redundant zero extension within basic
	blocks.

	2023-03-30  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	* ree.cc(is_feasible_elim_across_basic_blocks):
	Add checks to enable extension elimination..
	* ree.cc(combine_reaching_defs): Add checks to enable
	extension elimination.
---
 gcc/ree.cc | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 116 insertions(+), 5 deletions(-)

diff --git a/gcc/ree.cc b/gcc/ree.cc
index d05d37f9a23..7e4cce5cee4 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,56 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Get all the reaching definitions of an instruction.  The definitions are
+   desired for REG used in INSN.  Return the definition list or NULL if a
+   definition is missing.  If DEST is non-NULL, additionally push the INSN
+   of the definitions onto DEST.  */
+
+static struct df_link *
+get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
+{
+  df_ref use;
+  struct df_link *ref_chain, *ref_link;
+
+  FOR_EACH_INSN_USE (use, insn)
+    {
+      if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+	return NULL;
+      if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+	break;
+    }
+
+  if (use == NULL) return NULL;
+
+  gcc_assert (use != NULL);
+
+  ref_chain = DF_REF_CHAIN (use);
+
+  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+    {
+      /* Problem getting some definition for this instruction.  */
+      if (ref_link->ref == NULL)
+	return NULL;
+      if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+	return NULL;
+      /* As global regs are assumed to be defined at each function call
+	 dataflow can report a call_insn as being a definition of REG.
+	 But we can't do anything with that in this pass so proceed only
+	 if the instruction really sets REG in a way that can be deduced
+	 from the RTL structure.  */
+      if (global_regs[REGNO (reg)]
+	  && !set_of (reg, DF_REF_INSN (ref_link->ref)))
+	return NULL;
+    }
+
+  if (dest)
+    for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+      dest->safe_push (DF_REF_INSN (ref_link->ref));
+
+  return ref_chain;
+}
+
+
 /* Identify instruction AND with identical zero extension.  */
 
 static unsigned int
@@ -393,6 +443,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
   rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
+
   if (insn_is_zext_p(cand->insn)
        && CONST_INT_P (orig_src) && INTVAL (orig_src) != 0)
     return false;
@@ -401,6 +452,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
      then we need to change the original load to reference the destination
      of the extension.  Then we need to emit a copy from that destination
      to the original destination of the load.  */
+ // print_rtl_single(stdout, cand_pat);
   rtx new_reg;
   bool copy_needed
     = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
@@ -547,6 +599,7 @@ transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
   return false;
 }
 
+#if 0
 /* Get all the reaching definitions of an instruction.  The definitions are
    desired for REG used in INSN.  Return the definition list or NULL if a
    definition is missing.  If DEST is non-NULL, additionally push the INSN
@@ -593,7 +646,7 @@ get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
 
   return ref_chain;
 }
-
+#endif
 /* Get all the reaching uses of an instruction.  The uses are desired for REG
    set in INSN.  Return use list or NULL if a use is missing or irregular.  */
 
@@ -848,6 +901,36 @@ is_feasible_elim_across_basic_blocks (ext_cand *cand,
 	&& REGNO (XEXP (PATTERN (BB_END (bb)), 0)) != REGNO (SET_DEST (cand->expr)))
      return false;
 
+   if (cand->code == ZERO_EXTEND
+	&& GET_CODE ((PATTERN (def_insn))) == PARALLEL)
+     return false;
+
+   if (cand->code == ZERO_EXTEND
+	&& GET_CODE ((PATTERN (def_insn))) == SET
+	&& GET_CODE (SET_SRC (PATTERN (def_insn))) != XOR)
+     return false;
+
+   if (cand->code == ZERO_EXTEND
+	&& GET_CODE ((PATTERN (def_insn))) == SET
+	&& GET_CODE (SET_SRC (PATTERN (def_insn))) == XOR)
+     {
+	vec<rtx_insn *> *dest = XCNEWVEC (vec<rtx_insn *>, 4);
+	if (!get_defs (def_insn, XEXP (SET_SRC (PATTERN(def_insn)), 0), dest))
+	  return false;
+
+	int i;
+	rtx_insn *def_insn;
+
+	FOR_EACH_VEC_ELT (*dest, i, def_insn)
+	  {
+	    if ((GET_CODE (PATTERN (def_insn)) == SET
+		&& GET_CODE (SET_SRC (PATTERN (def_insn))) == ASHIFT)
+		|| GET_CODE (PATTERN (def_insn)) == PARALLEL)
+	       return false;
+	  }
+	XDELETEVEC (dest);
+      }
+
    return true;
 }
 
@@ -873,7 +956,8 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
 
   if (!feasible) return false;
 
-  if (((!copy_needed && (insn_is_zext_p (cand->insn))
+  if (((!copy_needed && (insn_is_zext_p (cand->insn)
+	|| (cand->code == ZERO_EXTEND && ext_src_mode == QImode))
 	&& (GET_MODE (SET_DEST (*sub_rtx)) != ext_src_mode
 	&& state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE))
 	|| ((state->modified[INSN_UID (def_insn)].kind
@@ -1211,15 +1295,22 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
 	 definitions could be merged.  */
       if (apply_change_group ())
         {
-          if (dump_file)
-            fprintf (dump_file, "All merges were successful.\n");
+	  if (state->modified_list.length() == 0) return false;
+
+	  if (cand->code == ZERO_EXTEND
+	      && GET_CODE (PATTERN (state->modified_list[0])) == SET
+	      && GET_CODE (SET_SRC (PATTERN (state->modified_list[0]))) != XOR)
+	     return false;
+
+	   if (dump_file)
+	     fprintf (dump_file, "All merges were successful.\n");
 
 	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
 	    {
 	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
 	      if (modified->kind == EXT_MODIFIED_NONE)
 		modified->kind = (cand->code == ZERO_EXTEND  ? EXT_MODIFIED_ZEXT
-						            : EXT_MODIFIED_SEXT);
+							     : EXT_MODIFIED_SEXT);
 
 	      if (copy_needed)
 		modified->do_not_reextend = 1;
@@ -1228,6 +1319,26 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
         }
 	else
 	  {
+	    if (state->modified_list.length() == 0) return false;
+
+	    if (cand->code == ZERO_EXTEND
+		&& GET_CODE (PATTERN(state->modified_list[0])) == SET
+		&& GET_CODE (SET_SRC (PATTERN (state->modified_list[0]))) != XOR)
+	      return false;
+
+	    if (cand->code == ZERO_EXTEND)
+	      {
+		FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
+		  {
+		    ext_modified *modified = &state->modified[INSN_UID (def_insn)];
+		    if (modified->kind == EXT_MODIFIED_NONE)
+		      modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
+								  : EXT_MODIFIED_SEXT);
+
+		     modified->do_not_reextend = 1;
+		   }
+		  return true;
+	      }
 	    /* Changes need not be cancelled explicitly as apply_change_group
 		does it.  Print list of definitions in the dump_file for debug
 		purposes.  This extension cannot be deleted.  */
  
Segher Boessenkool March 30, 2023, 1:41 p.m. UTC | #2
Hi!

On Tue, Mar 28, 2023 at 10:19:27PM +0530, Ajit Agarwal wrote:
> This patch makes REE pass as a default pass in rs6000 target. And
> add necessary subroutines to eliminate extensions across basic blocks.

Please wait with this until stage 1?

Some comments:

> 	rtl-optimization: ppc backend generates unnecessary
>  	extension.

Subject can start with "ree: ", but not "rtl-optimization: ".  Subject
should start with a capital (the part after the colon, that is).  There
should not be a full stop in the subject.

Subjects should be in the mail subject, not elsewhere in the patch.

> 	Eliminate unnecessary redundant zero extension across basic
> 	blocks.

Not sure where you want this?  It doesn't belong in the changelog.

> 	* ree.cc(insn_s_zext_p): New function.

Space before (.

> 	* ree.cc(is_feasible_elim_across_basic_blocks):
> 	New function.

This is the same file as the one before this.  You write that as
	(is_feasible_elim_across_basic_blocks): New function.

(and no early line breaks please, certainly not after a colon).

> 	* common/config/rs6000/rs6000-common.cc: Add free pass
> 	as default pass in rs6000 target.

Changelog lines are 80 positions.  The leading tab is 8 positions.

> @@ -30,6 +30,8 @@
>  /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
>  static const struct default_options rs6000_option_optimization_table[] =
>    {
> +    /* Enable -free for zero extension and sign extension elimination.*/
> +    { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },

>      /* Split multi-word types early.  */
>      { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
>      /* Enable -fsched-pressure for first pass instruction scheduling.  */
> @@ -42,7 +44,6 @@ static const struct default_options rs6000_option_optimization_table[] =
>      /* -frename-registers leads to non-optimal codegen and performance
>         on rs6000, turn it off by default.  */
>      { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
> -

Don't remove random lines please.  Never do this as part of unrelated
patches, too.

> +/* Identify instruction AND with identical zero extension.  */

What does that mean?

> +
> +static unsigned int
> +insn_is_zext_p(rtx insn)

Space before (.  If it is an insn, you probably want rtx_insn?

> +{
> +  if (GET_CODE (insn) == AND)
> +   {

Wrong indentation.

> +     rtx set = XEXP (insn, 0);
> +     if (REG_P(set))
> +       {
> +	 if (CONST_INT_P (XEXP (insn, 1))

You can see that here: all indentations are a multiple of two, so a
single space indent is always wrong.

> +	     && INTVAL (XEXP (insn, 1)) == 1)

This easily fits on the previous line.

> +	   return 1;
> +       }
> +     else
> +       return 0;
> +   }
> +
> +  return 0;
> +}

Don't "else return 0" if you have a catch-all anyway?

> +/* Identify instruction AND with identical zero extension.  */
> +
> +static unsigned int
> +insn_is_zext_p(rtx_insn * insn)

No space after * for a pointer.

> +{
> +  rtx body = PATTERN (insn);
> +
> +  if (GET_CODE (body) == PARALLEL) return 0;

Always a new line before an "if" body.

> +  if (GET_CODE(body) == SET && GET_CODE (SET_SRC (body)) == AND)
> +   {
> +     rtx set = XEXP (SET_SRC(body), 0);
> +
> +     if (REG_P(set) && GET_MODE(SET_DEST(body))
> +			 == GET_MODE(set))

What is this for?

> +       {
> +	 if (CONST_INT_P (XEXP (SET_SRC (body), 1))
> +	      && INTVAL (XEXP (SET_SRC (body), 1)) == 1)

The && should align with the previous "C".

You could just say
  if (XEXP (SET_SRC (body), 1) == const1_rtx)
.

> +  if (insn_is_zext_p(cand->insn)
> +       && CONST_INT_P (orig_src) && INTVAL (orig_src) != 0)
> +    return false;

  if (insn_is_zext_p (cand->insn) && orig_src != const0_rtx)
    return false;


So what is this patch doing?  At least two things, right?  It aims to
improve what REE does, and also enables it by default for rs6000?  So
make it two patches, please.

Can you talk a bit about what improvements to generated code you see?


Segher
  
Bernhard Reutner-Fischer March 30, 2023, 2:28 p.m. UTC | #3
On Thu, 30 Mar 2023 17:30:43 +0530
Ajit Agarwal via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:

> Hello All:

Just some nits.

And this seems to be an incremental diff ontop of your v2.
You might want check for coding-style errors by running:
 contrib/check_GNU_style.py /tmp/ree-v2bis.patch

> 
> This patch added enhancement to extimination elimination for rs6000 target.

I think REE means redundant extension elimination.

> gcc/ChangeLog:
> 
> 	* ree.cc(is_feasible_elim_across_basic_blocks):

We often use the lispy _p suffix for predicates.
Maybe eliminate_across_bbs_p would be shorter.

> 	Add checks to enable extension elimination..
> 	* ree.cc(combine_reaching_defs): Add checks to enable
> 	extension elimination.
> ---
>  gcc/ree.cc | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 116 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index d05d37f9a23..7e4cce5cee4 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -253,6 +253,56 @@ struct ext_cand
>  
>  static int max_insn_uid;
>  
> +/* Get all the reaching definitions of an instruction.  The definitions are
> +   desired for REG used in INSN.  Return the definition list or NULL if a
> +   definition is missing.  If DEST is non-NULL, additionally push the INSN
> +   of the definitions onto DEST.  */
> +
> +static struct df_link *
> +get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
> +{
> +  df_ref use;
> +  struct df_link *ref_chain, *ref_link;
> +
> +  FOR_EACH_INSN_USE (use, insn)
> +    {
> +      if (GET_CODE (DF_REF_REG (use)) == SUBREG)
> +	return NULL;
> +      if (REGNO (DF_REF_REG (use)) == REGNO (reg))
> +	break;
> +    }
> +
> +  if (use == NULL) return NULL;

Missing newline before return.

> +
> +  gcc_assert (use != NULL);

Either the assert or the if but both is a bit much.

> +
> +  ref_chain = DF_REF_CHAIN (use);
> +
> +  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
> +    {
> +      /* Problem getting some definition for this instruction.  */
> +      if (ref_link->ref == NULL)
> +	return NULL;
> +      if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
> +	return NULL;
> +      /* As global regs are assumed to be defined at each function call
> +	 dataflow can report a call_insn as being a definition of REG.
> +	 But we can't do anything with that in this pass so proceed only
> +	 if the instruction really sets REG in a way that can be deduced
> +	 from the RTL structure.  */
> +      if (global_regs[REGNO (reg)]
> +	  && !set_of (reg, DF_REF_INSN (ref_link->ref)))
> +	return NULL;
> +    }
> +
> +  if (dest)
> +    for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
> +      dest->safe_push (DF_REF_INSN (ref_link->ref));
> +
> +  return ref_chain;
> +}
> +
> +
>  /* Identify instruction AND with identical zero extension.  */
>  
>  static unsigned int
> @@ -393,6 +443,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
>    machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
>    rtx new_set = NULL_RTX;
>    rtx cand_pat = single_set (cand->insn);
> +

superfluous whitespace change.

>    if (insn_is_zext_p(cand->insn)
>         && CONST_INT_P (orig_src) && INTVAL (orig_src) != 0)
>      return false;
> @@ -401,6 +452,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
>       then we need to change the original load to reference the destination
>       of the extension.  Then we need to emit a copy from that destination
>       to the original destination of the load.  */
> + // print_rtl_single(stdout, cand_pat);

Please remove that debug comment.

>    rtx new_reg;
>    bool copy_needed
>      = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
> @@ -547,6 +599,7 @@ transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
>    return false;
>  }
>  
> +#if 0
>  /* Get all the reaching definitions of an instruction.  The definitions are
>     desired for REG used in INSN.  Return the definition list or NULL if a
>     definition is missing.  If DEST is non-NULL, additionally push the INSN
> @@ -593,7 +646,7 @@ get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
>  
>    return ref_chain;
>  }
> -
> +#endif

Why did you move get_defs?

>  /* Get all the reaching uses of an instruction.  The uses are desired for REG
>     set in INSN.  Return use list or NULL if a use is missing or irregular.  */
>  
> @@ -848,6 +901,36 @@ is_feasible_elim_across_basic_blocks (ext_cand *cand,
>  	&& REGNO (XEXP (PATTERN (BB_END (bb)), 0)) != REGNO (SET_DEST (cand->expr)))
>       return false;
>  
> +   if (cand->code == ZERO_EXTEND
> +	&& GET_CODE ((PATTERN (def_insn))) == PARALLEL)
> +     return false;

Excess braces above.

> +
> +   if (cand->code == ZERO_EXTEND
> +	&& GET_CODE ((PATTERN (def_insn))) == SET

Excess braces above.

> +	&& GET_CODE (SET_SRC (PATTERN (def_insn))) != XOR)
> +     return false;
> +
> +   if (cand->code == ZERO_EXTEND
> +	&& GET_CODE ((PATTERN (def_insn))) == SET
> +	&& GET_CODE (SET_SRC (PATTERN (def_insn))) == XOR)
> +     {
> +	vec<rtx_insn *> *dest = XCNEWVEC (vec<rtx_insn *>, 4);
> +	if (!get_defs (def_insn, XEXP (SET_SRC (PATTERN(def_insn)), 0), dest))
> +	  return false;

missing space between PATTERN and the brace

This leaks dest.

The above looks a bit redundant.
I'd have a single test for cand->code == ZERO_EXTEND and then
  rtx def = PATTERN (def_insn);

  if (def == PARALLEL)
    return false;
  if (def == SET)
    {
	rtx src = SET_SRC (def);
	if (GET_CODE (src) != XOR)
	  return false;
	/* maybe use an auto_vec here */
...


> +
> +	int i;
> +	rtx_insn *def_insn;
> +
> +	FOR_EACH_VEC_ELT (*dest, i, def_insn)
> +	  {
> +	    if ((GET_CODE (PATTERN (def_insn)) == SET
> +		&& GET_CODE (SET_SRC (PATTERN (def_insn))) == ASHIFT)
> +		|| GET_CODE (PATTERN (def_insn)) == PARALLEL)

leaks dest.

> +	       return false;
> +	  }
> +	XDELETEVEC (dest);
> +      }
> +
>     return true;
>  }
>  
> @@ -873,7 +956,8 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
>  
>    if (!feasible) return false;
>  
> -  if (((!copy_needed && (insn_is_zext_p (cand->insn))
> +  if (((!copy_needed && (insn_is_zext_p (cand->insn)
> +	|| (cand->code == ZERO_EXTEND && ext_src_mode == QImode))
>  	&& (GET_MODE (SET_DEST (*sub_rtx)) != ext_src_mode
>  	&& state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE))
>  	|| ((state->modified[INSN_UID (def_insn)].kind
> @@ -1211,15 +1295,22 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>  	 definitions could be merged.  */
>        if (apply_change_group ())
>          {
> -          if (dump_file)
> -            fprintf (dump_file, "All merges were successful.\n");
> +	  if (state->modified_list.length() == 0) return false;

Missing space after length before the opening brace, and missing
newline before return.

> +
> +	  if (cand->code == ZERO_EXTEND
> +	      && GET_CODE (PATTERN (state->modified_list[0])) == SET
> +	      && GET_CODE (SET_SRC (PATTERN (state->modified_list[0]))) != XOR)
> +	     return false;
> +
> +	   if (dump_file)
> +	     fprintf (dump_file, "All merges were successful.\n");
>  
>  	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
>  	    {
>  	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
>  	      if (modified->kind == EXT_MODIFIED_NONE)
>  		modified->kind = (cand->code == ZERO_EXTEND  ? EXT_MODIFIED_ZEXT
> -						            : EXT_MODIFIED_SEXT);
> +							     : EXT_MODIFIED_SEXT);
>  
>  	      if (copy_needed)
>  		modified->do_not_reextend = 1;
> @@ -1228,6 +1319,26 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>          }
>  	else
>  	  {
> +	    if (state->modified_list.length() == 0) return false;

same comment as above

> +
> +	    if (cand->code == ZERO_EXTEND
> +		&& GET_CODE (PATTERN(state->modified_list[0])) == SET
> +		&& GET_CODE (SET_SRC (PATTERN (state->modified_list[0]))) != XOR)
> +	      return false;

Missing space before '('

> +
> +	    if (cand->code == ZERO_EXTEND)
> +	      {
> +		FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
> +		  {
> +		    ext_modified *modified = &state->modified[INSN_UID (def_insn)];
> +		    if (modified->kind == EXT_MODIFIED_NONE)
> +		      modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
> +								  : EXT_MODIFIED_SEXT);

The whole loop is guarded by code == ZERO_EXTEND, so the rhs condition
above is redundant.
thanks,

> +
> +		     modified->do_not_reextend = 1;
> +		   }
> +		  return true;
> +	      }
>  	    /* Changes need not be cancelled explicitly as apply_change_group
>  		does it.  Print list of definitions in the dump_file for debug
>  		purposes.  This extension cannot be deleted.  */
  
Segher Boessenkool March 30, 2023, 3:28 p.m. UTC | #4
Hi!

On Thu, Mar 30, 2023 at 04:28:51PM +0200, Bernhard Reutner-Fischer wrote:
> On Thu, 30 Mar 2023 17:30:43 +0530
> Ajit Agarwal via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> > 	* ree.cc(is_feasible_elim_across_basic_blocks):
> 
> We often use the lispy _p suffix for predicates.
> Maybe eliminate_across_bbs_p would be shorter.

A bit shorter, but much less clear, and that is the actual reason to
keep names short -- so this misses the goal.  Also, many things
currently called _p are not predicates (and/or in some cases are not
even functions!)

> > +  if (use == NULL) return NULL;
> 
> Missing newline before return.

And better style is
  if (!use)

> > +#if 0
> >  /* Get all the reaching definitions of an instruction.  The definitions are
> >     desired for REG used in INSN.  Return the definition list or NULL if a
> >     definition is missing.  If DEST is non-NULL, additionally push the INSN
> > @@ -593,7 +646,7 @@ get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
> >  
> >    return ref_chain;
> >  }
> > -
> > +#endif
> 
> Why did you move get_defs?

And we should not normally have #if 0, too.


Segher
  

Patch

diff --git a/gcc/common/config/rs6000/rs6000-common.cc b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..e7780dc0c5d 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -30,6 +30,8 @@ 
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options rs6000_option_optimization_table[] =
   {
+    /* Enable -free for zero extension and sign extension elimination.*/
+    { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
     /* Split multi-word types early.  */
     { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
     /* Enable -fsched-pressure for first pass instruction scheduling.  */
@@ -42,7 +44,6 @@  static const struct default_options rs6000_option_optimization_table[] =
     /* -frename-registers leads to non-optimal codegen and performance
        on rs6000, turn it off by default.  */
     { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
-
     /* Double growth factor to counter reduced min jump length.  */
     { OPT_LEVELS_ALL, OPT__param_max_grow_copy_bb_insns_, NULL, 16 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 63d8cf9f237..d05d37f9a23 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,53 @@  struct ext_cand
 
 static int max_insn_uid;
 
+/* Identify instruction AND with identical zero extension.  */
+
+static unsigned int
+insn_is_zext_p(rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+   {
+     rtx set = XEXP (insn, 0);
+     if (REG_P(set))
+       {
+	 if (CONST_INT_P (XEXP (insn, 1))
+	     && INTVAL (XEXP (insn, 1)) == 1)
+	   return 1;
+       }
+     else
+       return 0;
+   }
+
+  return 0;
+}
+
+/* Identify instruction AND with identical zero extension.  */
+
+static unsigned int
+insn_is_zext_p(rtx_insn * insn)
+{
+  rtx body = PATTERN (insn);
+
+  if (GET_CODE (body) == PARALLEL) return 0;
+
+  if (GET_CODE(body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+     rtx set = XEXP (SET_SRC(body), 0);
+
+     if (REG_P(set) && GET_MODE(SET_DEST(body))
+			 == GET_MODE(set))
+       {
+	 if (CONST_INT_P (XEXP (SET_SRC (body), 1))
+	      && INTVAL (XEXP (SET_SRC (body), 1)) == 1)
+	   return 1;
+       }
+     else
+       return 0;
+   }
+   return 0;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -297,6 +344,31 @@  update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode,
   return true;
 }
 
+/* Return true if INSN is
+     (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
+   and store x1 and x2 in REG_1 and REG_2.  */
+
+static bool
+is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
+{
+  rtx expr = single_set (insn);
+
+  if (expr != NULL_RTX
+      && GET_CODE (expr) == SET
+      && GET_CODE (SET_DEST (expr)) == REG
+      && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
+      && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
+      && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
+    {
+      *reg1 = XEXP (SET_SRC (expr), 1);
+      *reg2 = XEXP (SET_SRC (expr), 2);
+      return true;
+    }
+
+  return false;
+}
+
+
 /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
    and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
    this code modifies the SET rtx to a new SET rtx that extends the
@@ -321,6 +393,9 @@  combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
   rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
+  if (insn_is_zext_p(cand->insn)
+       && CONST_INT_P (orig_src) && INTVAL (orig_src) != 0)
+    return false;
 
   /* If the extension's source/destination registers are not the same
      then we need to change the original load to reference the destination
@@ -359,8 +434,14 @@  combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
     {
       /* Here is a sequence of two extensions.  Try to merge them.  */
-      rtx temp_extension
-	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+      rtx temp_extension = NULL_RTX;
+      if (GET_CODE (SET_SRC (cand_pat)) == AND)
+	temp_extension
+	= gen_rtx_fmt_ee (cand->code, cand->mode, XEXP (orig_src, 0),
+			  XEXP (orig_src, 1));
+      else
+	temp_extension
+	 = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
       rtx simplified_temp_extension = simplify_rtx (temp_extension);
       if (simplified_temp_extension)
         temp_extension = simplified_temp_extension;
@@ -370,8 +451,9 @@  combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
     {
       /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
-         in general, IF_THEN_ELSE should not be combined.  */
-      return true;
+	in general, IF_THEN_ELSE should not be combined.  Relaxed
+	cases with IF_THEN_ELSE across basic blocls */
+	return true;
     }
   else
     {
@@ -541,30 +623,6 @@  get_uses (rtx_insn *insn, rtx reg)
   return ref_chain;
 }
 
-/* Return true if INSN is
-     (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
-   and store x1 and x2 in REG_1 and REG_2.  */
-
-static bool
-is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
-{
-  rtx expr = single_set (insn);
-
-  if (expr != NULL_RTX
-      && GET_CODE (expr) == SET
-      && GET_CODE (SET_DEST (expr)) == REG
-      && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
-      && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
-      && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
-    {
-      *reg1 = XEXP (SET_SRC (expr), 1);
-      *reg2 = XEXP (SET_SRC (expr), 2);
-      return true;
-    }
-
-  return false;
-}
-
 enum ext_modified_kind
 {
   /* The insn hasn't been modified by ree pass yet.  */
@@ -709,6 +767,90 @@  get_sub_rtx (rtx_insn *def_insn)
   return sub_rtx;
 }
 
+/* Find feasibility of extension elimination
+   across basic blocks.  */
+
+static bool
+is_feasible_elim_across_basic_blocks (ext_cand *cand,
+				      rtx_insn *def_insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (cand->insn);
+  edge fallthru_edge;
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      rtx_insn *insn = BB_END (e->src) ? PREV_INSN (BB_END (e->src)) : NULL;
+
+      if (insn && NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET && SET_SRC (PATTERN(insn))
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == IF_THEN_ELSE)
+	{
+	  if (e->dest == bb)
+	    {
+	      basic_block jump_block = e->dest;
+	      if (jump_block == bb)
+		{
+		  if (single_succ_p (BLOCK_FOR_INSN (def_insn)))
+		    {
+		      fallthru_edge = single_succ_edge (BLOCK_FOR_INSN (def_insn));
+		      if (BB_END (fallthru_edge->dest) && (bb != fallthru_edge->dest
+			|| e->dest != fallthru_edge->dest))
+			return false;
+		    }
+		   else
+		     return false;
+		}
+		else return false;
+	      }
+	      else
+		{
+		  if (single_succ_p (e->dest))
+		    {
+		      fallthru_edge = single_succ_edge (e->dest);
+		      if (BB_END (fallthru_edge->dest) && bb != fallthru_edge->dest)
+			return false;
+		    }
+		}
+	     }
+	  }
+  if (single_succ_p (BLOCK_FOR_INSN (def_insn)))
+    {
+      fallthru_edge = single_succ_edge (BLOCK_FOR_INSN (def_insn));
+      if (BB_END (fallthru_edge->dest) && bb != fallthru_edge->dest)
+	return false;
+    }
+   else
+     return false;
+
+   rtx set = single_set(cand->insn);
+   /* The destination register of the extension insn must not be
+	 used or set between the def_insn and cand->insn exclusive.  */
+   if (INSN_CHAIN_CODE_P (GET_CODE (def_insn))
+       && INSN_CHAIN_CODE_P (cand->code))
+     if ((cand->code == ZERO_EXTEND)
+	  && REG_P (SET_DEST (set)) && NEXT_INSN (def_insn)
+	  && (reg_used_between_p (SET_DEST (set), def_insn, cand->insn)
+		|| reg_set_between_p (SET_DEST (set), def_insn, cand->insn)))
+	return false;
+
+   if (cand->code == ZERO_EXTEND && (bb != BLOCK_FOR_INSN (def_insn)
+	|| DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn)))
+     return false;
+
+   if (insn_is_zext_p (cand->insn)
+	&& GET_CODE (PATTERN (BB_END (bb))) != USE)
+     return false;
+
+   if (insn_is_zext_p (cand->insn)
+	&& GET_CODE (PATTERN (BB_END (bb))) == USE
+	&& REGNO (XEXP (PATTERN (BB_END (bb)), 0)) != REGNO (SET_DEST (cand->expr)))
+     return false;
+
+   return true;
+}
+
 /* Merge the DEF_INSN with an extension.  Calls combine_set_extension
    on the SET pattern.  */
 
@@ -727,12 +869,18 @@  merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
   bool copy_needed
     = (REGNO (SET_DEST (cand->expr)) != REGNO (XEXP (SET_SRC (cand->expr), 0)));
 
-  if (!copy_needed || (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
-	  || ((state->modified[INSN_UID (def_insn)].kind
-	       == (cand->code == ZERO_EXTEND || cand->code == AND
+  bool feasible = is_feasible_elim_across_basic_blocks (cand, def_insn);
+
+  if (!feasible) return false;
+
+  if (((!copy_needed && (insn_is_zext_p (cand->insn))
+	&& (GET_MODE (SET_DEST (*sub_rtx)) != ext_src_mode
+	&& state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE))
+	|| ((state->modified[INSN_UID (def_insn)].kind
+		== (cand->code == ZERO_EXTEND
 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
-	      && state->modified[INSN_UID (def_insn)].mode
-		 == ext_src_mode)))
+	     && state->modified[INSN_UID (def_insn)].mode
+		== ext_src_mode)))
     {
       if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
 	  >= GET_MODE_UNIT_SIZE (cand->mode))
@@ -759,7 +907,7 @@  static inline rtx
 get_extended_src_reg (rtx src)
 {
   while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND
-	|| GET_CODE (src) == AND)
+	|| insn_is_zext_p(src))
     src = XEXP (src, 0);
   gcc_assert (REG_P (src));
   return src;
@@ -1008,7 +1156,7 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
       machine_mode mode;
 
       if (state->modified[INSN_UID (cand->insn)].kind
-	  != (cand->code == ZERO_EXTEND || cand->code == AND
+	  != (cand->code == ZERO_EXTEND
 	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
 	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
 	  || (set == NULL_RTX))
@@ -1019,7 +1167,7 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
       cand->mode = mode;
     }
 
-  merge_successful = true;
+  merge_successful = false;
 
   /* Go through the defs vector and try to merge all the definitions
      in this vector.  */
@@ -1027,7 +1175,10 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
   FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
     {
       if (merge_def_and_ext (cand, def_insn, state))
-	state->modified_list.safe_push (def_insn);
+	{
+	  merge_successful = true;
+	  state->modified_list.safe_push (def_insn);
+	}
       else
         {
           merge_successful = false;
@@ -1058,14 +1209,7 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
 	 cannot be merged, we entirely give up.  In the future, we should allow
 	 extensions to be partially eliminated along those paths where the
 	 definitions could be merged.  */
-       int num_clobbers = 0;
-       int icode = recog (cand->insn, cand->insn,
-			  (GET_CODE (cand->expr) == SET
-			   && ! reload_completed
-			   && ! reload_in_progress)
-			   ? &num_clobbers : 0);
-
-      if (apply_change_group () || (icode < 0))
+      if (apply_change_group ())
         {
           if (dump_file)
             fprintf (dump_file, "All merges were successful.\n");
@@ -1074,7 +1218,7 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
 	    {
 	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
 	      if (modified->kind == EXT_MODIFIED_NONE)
-		modified->kind = (cand->code == ZERO_EXTEND || cand->code == AND  ? EXT_MODIFIED_ZEXT
+		modified->kind = (cand->code == ZERO_EXTEND  ? EXT_MODIFIED_ZEXT
 						            : EXT_MODIFIED_SEXT);
 
 	      if (copy_needed)
@@ -1082,19 +1226,19 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
 	    }
           return true;
         }
-      else
-        {
-          /* Changes need not be cancelled explicitly as apply_change_group
-             does it.  Print list of definitions in the dump_file for debug
-             purposes.  This extension cannot be deleted.  */
-          if (dump_file)
-            {
-	      fprintf (dump_file,
-		       "Merge cancelled, non-mergeable definitions:\n");
-	      FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
-	        print_rtl_single (dump_file, def_insn);
-            }
-        }
+	else
+	  {
+	    /* Changes need not be cancelled explicitly as apply_change_group
+		does it.  Print list of definitions in the dump_file for debug
+		purposes.  This extension cannot be deleted.  */
+	    if (dump_file)
+	      {
+		fprintf (dump_file,
+			"Merge cancelled, non-mergeable definitions:\n");
+		FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
+		  print_rtl_single (dump_file, def_insn);
+	      }
+	   }
     }
   else
     {
@@ -1128,7 +1272,7 @@  add_removable_extension (const_rtx expr, rtx_insn *insn,
   mode = GET_MODE (dest);
 
   if (REG_P (dest)
-      && (code == SIGN_EXTEND || code == ZERO_EXTEND || code == AND)
+      && (code == SIGN_EXTEND || code == ZERO_EXTEND || insn_is_zext_p(src))
       && REG_P (XEXP (src, 0)))
     {
       rtx reg = XEXP (src, 0);
@@ -1140,7 +1284,7 @@  add_removable_extension (const_rtx expr, rtx_insn *insn,
 	 a path from the entry to this zero-extension that leaves this register
 	 uninitialized, removing the extension could change the behavior of
 	 correct programs.  So first, check it is not the case.  */
-      if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
+      if ((code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg))))
 	{
 	  if (dump_file)
 	    {
@@ -1390,9 +1534,10 @@  find_and_remove_re (void)
   reinsn_list.release ();
   XDELETEVEC (state.modified);
 
-  if (dump_file && num_re_opportunities > 0)
+  if (dump_file &&  num_re_opportunities > 0)
     fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
 	     num_re_opportunities, num_realized);
+
 }
 
 /* Find and remove redundant extensions.  */