PATCH v6 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

Message ID 65ed79a3-9964-dd50-39cb-98d5dbc72881@linux.ibm.com
State New
Headers
Series PATCH v6 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 fail Testing failed
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Testing failed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Testing failed
linaro-tcwg-bot/tcwg_gcc_check--master-arm fail Testing failed

Commit Message

Ajit Agarwal Sept. 18, 2023, 5:59 a.m. UTC
  This new version of patch 6 use improve ree pass for rs6000 target using defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit


ree: Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign extension and done to
improve ree pass to eliminate such redundant zero and sign extension
using defined ABI interfaces.

2023-09-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	* ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
	defined abi interfaces.
	(add_removable_extension): Use of defined abi interfaces for no
	reaching defs.
	(abi_extension_candidate_return_reg_p): New function.
	(abi_extension_candidate_p): New function.
	(abi_extension_candidate_argno_p): New function.
	(abi_handle_regs_without_defs_p): New function.
	(abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

        * g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc                                    | 145 +++++++++++++++++-
 .../g++.target/powerpc/zext-elim-3.C          |  13 ++
 2 files changed, 155 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
  

Comments

Maxim Kuvyrkov Sept. 18, 2023, 9:06 a.m. UTC | #1
Hi Ajit,

Is this patch supposed to be applied on top of another patch?  As is, this patch fails build on AArch64 and AArch32, and Linaro TCWG CI have sent notifications about the failures for v5 [1] and v6 [2] of this patch to you.  Did you receive the notifications?

Kind regards,

[1] https://patchwork.sourceware.org/project/gcc/patch/5ad7cdca-63e1-73af-b38d-d58898e21ef9@linux.ibm.com/
[2] https://patchwork.sourceware.org/project/gcc/patch/65ed79a3-9964-dd50-39cb-98d5dbc72881@linux.ibm.com/

--
Maxim Kuvyrkov
https://www.linaro.org

> On Sep 18, 2023, at 09:59, Ajit Agarwal via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> 
> This new version of patch 6 use improve ree pass for rs6000 target using defined ABI interfaces.
> Bootstrapped and regtested on power64-linux-gnu.
> 
> Review comments incorporated.
> 
> Thanks & Regards
> Ajit
> 
> 
> ree: Improve ree pass for rs6000 target using defined abi interfaces
> 
> For rs6000 target we see redundant zero and sign extension and done to
> improve ree pass to eliminate such redundant zero and sign extension
> using defined ABI interfaces.
> 
> 2023-09-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> 
> gcc/ChangeLog:
> 
> * ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
> defined abi interfaces.
> (add_removable_extension): Use of defined abi interfaces for no
> reaching defs.
> (abi_extension_candidate_return_reg_p): New function.
> (abi_extension_candidate_p): New function.
> (abi_extension_candidate_argno_p): New function.
> (abi_handle_regs_without_defs_p): New function.
> (abi_target_promote_function_mode): New function.
> 
> gcc/testsuite/ChangeLog:
> 
>        * g++.target/powerpc/zext-elim-3.C
> ---
> gcc/ree.cc                                    | 145 +++++++++++++++++-
> .../g++.target/powerpc/zext-elim-3.C          |  13 ++
> 2 files changed, 155 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> 
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index fc04249fa84..e395af6b1bd 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
>     if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>       break;
> 
> -  gcc_assert (def != NULL);
> +  if (def == NULL)
> +    return NULL;
> 
>   ref_chain = DF_REF_CHAIN (def);
> 
> @@ -750,6 +751,118 @@ get_extended_src_reg (rtx src)
>   return src;
> }
> 
> +/* Return TRUE if target mode is equal to source mode of zero_extend
> +   or sign_extend otherwise false.  */
> +
> +static bool
> +abi_target_promote_function_mode (machine_mode mode)
> +{
> +  int unsignedp;
> +  machine_mode tgt_mode =
> +    targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
> + NULL_TREE, 1);
> +
> +  if (tgt_mode == mode)
> +    return true;
> +  else
> +    return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an return  registers.  */
> +
> +static bool
> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
> +    return false;
> +
> +  if (FUNCTION_VALUE_REGNO_P (regno))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if reg source operand of zero_extend is argument registers
> +   and not return registers and source and destination operand are same
> +   and mode of source and destination operand are not same.  */
> +
> +static bool
> +abi_extension_candidate_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
> +    return false;
> +
> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
> +  rtx orig_src = XEXP (SET_SRC (set),0);
> +
> +  bool copy_needed
> +    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
> +
> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
> +      && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +      && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an argument registers.  */
> +
> +static bool
> +abi_extension_candidate_argno_p (rtx_code code, int regno)
> +{
> +  if (code != ZERO_EXTEND)
> +    return false;
> +
> +  if (FUNCTION_ARG_REGNO_P (regno))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn doesn't have defs and have
> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
> +
> +static bool
> +abi_handle_regs_without_defs_p (rtx_insn *insn)
> +{
> +  if (side_effects_p (PATTERN (insn)))
> +    return false;
> +
> +  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
> +
> +  if (!uses)
> +    return false;
> +
> +  for (df_link *use = uses; use; use = use->next)
> +    {
> +      if (!use->ref)
> + return false;
> +
> +      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
> + return false;
> +
> +      rtx_insn *use_insn = DF_REF_INSN (use->ref);
> +
> +      if (GET_CODE (PATTERN (use_insn)) == SET)
> + {
> +  rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
> +
> +  if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
> +      || GET_RTX_CLASS (code) == RTX_COMM_ARITH
> +      || GET_RTX_CLASS (code) == RTX_UNARY)
> +    return false;
> + }
> +     }
> +  return true;
> +}
> +
> /* This function goes through all reaching defs of the source
>    of the candidate for elimination (CAND) and tries to combine
>    the extension with the definition instruction.  The changes
> @@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
> 
>   state->defs_list.truncate (0);
>   state->copies_list.truncate (0);
> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
> +
> +  if (abi_extension_candidate_p (cand->insn)
> +      && (!get_defs (cand->insn, orig_src, NULL)))
> +    return abi_handle_regs_without_defs_p (cand->insn);
> 
>   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
> 
> @@ -1036,6 +1154,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>         }
>     }
> 
> +  rtx insn_set = single_set (cand->insn);
> +
> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
> +
> +  bool promote_p = abi_target_promote_function_mode (mode);
> +
> +  if (promote_p)
> +    return true;
> +
>   if (merge_successful)
>     {
>       /* Commit the changes here if possible
> @@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>       rtx reg = XEXP (src, 0);
>       struct df_link *defs, *def;
>       ext_cand *cand;
> +      defs = get_defs (insn, reg, NULL);
> 
>       /* Zero-extension of an undefined value is partly defined (it's
> completely undefined for sign-extension, though).  So if there exists
> a path from the entry to this zero-extension that leaves this register
> uninitialized, removing the extension could change the behavior of
> correct programs.  So first, check it is not the case.  */
> +      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
> + {
> +  ext_cand e = {expr, code, mode, insn};
> +  insn_list->safe_push (e);
> +  return;
> + }
> +
>       if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
> {
>  if (dump_file)
> @@ -1131,7 +1266,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
> }
> 
>       /* Second, make sure we can get all the reaching definitions.  */
> -      defs = get_defs (insn, reg, NULL);
>       if (!defs)
> {
>  if (dump_file)
> @@ -1321,7 +1455,8 @@ find_and_remove_re (void)
>      && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
>    {
>               reinsn_copy_list.safe_push (curr_cand->insn);
> -              reinsn_copy_list.safe_push (state.defs_list[0]);
> +      if (state.defs_list.length () != 0)
> + reinsn_copy_list.safe_push (state.defs_list[0]);
>    }
>  reinsn_del_list.safe_push (curr_cand->insn);
>  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
> @@ -1345,6 +1480,10 @@ find_and_remove_re (void)
>   for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
>     {
>       rtx_insn *curr_insn = reinsn_copy_list[i];
> +
> +      if ((i+1) >= reinsn_copy_list.length ())
> + continue;
> +
>       rtx_insn *def_insn = reinsn_copy_list[i + 1];
> 
>       /* Use the mode of the destination of the defining insn
> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> new file mode 100644
> index 00000000000..5a050df06ff
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> @@ -0,0 +1,13 @@
> +/* { dg-options "-mcpu=power9 -O2" } */
> +
> +void *memset(void *b, int c, unsigned long len)
> +{
> +  unsigned long i;
> +
> +  for (i = 0; i < len; i++)
> +    ((unsigned char *)b)[i] = c;
> +
> +   return b;
> +}
> +
> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */
> -- 
> 2.39.3
>
  
Vineet Gupta Sept. 18, 2023, 8:27 p.m. UTC | #2
Hi Ajit,

On 9/17/23 22:59, Ajit Agarwal wrote:
> This new version of patch 6 use improve ree pass for rs6000 target using defined ABI interfaces.
> Bootstrapped and regtested on power64-linux-gnu.
>
> Review comments incorporated.
>
> Thanks & Regards
> Ajit

Nit: This seems to belong to "what changed in v6" between the two "---" 
lines right before start of source diff.

> ree: Improve ree pass for rs6000 target using defined abi interfaces
>
> For rs6000 target we see redundant zero and sign extension and done to
> improve ree pass to eliminate such redundant zero and sign extension
> using defined ABI interfaces.

It seems you have redundant "redundant zero and sign extension" - pun 
intended  ;-)

On a serious note, when debugging your code for a possible RISC-V 
benefit, it seems what it is trying to do is address REE giving up due 
to "missing definition(s)". Perhaps mentioning that in commitlog would 
give the reader more context.

> +/* Return TRUE if target mode is equal to source mode of zero_extend
> +   or sign_extend otherwise false.  */
> +
> +static bool
> +abi_target_promote_function_mode (machine_mode mode)
> +{
> +  int unsignedp;
> +  machine_mode tgt_mode =
> +    targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
> +					 NULL_TREE, 1);
> +
> +  if (tgt_mode == mode)
> +    return true;
> +  else
> +    return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an return  registers.  */

Additional Whitespace and grammer
s/an return  registers/a return register

Please *run* contrib/check_gnu_style on your patch before sending out on 
mailing lists, saves reviewers time and they can focus more on technical 
content.

> +
> +static bool
> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
> +    return false;

This still has ABI assumptions: RISC-V generates SIGN_EXTEND for 
functions args and return reg.
This is not a deficiency of patch per-se, but something we would like to 
address - even if as an addon-patch.

> +
> +  if (FUNCTION_VALUE_REGNO_P (regno))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if reg source operand of zero_extend is argument registers
> +   and not return registers and source and destination operand are same
> +   and mode of source and destination operand are not same.  */
> +
> +static bool
> +abi_extension_candidate_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
> +    return false;
Ditto: ABI assumption.

> +
> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));

why not simply @dst_mode

> +  rtx orig_src = XEXP (SET_SRC (set),0);
> +
> +  bool copy_needed
> +    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));

Maybe use @orig_src here, rather than duplicating XEXP (SET_SRC (set),0)

> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)

The bailing out for copy_needed needs extra commentary, why ?

> +      && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +      && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
> +    return true;
> +
> +  return false;

Consider this bike-shed but I would arrange this code differently. The 
main case here is check for function args and then the not so imp reasons

+  rtx orig_src = XEXP (src, 0);
+
+  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      || abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+    return false;
+
+  /* commentary as to why .... */
+  if (dst_mode == GET_MODE (orig_src))
+    return false;

-   bool copy_needed
-    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+  /* copy needed  ..... */
+  if (REGNO (SET_DEST (set)) != REGNO (orig_src))
+    return false;
+
+ return true;

> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an argument registers.  */
> +
> +static bool
> +abi_extension_candidate_argno_p (rtx_code code, int regno)
> +{
> +  if (code != ZERO_EXTEND)
> +    return false;

ABI assumption still.

> +
> +  if (FUNCTION_ARG_REGNO_P (regno))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn doesn't have defs and have
> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
> +
> +static bool
> +abi_handle_regs_without_defs_p (rtx_insn *insn)
> +{
> +  if (side_effects_p (PATTERN (insn)))
> +    return false;
> +
> +  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
> +
> +  if (!uses)
> +    return false;
> +
> +  for (df_link *use = uses; use; use = use->next)
> +    {
> +      if (!use->ref)
> +	return false;
> +
> +      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
> +	return false;
> +
> +      rtx_insn *use_insn = DF_REF_INSN (use->ref);
> +
> +      if (GET_CODE (PATTERN (use_insn)) == SET)
> +	{
> +	  rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
> +
> +	  if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
> +	      || GET_RTX_CLASS (code) == RTX_COMM_ARITH
> +	      || GET_RTX_CLASS (code) == RTX_UNARY)
> +	    return false;
> +	}
> +     }
> +  return true;
> +}
> +
>   /* This function goes through all reaching defs of the source
>      of the candidate for elimination (CAND) and tries to combine
>      the extension with the definition instruction.  The changes
> @@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>   
>     state->defs_list.truncate (0);
>     state->copies_list.truncate (0);
> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
> +
> +  if (abi_extension_candidate_p (cand->insn)
> +      && (!get_defs (cand->insn, orig_src, NULL)))
> +    return abi_handle_regs_without_defs_p (cand->insn);
>   
>     outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>   
> @@ -1036,6 +1154,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>           }
>       }
>   
> +  rtx insn_set = single_set (cand->insn);
> +
> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
> +
> +  bool promote_p = abi_target_promote_function_mode (mode);
> +
> +  if (promote_p)
> +    return true;
> +
>     if (merge_successful)
>       {
>         /* Commit the changes here if possible
> @@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>         rtx reg = XEXP (src, 0);
>         struct df_link *defs, *def;
>         ext_cand *cand;
> +      defs = get_defs (insn, reg, NULL);
>   
>         /* Zero-extension of an undefined value is partly defined (it's
>   	 completely undefined for sign-extension, though).  So if there exists
>   	 a path from the entry to this zero-extension that leaves this register
>   	 uninitialized, removing the extension could change the behavior of
>   	 correct programs.  So first, check it is not the case.  */
> +      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
> +	{
> +	  ext_cand e = {expr, code, mode, insn};
> +	  insn_list->safe_push (e);
> +	  return;
> +	}
> +
Naively I would expect this change to go in the existing !defs { } block 
below, after the uninitialized case but it seems this is deliberate - 
you could be bypassing the uninitialized data case ... (see below as well)

>         if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
>   	{
>   	  if (dump_file)
> @@ -1131,7 +1266,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>   	}
>   
>         /* Second, make sure we can get all the reaching definitions.  */
> -      defs = get_defs (insn, reg, NULL);
>         if (!defs)
>   	{
>   	  if (dump_file)
> @@ -1321,7 +1455,8 @@ find_and_remove_re (void)
>   	      && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
>   	    {
>                 reinsn_copy_list.safe_push (curr_cand->insn);
> -              reinsn_copy_list.safe_push (state.defs_list[0]);
> +	      if (state.defs_list.length () != 0)
> +		reinsn_copy_list.safe_push (state.defs_list[0]);
>   	    }
>   	  reinsn_del_list.safe_push (curr_cand->insn);
>   	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
> @@ -1345,6 +1480,10 @@ find_and_remove_re (void)
>     for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
>       {
>         rtx_insn *curr_insn = reinsn_copy_list[i];
> +
> +      if ((i+1) >= reinsn_copy_list.length ())
> +	continue;
> +
>         rtx_insn *def_insn = reinsn_copy_list[i + 1];
>   
>         /* Use the mode of the destination of the defining insn
> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> new file mode 100644
> index 00000000000..5a050df06ff
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> @@ -0,0 +1,13 @@
> +/* { dg-options "-mcpu=power9 -O2" } */
> +
> +void *memset(void *b, int c, unsigned long len)
> +{
> +  unsigned long i;
> +
> +  for (i = 0; i < len; i++)
> +    ((unsigned char *)b)[i] = c;
> +
> +   return b;
> +}

So I did a simple cc1 build for powerpc on gcc tip w/ and w/o your patch.
W/o the patch this test generates the rlwinm insn and REE spits out.

| Cannot eliminate extension:
| (insn 20 18 22 3 (set (reg:SI 4 4)
|        (zero_extend:SI (reg:QI 4 4 [orig:120 c+3 ] [120]))) 
"zext-elim-3.c":8:29 7 {zero_extendqisi2}
|     (nil))
| because it can operate on uninitialized data

With the patch obviously the extension insn goes away and so does the 
diagnostic, but it wonder your abi_extension_candidate_argno_p () check 
before uniinitialized data check is correct/safe ? I've not looked 
closely what the scope of that check is

IOW, is this test case sufficient or do we need a different test which 
would cure a genuine "missing definitiion(s)" bailout of REE ?

> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */

Thanks,
-Vineet
  
Ajit Agarwal Sept. 19, 2023, 9 a.m. UTC | #3
Hello Maxim:

Version 7 of the patch solves this problem. Sorry for the inconvenience caused.

Thanks & Regards
Ajit

On 18/09/23 2:36 pm, Maxim Kuvyrkov wrote:
> Hi Ajit,
> 
> Is this patch supposed to be applied on top of another patch?  As is, this patch fails build on AArch64 and AArch32, and Linaro TCWG CI have sent notifications about the failures for v5 [1] and v6 [2] of this patch to you.  Did you receive the notifications?
> 
> Kind regards,
> 
> [1] https://patchwork.sourceware.org/project/gcc/patch/5ad7cdca-63e1-73af-b38d-d58898e21ef9@linux.ibm.com/
> [2] https://patchwork.sourceware.org/project/gcc/patch/65ed79a3-9964-dd50-39cb-98d5dbc72881@linux.ibm.com/
> 
> --
> Maxim Kuvyrkov
> https://www.linaro.org
> 
>> On Sep 18, 2023, at 09:59, Ajit Agarwal via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>>
>> This new version of patch 6 use improve ree pass for rs6000 target using defined ABI interfaces.
>> Bootstrapped and regtested on power64-linux-gnu.
>>
>> Review comments incorporated.
>>
>> Thanks & Regards
>> Ajit
>>
>>
>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>>
>> For rs6000 target we see redundant zero and sign extension and done to
>> improve ree pass to eliminate such redundant zero and sign extension
>> using defined ABI interfaces.
>>
>> 2023-09-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>
>> gcc/ChangeLog:
>>
>> * ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
>> defined abi interfaces.
>> (add_removable_extension): Use of defined abi interfaces for no
>> reaching defs.
>> (abi_extension_candidate_return_reg_p): New function.
>> (abi_extension_candidate_p): New function.
>> (abi_extension_candidate_argno_p): New function.
>> (abi_handle_regs_without_defs_p): New function.
>> (abi_target_promote_function_mode): New function.
>>
>> gcc/testsuite/ChangeLog:
>>
>>        * g++.target/powerpc/zext-elim-3.C
>> ---
>> gcc/ree.cc                                    | 145 +++++++++++++++++-
>> .../g++.target/powerpc/zext-elim-3.C          |  13 ++
>> 2 files changed, 155 insertions(+), 3 deletions(-)
>> create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>>
>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>> index fc04249fa84..e395af6b1bd 100644
>> --- a/gcc/ree.cc
>> +++ b/gcc/ree.cc
>> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
>>     if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>>       break;
>>
>> -  gcc_assert (def != NULL);
>> +  if (def == NULL)
>> +    return NULL;
>>
>>   ref_chain = DF_REF_CHAIN (def);
>>
>> @@ -750,6 +751,118 @@ get_extended_src_reg (rtx src)
>>   return src;
>> }
>>
>> +/* Return TRUE if target mode is equal to source mode of zero_extend
>> +   or sign_extend otherwise false.  */
>> +
>> +static bool
>> +abi_target_promote_function_mode (machine_mode mode)
>> +{
>> +  int unsignedp;
>> +  machine_mode tgt_mode =
>> +    targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
>> + NULL_TREE, 1);
>> +
>> +  if (tgt_mode == mode)
>> +    return true;
>> +  else
>> +    return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an return  registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
>> +    return false;
>> +
>> +  if (FUNCTION_VALUE_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>> +   and not return registers and source and destination operand are same
>> +   and mode of source and destination operand are not same.  */
>> +
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
>> +    return false;
>> +
>> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
>> +  rtx orig_src = XEXP (SET_SRC (set),0);
>> +
>> +  bool copy_needed
>> +    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
>> +
>> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
>> +      && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +      && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an argument registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_argno_p (rtx_code code, int regno)
>> +{
>> +  if (code != ZERO_EXTEND)
>> +    return false;
>> +
>> +  if (FUNCTION_ARG_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn doesn't have defs and have
>> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
>> +
>> +static bool
>> +abi_handle_regs_without_defs_p (rtx_insn *insn)
>> +{
>> +  if (side_effects_p (PATTERN (insn)))
>> +    return false;
>> +
>> +  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
>> +
>> +  if (!uses)
>> +    return false;
>> +
>> +  for (df_link *use = uses; use; use = use->next)
>> +    {
>> +      if (!use->ref)
>> + return false;
>> +
>> +      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
>> + return false;
>> +
>> +      rtx_insn *use_insn = DF_REF_INSN (use->ref);
>> +
>> +      if (GET_CODE (PATTERN (use_insn)) == SET)
>> + {
>> +  rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
>> +
>> +  if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
>> +      || GET_RTX_CLASS (code) == RTX_COMM_ARITH
>> +      || GET_RTX_CLASS (code) == RTX_UNARY)
>> +    return false;
>> + }
>> +     }
>> +  return true;
>> +}
>> +
>> /* This function goes through all reaching defs of the source
>>    of the candidate for elimination (CAND) and tries to combine
>>    the extension with the definition instruction.  The changes
>> @@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>
>>   state->defs_list.truncate (0);
>>   state->copies_list.truncate (0);
>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>> +
>> +  if (abi_extension_candidate_p (cand->insn)
>> +      && (!get_defs (cand->insn, orig_src, NULL)))
>> +    return abi_handle_regs_without_defs_p (cand->insn);
>>
>>   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>
>> @@ -1036,6 +1154,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>         }
>>     }
>>
>> +  rtx insn_set = single_set (cand->insn);
>> +
>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
>> +
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  if (promote_p)
>> +    return true;
>> +
>>   if (merge_successful)
>>     {
>>       /* Commit the changes here if possible
>> @@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>>       rtx reg = XEXP (src, 0);
>>       struct df_link *defs, *def;
>>       ext_cand *cand;
>> +      defs = get_defs (insn, reg, NULL);
>>
>>       /* Zero-extension of an undefined value is partly defined (it's
>> completely undefined for sign-extension, though).  So if there exists
>> a path from the entry to this zero-extension that leaves this register
>> uninitialized, removing the extension could change the behavior of
>> correct programs.  So first, check it is not the case.  */
>> +      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
>> + {
>> +  ext_cand e = {expr, code, mode, insn};
>> +  insn_list->safe_push (e);
>> +  return;
>> + }
>> +
>>       if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
>> {
>>  if (dump_file)
>> @@ -1131,7 +1266,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>> }
>>
>>       /* Second, make sure we can get all the reaching definitions.  */
>> -      defs = get_defs (insn, reg, NULL);
>>       if (!defs)
>> {
>>  if (dump_file)
>> @@ -1321,7 +1455,8 @@ find_and_remove_re (void)
>>      && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
>>    {
>>               reinsn_copy_list.safe_push (curr_cand->insn);
>> -              reinsn_copy_list.safe_push (state.defs_list[0]);
>> +      if (state.defs_list.length () != 0)
>> + reinsn_copy_list.safe_push (state.defs_list[0]);
>>    }
>>  reinsn_del_list.safe_push (curr_cand->insn);
>>  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
>> @@ -1345,6 +1480,10 @@ find_and_remove_re (void)
>>   for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
>>     {
>>       rtx_insn *curr_insn = reinsn_copy_list[i];
>> +
>> +      if ((i+1) >= reinsn_copy_list.length ())
>> + continue;
>> +
>>       rtx_insn *def_insn = reinsn_copy_list[i + 1];
>>
>>       /* Use the mode of the destination of the defining insn
>> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> new file mode 100644
>> index 00000000000..5a050df06ff
>> --- /dev/null
>> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> @@ -0,0 +1,13 @@
>> +/* { dg-options "-mcpu=power9 -O2" } */
>> +
>> +void *memset(void *b, int c, unsigned long len)
>> +{
>> +  unsigned long i;
>> +
>> +  for (i = 0; i < len; i++)
>> +    ((unsigned char *)b)[i] = c;
>> +
>> +   return b;
>> +}
>> +
>> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */
>> -- 
>> 2.39.3
>>
>
  
Ajit Agarwal Sept. 19, 2023, 9:01 a.m. UTC | #4
Hello Vineet:

Version 7 of the patch incorporates the below review comments.
Please review.

Thanks & Regards
Ajit

On 19/09/23 1:57 am, Vineet Gupta wrote:
> Hi Ajit,
> 
> On 9/17/23 22:59, Ajit Agarwal wrote:
>> This new version of patch 6 use improve ree pass for rs6000 target using defined ABI interfaces.
>> Bootstrapped and regtested on power64-linux-gnu.
>>
>> Review comments incorporated.
>>
>> Thanks & Regards
>> Ajit
> 
> Nit: This seems to belong to "what changed in v6" between the two "---" lines right before start of source diff.
> 
>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>>
>> For rs6000 target we see redundant zero and sign extension and done to
>> improve ree pass to eliminate such redundant zero and sign extension
>> using defined ABI interfaces.
> 
> It seems you have redundant "redundant zero and sign extension" - pun intended  ;-)
> 
> On a serious note, when debugging your code for a possible RISC-V benefit, it seems what it is trying to do is address REE giving up due to "missing definition(s)". Perhaps mentioning that in commitlog would give the reader more context.
> 
>> +/* Return TRUE if target mode is equal to source mode of zero_extend
>> +   or sign_extend otherwise false.  */
>> +
>> +static bool
>> +abi_target_promote_function_mode (machine_mode mode)
>> +{
>> +  int unsignedp;
>> +  machine_mode tgt_mode =
>> +    targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
>> +                     NULL_TREE, 1);
>> +
>> +  if (tgt_mode == mode)
>> +    return true;
>> +  else
>> +    return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an return  registers.  */
> 
> Additional Whitespace and grammer
> s/an return  registers/a return register
> 
> Please *run* contrib/check_gnu_style on your patch before sending out on mailing lists, saves reviewers time and they can focus more on technical content.
> 
>> +
>> +static bool
>> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
>> +    return false;
> 
> This still has ABI assumptions: RISC-V generates SIGN_EXTEND for functions args and return reg.
> This is not a deficiency of patch per-se, but something we would like to address - even if as an addon-patch.
> 
>> +
>> +  if (FUNCTION_VALUE_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>> +   and not return registers and source and destination operand are same
>> +   and mode of source and destination operand are not same.  */
>> +
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
>> +    return false;
> Ditto: ABI assumption.
> 
>> +
>> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
> 
> why not simply @dst_mode
> 
>> +  rtx orig_src = XEXP (SET_SRC (set),0);
>> +
>> +  bool copy_needed
>> +    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
> 
> Maybe use @orig_src here, rather than duplicating XEXP (SET_SRC (set),0)
> 
>> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
> 
> The bailing out for copy_needed needs extra commentary, why ?
> 
>> +      && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +      && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
>> +    return true;
>> +
>> +  return false;
> 
> Consider this bike-shed but I would arrange this code differently. The main case here is check for function args and then the not so imp reasons
> 
> +  rtx orig_src = XEXP (src, 0);
> +
> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +      || abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
> +    return false;
> +
> +  /* commentary as to why .... */
> +  if (dst_mode == GET_MODE (orig_src))
> +    return false;
> 
> -   bool copy_needed
> -    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
> +  /* copy needed  ..... */
> +  if (REGNO (SET_DEST (set)) != REGNO (orig_src))
> +    return false;
> +
> + return true;
> 
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an argument registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_argno_p (rtx_code code, int regno)
>> +{
>> +  if (code != ZERO_EXTEND)
>> +    return false;
> 
> ABI assumption still.
> 
>> +
>> +  if (FUNCTION_ARG_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn doesn't have defs and have
>> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
>> +
>> +static bool
>> +abi_handle_regs_without_defs_p (rtx_insn *insn)
>> +{
>> +  if (side_effects_p (PATTERN (insn)))
>> +    return false;
>> +
>> +  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
>> +
>> +  if (!uses)
>> +    return false;
>> +
>> +  for (df_link *use = uses; use; use = use->next)
>> +    {
>> +      if (!use->ref)
>> +    return false;
>> +
>> +      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
>> +    return false;
>> +
>> +      rtx_insn *use_insn = DF_REF_INSN (use->ref);
>> +
>> +      if (GET_CODE (PATTERN (use_insn)) == SET)
>> +    {
>> +      rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
>> +
>> +      if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
>> +          || GET_RTX_CLASS (code) == RTX_COMM_ARITH
>> +          || GET_RTX_CLASS (code) == RTX_UNARY)
>> +        return false;
>> +    }
>> +     }
>> +  return true;
>> +}
>> +
>>   /* This function goes through all reaching defs of the source
>>      of the candidate for elimination (CAND) and tries to combine
>>      the extension with the definition instruction.  The changes
>> @@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>       state->defs_list.truncate (0);
>>     state->copies_list.truncate (0);
>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>> +
>> +  if (abi_extension_candidate_p (cand->insn)
>> +      && (!get_defs (cand->insn, orig_src, NULL)))
>> +    return abi_handle_regs_without_defs_p (cand->insn);
>>       outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>   @@ -1036,6 +1154,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>           }
>>       }
>>   +  rtx insn_set = single_set (cand->insn);
>> +
>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
>> +
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  if (promote_p)
>> +    return true;
>> +
>>     if (merge_successful)
>>       {
>>         /* Commit the changes here if possible
>> @@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>>         rtx reg = XEXP (src, 0);
>>         struct df_link *defs, *def;
>>         ext_cand *cand;
>> +      defs = get_defs (insn, reg, NULL);
>>           /* Zero-extension of an undefined value is partly defined (it's
>>        completely undefined for sign-extension, though).  So if there exists
>>        a path from the entry to this zero-extension that leaves this register
>>        uninitialized, removing the extension could change the behavior of
>>        correct programs.  So first, check it is not the case.  */
>> +      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
>> +    {
>> +      ext_cand e = {expr, code, mode, insn};
>> +      insn_list->safe_push (e);
>> +      return;
>> +    }
>> +
> Naively I would expect this change to go in the existing !defs { } block below, after the uninitialized case but it seems this is deliberate - you could be bypassing the uninitialized data case ... (see below as well)
> 
>>         if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
>>       {
>>         if (dump_file)
>> @@ -1131,7 +1266,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>>       }
>>           /* Second, make sure we can get all the reaching definitions.  */
>> -      defs = get_defs (insn, reg, NULL);
>>         if (!defs)
>>       {
>>         if (dump_file)
>> @@ -1321,7 +1455,8 @@ find_and_remove_re (void)
>>             && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
>>           {
>>                 reinsn_copy_list.safe_push (curr_cand->insn);
>> -              reinsn_copy_list.safe_push (state.defs_list[0]);
>> +          if (state.defs_list.length () != 0)
>> +        reinsn_copy_list.safe_push (state.defs_list[0]);
>>           }
>>         reinsn_del_list.safe_push (curr_cand->insn);
>>         state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
>> @@ -1345,6 +1480,10 @@ find_and_remove_re (void)
>>     for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
>>       {
>>         rtx_insn *curr_insn = reinsn_copy_list[i];
>> +
>> +      if ((i+1) >= reinsn_copy_list.length ())
>> +    continue;
>> +
>>         rtx_insn *def_insn = reinsn_copy_list[i + 1];
>>           /* Use the mode of the destination of the defining insn
>> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> new file mode 100644
>> index 00000000000..5a050df06ff
>> --- /dev/null
>> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> @@ -0,0 +1,13 @@
>> +/* { dg-options "-mcpu=power9 -O2" } */
>> +
>> +void *memset(void *b, int c, unsigned long len)
>> +{
>> +  unsigned long i;
>> +
>> +  for (i = 0; i < len; i++)
>> +    ((unsigned char *)b)[i] = c;
>> +
>> +   return b;
>> +}
> 
> So I did a simple cc1 build for powerpc on gcc tip w/ and w/o your patch.
> W/o the patch this test generates the rlwinm insn and REE spits out.
> 
> | Cannot eliminate extension:
> | (insn 20 18 22 3 (set (reg:SI 4 4)
> |        (zero_extend:SI (reg:QI 4 4 [orig:120 c+3 ] [120]))) "zext-elim-3.c":8:29 7 {zero_extendqisi2}
> |     (nil))
> | because it can operate on uninitialized data
> 
> With the patch obviously the extension insn goes away and so does the diagnostic, but it wonder your abi_extension_candidate_argno_p () check before uniinitialized data check is correct/safe ? I've not looked closely what the scope of that check is
> 
> IOW, is this test case sufficient or do we need a different test which would cure a genuine "missing definitiion(s)" bailout of REE ?
> 
>> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */
> 
> Thanks,
> -Vineet
  
Bernhard Reutner-Fischer Sept. 19, 2023, 2:27 p.m. UTC | #5
On Mon, 18 Sep 2023 13:27:38 -0700
Vineet Gupta <vineetg@rivosinc.com> wrote:


> > @@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
> >         rtx reg = XEXP (src, 0);
> >         struct df_link *defs, *def;
> >         ext_cand *cand;
> > +      defs = get_defs (insn, reg, NULL);
> >   
> >         /* Zero-extension of an undefined value is partly defined (it's
> >   	 completely undefined for sign-extension, though).  So if there exists
> >   	 a path from the entry to this zero-extension that leaves this register
> >   	 uninitialized, removing the extension could change the behavior of
> >   	 correct programs.  So first, check it is not the case.  */
> > +      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
> > +	{
> > +	  ext_cand e = {expr, code, mode, insn};
> > +	  insn_list->safe_push (e);
> > +	  return;
> > +	}
> > +  
> Naively I would expect this change to go in the existing !defs { } block 
> below, after the uninitialized case but it seems this is deliberate - 
> you could be bypassing the uninitialized data case ... (see below as well)

Especially because the comment is now detached from the
corresponding code, which is a no-go.

thanks,
  
Ajit Agarwal Oct. 24, 2023, 9:39 a.m. UTC | #6
On 19/09/23 1:57 am, Vineet Gupta wrote:
> Hi Ajit,
> 
> On 9/17/23 22:59, Ajit Agarwal wrote:
>> This new version of patch 6 use improve ree pass for rs6000 target using defined ABI interfaces.
>> Bootstrapped and regtested on power64-linux-gnu.
>>
>> Review comments incorporated.
>>
>> Thanks & Regards
>> Ajit
> 
> Nit: This seems to belong to "what changed in v6" between the two "---" lines right before start of source diff.

Addressed in V13 of the patch.
> 
>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>>
>> For rs6000 target we see redundant zero and sign extension and done to
>> improve ree pass to eliminate such redundant zero and sign extension
>> using defined ABI interfaces.
> 
> It seems you have redundant "redundant zero and sign extension" - pun intended  ;-)
> 
> On a serious note, when debugging your code for a possible RISC-V benefit, it seems what it is trying to do is address REE giving up due to "missing definition(s)". Perhaps mentioning that in commitlog would give the reader more context.

Addressed in V13 of the patch.
> 
>> +/* Return TRUE if target mode is equal to source mode of zero_extend
>> +   or sign_extend otherwise false.  */
>> +
>> +static bool
>> +abi_target_promote_function_mode (machine_mode mode)
>> +{
>> +  int unsignedp;
>> +  machine_mode tgt_mode =
>> +    targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
>> +                     NULL_TREE, 1);
>> +
>> +  if (tgt_mode == mode)
>> +    return true;
>> +  else
>> +    return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an return  registers.  */
> 
> Additional Whitespace and grammer
> s/an return  registers/a return register
> 

Addressed in V12 of the patch.

> Please *run* contrib/check_gnu_style on your patch before sending out on mailing lists, saves reviewers time and they can focus more on technical content.
> 
>> +
>> +static bool
>> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
>> +    return false;
> 
> This still has ABI assumptions: RISC-V generates SIGN_EXTEND for functions args and return reg.
> This is not a deficiency of patch per-se, but something we would like to address - even if as an addon-patch.
>

Already addressed in V13 of the patch.
 
>> +
>> +  if (FUNCTION_VALUE_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>> +   and not return registers and source and destination operand are same
>> +   and mode of source and destination operand are not same.  */
>> +
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
>> +    return false;
> Ditto: ABI assumption.
> 

Already addressed in V12 of the patch.

>> +
>> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
> 
> why not simply @dst_mode
> 
>> +  rtx orig_src = XEXP (SET_SRC (set),0);
>> +
>> +  bool copy_needed
>> +    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
> 
> Maybe use @orig_src here, rather than duplicating XEXP (SET_SRC (set),0)
>

Already addressed.
 
>> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
> 
> The bailing out for copy_needed needs extra commentary, why ?
> 
>> +      && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +      && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
>> +    return true;
>> +
>> +  return false;
> 
> Consider this bike-shed but I would arrange this code differently. The main case here is check for function args and then the not so imp reasons
> 
> +  rtx orig_src = XEXP (src, 0);
> +
> +  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +      || abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
> +    return false;
> +
> +  /* commentary as to why .... */
> +  if (dst_mode == GET_MODE (orig_src))
> +    return false;
> 
> -   bool copy_needed
> -    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
> +  /* copy needed  ..... */
> +  if (REGNO (SET_DEST (set)) != REGNO (orig_src))
> +    return false;
> +
> + return true;
> 

Already addressed.


>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an argument registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_argno_p (rtx_code code, int regno)
>> +{
>> +  if (code != ZERO_EXTEND)
>> +    return false;
> 
> ABI assumption still.
>

Already addressed.
 
>> +
>> +  if (FUNCTION_ARG_REGNO_P (regno))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn doesn't have defs and have
>> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
>> +
>> +static bool
>> +abi_handle_regs_without_defs_p (rtx_insn *insn)
>> +{
>> +  if (side_effects_p (PATTERN (insn)))
>> +    return false;
>> +
>> +  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
>> +
>> +  if (!uses)
>> +    return false;
>> +
>> +  for (df_link *use = uses; use; use = use->next)
>> +    {
>> +      if (!use->ref)
>> +    return false;
>> +
>> +      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
>> +    return false;
>> +
>> +      rtx_insn *use_insn = DF_REF_INSN (use->ref);
>> +
>> +      if (GET_CODE (PATTERN (use_insn)) == SET)
>> +    {
>> +      rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
>> +
>> +      if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
>> +          || GET_RTX_CLASS (code) == RTX_COMM_ARITH
>> +          || GET_RTX_CLASS (code) == RTX_UNARY)
>> +        return false;
>> +    }
>> +     }
>> +  return true;
>> +}
>> +
>>   /* This function goes through all reaching defs of the source
>>      of the candidate for elimination (CAND) and tries to combine
>>      the extension with the definition instruction.  The changes
>> @@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>       state->defs_list.truncate (0);
>>     state->copies_list.truncate (0);
>> +  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
>> +
>> +  if (abi_extension_candidate_p (cand->insn)
>> +      && (!get_defs (cand->insn, orig_src, NULL)))
>> +    return abi_handle_regs_without_defs_p (cand->insn);
>>       outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
>>   @@ -1036,6 +1154,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
>>           }
>>       }
>>   +  rtx insn_set = single_set (cand->insn);
>> +
>> +  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
>> +
>> +  bool promote_p = abi_target_promote_function_mode (mode);
>> +
>> +  if (promote_p)
>> +    return true;
>> +
>>     if (merge_successful)
>>       {
>>         /* Commit the changes here if possible
>> @@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>>         rtx reg = XEXP (src, 0);
>>         struct df_link *defs, *def;
>>         ext_cand *cand;
>> +      defs = get_defs (insn, reg, NULL);
>>           /* Zero-extension of an undefined value is partly defined (it's
>>        completely undefined for sign-extension, though).  So if there exists
>>        a path from the entry to this zero-extension that leaves this register
>>        uninitialized, removing the extension could change the behavior of
>>        correct programs.  So first, check it is not the case.  */
>> +      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
>> +    {
>> +      ext_cand e = {expr, code, mode, insn};
>> +      insn_list->safe_push (e);
>> +      return;
>> +    }
>> +
> Naively I would expect this change to go in the existing !defs { } block below, after the uninitialized case but it seems this is deliberate - you could be bypassing the uninitialized data case ... (see below as well)
> 

Already addressed.
_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
>>       {
>>         if (dump_file)
>> @@ -1131,7 +1266,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
>>       }
>>           /* Second, make sure we can get all the reaching definitions.  */
>> -      defs = get_defs (insn, reg, NULL);
>>         if (!defs)
>>       {
>>         if (dump_file)
>> @@ -1321,7 +1455,8 @@ find_and_remove_re (void)
>>             && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
>>           {
>>                 reinsn_copy_list.safe_push (curr_cand->insn);
>> -              reinsn_copy_list.safe_push (state.defs_list[0]);
>> +          if (state.defs_list.length () != 0)
>> +        reinsn_copy_list.safe_push (state.defs_list[0]);
>>           }
>>         reinsn_del_list.safe_push (curr_cand->insn);
>>         state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
>> @@ -1345,6 +1480,10 @@ find_and_remove_re (void)
>>     for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
>>       {
>>         rtx_insn *curr_insn = reinsn_copy_list[i];
>> +
>> +      if ((i+1) >= reinsn_copy_list.length ())
>> +    continue;
>> +
>>         rtx_insn *def_insn = reinsn_copy_list[i + 1];
>>           /* Use the mode of the destination of the defining insn
>> diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> new file mode 100644
>> index 00000000000..5a050df06ff
>> --- /dev/null
>> +++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>> @@ -0,0 +1,13 @@
>> +/* { dg-options "-mcpu=power9 -O2" } */
>> +
>> +void *memset(void *b, int c, unsigned long len)
>> +{
>> +  unsigned long i;
>> +
>> +  for (i = 0; i < len; i++)
>> +    ((unsigned char *)b)[i] = c;
>> +
>> +   return b;
>> +}
> 
> So I did a simple cc1 build for powerpc on gcc tip w/ and w/o your patch.
> W/o the patch this test generates the rlwinm insn and REE spits out.
> 
> | Cannot eliminate extension:
> | (insn 20 18 22 3 (set (reg:SI 4 4)
> |        (zero_extend:SI (reg:QI 4 4 [orig:120 c+3 ] [120]))) "zext-elim-3.c":8:29 7 {zero_extendqisi2}
> |     (nil))
> | because it can operate on uninitialized data
> 
> With the patch obviously the extension insn goes away and so does the diagnostic, but it wonder your abi_extension_candidate_argno_p () check before uniinitialized data check is correct/safe ? I've not looked closely what the scope of that check is
> 
> IOW, is this test case sufficient or do we need a different test which would cure a genuine "missing definitiion(s)" bailout of REE ?
> 
>> +/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */
> 
> Thanks,
> -Vineet

Thanks & Regards
Ajit
  

Patch

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..e395af6b1bd 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@  get_uses (rtx_insn *insn, rtx reg)
     if (REGNO (DF_REF_REG (def)) == REGNO (reg))
       break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+    return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,118 @@  get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+    targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
+					 NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+    return true;
+  else
+    return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+    return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+    return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+      && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code != ZERO_EXTEND)
+    return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+    return false;
+
+  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+    return false;
+
+  for (df_link *use = uses; use; use = use->next)
+    {
+      if (!use->ref)
+	return false;
+
+      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+	return false;
+
+      rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+      if (GET_CODE (PATTERN (use_insn)) == SET)
+	{
+	  rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+	  if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+	      || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+	      || GET_RTX_CLASS (code) == RTX_UNARY)
+	    return false;
+	}
+     }
+  return true;
+}
+
 /* This function goes through all reaching defs of the source
    of the candidate for elimination (CAND) and tries to combine
    the extension with the definition instruction.  The changes
@@ -770,6 +883,11 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
 
   state->defs_list.truncate (0);
   state->copies_list.truncate (0);
+  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+
+  if (abi_extension_candidate_p (cand->insn)
+      && (!get_defs (cand->insn, orig_src, NULL)))
+    return abi_handle_regs_without_defs_p (cand->insn);
 
   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
 
@@ -1036,6 +1154,15 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
         }
     }
 
+  rtx insn_set = single_set (cand->insn);
+
+  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
+
+  bool promote_p = abi_target_promote_function_mode (mode);
+
+  if (promote_p)
+    return true;
+
   if (merge_successful)
     {
       /* Commit the changes here if possible
@@ -1112,12 +1239,20 @@  add_removable_extension (const_rtx expr, rtx_insn *insn,
       rtx reg = XEXP (src, 0);
       struct df_link *defs, *def;
       ext_cand *cand;
+      defs = get_defs (insn, reg, NULL);
 
       /* Zero-extension of an undefined value is partly defined (it's
 	 completely undefined for sign-extension, though).  So if there exists
 	 a path from the entry to this zero-extension that leaves this register
 	 uninitialized, removing the extension could change the behavior of
 	 correct programs.  So first, check it is not the case.  */
+      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
+	{
+	  ext_cand e = {expr, code, mode, insn};
+	  insn_list->safe_push (e);
+	  return;
+	}
+
       if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
 	{
 	  if (dump_file)
@@ -1131,7 +1266,6 @@  add_removable_extension (const_rtx expr, rtx_insn *insn,
 	}
 
       /* Second, make sure we can get all the reaching definitions.  */
-      defs = get_defs (insn, reg, NULL);
       if (!defs)
 	{
 	  if (dump_file)
@@ -1321,7 +1455,8 @@  find_and_remove_re (void)
 	      && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
 	    {
               reinsn_copy_list.safe_push (curr_cand->insn);
-              reinsn_copy_list.safe_push (state.defs_list[0]);
+	      if (state.defs_list.length () != 0)
+		reinsn_copy_list.safe_push (state.defs_list[0]);
 	    }
 	  reinsn_del_list.safe_push (curr_cand->insn);
 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
@@ -1345,6 +1480,10 @@  find_and_remove_re (void)
   for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
     {
       rtx_insn *curr_insn = reinsn_copy_list[i];
+
+      if ((i+1) >= reinsn_copy_list.length ())
+	continue;
+
       rtx_insn *def_insn = reinsn_copy_list[i + 1];
 
       /* Use the mode of the destination of the defining insn
diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
new file mode 100644
index 00000000000..5a050df06ff
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
@@ -0,0 +1,13 @@ 
+/* { dg-options "-mcpu=power9 -O2" } */
+
+void *memset(void *b, int c, unsigned long len)
+{
+  unsigned long i;
+
+  for (i = 0; i < len; i++)
+    ((unsigned char *)b)[i] = c;
+
+   return b;
+}
+
+/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */