try multi dest registers in default_zero_call_used_regs

Message ID orczi75965.fsf@lxoliva.fsfla.org
State New
Headers
Series try multi dest registers in default_zero_call_used_regs |

Commit Message

Alexandre Oliva March 27, 2022, 10:21 p.m. UTC
  When the mode of regno_reg_rtx is not hard_regno_mode_ok for the
target, try grouping the register with subsequent ones.  This enables
s16 to s31 and their hidden pairs to be zeroed with the default logic
on some arm variants.

Regstrapped on x86_64-linux-gnu, also tested on an affected arm
configuration.  Ok to install?


for  gcc/ChangeLog

	* targhooks.c (default_zero_call_used_regs): Attempt to group
	regs that the target refuses to use in their natural modes.
---
 gcc/targhooks.cc |   79 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 9 deletions(-)
  

Comments

Richard Sandiford March 31, 2022, 12:10 p.m. UTC | #1
Alexandre Oliva via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> When the mode of regno_reg_rtx is not hard_regno_mode_ok for the
> target, try grouping the register with subsequent ones.  This enables
> s16 to s31 and their hidden pairs to be zeroed with the default logic
> on some arm variants.
>
> Regstrapped on x86_64-linux-gnu, also tested on an affected arm
> configuration.  Ok to install?
>
>
> for  gcc/ChangeLog
>
> 	* targhooks.c (default_zero_call_used_regs): Attempt to group
> 	regs that the target refuses to use in their natural modes.

Thanks for doing this.  Some comments below…

> ---
>  gcc/targhooks.cc |   79 ++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 70 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> index fc49235eb38ee..bdaab9c63c7ee 100644
> --- a/gcc/targhooks.cc
> +++ b/gcc/targhooks.cc
> @@ -1035,16 +1035,45 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>      if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>        {
>  	rtx_insn *last_insn = get_last_insn ();
> -	machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
> +	rtx regno_rtx = regno_reg_rtx[regno];
> +	machine_mode mode = GET_MODE (regno_rtx);
> +
> +	/* If the natural mode doesn't work, try some wider mode.  */
> +	if (!targetm.hard_regno_mode_ok (regno, mode))
> +	  {
> +	    for (int nregs = 2;
> +		 regno + nregs <= FIRST_PSEUDO_REGISTER
> +		   && TEST_HARD_REG_BIT (need_zeroed_hardregs,
> +					 regno + nregs - 1);
> +		 nregs++)
> +	      {
> +		mode = choose_hard_reg_mode (regno, nregs, 0);

I like the idea, but it would be good to avoid the large:

  FIRST_PSEUDO_REGISTER * FIRST_PSEUDO_REGISTER * NUM_MACHINE_MODES

constant factor.  How about if init_reg_modes_target recorded the
maximum value of x_hard_regno_nregs?

> +		if (mode == E_VOIDmode)
> +		  continue;
> +		gcc_checking_assert (targetm.hard_regno_mode_ok (regno, mode));
> +		regno_rtx = gen_rtx_REG (mode, regno);
> +		break;
> +	      }
> +	    if (mode != GET_MODE (regno_rtx)
> +		|| regno_rtx == regno_reg_rtx[regno])
> +	      {
> +		SET_HARD_REG_BIT (failed, regno);
> +		continue;
> +	      }
> +	  }
> +
>  	rtx zero = CONST0_RTX (mode);
> -	rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zero);
> +	rtx_insn *insn = emit_move_insn (regno_rtx, zero);
>  	if (!valid_insn_p (insn))
>  	  {
>  	    SET_HARD_REG_BIT (failed, regno);
>  	    delete_insns_since (last_insn);
>  	  }
>  	else
> -	  progress = true;
> +	  {
> +	    progress = true;
> +	    regno += hard_regno_nregs (regno, mode) - 1;
> +	  }
>        }
>  
>    /* Now retry with copies from zeroed registers, as long as we've
> @@ -1060,7 +1089,34 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>        for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>  	if (TEST_HARD_REG_BIT (retrying, regno))
>  	  {
> -	    machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
> +	    rtx regno_rtx = regno_reg_rtx[regno];
> +	    machine_mode mode = GET_MODE (regno_rtx);
> +
> +	    /* If the natural mode doesn't work, try some wider mode.  */
> +	    if (!targetm.hard_regno_mode_ok (regno, mode))
> +	      {
> +		for (int nregs = 2;
> +		     regno + nregs <= FIRST_PSEUDO_REGISTER
> +		       && TEST_HARD_REG_BIT (need_zeroed_hardregs,
> +					     regno + nregs - 1);
> +		     nregs++)
> +		  {
> +		    mode = choose_hard_reg_mode (regno, nregs, 0);
> +		    if (mode == E_VOIDmode)
> +		      continue;
> +		    gcc_checking_assert (targetm.hard_regno_mode_ok (regno,
> +								     mode));
> +		    regno_rtx = gen_rtx_REG (mode, regno);
> +		    break;
> +		  }
> +		if (mode != GET_MODE (regno_rtx)
> +		    || regno_rtx == regno_reg_rtx[regno])
> +		  {
> +		    SET_HARD_REG_BIT (failed, regno);
> +		    continue;
> +		  }
> +	      }
> +	    

This seems big enough to be worth splitting out into a helper, rather
than repeating.  That should also simplify the failure detection:
the helper can return nonnull on success and null on failure.

>  	    bool success = false;
>  	    /* Look for a source.  */
>  	    for (unsigned int src = 0; src < FIRST_PSEUDO_REGISTER; src++)
> @@ -1086,8 +1142,10 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>  
>  		/* SRC is usable, try to copy from it.  */
>  		rtx_insn *last_insn = get_last_insn ();
> -		rtx zsrc = gen_rtx_REG (mode, src);
> -		rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zsrc);
> +		rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
> +			       ? regno_reg_rtx[src]
> +			       : gen_rtx_REG (mode, src));

Is this needed?  The original gen_rtx_REG (mode, src) seems OK.

Thanks,
Richard

> +		rtx_insn *insn = emit_move_insn (regno_rtx, src_rtx);
>  		if (!valid_insn_p (insn))
>  		  /* It didn't work, remove any inserts.  We'll look
>  		     for another SRC.  */
> @@ -1100,13 +1158,16 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>  		  }
>  	      }
>  
> -	    /* If nothing worked for REGNO this round, marked it to be
> +	    /* If nothing worked for REGNO this round, mark it to be
>  	       retried if we get another round.  */
>  	    if (!success)
>  	      SET_HARD_REG_BIT (failed, regno);
>  	    else
> -	      /* Take note so as to enable another round if needed.  */
> -	      progress = true;
> +	      {
> +		/* Take note so as to enable another round if needed.  */
> +		progress = true;
> +		regno += hard_regno_nregs (regno, mode) - 1;
> +	      }
>  	  }
>      }
  
Alexandre Oliva April 1, 2022, 5:56 a.m. UTC | #2
Hello, Richard,

Thanks for the review!

On Mar 31, 2022, Richard Sandiford <richard.sandiford@arm.com> wrote:

>> +	/* If the natural mode doesn't work, try some wider mode.  */
>> +	if (!targetm.hard_regno_mode_ok (regno, mode))
>> +	  {
>> +	    for (int nregs = 2;
>> +		 regno + nregs <= FIRST_PSEUDO_REGISTER
>> +		   && TEST_HARD_REG_BIT (need_zeroed_hardregs,
>> +					 regno + nregs - 1);
>> +		 nregs++)
>> +	      {
>> +		mode = choose_hard_reg_mode (regno, nregs, 0);

> I like the idea, but it would be good to avoid the large:

>   FIRST_PSEUDO_REGISTER * FIRST_PSEUDO_REGISTER * NUM_MACHINE_MODES

> constant factor.

Enteringf the nregs loop, because the register can't be used in its
natural mode, is supposed to be an unusual case, not worth optimizing
much under Amdahl's law.  I gather the aggregate trip counts are
unlikely to hit the theoretical O(n^2) because registers that would take
the loop are rare and expected to be paired/grouped up.  If that
assumption doesn't hold, then a cap would indeed be desirable.

> How about if init_reg_modes_target recorded the maximum value of
> x_hard_regno_nregs?

I had thought of a cap but couldn't find one I was happy with, and in
the end I thought we didn't need one.  But this is indeed a good one to
use.  Thanks, I'm implementing it.

> This seems big enough to be worth splitting out into a helper, rather
> than repeating.

I had considered that, but it didn't seem to me it would bring an
improvement.  As it turns out, it does.  Thanks.

>> -		rtx zsrc = gen_rtx_REG (mode, src);
>> +		rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
>> +			       ? regno_reg_rtx[src]
>> +			       : gen_rtx_REG (mode, src));

> Is this needed?  The original gen_rtx_REG (mode, src) seems OK.

No, it's not needed, it's just an attempt to avoid allocating RTL that
we have handy.  This function could in theory make several attempts at
allocating rtl for each register in the shrinking pending set.  I
thought every saved bit could help.


Here's what I'm regstrapping on x86_64-linux-gnu, after verifying that
it does the job on the affected arm variant.  Ok to install, assuming no
surprises in the testing?


try multi-reg dest in default_zero_call_used_regs

From: Alexandre Oliva <oliva@adacore.com>

When the mode of regno_reg_rtx is not hard_regno_mode_ok for the
target, try grouping the register with subsequent ones.  This enables
s16 to s31 and their hidden pairs to be zeroed with the default logic
on some arm variants.


for  gcc/ChangeLog

	* targhooks.c (default_zero_call_used_regs): Attempt to group
	regs that the target refuses to use in their natural modes.
	(zcur_select_mode_rtx): New.
	* regs.h (struct target_regs): Add x_hard_regno_max_nregs.
	(hard_regno_max_nregs): Define.
	* reginfo.c (init_reg_modes_target): Set hard_regno_max_nregs.
---
 gcc/reginfo.cc   |    9 ++++--
 gcc/regs.h       |    5 +++
 gcc/targhooks.cc |   86 ++++++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/gcc/reginfo.cc b/gcc/reginfo.cc
index 234f72eceeb25..67e30cab42855 100644
--- a/gcc/reginfo.cc
+++ b/gcc/reginfo.cc
@@ -441,10 +441,15 @@ init_reg_modes_target (void)
 {
   int i, j;
 
+  this_target_regs->x_hard_regno_max_nregs = 1;
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     for (j = 0; j < MAX_MACHINE_MODE; j++)
-      this_target_regs->x_hard_regno_nregs[i][j]
-	= targetm.hard_regno_nregs (i, (machine_mode) j);
+      {
+	unsigned char nregs = targetm.hard_regno_nregs (i, (machine_mode) j);
+	this_target_regs->x_hard_regno_nregs[i][j] = nregs;
+	if (nregs > this_target_regs->x_hard_regno_max_nregs)
+	  this_target_regs->x_hard_regno_max_nregs = nregs;
+      }
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     {
diff --git a/gcc/regs.h b/gcc/regs.h
index 74f1f63770322..f72b06fb56508 100644
--- a/gcc/regs.h
+++ b/gcc/regs.h
@@ -202,6 +202,9 @@ struct target_regs {
      registers that a given machine mode occupies.  */
   unsigned char x_hard_regno_nregs[FIRST_PSEUDO_REGISTER][MAX_MACHINE_MODE];
 
+  /* The max value found in x_hard_regno_nregs.  */
+  unsigned char x_hard_regno_max_nregs;
+
   /* For each hard register, the widest mode object that it can contain.
      This will be a MODE_INT mode if the register can hold integers.  Otherwise
      it will be a MODE_FLOAT or a MODE_CC mode, whichever is valid for the
@@ -235,6 +238,8 @@ extern struct target_regs *this_target_regs;
 #else
 #define this_target_regs (&default_target_regs)
 #endif
+#define hard_regno_max_nregs \
+  (this_target_regs->x_hard_regno_max_nregs)
 #define reg_raw_mode \
   (this_target_regs->x_reg_raw_mode)
 #define have_regs_of_mode \
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index fc49235eb38ee..2681833e2ce79 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -1017,6 +1017,45 @@ default_function_value_regno_p (const unsigned int regno ATTRIBUTE_UNUSED)
 #endif
 }
 
+/* Choose the mode and rtx to use to zero REGNO, storing tem in PMODE and
+   PREGNO_RTX and returning TRUE if successful, otherwise returning FALSE.  If
+   the natural mode for REGNO doesn't work, attempt to group it with subsequent
+   adjacent registers set in TOZERO.  */
+
+static inline bool
+zcur_select_mode_rtx (unsigned int regno, machine_mode *pmode,
+		      rtx *pregno_rtx, HARD_REG_SET tozero)
+{
+  rtx regno_rtx = regno_reg_rtx[regno];
+  machine_mode mode = GET_MODE (regno_rtx);
+
+  /* If the natural mode doesn't work, try some wider mode.  */
+  if (!targetm.hard_regno_mode_ok (regno, mode))
+    {
+      bool found = false;
+      for (int nregs = 2;
+	   !found && nregs <= hard_regno_max_nregs
+	     && regno + nregs <= FIRST_PSEUDO_REGISTER
+	     && TEST_HARD_REG_BIT (tozero,
+				   regno + nregs - 1);
+	   nregs++)
+	{
+	  mode = choose_hard_reg_mode (regno, nregs, 0);
+	  if (mode == E_VOIDmode)
+	    continue;
+	  gcc_checking_assert (targetm.hard_regno_mode_ok (regno, mode));
+	  regno_rtx = gen_rtx_REG (mode, regno);
+	  found = true;
+	}
+      if (!found)
+	return false;
+    }
+
+  *pmode = mode;
+  *pregno_rtx = regno_rtx;
+  return true;
+}
+
 /* The default hook for TARGET_ZERO_CALL_USED_REGS.  */
 
 HARD_REG_SET
@@ -1035,16 +1074,28 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
     if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
       {
 	rtx_insn *last_insn = get_last_insn ();
-	machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+	rtx regno_rtx;
+	machine_mode mode;
+
+	if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
+				   need_zeroed_hardregs))
+	  {
+	    SET_HARD_REG_BIT (failed, regno);
+	    continue;
+	  }
+
 	rtx zero = CONST0_RTX (mode);
-	rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zero);
+	rtx_insn *insn = emit_move_insn (regno_rtx, zero);
 	if (!valid_insn_p (insn))
 	  {
 	    SET_HARD_REG_BIT (failed, regno);
 	    delete_insns_since (last_insn);
 	  }
 	else
-	  progress = true;
+	  {
+	    progress = true;
+	    regno += hard_regno_nregs (regno, mode) - 1;
+	  }
       }
 
   /* Now retry with copies from zeroed registers, as long as we've
@@ -1060,7 +1111,18 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
       for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 	if (TEST_HARD_REG_BIT (retrying, regno))
 	  {
-	    machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+	    rtx regno_rtx;
+	    machine_mode mode;
+
+	    /* This might select registers we've already zeroed.  If grouping
+	       with them is what it takes to get regno zeroed, so be it.  */
+	    if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
+				       need_zeroed_hardregs))
+	      {
+		SET_HARD_REG_BIT (failed, regno);
+		continue;
+	      }
+
 	    bool success = false;
 	    /* Look for a source.  */
 	    for (unsigned int src = 0; src < FIRST_PSEUDO_REGISTER; src++)
@@ -1086,8 +1148,11 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 
 		/* SRC is usable, try to copy from it.  */
 		rtx_insn *last_insn = get_last_insn ();
-		rtx zsrc = gen_rtx_REG (mode, src);
-		rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zsrc);
+		/* Try to avoid allocating RTL that we have handy.  */
+		rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
+			       ? regno_reg_rtx[src]
+			       : gen_rtx_REG (mode, src));
+		rtx_insn *insn = emit_move_insn (regno_rtx, src_rtx);
 		if (!valid_insn_p (insn))
 		  /* It didn't work, remove any inserts.  We'll look
 		     for another SRC.  */
@@ -1100,13 +1165,16 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 		  }
 	      }
 
-	    /* If nothing worked for REGNO this round, marked it to be
+	    /* If nothing worked for REGNO this round, mark it to be
 	       retried if we get another round.  */
 	    if (!success)
 	      SET_HARD_REG_BIT (failed, regno);
 	    else
-	      /* Take note so as to enable another round if needed.  */
-	      progress = true;
+	      {
+		/* Take note so as to enable another round if needed.  */
+		progress = true;
+		regno += hard_regno_nregs (regno, mode) - 1;
+	      }
 	  }
     }
  
Richard Sandiford April 4, 2022, 12:29 p.m. UTC | #3
Alexandre Oliva <oliva@adacore.com> writes:
> Hello, Richard,
>
> Thanks for the review!
>
> On Mar 31, 2022, Richard Sandiford <richard.sandiford@arm.com> wrote:
>
>>> +	/* If the natural mode doesn't work, try some wider mode.  */
>>> +	if (!targetm.hard_regno_mode_ok (regno, mode))
>>> +	  {
>>> +	    for (int nregs = 2;
>>> +		 regno + nregs <= FIRST_PSEUDO_REGISTER
>>> +		   && TEST_HARD_REG_BIT (need_zeroed_hardregs,
>>> +					 regno + nregs - 1);
>>> +		 nregs++)
>>> +	      {
>>> +		mode = choose_hard_reg_mode (regno, nregs, 0);
>
>> I like the idea, but it would be good to avoid the large:
>
>>   FIRST_PSEUDO_REGISTER * FIRST_PSEUDO_REGISTER * NUM_MACHINE_MODES
>
>> constant factor.
>
> Enteringf the nregs loop, because the register can't be used in its
> natural mode, is supposed to be an unusual case, not worth optimizing
> much under Amdahl's law.  I gather the aggregate trip counts are
> unlikely to hit the theoretical O(n^2) because registers that would take
> the loop are rare and expected to be paired/grouped up.  If that
> assumption doesn't hold, then a cap would indeed be desirable.
>
>> How about if init_reg_modes_target recorded the maximum value of
>> x_hard_regno_nregs?
>
> I had thought of a cap but couldn't find one I was happy with, and in
> the end I thought we didn't need one.  But this is indeed a good one to
> use.  Thanks, I'm implementing it.
>
>> This seems big enough to be worth splitting out into a helper, rather
>> than repeating.
>
> I had considered that, but it didn't seem to me it would bring an
> improvement.  As it turns out, it does.  Thanks.
>
>>> -		rtx zsrc = gen_rtx_REG (mode, src);
>>> +		rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
>>> +			       ? regno_reg_rtx[src]
>>> +			       : gen_rtx_REG (mode, src));
>
>> Is this needed?  The original gen_rtx_REG (mode, src) seems OK.
>
> No, it's not needed, it's just an attempt to avoid allocating RTL that
> we have handy.  This function could in theory make several attempts at
> allocating rtl for each register in the shrinking pending set.  I
> thought every saved bit could help.

But if that's true, it should happen in gen_rtx_REG.  It already has:

#if 0
  /* If the per-function register table has been set up, try to re-use
     an existing entry in that table to avoid useless generation of RTL.

     This code is disabled for now until we can fix the various backends
     which depend on having non-shared hard registers in some cases.   Long
     term we want to re-enable this code as it can significantly cut down
     on the amount of useless RTL that gets generated.

     We'll also need to fix some code that runs after reload that wants to
     set ORIGINAL_REGNO.  */

  if (cfun
      && cfun->emit
      && regno_reg_rtx
      && regno < FIRST_PSEUDO_REGISTER
      && reg_raw_mode[regno] == mode)
    return regno_reg_rtx[regno];
#endif

Having the special case here in targhooks.c would set a precedent that
efficiency-conscious callers should always do the ?: rather than call
gen_rtx_REG directly.  Keeping the code in gen_rtx_REG means that we can
flip the switch when backends have been fixed (maybe they already have).

OK without the introduction of the ?:, thanks.

Richard

>
>
> Here's what I'm regstrapping on x86_64-linux-gnu, after verifying that
> it does the job on the affected arm variant.  Ok to install, assuming no
> surprises in the testing?
>
>
> try multi-reg dest in default_zero_call_used_regs
>
> From: Alexandre Oliva <oliva@adacore.com>
>
> When the mode of regno_reg_rtx is not hard_regno_mode_ok for the
> target, try grouping the register with subsequent ones.  This enables
> s16 to s31 and their hidden pairs to be zeroed with the default logic
> on some arm variants.
>
>
> for  gcc/ChangeLog
>
> 	* targhooks.c (default_zero_call_used_regs): Attempt to group
> 	regs that the target refuses to use in their natural modes.
> 	(zcur_select_mode_rtx): New.
> 	* regs.h (struct target_regs): Add x_hard_regno_max_nregs.
> 	(hard_regno_max_nregs): Define.
> 	* reginfo.c (init_reg_modes_target): Set hard_regno_max_nregs.
> ---
>  gcc/reginfo.cc   |    9 ++++--
>  gcc/regs.h       |    5 +++
>  gcc/targhooks.cc |   86 ++++++++++++++++++++++++++++++++++++++++++++++++------
>  3 files changed, 89 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/reginfo.cc b/gcc/reginfo.cc
> index 234f72eceeb25..67e30cab42855 100644
> --- a/gcc/reginfo.cc
> +++ b/gcc/reginfo.cc
> @@ -441,10 +441,15 @@ init_reg_modes_target (void)
>  {
>    int i, j;
>  
> +  this_target_regs->x_hard_regno_max_nregs = 1;
>    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>      for (j = 0; j < MAX_MACHINE_MODE; j++)
> -      this_target_regs->x_hard_regno_nregs[i][j]
> -	= targetm.hard_regno_nregs (i, (machine_mode) j);
> +      {
> +	unsigned char nregs = targetm.hard_regno_nregs (i, (machine_mode) j);
> +	this_target_regs->x_hard_regno_nregs[i][j] = nregs;
> +	if (nregs > this_target_regs->x_hard_regno_max_nregs)
> +	  this_target_regs->x_hard_regno_max_nregs = nregs;
> +      }
>  
>    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>      {
> diff --git a/gcc/regs.h b/gcc/regs.h
> index 74f1f63770322..f72b06fb56508 100644
> --- a/gcc/regs.h
> +++ b/gcc/regs.h
> @@ -202,6 +202,9 @@ struct target_regs {
>       registers that a given machine mode occupies.  */
>    unsigned char x_hard_regno_nregs[FIRST_PSEUDO_REGISTER][MAX_MACHINE_MODE];
>  
> +  /* The max value found in x_hard_regno_nregs.  */
> +  unsigned char x_hard_regno_max_nregs;
> +
>    /* For each hard register, the widest mode object that it can contain.
>       This will be a MODE_INT mode if the register can hold integers.  Otherwise
>       it will be a MODE_FLOAT or a MODE_CC mode, whichever is valid for the
> @@ -235,6 +238,8 @@ extern struct target_regs *this_target_regs;
>  #else
>  #define this_target_regs (&default_target_regs)
>  #endif
> +#define hard_regno_max_nregs \
> +  (this_target_regs->x_hard_regno_max_nregs)
>  #define reg_raw_mode \
>    (this_target_regs->x_reg_raw_mode)
>  #define have_regs_of_mode \
> diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> index fc49235eb38ee..2681833e2ce79 100644
> --- a/gcc/targhooks.cc
> +++ b/gcc/targhooks.cc
> @@ -1017,6 +1017,45 @@ default_function_value_regno_p (const unsigned int regno ATTRIBUTE_UNUSED)
>  #endif
>  }
>  
> +/* Choose the mode and rtx to use to zero REGNO, storing tem in PMODE and
> +   PREGNO_RTX and returning TRUE if successful, otherwise returning FALSE.  If
> +   the natural mode for REGNO doesn't work, attempt to group it with subsequent
> +   adjacent registers set in TOZERO.  */
> +
> +static inline bool
> +zcur_select_mode_rtx (unsigned int regno, machine_mode *pmode,
> +		      rtx *pregno_rtx, HARD_REG_SET tozero)
> +{
> +  rtx regno_rtx = regno_reg_rtx[regno];
> +  machine_mode mode = GET_MODE (regno_rtx);
> +
> +  /* If the natural mode doesn't work, try some wider mode.  */
> +  if (!targetm.hard_regno_mode_ok (regno, mode))
> +    {
> +      bool found = false;
> +      for (int nregs = 2;
> +	   !found && nregs <= hard_regno_max_nregs
> +	     && regno + nregs <= FIRST_PSEUDO_REGISTER
> +	     && TEST_HARD_REG_BIT (tozero,
> +				   regno + nregs - 1);
> +	   nregs++)
> +	{
> +	  mode = choose_hard_reg_mode (regno, nregs, 0);
> +	  if (mode == E_VOIDmode)
> +	    continue;
> +	  gcc_checking_assert (targetm.hard_regno_mode_ok (regno, mode));
> +	  regno_rtx = gen_rtx_REG (mode, regno);
> +	  found = true;
> +	}
> +      if (!found)
> +	return false;
> +    }
> +
> +  *pmode = mode;
> +  *pregno_rtx = regno_rtx;
> +  return true;
> +}
> +
>  /* The default hook for TARGET_ZERO_CALL_USED_REGS.  */
>  
>  HARD_REG_SET
> @@ -1035,16 +1074,28 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>      if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
>        {
>  	rtx_insn *last_insn = get_last_insn ();
> -	machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
> +	rtx regno_rtx;
> +	machine_mode mode;
> +
> +	if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
> +				   need_zeroed_hardregs))
> +	  {
> +	    SET_HARD_REG_BIT (failed, regno);
> +	    continue;
> +	  }
> +
>  	rtx zero = CONST0_RTX (mode);
> -	rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zero);
> +	rtx_insn *insn = emit_move_insn (regno_rtx, zero);
>  	if (!valid_insn_p (insn))
>  	  {
>  	    SET_HARD_REG_BIT (failed, regno);
>  	    delete_insns_since (last_insn);
>  	  }
>  	else
> -	  progress = true;
> +	  {
> +	    progress = true;
> +	    regno += hard_regno_nregs (regno, mode) - 1;
> +	  }
>        }
>  
>    /* Now retry with copies from zeroed registers, as long as we've
> @@ -1060,7 +1111,18 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>        for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
>  	if (TEST_HARD_REG_BIT (retrying, regno))
>  	  {
> -	    machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
> +	    rtx regno_rtx;
> +	    machine_mode mode;
> +
> +	    /* This might select registers we've already zeroed.  If grouping
> +	       with them is what it takes to get regno zeroed, so be it.  */
> +	    if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
> +				       need_zeroed_hardregs))
> +	      {
> +		SET_HARD_REG_BIT (failed, regno);
> +		continue;
> +	      }
> +
>  	    bool success = false;
>  	    /* Look for a source.  */
>  	    for (unsigned int src = 0; src < FIRST_PSEUDO_REGISTER; src++)
> @@ -1086,8 +1148,11 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>  
>  		/* SRC is usable, try to copy from it.  */
>  		rtx_insn *last_insn = get_last_insn ();
> -		rtx zsrc = gen_rtx_REG (mode, src);
> -		rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zsrc);
> +		/* Try to avoid allocating RTL that we have handy.  */
> +		rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
> +			       ? regno_reg_rtx[src]
> +			       : gen_rtx_REG (mode, src));
> +		rtx_insn *insn = emit_move_insn (regno_rtx, src_rtx);
>  		if (!valid_insn_p (insn))
>  		  /* It didn't work, remove any inserts.  We'll look
>  		     for another SRC.  */
> @@ -1100,13 +1165,16 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
>  		  }
>  	      }
>  
> -	    /* If nothing worked for REGNO this round, marked it to be
> +	    /* If nothing worked for REGNO this round, mark it to be
>  	       retried if we get another round.  */
>  	    if (!success)
>  	      SET_HARD_REG_BIT (failed, regno);
>  	    else
> -	      /* Take note so as to enable another round if needed.  */
> -	      progress = true;
> +	      {
> +		/* Take note so as to enable another round if needed.  */
> +		progress = true;
> +		regno += hard_regno_nregs (regno, mode) - 1;
> +	      }
>  	  }
>      }
  
Alexandre Oliva April 5, 2022, 4:26 a.m. UTC | #4
On Apr  4, 2022, Richard Sandiford <richard.sandiford@arm.com> wrote:

> But if that's true, it should happen in gen_rtx_REG.

Yeah, I agree, that makes sense.

> OK without the introduction of the ?:, thanks.

Thanks, here's what I'm checking in.


try multi-reg dest in default_zero_call_used_regs

From: Alexandre Oliva <oliva@adacore.com>

When the mode of regno_reg_rtx is not hard_regno_mode_ok for the
target, try grouping the register with subsequent ones.  This enables
s16 to s31 and their hidden pairs to be zeroed with the default logic
on some arm variants.


for  gcc/ChangeLog

	* targhooks.c (default_zero_call_used_regs): Attempt to group
	regs that the target refuses to use in their natural modes.
	(zcur_select_mode_rtx): New.
	* regs.h (struct target_regs): Add x_hard_regno_max_nregs.
	(hard_regno_max_nregs): Define.
	* reginfo.c (init_reg_modes_target): Set hard_regno_max_nregs.
---
 gcc/reginfo.cc   |    9 +++++-
 gcc/regs.h       |    5 +++
 gcc/targhooks.cc |   83 ++++++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 86 insertions(+), 11 deletions(-)

diff --git a/gcc/reginfo.cc b/gcc/reginfo.cc
index 234f72eceeb25..67e30cab42855 100644
--- a/gcc/reginfo.cc
+++ b/gcc/reginfo.cc
@@ -441,10 +441,15 @@ init_reg_modes_target (void)
 {
   int i, j;
 
+  this_target_regs->x_hard_regno_max_nregs = 1;
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     for (j = 0; j < MAX_MACHINE_MODE; j++)
-      this_target_regs->x_hard_regno_nregs[i][j]
-	= targetm.hard_regno_nregs (i, (machine_mode) j);
+      {
+	unsigned char nregs = targetm.hard_regno_nregs (i, (machine_mode) j);
+	this_target_regs->x_hard_regno_nregs[i][j] = nregs;
+	if (nregs > this_target_regs->x_hard_regno_max_nregs)
+	  this_target_regs->x_hard_regno_max_nregs = nregs;
+      }
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     {
diff --git a/gcc/regs.h b/gcc/regs.h
index 74f1f63770322..f72b06fb56508 100644
--- a/gcc/regs.h
+++ b/gcc/regs.h
@@ -202,6 +202,9 @@ struct target_regs {
      registers that a given machine mode occupies.  */
   unsigned char x_hard_regno_nregs[FIRST_PSEUDO_REGISTER][MAX_MACHINE_MODE];
 
+  /* The max value found in x_hard_regno_nregs.  */
+  unsigned char x_hard_regno_max_nregs;
+
   /* For each hard register, the widest mode object that it can contain.
      This will be a MODE_INT mode if the register can hold integers.  Otherwise
      it will be a MODE_FLOAT or a MODE_CC mode, whichever is valid for the
@@ -235,6 +238,8 @@ extern struct target_regs *this_target_regs;
 #else
 #define this_target_regs (&default_target_regs)
 #endif
+#define hard_regno_max_nregs \
+  (this_target_regs->x_hard_regno_max_nregs)
 #define reg_raw_mode \
   (this_target_regs->x_reg_raw_mode)
 #define have_regs_of_mode \
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index fc49235eb38ee..e22bc66a6c896 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -1017,6 +1017,45 @@ default_function_value_regno_p (const unsigned int regno ATTRIBUTE_UNUSED)
 #endif
 }
 
+/* Choose the mode and rtx to use to zero REGNO, storing tem in PMODE and
+   PREGNO_RTX and returning TRUE if successful, otherwise returning FALSE.  If
+   the natural mode for REGNO doesn't work, attempt to group it with subsequent
+   adjacent registers set in TOZERO.  */
+
+static inline bool
+zcur_select_mode_rtx (unsigned int regno, machine_mode *pmode,
+		      rtx *pregno_rtx, HARD_REG_SET tozero)
+{
+  rtx regno_rtx = regno_reg_rtx[regno];
+  machine_mode mode = GET_MODE (regno_rtx);
+
+  /* If the natural mode doesn't work, try some wider mode.  */
+  if (!targetm.hard_regno_mode_ok (regno, mode))
+    {
+      bool found = false;
+      for (int nregs = 2;
+	   !found && nregs <= hard_regno_max_nregs
+	     && regno + nregs <= FIRST_PSEUDO_REGISTER
+	     && TEST_HARD_REG_BIT (tozero,
+				   regno + nregs - 1);
+	   nregs++)
+	{
+	  mode = choose_hard_reg_mode (regno, nregs, 0);
+	  if (mode == E_VOIDmode)
+	    continue;
+	  gcc_checking_assert (targetm.hard_regno_mode_ok (regno, mode));
+	  regno_rtx = gen_rtx_REG (mode, regno);
+	  found = true;
+	}
+      if (!found)
+	return false;
+    }
+
+  *pmode = mode;
+  *pregno_rtx = regno_rtx;
+  return true;
+}
+
 /* The default hook for TARGET_ZERO_CALL_USED_REGS.  */
 
 HARD_REG_SET
@@ -1035,16 +1074,28 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
     if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
       {
 	rtx_insn *last_insn = get_last_insn ();
-	machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+	rtx regno_rtx;
+	machine_mode mode;
+
+	if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
+				   need_zeroed_hardregs))
+	  {
+	    SET_HARD_REG_BIT (failed, regno);
+	    continue;
+	  }
+
 	rtx zero = CONST0_RTX (mode);
-	rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zero);
+	rtx_insn *insn = emit_move_insn (regno_rtx, zero);
 	if (!valid_insn_p (insn))
 	  {
 	    SET_HARD_REG_BIT (failed, regno);
 	    delete_insns_since (last_insn);
 	  }
 	else
-	  progress = true;
+	  {
+	    progress = true;
+	    regno += hard_regno_nregs (regno, mode) - 1;
+	  }
       }
 
   /* Now retry with copies from zeroed registers, as long as we've
@@ -1060,7 +1111,18 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
       for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 	if (TEST_HARD_REG_BIT (retrying, regno))
 	  {
-	    machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+	    rtx regno_rtx;
+	    machine_mode mode;
+
+	    /* This might select registers we've already zeroed.  If grouping
+	       with them is what it takes to get regno zeroed, so be it.  */
+	    if (!zcur_select_mode_rtx (regno, &mode, &regno_rtx,
+				       need_zeroed_hardregs))
+	      {
+		SET_HARD_REG_BIT (failed, regno);
+		continue;
+	      }
+
 	    bool success = false;
 	    /* Look for a source.  */
 	    for (unsigned int src = 0; src < FIRST_PSEUDO_REGISTER; src++)
@@ -1086,8 +1148,8 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 
 		/* SRC is usable, try to copy from it.  */
 		rtx_insn *last_insn = get_last_insn ();
-		rtx zsrc = gen_rtx_REG (mode, src);
-		rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zsrc);
+		rtx src_rtx = gen_rtx_REG (mode, src);
+		rtx_insn *insn = emit_move_insn (regno_rtx, src_rtx);
 		if (!valid_insn_p (insn))
 		  /* It didn't work, remove any inserts.  We'll look
 		     for another SRC.  */
@@ -1100,13 +1162,16 @@ default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 		  }
 	      }
 
-	    /* If nothing worked for REGNO this round, marked it to be
+	    /* If nothing worked for REGNO this round, mark it to be
 	       retried if we get another round.  */
 	    if (!success)
 	      SET_HARD_REG_BIT (failed, regno);
 	    else
-	      /* Take note so as to enable another round if needed.  */
-	      progress = true;
+	      {
+		/* Take note so as to enable another round if needed.  */
+		progress = true;
+		regno += hard_regno_nregs (regno, mode) - 1;
+	      }
 	  }
     }
  

Patch

diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index fc49235eb38ee..bdaab9c63c7ee 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -1035,16 +1035,45 @@  default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
     if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
       {
 	rtx_insn *last_insn = get_last_insn ();
-	machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+	rtx regno_rtx = regno_reg_rtx[regno];
+	machine_mode mode = GET_MODE (regno_rtx);
+
+	/* If the natural mode doesn't work, try some wider mode.  */
+	if (!targetm.hard_regno_mode_ok (regno, mode))
+	  {
+	    for (int nregs = 2;
+		 regno + nregs <= FIRST_PSEUDO_REGISTER
+		   && TEST_HARD_REG_BIT (need_zeroed_hardregs,
+					 regno + nregs - 1);
+		 nregs++)
+	      {
+		mode = choose_hard_reg_mode (regno, nregs, 0);
+		if (mode == E_VOIDmode)
+		  continue;
+		gcc_checking_assert (targetm.hard_regno_mode_ok (regno, mode));
+		regno_rtx = gen_rtx_REG (mode, regno);
+		break;
+	      }
+	    if (mode != GET_MODE (regno_rtx)
+		|| regno_rtx == regno_reg_rtx[regno])
+	      {
+		SET_HARD_REG_BIT (failed, regno);
+		continue;
+	      }
+	  }
+
 	rtx zero = CONST0_RTX (mode);
-	rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zero);
+	rtx_insn *insn = emit_move_insn (regno_rtx, zero);
 	if (!valid_insn_p (insn))
 	  {
 	    SET_HARD_REG_BIT (failed, regno);
 	    delete_insns_since (last_insn);
 	  }
 	else
-	  progress = true;
+	  {
+	    progress = true;
+	    regno += hard_regno_nregs (regno, mode) - 1;
+	  }
       }
 
   /* Now retry with copies from zeroed registers, as long as we've
@@ -1060,7 +1089,34 @@  default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
       for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 	if (TEST_HARD_REG_BIT (retrying, regno))
 	  {
-	    machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
+	    rtx regno_rtx = regno_reg_rtx[regno];
+	    machine_mode mode = GET_MODE (regno_rtx);
+
+	    /* If the natural mode doesn't work, try some wider mode.  */
+	    if (!targetm.hard_regno_mode_ok (regno, mode))
+	      {
+		for (int nregs = 2;
+		     regno + nregs <= FIRST_PSEUDO_REGISTER
+		       && TEST_HARD_REG_BIT (need_zeroed_hardregs,
+					     regno + nregs - 1);
+		     nregs++)
+		  {
+		    mode = choose_hard_reg_mode (regno, nregs, 0);
+		    if (mode == E_VOIDmode)
+		      continue;
+		    gcc_checking_assert (targetm.hard_regno_mode_ok (regno,
+								     mode));
+		    regno_rtx = gen_rtx_REG (mode, regno);
+		    break;
+		  }
+		if (mode != GET_MODE (regno_rtx)
+		    || regno_rtx == regno_reg_rtx[regno])
+		  {
+		    SET_HARD_REG_BIT (failed, regno);
+		    continue;
+		  }
+	      }
+	    
 	    bool success = false;
 	    /* Look for a source.  */
 	    for (unsigned int src = 0; src < FIRST_PSEUDO_REGISTER; src++)
@@ -1086,8 +1142,10 @@  default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 
 		/* SRC is usable, try to copy from it.  */
 		rtx_insn *last_insn = get_last_insn ();
-		rtx zsrc = gen_rtx_REG (mode, src);
-		rtx_insn *insn = emit_move_insn (regno_reg_rtx[regno], zsrc);
+		rtx src_rtx = (mode == GET_MODE (regno_reg_rtx[src])
+			       ? regno_reg_rtx[src]
+			       : gen_rtx_REG (mode, src));
+		rtx_insn *insn = emit_move_insn (regno_rtx, src_rtx);
 		if (!valid_insn_p (insn))
 		  /* It didn't work, remove any inserts.  We'll look
 		     for another SRC.  */
@@ -1100,13 +1158,16 @@  default_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
 		  }
 	      }
 
-	    /* If nothing worked for REGNO this round, marked it to be
+	    /* If nothing worked for REGNO this round, mark it to be
 	       retried if we get another round.  */
 	    if (!success)
 	      SET_HARD_REG_BIT (failed, regno);
 	    else
-	      /* Take note so as to enable another round if needed.  */
-	      progress = true;
+	      {
+		/* Take note so as to enable another round if needed.  */
+		progress = true;
+		regno += hard_regno_nregs (regno, mode) - 1;
+	      }
 	  }
     }