PR target/56102: Improve rtx_costs from -mthumb on ARM.

Message ID 001401dcf2c0$91ce6ee0$b56b4ca0$@nextmovesoftware.com
State New
Headers
Series PR target/56102: Improve rtx_costs from -mthumb on ARM. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-aarch64-bootstrap success Build passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-arm-bootstrap success Build passed

Commit Message

Roger Sayle June 2, 2026, 6:49 p.m. UTC
  This patch provides improved (more accurate) RTX costs for -mthumb on ARM.
My recent patch for double word multiplication, PR 122871, revealed that
the current costs for THUMB code on ARM are... let's say a little dubious.

To demonstrate the code generation improvements provided by better
thumb1_rtx_costs consider the function below (from PR middle-end/122871).

long long foo (long long a)
{
  long long c = a << 33;
  c += a;
  return c;
}

With the ARM backend's current costs, this produces 11 instructions with
-O2 -mthumb.

Before: movs    r3, r0
        movs    r2, #0
        adds    r2, r2, r0
        adcs    r3, r3, r1
        adds    r2, r2, r2
        adcs    r3, r3, r3
        subs    r2, r2, r0
        sbcs    r3, r3, r1
        movs    r0, r2
        movs    r1, r3
        bx      lr

With sane RTX costs, GCC now generates the much more reasonable 5 insns:

After:  movs    r2, #0
        lsls    r3, r0, #1
        adds    r0, r0, r2
        adcs    r1, r1, r3
        bx      lr


Tested on arm-unknown-linux-gnueabihf with make bootstrap and make -k check
with no new failures.  I also noticed that arm.cc has a compiler warning
when using GCC 11 as the host compiler; trivially silenced with a one line
change.  Ok for mainline?


2026-06-02  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        PR target/56102
        PR middle-end/122871
        * config/arm/arm.cc (thumb1_rtx_costs): Provide reasonable costs
        for PLUS, MINUS, COMPARE, AND, XOR, IOR, NEG, NOT, ASHIFT,
        ASHIFTRT and ROTATERT for SImode, DImode, HImode and QImode.
        (thumb1_size_rtx_costs): Likewise.

        (comp_not_to_clear_mask_str_un): Silence host compiler warning.


Thanks in advance,
Roger
--
  

Comments

Richard Earnshaw June 3, 2026, 2:25 p.m. UTC | #1
On 02/06/2026 19:49, Roger Sayle wrote:
> 
> This patch provides improved (more accurate) RTX costs for -mthumb on ARM.
> My recent patch for double word multiplication, PR 122871, revealed that
> the current costs for THUMB code on ARM are... let's say a little dubious.
> 
> To demonstrate the code generation improvements provided by better
> thumb1_rtx_costs consider the function below (from PR middle-end/122871).
> 
> long long foo (long long a)
> {
>   long long c = a << 33;
>   c += a;
>   return c;
> }
> 
> With the ARM backend's current costs, this produces 11 instructions with
> -O2 -mthumb.
> 
> Before: movs    r3, r0
>         movs    r2, #0
>         adds    r2, r2, r0
>         adcs    r3, r3, r1
>         adds    r2, r2, r2
>         adcs    r3, r3, r3
>         subs    r2, r2, r0
>         sbcs    r3, r3, r1
>         movs    r0, r2
>         movs    r1, r3
>         bx      lr
> 
> With sane RTX costs, GCC now generates the much more reasonable 5 insns:
> 
> After:  movs    r2, #0
>         lsls    r3, r0, #1
>         adds    r0, r0, r2
>         adcs    r1, r1, r3
>         bx      lr
> 
> 
> Tested on arm-unknown-linux-gnueabihf with make bootstrap and make -k check
> with no new failures.  I also noticed that arm.cc has a compiler warning
> when using GCC 11 as the host compiler; trivially silenced with a one line
> change.  Ok for mainline?
> 
> 
> 2026-06-02  Roger Sayle  <roger@nextmovesoftware.com>
> 
> gcc/ChangeLog
>         PR target/56102
>         PR middle-end/122871
>         * config/arm/arm.cc (thumb1_rtx_costs): Provide reasonable costs
>         for PLUS, MINUS, COMPARE, AND, XOR, IOR, NEG, NOT, ASHIFT,
>         ASHIFTRT and ROTATERT for SImode, DImode, HImode and QImode.
>         (thumb1_size_rtx_costs): Likewise.
> 
>         (comp_not_to_clear_mask_str_un): Silence host compiler warning.
> 
> 
> Thanks in advance,
> Roger
> --
> 

Thanks.

This is OK.

We could do a little better for DImode shifts by a constant >=32, since we know in that case that we don't have to merge partial results from two sources.  But the code you've posted is still an improvement.

Eg:

uint64_t x;

x <<= 33;

becomes
LSLS Xhi, Xlo, #1
MOVS Xlo, #0

(2 insns instead of 4).

R.
  

Patch

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 41808e42ab1..6da4d7eed25 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -9812,18 +9812,127 @@  thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
 
   switch (code)
     {
-    case ASHIFT:
-    case ASHIFTRT:
-    case LSHIFTRT:
-    case ROTATERT:
-      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
-
     case PLUS:
     case MINUS:
     case COMPARE:
-    case NEG:
+    case AND:
+    case XOR:
+    case IOR:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  total = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 0)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+	    total += COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 1)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+	    total += COSTS_N_INSNS (1);
+	  return total;
+	case E_DImode:
+	  total = COSTS_N_INSNS (2);
+	  if (GET_CODE (XEXP (x, 0)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+	    total += COSTS_N_INSNS (4);
+	  if (GET_CODE (XEXP (x, 1)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+	    total += COSTS_N_INSNS (4);
+	  return total;
+	case E_HImode:
+	case E_QImode:
+	  total = COSTS_N_INSNS (3);
+	  if (GET_CODE (XEXP (x, 0)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+	    total += COSTS_N_INSNS (3);
+	  if (GET_CODE (XEXP (x, 1)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+	    total += COSTS_N_INSNS (3);
+	  return total;
+	}
+      break;
+
     case NOT:
-      return COSTS_N_INSNS (1);
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (2);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (3);
+	}
+      break;
+
+    case NEG:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (3);
+	}
+      break;
+
+    case ASHIFT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (4);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (3);
+	}
+      break;
+
+    case ASHIFTRT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (4);
+	}
+      break;
+
+    case LSHIFTRT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (4);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (2);
+	}
+      break;
+
+    case ROTATERT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (2);
+	case E_DImode:
+	  return COSTS_N_INSNS (6);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (5);
+	}
+      break;
 
     case MULT:
       if (arm_arch6m && arm_m_profile_small_mul)
@@ -9899,12 +10008,6 @@  thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
     case TRUNCATE:
       return 99;
 
-    case AND:
-    case XOR:
-    case IOR:
-      /* XXX guess.  */
-      return 8;
-
     case MEM:
       /* XXX another guess.  */
       /* Memory costs quite a lot for the first word, but subsequent words
@@ -9950,27 +10053,127 @@  thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
 
   switch (code)
     {
-    case ASHIFT:
-    case ASHIFTRT:
-    case LSHIFTRT:
-    case ROTATERT:
-      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
-
     case PLUS:
     case MINUS:
-      /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
-	 defined by RTL expansion, especially for the expansion of
-	 multiplication.  */
-      if ((GET_CODE (XEXP (x, 0)) == MULT
-	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
-	  || (GET_CODE (XEXP (x, 1)) == MULT
-	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
-	return COSTS_N_INSNS (2);
-      /* Fall through.  */
     case COMPARE:
-    case NEG:
+    case AND:
+    case XOR:
+    case IOR:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  cost = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 0)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+	    cost += COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 1)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+	    cost += COSTS_N_INSNS (1);
+	  return cost;
+	case E_DImode:
+	  cost = COSTS_N_INSNS (2);
+	  if (GET_CODE (XEXP (x, 0)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+	    cost += COSTS_N_INSNS (4);
+	  if (GET_CODE (XEXP (x, 1)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+	    cost += COSTS_N_INSNS (4);
+	  return cost;
+	case E_HImode:
+	case E_QImode:
+	  cost = COSTS_N_INSNS (3);
+	  if (GET_CODE (XEXP (x, 0)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+	    cost += COSTS_N_INSNS (3);
+	  if (GET_CODE (XEXP (x, 1)) == MULT
+	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+	    cost += COSTS_N_INSNS (3);
+	  return cost;
+	}
+      break;
+
     case NOT:
-      return COSTS_N_INSNS (1);
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (2);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (3);
+	}
+      break;
+
+    case NEG:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (3);
+	}
+      break;
+
+    case ASHIFT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (4);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (3);
+	}
+      break;
+
+    case ASHIFTRT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (4);
+	}
+      break;
+
+    case LSHIFTRT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (1);
+	case E_DImode:
+	  return COSTS_N_INSNS (4);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (2);
+	}
+      break;
+
+    case ROTATERT:
+      switch (mode)
+	{
+	default:
+	case E_SImode:
+	  return COSTS_N_INSNS (2);
+	case E_DImode:
+	  return COSTS_N_INSNS (6);
+	case E_HImode:
+	case E_QImode:
+	  return COSTS_N_INSNS (5);
+	}
+      break;
 
     case MULT:
       if (CONST_INT_P (XEXP (x, 1)))
@@ -10058,11 +10261,6 @@  thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
     case TRUNCATE:
       return 99;
 
-    case AND:
-    case XOR:
-    case IOR:
-      return COSTS_N_INSNS (1);
-
     case MEM:
       return (COSTS_N_INSNS (1)
 	      + COSTS_N_INSNS (1)
@@ -18609,7 +18807,7 @@  comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
       int max_bit = -1;
       uint32_t mask;
       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
-	= {-1, -1, -1, -1};
+	= {~0U, ~0U, ~0U, ~0U};
 
       /* To compute the padding bits in a union we only consider bits as
 	 padding bits if they are always either a padding bit or fall outside a