PR target/56102: Improve rtx_costs from -mthumb on ARM.
Checks
| Context |
Check |
Description |
| linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
| linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
| linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
| linaro-tcwg-bot/tcwg_simplebootstrap_build--master-aarch64-bootstrap |
success
|
Build passed
|
| linaro-tcwg-bot/tcwg_simplebootstrap_build--master-arm-bootstrap |
success
|
Build passed
|
Commit Message
This patch provides improved (more accurate) RTX costs for -mthumb on ARM.
My recent patch for double word multiplication, PR 122871, revealed that
the current costs for THUMB code on ARM are... let's say a little dubious.
To demonstrate the code generation improvements provided by better
thumb1_rtx_costs consider the function below (from PR middle-end/122871).
long long foo (long long a)
{
long long c = a << 33;
c += a;
return c;
}
With the ARM backend's current costs, this produces 11 instructions with
-O2 -mthumb.
Before: movs r3, r0
movs r2, #0
adds r2, r2, r0
adcs r3, r3, r1
adds r2, r2, r2
adcs r3, r3, r3
subs r2, r2, r0
sbcs r3, r3, r1
movs r0, r2
movs r1, r3
bx lr
With sane RTX costs, GCC now generates the much more reasonable 5 insns:
After: movs r2, #0
lsls r3, r0, #1
adds r0, r0, r2
adcs r1, r1, r3
bx lr
Tested on arm-unknown-linux-gnueabihf with make bootstrap and make -k check
with no new failures. I also noticed that arm.cc has a compiler warning
when using GCC 11 as the host compiler; trivially silenced with a one line
change. Ok for mainline?
2026-06-02 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
PR target/56102
PR middle-end/122871
* config/arm/arm.cc (thumb1_rtx_costs): Provide reasonable costs
for PLUS, MINUS, COMPARE, AND, XOR, IOR, NEG, NOT, ASHIFT,
ASHIFTRT and ROTATERT for SImode, DImode, HImode and QImode.
(thumb1_size_rtx_costs): Likewise.
(comp_not_to_clear_mask_str_un): Silence host compiler warning.
Thanks in advance,
Roger
--
Comments
On 02/06/2026 19:49, Roger Sayle wrote:
>
> This patch provides improved (more accurate) RTX costs for -mthumb on ARM.
> My recent patch for double word multiplication, PR 122871, revealed that
> the current costs for THUMB code on ARM are... let's say a little dubious.
>
> To demonstrate the code generation improvements provided by better
> thumb1_rtx_costs consider the function below (from PR middle-end/122871).
>
> long long foo (long long a)
> {
> long long c = a << 33;
> c += a;
> return c;
> }
>
> With the ARM backend's current costs, this produces 11 instructions with
> -O2 -mthumb.
>
> Before: movs r3, r0
> movs r2, #0
> adds r2, r2, r0
> adcs r3, r3, r1
> adds r2, r2, r2
> adcs r3, r3, r3
> subs r2, r2, r0
> sbcs r3, r3, r1
> movs r0, r2
> movs r1, r3
> bx lr
>
> With sane RTX costs, GCC now generates the much more reasonable 5 insns:
>
> After: movs r2, #0
> lsls r3, r0, #1
> adds r0, r0, r2
> adcs r1, r1, r3
> bx lr
>
>
> Tested on arm-unknown-linux-gnueabihf with make bootstrap and make -k check
> with no new failures. I also noticed that arm.cc has a compiler warning
> when using GCC 11 as the host compiler; trivially silenced with a one line
> change. Ok for mainline?
>
>
> 2026-06-02 Roger Sayle <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
> PR target/56102
> PR middle-end/122871
> * config/arm/arm.cc (thumb1_rtx_costs): Provide reasonable costs
> for PLUS, MINUS, COMPARE, AND, XOR, IOR, NEG, NOT, ASHIFT,
> ASHIFTRT and ROTATERT for SImode, DImode, HImode and QImode.
> (thumb1_size_rtx_costs): Likewise.
>
> (comp_not_to_clear_mask_str_un): Silence host compiler warning.
>
>
> Thanks in advance,
> Roger
> --
>
Thanks.
This is OK.
We could do a little better for DImode shifts by a constant >=32, since we know in that case that we don't have to merge partial results from two sources. But the code you've posted is still an improvement.
Eg:
uint64_t x;
x <<= 33;
becomes
LSLS Xhi, Xlo, #1
MOVS Xlo, #0
(2 insns instead of 4).
R.
@@ -9812,18 +9812,127 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
switch (code)
{
- case ASHIFT:
- case ASHIFTRT:
- case LSHIFTRT:
- case ROTATERT:
- return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
-
case PLUS:
case MINUS:
case COMPARE:
- case NEG:
+ case AND:
+ case XOR:
+ case IOR:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+ total += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+ total += COSTS_N_INSNS (1);
+ return total;
+ case E_DImode:
+ total = COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+ total += COSTS_N_INSNS (4);
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+ total += COSTS_N_INSNS (4);
+ return total;
+ case E_HImode:
+ case E_QImode:
+ total = COSTS_N_INSNS (3);
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+ total += COSTS_N_INSNS (3);
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+ total += COSTS_N_INSNS (3);
+ return total;
+ }
+ break;
+
case NOT:
- return COSTS_N_INSNS (1);
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (2);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (3);
+ }
+ break;
+
+ case NEG:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (3);
+ }
+ break;
+
+ case ASHIFT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (4);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (3);
+ }
+ break;
+
+ case ASHIFTRT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (4);
+ }
+ break;
+
+ case LSHIFTRT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (4);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (2);
+ }
+ break;
+
+ case ROTATERT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (2);
+ case E_DImode:
+ return COSTS_N_INSNS (6);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (5);
+ }
+ break;
case MULT:
if (arm_arch6m && arm_m_profile_small_mul)
@@ -9899,12 +10008,6 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
case TRUNCATE:
return 99;
- case AND:
- case XOR:
- case IOR:
- /* XXX guess. */
- return 8;
-
case MEM:
/* XXX another guess. */
/* Memory costs quite a lot for the first word, but subsequent words
@@ -9950,27 +10053,127 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
switch (code)
{
- case ASHIFT:
- case ASHIFTRT:
- case LSHIFTRT:
- case ROTATERT:
- return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
-
case PLUS:
case MINUS:
- /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
- defined by RTL expansion, especially for the expansion of
- multiplication. */
- if ((GET_CODE (XEXP (x, 0)) == MULT
- && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
- || (GET_CODE (XEXP (x, 1)) == MULT
- && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
- return COSTS_N_INSNS (2);
- /* Fall through. */
case COMPARE:
- case NEG:
+ case AND:
+ case XOR:
+ case IOR:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ cost = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+ cost += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+ cost += COSTS_N_INSNS (1);
+ return cost;
+ case E_DImode:
+ cost = COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+ cost += COSTS_N_INSNS (4);
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+ cost += COSTS_N_INSNS (4);
+ return cost;
+ case E_HImode:
+ case E_QImode:
+ cost = COSTS_N_INSNS (3);
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+ cost += COSTS_N_INSNS (3);
+ if (GET_CODE (XEXP (x, 1)) == MULT
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), mode))
+ cost += COSTS_N_INSNS (3);
+ return cost;
+ }
+ break;
+
case NOT:
- return COSTS_N_INSNS (1);
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (2);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (3);
+ }
+ break;
+
+ case NEG:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (3);
+ }
+ break;
+
+ case ASHIFT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (4);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (3);
+ }
+ break;
+
+ case ASHIFTRT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (4);
+ }
+ break;
+
+ case LSHIFTRT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (1);
+ case E_DImode:
+ return COSTS_N_INSNS (4);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (2);
+ }
+ break;
+
+ case ROTATERT:
+ switch (mode)
+ {
+ default:
+ case E_SImode:
+ return COSTS_N_INSNS (2);
+ case E_DImode:
+ return COSTS_N_INSNS (6);
+ case E_HImode:
+ case E_QImode:
+ return COSTS_N_INSNS (5);
+ }
+ break;
case MULT:
if (CONST_INT_P (XEXP (x, 1)))
@@ -10058,11 +10261,6 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
case TRUNCATE:
return 99;
- case AND:
- case XOR:
- case IOR:
- return COSTS_N_INSNS (1);
-
case MEM:
return (COSTS_N_INSNS (1)
+ COSTS_N_INSNS (1)
@@ -18609,7 +18807,7 @@ comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
int max_bit = -1;
uint32_t mask;
uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
- = {-1, -1, -1, -1};
+ = {~0U, ~0U, ~0U, ~0U};
/* To compute the padding bits in a union we only consider bits as
padding bits if they are always either a padding bit or fall outside a