[v2] ld: fix alignment issue for ARM thumb long branch stub using PureCode section
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_binutils_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-arm |
success
|
Test passed
|
Commit Message
Changes since v1:
- Simplified new test case to fix issue reported by Linaro CI.
Ok for trunk?
--
When pure-code option is activated. The linker creates for M-profile architecures
a 2-bytes branch instruction. This causes the section alignment to be set to 2-byte
alignment instead of 4-byte alignment. This is a problem for long branch stub
without pure-code section as it contains a 32-bit address as data, which is expected
to be 4-byte aligned. Hence creating a long branch stub for PureCode section followed
by a long branch stub will result in a misalignment for the 32-bit address.
An easy fix is to add a nop instruction after the branch to keep the section alignment
to 4 bytes.
Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
Co-authored-by: Guillaume VACHERIAS <guillaume.vacherias@st.com>
---
bfd/elf32-arm.c | 1 +
ld/testsuite/ld-arm/arm-elf.exp | 3 ++
...rcall-thumb2-purecode-consecutive-veneer.d | 28 +++++++++++++++++++
...rcall-thumb2-purecode-consecutive-veneer.s | 25 +++++++++++++++++
ld/testsuite/ld-arm/farcall-thumb2-purecode.d | 2 +-
5 files changed, 58 insertions(+), 1 deletion(-)
create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
Comments
Hi,
Gentle ping :)
Kind regards,
Torbjörn
On 2024-12-18 10:29, Torbjörn SVENSSON wrote:
> Changes since v1:
>
> - Simplified new test case to fix issue reported by Linaro CI.
>
> Ok for trunk?
>
> --
>
> When pure-code option is activated. The linker creates for M-profile architecures
> a 2-bytes branch instruction. This causes the section alignment to be set to 2-byte
> alignment instead of 4-byte alignment. This is a problem for long branch stub
> without pure-code section as it contains a 32-bit address as data, which is expected
> to be 4-byte aligned. Hence creating a long branch stub for PureCode section followed
> by a long branch stub will result in a misalignment for the 32-bit address.
>
> An easy fix is to add a nop instruction after the branch to keep the section alignment
> to 4 bytes.
>
> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
> Co-authored-by: Guillaume VACHERIAS <guillaume.vacherias@st.com>
> ---
> bfd/elf32-arm.c | 1 +
> ld/testsuite/ld-arm/arm-elf.exp | 3 ++
> ...rcall-thumb2-purecode-consecutive-veneer.d | 28 +++++++++++++++++++
> ...rcall-thumb2-purecode-consecutive-veneer.s | 25 +++++++++++++++++
> ld/testsuite/ld-arm/farcall-thumb2-purecode.d | 2 +-
> 5 files changed, 58 insertions(+), 1 deletion(-)
> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>
> diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
> index a4c23216c68..b22ca579493 100644
> --- a/bfd/elf32-arm.c
> +++ b/bfd/elf32-arm.c
> @@ -2630,6 +2630,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
> THUMB32_MOVW (0xf2400c00), /* mov.w ip, R_ARM_MOVW_ABS_NC */
> THUMB32_MOVT (0xf2c00c00), /* movt ip, R_ARM_MOVT_ABS << 16 */
> THUMB16_INSN (0x4760), /* bx ip */
> + THUMB16_INSN (0xbf00), /* nop */
> };
>
> /* V4T Thumb -> Thumb long branch stub. Using the stack is not
> diff --git a/ld/testsuite/ld-arm/arm-elf.exp b/ld/testsuite/ld-arm/arm-elf.exp
> index 7fa4520fb15..0f8beb3236c 100644
> --- a/ld/testsuite/ld-arm/arm-elf.exp
> +++ b/ld/testsuite/ld-arm/arm-elf.exp
> @@ -604,6 +604,9 @@ set armeabitests_nonacl {
> {"Thumb2 purecode farcall" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode.s}
> {{objdump -d farcall-thumb2-purecode.d}}
> "farcall-thumb2-purecode"}
> + {"Thumb2 purecode farcall consecutive veneer" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode-consecutive-veneer.s}
> + {{objdump -d farcall-thumb2-purecode-consecutive-veneer.d}}
> + "farcall-thumb2-purecode-consecutive-veneer"}
>
> {"Thumb-ARM farcall" "-Ttext 0x1c01010 --section-start .foo=0x2001014" "" "-W" {farcall-thumb-arm.s}
> {{objdump -d farcall-thumb-arm.d}}
> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
> new file mode 100644
> index 00000000000..b89da03eec0
> --- /dev/null
> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
> @@ -0,0 +1,28 @@
> +.*: file format .*
> +
> +Disassembly of section .text:
> +
> +00001000 <bar>:
> + 1000: 4770 bx lr
> +
> +00001002 <baz>:
> + 1002: 4770 bx lr
> +
> +Disassembly of section .foo:
> +
> +02001020 <_start>:
> + 2001020: f000 f802 bl 2001028 <__bar_veneer>
> + 2001024: f000 f806 bl 2001034 <__baz_veneer>
> +
> +02001028 <__bar_veneer>:
> + 2001028: f241 0c01 movw ip, #4097 @ 0x1001
> + 200102c: f2c0 0c00 movt ip, #0
> + 2001030: 4760 bx ip
> + 2001032: bf00 nop
> +
> +02001034 <__baz_veneer>:
> + 2001034: f241 0c03 movw ip, #4099 @ 0x1003
> + 2001038: f2c0 0c00 movt ip, #0
> + 200103c: 4760 bx ip
> + 200103e: bf00 nop
> + \.\.\.
> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
> new file mode 100644
> index 00000000000..ee3d6c10b7b
> --- /dev/null
> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
> @@ -0,0 +1,25 @@
> +@ Test to ensure that a purecode Thumb2 call exceeding 4Mb generates a stub.
> +
> + .global _start
> + .syntax unified
> + .arch armv7-m
> + .thumb
> + .thumb_func
> +
> +@ We will place the section .text at 0x1000.
> +
> + .section .text, "0x20000006"
> + .type bar, %function
> + .type baz, %function
> +bar:
> + bx lr
> +
> +baz:
> + bx lr
> +
> +@ We will place the section .foo at 0x02001014.
> +
> + .section .foo, "0x20000006"
> +_start:
> + bl bar
> + bl baz
> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
> index 451832678e9..00a070eed5b 100644
> --- a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
> @@ -16,7 +16,7 @@ Disassembly of section .foo:
> 2001028: f241 0c01 movw ip, #4097 @ 0x1001
> 200102c: f2c0 0c00 movt ip, #0
> 2001030: 4760 bx ip
> - 2001032: 0000 movs r0, r0
> + 2001032: bf00 nop
> 2001034: 0000 movs r0, r0
> \.\.\.
>
Hi Torbjorn,
> Gentle ping :)
Gentle pong. :-)
>> - Simplified new test case to fix issue reported by Linaro CI.
>>
>> Ok for trunk?
Approved with one small change:
>> +++ b/bfd/elf32-arm.c
>> @@ -2630,6 +2630,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
>> THUMB32_MOVW (0xf2400c00), /* mov.w ip, R_ARM_MOVW_ABS_NC */
>> THUMB32_MOVT (0xf2c00c00), /* movt ip, R_ARM_MOVT_ABS << 16 */
>> THUMB16_INSN (0x4760), /* bx ip */
>> + THUMB16_INSN (0xbf00), /* nop */
>> };
Please could you add a comment here explaining why the NOP is present
and hopefully preventing future optimizers from deleting it.
Cheers
Nick
On 18/12/2024 09:29, Torbjörn SVENSSON wrote:
> Changes since v1:
>
> - Simplified new test case to fix issue reported by Linaro CI.
>
> Ok for trunk?
I wonder if this is fall-out from a recent change Jan made to suppress some default alignments?
My feeling is that this patch is possibly fragile - it will work, provided that the initial address at which we start adding stubs is 4-byte aligned, but will fail if that does not hold true.
R.
>
> --
>
> When pure-code option is activated. The linker creates for M-profile architecures
> a 2-bytes branch instruction. This causes the section alignment to be set to 2-byte
> alignment instead of 4-byte alignment. This is a problem for long branch stub
> without pure-code section as it contains a 32-bit address as data, which is expected
> to be 4-byte aligned. Hence creating a long branch stub for PureCode section followed
> by a long branch stub will result in a misalignment for the 32-bit address.
>
> An easy fix is to add a nop instruction after the branch to keep the section alignment
> to 4 bytes.
>
> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
> Co-authored-by: Guillaume VACHERIAS <guillaume.vacherias@st.com>
> ---
> bfd/elf32-arm.c | 1 +
> ld/testsuite/ld-arm/arm-elf.exp | 3 ++
> ...rcall-thumb2-purecode-consecutive-veneer.d | 28 +++++++++++++++++++
> ...rcall-thumb2-purecode-consecutive-veneer.s | 25 +++++++++++++++++
> ld/testsuite/ld-arm/farcall-thumb2-purecode.d | 2 +-
> 5 files changed, 58 insertions(+), 1 deletion(-)
> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>
> diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
> index a4c23216c68..b22ca579493 100644
> --- a/bfd/elf32-arm.c
> +++ b/bfd/elf32-arm.c
> @@ -2630,6 +2630,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
> THUMB32_MOVW (0xf2400c00), /* mov.w ip, R_ARM_MOVW_ABS_NC */
> THUMB32_MOVT (0xf2c00c00), /* movt ip, R_ARM_MOVT_ABS << 16 */
> THUMB16_INSN (0x4760), /* bx ip */
> + THUMB16_INSN (0xbf00), /* nop */
> };
>
> /* V4T Thumb -> Thumb long branch stub. Using the stack is not
> diff --git a/ld/testsuite/ld-arm/arm-elf.exp b/ld/testsuite/ld-arm/arm-elf.exp
> index 7fa4520fb15..0f8beb3236c 100644
> --- a/ld/testsuite/ld-arm/arm-elf.exp
> +++ b/ld/testsuite/ld-arm/arm-elf.exp
> @@ -604,6 +604,9 @@ set armeabitests_nonacl {
> {"Thumb2 purecode farcall" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode.s}
> {{objdump -d farcall-thumb2-purecode.d}}
> "farcall-thumb2-purecode"}
> + {"Thumb2 purecode farcall consecutive veneer" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode-consecutive-veneer.s}
> + {{objdump -d farcall-thumb2-purecode-consecutive-veneer.d}}
> + "farcall-thumb2-purecode-consecutive-veneer"}
>
> {"Thumb-ARM farcall" "-Ttext 0x1c01010 --section-start .foo=0x2001014" "" "-W" {farcall-thumb-arm.s}
> {{objdump -d farcall-thumb-arm.d}}
> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
> new file mode 100644
> index 00000000000..b89da03eec0
> --- /dev/null
> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
> @@ -0,0 +1,28 @@
> +.*: file format .*
> +
> +Disassembly of section .text:
> +
> +00001000 <bar>:
> + 1000: 4770 bx lr
> +
> +00001002 <baz>:
> + 1002: 4770 bx lr
> +
> +Disassembly of section .foo:
> +
> +02001020 <_start>:
> + 2001020: f000 f802 bl 2001028 <__bar_veneer>
> + 2001024: f000 f806 bl 2001034 <__baz_veneer>
> +
> +02001028 <__bar_veneer>:
> + 2001028: f241 0c01 movw ip, #4097 @ 0x1001
> + 200102c: f2c0 0c00 movt ip, #0
> + 2001030: 4760 bx ip
> + 2001032: bf00 nop
> +
> +02001034 <__baz_veneer>:
> + 2001034: f241 0c03 movw ip, #4099 @ 0x1003
> + 2001038: f2c0 0c00 movt ip, #0
> + 200103c: 4760 bx ip
> + 200103e: bf00 nop
> + \.\.\.
> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
> new file mode 100644
> index 00000000000..ee3d6c10b7b
> --- /dev/null
> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
> @@ -0,0 +1,25 @@
> +@ Test to ensure that a purecode Thumb2 call exceeding 4Mb generates a stub.
> +
> + .global _start
> + .syntax unified
> + .arch armv7-m
> + .thumb
> + .thumb_func
> +
> +@ We will place the section .text at 0x1000.
> +
> + .section .text, "0x20000006"
> + .type bar, %function
> + .type baz, %function
> +bar:
> + bx lr
> +
> +baz:
> + bx lr
> +
> +@ We will place the section .foo at 0x02001014.
> +
> + .section .foo, "0x20000006"
> +_start:
> + bl bar
> + bl baz
> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
> index 451832678e9..00a070eed5b 100644
> --- a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
> @@ -16,7 +16,7 @@ Disassembly of section .foo:
> 2001028: f241 0c01 movw ip, #4097 @ 0x1001
> 200102c: f2c0 0c00 movt ip, #0
> 2001030: 4760 bx ip
> - 2001032: 0000 movs r0, r0
> + 2001032: bf00 nop
> 2001034: 0000 movs r0, r0
> \.\.\.
>
On 08.01.2025 11:58, Richard Earnshaw (lists) wrote:
> On 18/12/2024 09:29, Torbjörn SVENSSON wrote:
>> Changes since v1:
>>
>> - Simplified new test case to fix issue reported by Linaro CI.
>>
>> Ok for trunk?
>
> I wonder if this is fall-out from a recent change Jan made to suppress some default alignments?
I don't think I made such a change. Maybe what you're referring to is the
change to alter _file position_ alignment, but that should be unrelated
here.
Jan
On 2025-01-08 11:58, Richard Earnshaw (lists) wrote:
> On 18/12/2024 09:29, Torbjörn SVENSSON wrote:
>> Changes since v1:
>>
>> - Simplified new test case to fix issue reported by Linaro CI.
>>
>> Ok for trunk?
>
> I wonder if this is fall-out from a recent change Jan made to suppress some default alignments?
>
> My feeling is that this patch is possibly fragile - it will work, provided that the initial address at which we start adding stubs is 4-byte aligned, but will fail if that does not hold true.
The solution that is provided here is more or less a followup on what was done for aarch64 in https://patches.linaro.org/project/binutils/patch/CABXYE2UOkCbiFQTJ7kijLUTMy25unww4w2Q_xSMf6icLS7xxog@mail.gmail.com/.
Kind regards,
Torbjörn
>
> R.
>
>>
>> --
>>
>> When pure-code option is activated. The linker creates for M-profile architecures
>> a 2-bytes branch instruction. This causes the section alignment to be set to 2-byte
>> alignment instead of 4-byte alignment. This is a problem for long branch stub
>> without pure-code section as it contains a 32-bit address as data, which is expected
>> to be 4-byte aligned. Hence creating a long branch stub for PureCode section followed
>> by a long branch stub will result in a misalignment for the 32-bit address.
>>
>> An easy fix is to add a nop instruction after the branch to keep the section alignment
>> to 4 bytes.
>>
>> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
>> Co-authored-by: Guillaume VACHERIAS <guillaume.vacherias@st.com>
>> ---
>> bfd/elf32-arm.c | 1 +
>> ld/testsuite/ld-arm/arm-elf.exp | 3 ++
>> ...rcall-thumb2-purecode-consecutive-veneer.d | 28 +++++++++++++++++++
>> ...rcall-thumb2-purecode-consecutive-veneer.s | 25 +++++++++++++++++
>> ld/testsuite/ld-arm/farcall-thumb2-purecode.d | 2 +-
>> 5 files changed, 58 insertions(+), 1 deletion(-)
>> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>>
>> diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
>> index a4c23216c68..b22ca579493 100644
>> --- a/bfd/elf32-arm.c
>> +++ b/bfd/elf32-arm.c
>> @@ -2630,6 +2630,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
>> THUMB32_MOVW (0xf2400c00), /* mov.w ip, R_ARM_MOVW_ABS_NC */
>> THUMB32_MOVT (0xf2c00c00), /* movt ip, R_ARM_MOVT_ABS << 16 */
>> THUMB16_INSN (0x4760), /* bx ip */
>> + THUMB16_INSN (0xbf00), /* nop */
>> };
>>
>> /* V4T Thumb -> Thumb long branch stub. Using the stack is not
>> diff --git a/ld/testsuite/ld-arm/arm-elf.exp b/ld/testsuite/ld-arm/arm-elf.exp
>> index 7fa4520fb15..0f8beb3236c 100644
>> --- a/ld/testsuite/ld-arm/arm-elf.exp
>> +++ b/ld/testsuite/ld-arm/arm-elf.exp
>> @@ -604,6 +604,9 @@ set armeabitests_nonacl {
>> {"Thumb2 purecode farcall" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode.s}
>> {{objdump -d farcall-thumb2-purecode.d}}
>> "farcall-thumb2-purecode"}
>> + {"Thumb2 purecode farcall consecutive veneer" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode-consecutive-veneer.s}
>> + {{objdump -d farcall-thumb2-purecode-consecutive-veneer.d}}
>> + "farcall-thumb2-purecode-consecutive-veneer"}
>>
>> {"Thumb-ARM farcall" "-Ttext 0x1c01010 --section-start .foo=0x2001014" "" "-W" {farcall-thumb-arm.s}
>> {{objdump -d farcall-thumb-arm.d}}
>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>> new file mode 100644
>> index 00000000000..b89da03eec0
>> --- /dev/null
>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>> @@ -0,0 +1,28 @@
>> +.*: file format .*
>> +
>> +Disassembly of section .text:
>> +
>> +00001000 <bar>:
>> + 1000: 4770 bx lr
>> +
>> +00001002 <baz>:
>> + 1002: 4770 bx lr
>> +
>> +Disassembly of section .foo:
>> +
>> +02001020 <_start>:
>> + 2001020: f000 f802 bl 2001028 <__bar_veneer>
>> + 2001024: f000 f806 bl 2001034 <__baz_veneer>
>> +
>> +02001028 <__bar_veneer>:
>> + 2001028: f241 0c01 movw ip, #4097 @ 0x1001
>> + 200102c: f2c0 0c00 movt ip, #0
>> + 2001030: 4760 bx ip
>> + 2001032: bf00 nop
>> +
>> +02001034 <__baz_veneer>:
>> + 2001034: f241 0c03 movw ip, #4099 @ 0x1003
>> + 2001038: f2c0 0c00 movt ip, #0
>> + 200103c: 4760 bx ip
>> + 200103e: bf00 nop
>> + \.\.\.
>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>> new file mode 100644
>> index 00000000000..ee3d6c10b7b
>> --- /dev/null
>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>> @@ -0,0 +1,25 @@
>> +@ Test to ensure that a purecode Thumb2 call exceeding 4Mb generates a stub.
>> +
>> + .global _start
>> + .syntax unified
>> + .arch armv7-m
>> + .thumb
>> + .thumb_func
>> +
>> +@ We will place the section .text at 0x1000.
>> +
>> + .section .text, "0x20000006"
>> + .type bar, %function
>> + .type baz, %function
>> +bar:
>> + bx lr
>> +
>> +baz:
>> + bx lr
>> +
>> +@ We will place the section .foo at 0x02001014.
>> +
>> + .section .foo, "0x20000006"
>> +_start:
>> + bl bar
>> + bl baz
>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>> index 451832678e9..00a070eed5b 100644
>> --- a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>> @@ -16,7 +16,7 @@ Disassembly of section .foo:
>> 2001028: f241 0c01 movw ip, #4097 @ 0x1001
>> 200102c: f2c0 0c00 movt ip, #0
>> 2001030: 4760 bx ip
>> - 2001032: 0000 movs r0, r0
>> + 2001032: bf00 nop
>> 2001034: 0000 movs r0, r0
>> \.\.\.
>>
>
On 08/01/2025 12:25, Torbjorn SVENSSON wrote:
>
>
> On 2025-01-08 11:58, Richard Earnshaw (lists) wrote:
>> On 18/12/2024 09:29, Torbjörn SVENSSON wrote:
>>> Changes since v1:
>>>
>>> - Simplified new test case to fix issue reported by Linaro CI.
>>>
>>> Ok for trunk?
>>
>> I wonder if this is fall-out from a recent change Jan made to suppress some default alignments?
>>
>> My feeling is that this patch is possibly fragile - it will work, provided that the initial address at which we start adding stubs is 4-byte aligned, but will fail if that does not hold true.
>
> The solution that is provided here is more or less a followup on what was done for aarch64 in https://patches.linaro.org/project/binutils/patch/CABXYE2UOkCbiFQTJ7kijLUTMy25unww4w2Q_xSMf6icLS7xxog@mail.gmail.com/.
>
Thanks for the additional info. I guess what I'm trying to establish was whether or not this has always been broken or whether this is needed because of some other change that as been made (similar to what happened in the aarch64 case).
R.
> Kind regards,
> Torbjörn
>
>>
>> R.
>>
>>>
>>> --
>>>
>>> When pure-code option is activated. The linker creates for M-profile architecures
>>> a 2-bytes branch instruction. This causes the section alignment to be set to 2-byte
>>> alignment instead of 4-byte alignment. This is a problem for long branch stub
>>> without pure-code section as it contains a 32-bit address as data, which is expected
>>> to be 4-byte aligned. Hence creating a long branch stub for PureCode section followed
>>> by a long branch stub will result in a misalignment for the 32-bit address.
>>>
>>> An easy fix is to add a nop instruction after the branch to keep the section alignment
>>> to 4 bytes.
>>>
>>> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
>>> Co-authored-by: Guillaume VACHERIAS <guillaume.vacherias@st.com>
>>> ---
>>> bfd/elf32-arm.c | 1 +
>>> ld/testsuite/ld-arm/arm-elf.exp | 3 ++
>>> ...rcall-thumb2-purecode-consecutive-veneer.d | 28 +++++++++++++++++++
>>> ...rcall-thumb2-purecode-consecutive-veneer.s | 25 +++++++++++++++++
>>> ld/testsuite/ld-arm/farcall-thumb2-purecode.d | 2 +-
>>> 5 files changed, 58 insertions(+), 1 deletion(-)
>>> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>>> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>>>
>>> diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
>>> index a4c23216c68..b22ca579493 100644
>>> --- a/bfd/elf32-arm.c
>>> +++ b/bfd/elf32-arm.c
>>> @@ -2630,6 +2630,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
>>> THUMB32_MOVW (0xf2400c00), /* mov.w ip, R_ARM_MOVW_ABS_NC */
>>> THUMB32_MOVT (0xf2c00c00), /* movt ip, R_ARM_MOVT_ABS << 16 */
>>> THUMB16_INSN (0x4760), /* bx ip */
>>> + THUMB16_INSN (0xbf00), /* nop */
>>> };
>>> /* V4T Thumb -> Thumb long branch stub. Using the stack is not
>>> diff --git a/ld/testsuite/ld-arm/arm-elf.exp b/ld/testsuite/ld-arm/arm-elf.exp
>>> index 7fa4520fb15..0f8beb3236c 100644
>>> --- a/ld/testsuite/ld-arm/arm-elf.exp
>>> +++ b/ld/testsuite/ld-arm/arm-elf.exp
>>> @@ -604,6 +604,9 @@ set armeabitests_nonacl {
>>> {"Thumb2 purecode farcall" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode.s}
>>> {{objdump -d farcall-thumb2-purecode.d}}
>>> "farcall-thumb2-purecode"}
>>> + {"Thumb2 purecode farcall consecutive veneer" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode-consecutive-veneer.s}
>>> + {{objdump -d farcall-thumb2-purecode-consecutive-veneer.d}}
>>> + "farcall-thumb2-purecode-consecutive-veneer"}
>>> {"Thumb-ARM farcall" "-Ttext 0x1c01010 --section-start .foo=0x2001014" "" "-W" {farcall-thumb-arm.s}
>>> {{objdump -d farcall-thumb-arm.d}}
>>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>>> new file mode 100644
>>> index 00000000000..b89da03eec0
>>> --- /dev/null
>>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>>> @@ -0,0 +1,28 @@
>>> +.*: file format .*
>>> +
>>> +Disassembly of section .text:
>>> +
>>> +00001000 <bar>:
>>> + 1000: 4770 bx lr
>>> +
>>> +00001002 <baz>:
>>> + 1002: 4770 bx lr
>>> +
>>> +Disassembly of section .foo:
>>> +
>>> +02001020 <_start>:
>>> + 2001020: f000 f802 bl 2001028 <__bar_veneer>
>>> + 2001024: f000 f806 bl 2001034 <__baz_veneer>
>>> +
>>> +02001028 <__bar_veneer>:
>>> + 2001028: f241 0c01 movw ip, #4097 @ 0x1001
>>> + 200102c: f2c0 0c00 movt ip, #0
>>> + 2001030: 4760 bx ip
>>> + 2001032: bf00 nop
>>> +
>>> +02001034 <__baz_veneer>:
>>> + 2001034: f241 0c03 movw ip, #4099 @ 0x1003
>>> + 2001038: f2c0 0c00 movt ip, #0
>>> + 200103c: 4760 bx ip
>>> + 200103e: bf00 nop
>>> + \.\.\.
>>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>>> new file mode 100644
>>> index 00000000000..ee3d6c10b7b
>>> --- /dev/null
>>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>>> @@ -0,0 +1,25 @@
>>> +@ Test to ensure that a purecode Thumb2 call exceeding 4Mb generates a stub.
>>> +
>>> + .global _start
>>> + .syntax unified
>>> + .arch armv7-m
>>> + .thumb
>>> + .thumb_func
>>> +
>>> +@ We will place the section .text at 0x1000.
>>> +
>>> + .section .text, "0x20000006"
>>> + .type bar, %function
>>> + .type baz, %function
>>> +bar:
>>> + bx lr
>>> +
>>> +baz:
>>> + bx lr
>>> +
>>> +@ We will place the section .foo at 0x02001014.
>>> +
>>> + .section .foo, "0x20000006"
>>> +_start:
>>> + bl bar
>>> + bl baz
>>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>>> index 451832678e9..00a070eed5b 100644
>>> --- a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>>> @@ -16,7 +16,7 @@ Disassembly of section .foo:
>>> 2001028: f241 0c01 movw ip, #4097 @ 0x1001
>>> 200102c: f2c0 0c00 movt ip, #0
>>> 2001030: 4760 bx ip
>>> - 2001032: 0000 movs r0, r0
>>> + 2001032: bf00 nop
>>> 2001034: 0000 movs r0, r0
>>> \.\.\.
>>>
>>
>
On 2025-01-08 13:49, Richard Earnshaw (lists) wrote:
> On 08/01/2025 12:25, Torbjorn SVENSSON wrote:
>>
>>
>> On 2025-01-08 11:58, Richard Earnshaw (lists) wrote:
>>> On 18/12/2024 09:29, Torbjörn SVENSSON wrote:
>>>> Changes since v1:
>>>>
>>>> - Simplified new test case to fix issue reported by Linaro CI.
>>>>
>>>> Ok for trunk?
>>>
>>> I wonder if this is fall-out from a recent change Jan made to suppress some default alignments?
>>>
>>> My feeling is that this patch is possibly fragile - it will work, provided that the initial address at which we start adding stubs is 4-byte aligned, but will fail if that does not hold true.
>>
>> The solution that is provided here is more or less a followup on what was done for aarch64 in https://patches.linaro.org/project/binutils/patch/CABXYE2UOkCbiFQTJ7kijLUTMy25unww4w2Q_xSMf6icLS7xxog@mail.gmail.com/.
>>
>
> Thanks for the additional info. I guess what I'm trying to establish was whether or not this has always been broken or whether this is needed because of some other change that as been made (similar to what happened in the aarch64 case).
Our investigation has concluded that this never worked when both a pure-code veneer and a non-pure-code veneer was generated and the non-pure-code veneer was placed last.
All the other veneers in bfd/elf32-arm.c have either 32-bit instructions or an even number of 16-bit instructions. It's only elf32_arm_stub_long_branch_thumb2_only_pure that had 2 32-bit instructions and 1 16-bit instruction => only one veneer that could cause an unaligned start of a function placed after it in the resulting binary.
According to git, the oldest ld that may have this problem should be 2.28. I have 2.30 handy and it contains the problem.
Kind regards,
Torbjörn and Guillaume
>
> R.
>
>> Kind regards,
>> Torbjörn
>>
>>>
>>> R.
>>>
>>>>
>>>> --
>>>>
>>>> When pure-code option is activated. The linker creates for M-profile architecures
>>>> a 2-bytes branch instruction. This causes the section alignment to be set to 2-byte
>>>> alignment instead of 4-byte alignment. This is a problem for long branch stub
>>>> without pure-code section as it contains a 32-bit address as data, which is expected
>>>> to be 4-byte aligned. Hence creating a long branch stub for PureCode section followed
>>>> by a long branch stub will result in a misalignment for the 32-bit address.
>>>>
>>>> An easy fix is to add a nop instruction after the branch to keep the section alignment
>>>> to 4 bytes.
>>>>
>>>> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
>>>> Co-authored-by: Guillaume VACHERIAS <guillaume.vacherias@st.com>
>>>> ---
>>>> bfd/elf32-arm.c | 1 +
>>>> ld/testsuite/ld-arm/arm-elf.exp | 3 ++
>>>> ...rcall-thumb2-purecode-consecutive-veneer.d | 28 +++++++++++++++++++
>>>> ...rcall-thumb2-purecode-consecutive-veneer.s | 25 +++++++++++++++++
>>>> ld/testsuite/ld-arm/farcall-thumb2-purecode.d | 2 +-
>>>> 5 files changed, 58 insertions(+), 1 deletion(-)
>>>> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>>>> create mode 100644 ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>>>>
>>>> diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
>>>> index a4c23216c68..b22ca579493 100644
>>>> --- a/bfd/elf32-arm.c
>>>> +++ b/bfd/elf32-arm.c
>>>> @@ -2630,6 +2630,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
>>>> THUMB32_MOVW (0xf2400c00), /* mov.w ip, R_ARM_MOVW_ABS_NC */
>>>> THUMB32_MOVT (0xf2c00c00), /* movt ip, R_ARM_MOVT_ABS << 16 */
>>>> THUMB16_INSN (0x4760), /* bx ip */
>>>> + THUMB16_INSN (0xbf00), /* nop */
>>>> };
>>>> /* V4T Thumb -> Thumb long branch stub. Using the stack is not
>>>> diff --git a/ld/testsuite/ld-arm/arm-elf.exp b/ld/testsuite/ld-arm/arm-elf.exp
>>>> index 7fa4520fb15..0f8beb3236c 100644
>>>> --- a/ld/testsuite/ld-arm/arm-elf.exp
>>>> +++ b/ld/testsuite/ld-arm/arm-elf.exp
>>>> @@ -604,6 +604,9 @@ set armeabitests_nonacl {
>>>> {"Thumb2 purecode farcall" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode.s}
>>>> {{objdump -d farcall-thumb2-purecode.d}}
>>>> "farcall-thumb2-purecode"}
>>>> + {"Thumb2 purecode farcall consecutive veneer" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode-consecutive-veneer.s}
>>>> + {{objdump -d farcall-thumb2-purecode-consecutive-veneer.d}}
>>>> + "farcall-thumb2-purecode-consecutive-veneer"}
>>>> {"Thumb-ARM farcall" "-Ttext 0x1c01010 --section-start .foo=0x2001014" "" "-W" {farcall-thumb-arm.s}
>>>> {{objdump -d farcall-thumb-arm.d}}
>>>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>>>> new file mode 100644
>>>> index 00000000000..b89da03eec0
>>>> --- /dev/null
>>>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.d
>>>> @@ -0,0 +1,28 @@
>>>> +.*: file format .*
>>>> +
>>>> +Disassembly of section .text:
>>>> +
>>>> +00001000 <bar>:
>>>> + 1000: 4770 bx lr
>>>> +
>>>> +00001002 <baz>:
>>>> + 1002: 4770 bx lr
>>>> +
>>>> +Disassembly of section .foo:
>>>> +
>>>> +02001020 <_start>:
>>>> + 2001020: f000 f802 bl 2001028 <__bar_veneer>
>>>> + 2001024: f000 f806 bl 2001034 <__baz_veneer>
>>>> +
>>>> +02001028 <__bar_veneer>:
>>>> + 2001028: f241 0c01 movw ip, #4097 @ 0x1001
>>>> + 200102c: f2c0 0c00 movt ip, #0
>>>> + 2001030: 4760 bx ip
>>>> + 2001032: bf00 nop
>>>> +
>>>> +02001034 <__baz_veneer>:
>>>> + 2001034: f241 0c03 movw ip, #4099 @ 0x1003
>>>> + 2001038: f2c0 0c00 movt ip, #0
>>>> + 200103c: 4760 bx ip
>>>> + 200103e: bf00 nop
>>>> + \.\.\.
>>>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>>>> new file mode 100644
>>>> index 00000000000..ee3d6c10b7b
>>>> --- /dev/null
>>>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode-consecutive-veneer.s
>>>> @@ -0,0 +1,25 @@
>>>> +@ Test to ensure that a purecode Thumb2 call exceeding 4Mb generates a stub.
>>>> +
>>>> + .global _start
>>>> + .syntax unified
>>>> + .arch armv7-m
>>>> + .thumb
>>>> + .thumb_func
>>>> +
>>>> +@ We will place the section .text at 0x1000.
>>>> +
>>>> + .section .text, "0x20000006"
>>>> + .type bar, %function
>>>> + .type baz, %function
>>>> +bar:
>>>> + bx lr
>>>> +
>>>> +baz:
>>>> + bx lr
>>>> +
>>>> +@ We will place the section .foo at 0x02001014.
>>>> +
>>>> + .section .foo, "0x20000006"
>>>> +_start:
>>>> + bl bar
>>>> + bl baz
>>>> diff --git a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>>>> index 451832678e9..00a070eed5b 100644
>>>> --- a/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>>>> +++ b/ld/testsuite/ld-arm/farcall-thumb2-purecode.d
>>>> @@ -16,7 +16,7 @@ Disassembly of section .foo:
>>>> 2001028: f241 0c01 movw ip, #4097 @ 0x1001
>>>> 200102c: f2c0 0c00 movt ip, #0
>>>> 2001030: 4760 bx ip
>>>> - 2001032: 0000 movs r0, r0
>>>> + 2001032: bf00 nop
>>>> 2001034: 0000 movs r0, r0
>>>> \.\.\.
>>>>
>>>
>>
>
@@ -2630,6 +2630,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
THUMB32_MOVW (0xf2400c00), /* mov.w ip, R_ARM_MOVW_ABS_NC */
THUMB32_MOVT (0xf2c00c00), /* movt ip, R_ARM_MOVT_ABS << 16 */
THUMB16_INSN (0x4760), /* bx ip */
+ THUMB16_INSN (0xbf00), /* nop */
};
/* V4T Thumb -> Thumb long branch stub. Using the stack is not
@@ -604,6 +604,9 @@ set armeabitests_nonacl {
{"Thumb2 purecode farcall" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode.s}
{{objdump -d farcall-thumb2-purecode.d}}
"farcall-thumb2-purecode"}
+ {"Thumb2 purecode farcall consecutive veneer" "-Ttext 0x1000 --section-start .foo=0x2001020" "" "" {farcall-thumb2-purecode-consecutive-veneer.s}
+ {{objdump -d farcall-thumb2-purecode-consecutive-veneer.d}}
+ "farcall-thumb2-purecode-consecutive-veneer"}
{"Thumb-ARM farcall" "-Ttext 0x1c01010 --section-start .foo=0x2001014" "" "-W" {farcall-thumb-arm.s}
{{objdump -d farcall-thumb-arm.d}}
new file mode 100644
@@ -0,0 +1,28 @@
+.*: file format .*
+
+Disassembly of section .text:
+
+00001000 <bar>:
+ 1000: 4770 bx lr
+
+00001002 <baz>:
+ 1002: 4770 bx lr
+
+Disassembly of section .foo:
+
+02001020 <_start>:
+ 2001020: f000 f802 bl 2001028 <__bar_veneer>
+ 2001024: f000 f806 bl 2001034 <__baz_veneer>
+
+02001028 <__bar_veneer>:
+ 2001028: f241 0c01 movw ip, #4097 @ 0x1001
+ 200102c: f2c0 0c00 movt ip, #0
+ 2001030: 4760 bx ip
+ 2001032: bf00 nop
+
+02001034 <__baz_veneer>:
+ 2001034: f241 0c03 movw ip, #4099 @ 0x1003
+ 2001038: f2c0 0c00 movt ip, #0
+ 200103c: 4760 bx ip
+ 200103e: bf00 nop
+ \.\.\.
new file mode 100644
@@ -0,0 +1,25 @@
+@ Test to ensure that a purecode Thumb2 call exceeding 4Mb generates a stub.
+
+ .global _start
+ .syntax unified
+ .arch armv7-m
+ .thumb
+ .thumb_func
+
+@ We will place the section .text at 0x1000.
+
+ .section .text, "0x20000006"
+ .type bar, %function
+ .type baz, %function
+bar:
+ bx lr
+
+baz:
+ bx lr
+
+@ We will place the section .foo at 0x02001014.
+
+ .section .foo, "0x20000006"
+_start:
+ bl bar
+ bl baz
@@ -16,7 +16,7 @@ Disassembly of section .foo:
2001028: f241 0c01 movw ip, #4097 @ 0x1001
200102c: f2c0 0c00 movt ip, #0
2001030: 4760 bx ip
- 2001032: 0000 movs r0, r0
+ 2001032: bf00 nop
2001034: 0000 movs r0, r0
\.\.\.