Optimize incoming integer argument promotion

Message ID 20241101223201.384907-1-hjl.tools@gmail.com
State New
Headers
Series Optimize incoming integer argument promotion |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed

Commit Message

H.J. Lu Nov. 1, 2024, 10:32 p.m. UTC
  For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
true, all integer arguments smaller than int are passed as int:

[hjl@gnu-tgl-3 pr14907]$ cat x.c
extern int baz (char c1);

int
foo (char c1)
{
  return baz (c1);
}
[hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
[hjl@gnu-tgl-3 pr14907]$ cat x.s
	.file	"x.c"
	.text
	.p2align 4
	.globl	foo
	.type	foo, @function
foo:
.LFB0:
	.cfi_startproc
	movsbl	4(%esp), %eax
	movl	%eax, 4(%esp)
	jmp	baz
	.cfi_endproc
.LFE0:
	.size	foo, .-foo
	.ident	"GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
	.section	.note.GNU-stack,"",@progbits
[hjl@gnu-tgl-3 pr14907]$

But integer promotion:

	movsbl	4(%esp), %eax
	movl	%eax, 4(%esp)

isn't necessary if incoming arguments and outgoing arguments are the
same.  Use unpromoted incoming integer arguments as outgoing arguments
if incoming integer arguments are the same as outgoing arguments to
avoid unnecessary integer promotion.

gcc/

	PR middle-end/14907
	* calls.cc: Include "ssa.h", "tree-ssa-live.h" and
	"tree-outof-ssa.h".
	(arg_data): Add unpromoted_int_parm_rtx.
	(precompute_register_parameters): Use unpromoted_int_parm_rtx
	as argument value if available.
	(get_unpromoted_int_parm_rtx_from_ssa_name): New function.
	(get_unpromoted_int_parm_rtx): Likewise.
	(initialize_argument_information): Add an argument for function
	parameter types.  Set unpromoted_int_parm_rtx if integer function
	arguments are promoted to int.  Change mode, reg and tail_call_reg
	to the same mode as unpromoted_int_parm_rtx.
	(expand_call): Pass type_arg_types to
	initialize_argument_information.
	(store_one_arg): Use unpromoted_int_parm_rtx as argument value
	if available.

gcc/testsuite/

	PR middle-end/14907
	* gcc.target/i386/pr14907-1.c: New test.
	* gcc.target/i386/pr14907-2.c: Likewise.
	* gcc.target/i386/pr14907-3.c: Likewise.
	* gcc.target/i386/pr14907-4.c: Likewise.
	* gcc.target/i386/pr14907-5.c: Likewise.
	* gcc.target/i386/pr14907-6.c: Likewise.
	* gcc.target/i386/pr14907-7.c: Likewise.
	* gcc.target/i386/pr14907-8.c: Likewise.
	* gcc.target/i386/pr14907-9.c: Likewise.
	* gcc.target/i386/pr14907-10.c: Likewise.
	* gcc.target/i386/pr14907-11.c: Likewise.
	* gcc.target/i386/pr14907-12.c: Likewise.
	* gcc.target/i386/pr14907-13.c: Likewise.
	* gcc.target/i386/pr14907-14.c: Likewise.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
---
 gcc/calls.cc                               | 142 +++++++++++++++++++--
 gcc/testsuite/gcc.target/i386/pr14907-1.c  |  21 +++
 gcc/testsuite/gcc.target/i386/pr14907-10.c |  23 ++++
 gcc/testsuite/gcc.target/i386/pr14907-11.c |  12 ++
 gcc/testsuite/gcc.target/i386/pr14907-12.c |  17 +++
 gcc/testsuite/gcc.target/i386/pr14907-13.c |  12 ++
 gcc/testsuite/gcc.target/i386/pr14907-14.c |  17 +++
 gcc/testsuite/gcc.target/i386/pr14907-2.c  |  21 +++
 gcc/testsuite/gcc.target/i386/pr14907-3.c  |  21 +++
 gcc/testsuite/gcc.target/i386/pr14907-4.c  |  21 +++
 gcc/testsuite/gcc.target/i386/pr14907-5.c  |  21 +++
 gcc/testsuite/gcc.target/i386/pr14907-6.c  |  21 +++
 gcc/testsuite/gcc.target/i386/pr14907-7.c  |  22 ++++
 gcc/testsuite/gcc.target/i386/pr14907-8.c  |  23 ++++
 gcc/testsuite/gcc.target/i386/pr14907-9.c  |  22 ++++
 15 files changed, 408 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr14907-9.c
  

Comments

Jeff Law Nov. 5, 2024, 12:06 a.m. UTC | #1
On 11/1/24 4:32 PM, H.J. Lu wrote:
> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> true, all integer arguments smaller than int are passed as int:
> 
> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> extern int baz (char c1);
> 
> int
> foo (char c1)
> {
>    return baz (c1);
> }
> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> 	.file	"x.c"
> 	.text
> 	.p2align 4
> 	.globl	foo
> 	.type	foo, @function
> foo:
> .LFB0:
> 	.cfi_startproc
> 	movsbl	4(%esp), %eax
> 	movl	%eax, 4(%esp)
> 	jmp	baz
> 	.cfi_endproc
> .LFE0:
> 	.size	foo, .-foo
> 	.ident	"GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> 	.section	.note.GNU-stack,"",@progbits
> [hjl@gnu-tgl-3 pr14907]$
> 
> But integer promotion:
> 
> 	movsbl	4(%esp), %eax
> 	movl	%eax, 4(%esp)
> 
> isn't necessary if incoming arguments and outgoing arguments are the
> same.  Use unpromoted incoming integer arguments as outgoing arguments
> if incoming integer arguments are the same as outgoing arguments to
> avoid unnecessary integer promotion.
Is there a particular reason x86 can't use the same mechanisms that 
other targets used to expose how arguments are promoted and ultimately 
optimize away unnecessary promotions?

jeff
  
H.J. Lu Nov. 5, 2024, 12:42 a.m. UTC | #2
On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > true, all integer arguments smaller than int are passed as int:
> >
> > [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > extern int baz (char c1);
> >
> > int
> > foo (char c1)
> > {
> >    return baz (c1);
> > }
> > [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > [hjl@gnu-tgl-3 pr14907]$ cat x.s
> >       .file   "x.c"
> >       .text
> >       .p2align 4
> >       .globl  foo
> >       .type   foo, @function
> > foo:
> > .LFB0:
> >       .cfi_startproc
> >       movsbl  4(%esp), %eax
> >       movl    %eax, 4(%esp)
> >       jmp     baz
> >       .cfi_endproc
> > .LFE0:
> >       .size   foo, .-foo
> >       .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> >       .section        .note.GNU-stack,"",@progbits
> > [hjl@gnu-tgl-3 pr14907]$
> >
> > But integer promotion:
> >
> >       movsbl  4(%esp), %eax
> >       movl    %eax, 4(%esp)
> >
> > isn't necessary if incoming arguments and outgoing arguments are the
> > same.  Use unpromoted incoming integer arguments as outgoing arguments
> > if incoming integer arguments are the same as outgoing arguments to
> > avoid unnecessary integer promotion.
> Is there a particular reason x86 can't use the same mechanisms that

Other targets define TARGET_PROMOTE_PROTOTYPES to return false
to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
to return false will break LLVM which assumes that incoming char/short
arguments on x86 are always extended to int.   The following targets

arc/arc.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
cris/cris.h:   defining TARGET_PROMOTE_PROTOTYPES that always returns true would
epiphany/epiphany.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
fr30/fr30.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
ft32/ft32.cc:#define TARGET_PROMOTE_PROTOTYPES       hook_bool_const_tree_true
i386/i386.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
ia64/ia64.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
iq2000/iq2000.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
lm32/lm32.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
m32r/m32r.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
m68k/m68k.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
mcore/mcore.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
mn10300/mn10300.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
moxie/moxie.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
nios2/nios2.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
pa/pa.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
stormy16/stormy16.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
v850/v850.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
vax/vax.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
xtensa/xtensa.cc:#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true

suffer the same issue as x86.

> other targets used to expose how arguments are promoted and ultimately
> optimize away unnecessary promotions?
>
> jeff
  
Jeff Law Nov. 5, 2024, 12:48 a.m. UTC | #3
On 11/4/24 5:42 PM, H.J. Lu wrote:
> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>>
>>
>>
>> On 11/1/24 4:32 PM, H.J. Lu wrote:
>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
>>> true, all integer arguments smaller than int are passed as int:
>>>
>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
>>> extern int baz (char c1);
>>>
>>> int
>>> foo (char c1)
>>> {
>>>     return baz (c1);
>>> }
>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
>>>        .file   "x.c"
>>>        .text
>>>        .p2align 4
>>>        .globl  foo
>>>        .type   foo, @function
>>> foo:
>>> .LFB0:
>>>        .cfi_startproc
>>>        movsbl  4(%esp), %eax
>>>        movl    %eax, 4(%esp)
>>>        jmp     baz
>>>        .cfi_endproc
>>> .LFE0:
>>>        .size   foo, .-foo
>>>        .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
>>>        .section        .note.GNU-stack,"",@progbits
>>> [hjl@gnu-tgl-3 pr14907]$
>>>
>>> But integer promotion:
>>>
>>>        movsbl  4(%esp), %eax
>>>        movl    %eax, 4(%esp)
>>>
>>> isn't necessary if incoming arguments and outgoing arguments are the
>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
>>> if incoming integer arguments are the same as outgoing arguments to
>>> avoid unnecessary integer promotion.
>> Is there a particular reason x86 can't use the same mechanisms that
> 
> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> to return false will break LLVM which assumes that incoming char/short
> arguments on x86 are always extended to int.   The following targets
Then my suggestion would be to cover this in REE somehow.  We started 
looking at that a couple years ago and set it aside.   But the basic 
idea was to expose the ABI guarantees to REE, then let REE do its thing.

Jeff
  
H.J. Lu Nov. 5, 2024, 2:52 a.m. UTC | #4
On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> >>
> >>
> >>
> >> On 11/1/24 4:32 PM, H.J. Lu wrote:
> >>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> >>> true, all integer arguments smaller than int are passed as int:
> >>>
> >>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> >>> extern int baz (char c1);
> >>>
> >>> int
> >>> foo (char c1)
> >>> {
> >>>     return baz (c1);
> >>> }
> >>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> >>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> >>>        .file   "x.c"
> >>>        .text
> >>>        .p2align 4
> >>>        .globl  foo
> >>>        .type   foo, @function
> >>> foo:
> >>> .LFB0:
> >>>        .cfi_startproc
> >>>        movsbl  4(%esp), %eax
> >>>        movl    %eax, 4(%esp)
> >>>        jmp     baz
> >>>        .cfi_endproc
> >>> .LFE0:
> >>>        .size   foo, .-foo
> >>>        .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> >>>        .section        .note.GNU-stack,"",@progbits
> >>> [hjl@gnu-tgl-3 pr14907]$
> >>>
> >>> But integer promotion:
> >>>
> >>>        movsbl  4(%esp), %eax
> >>>        movl    %eax, 4(%esp)
> >>>
> >>> isn't necessary if incoming arguments and outgoing arguments are the
> >>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> >>> if incoming integer arguments are the same as outgoing arguments to
> >>> avoid unnecessary integer promotion.
> >> Is there a particular reason x86 can't use the same mechanisms that
> >
> > Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > to return false will break LLVM which assumes that incoming char/short
> > arguments on x86 are always extended to int.   The following targets
> Then my suggestion would be to cover this in REE somehow.  We started
> looking at that a couple years ago and set it aside.   But the basic
> idea was to expose the ABI guarantees to REE, then let REE do its thing.
>
> Jeff
>

For

extern int baz (char c1);

int
foo (char c1)
{
  return baz (c1);
}

on i386, we get these

(insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
        (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
                    (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
190 {extendqisi2}
     (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
        (nil)))
(insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
                (const_int 4 [0x4])) [0  S4 A32])
        (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
     (nil))
(call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
        (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
<function_decl 0x7f27347aae00
baz>) [0 baz S1 A8])
            (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
     (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
<function_decl 0x7f273
47aae00 baz>)
        (nil))
    (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
        (nil)))

before REE.  How should REE work to elimate

(insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
        (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
                    (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
190 {extendqisi2}
     (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
        (nil)))
(insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
                (const_int 4 [0x4])) [0  S4 A32])
        (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
     (nil))
  
Jeff Law Nov. 5, 2024, 2:57 a.m. UTC | #5
On 11/4/24 7:52 PM, H.J. Lu wrote:
> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>>
>>
>>
>> On 11/4/24 5:42 PM, H.J. Lu wrote:
>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>>>>
>>>>
>>>>
>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
>>>>> true, all integer arguments smaller than int are passed as int:
>>>>>
>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
>>>>> extern int baz (char c1);
>>>>>
>>>>> int
>>>>> foo (char c1)
>>>>> {
>>>>>      return baz (c1);
>>>>> }
>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
>>>>>         .file   "x.c"
>>>>>         .text
>>>>>         .p2align 4
>>>>>         .globl  foo
>>>>>         .type   foo, @function
>>>>> foo:
>>>>> .LFB0:
>>>>>         .cfi_startproc
>>>>>         movsbl  4(%esp), %eax
>>>>>         movl    %eax, 4(%esp)
>>>>>         jmp     baz
>>>>>         .cfi_endproc
>>>>> .LFE0:
>>>>>         .size   foo, .-foo
>>>>>         .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
>>>>>         .section        .note.GNU-stack,"",@progbits
>>>>> [hjl@gnu-tgl-3 pr14907]$
>>>>>
>>>>> But integer promotion:
>>>>>
>>>>>         movsbl  4(%esp), %eax
>>>>>         movl    %eax, 4(%esp)
>>>>>
>>>>> isn't necessary if incoming arguments and outgoing arguments are the
>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
>>>>> if incoming integer arguments are the same as outgoing arguments to
>>>>> avoid unnecessary integer promotion.
>>>> Is there a particular reason x86 can't use the same mechanisms that
>>>
>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
>>> to return false will break LLVM which assumes that incoming char/short
>>> arguments on x86 are always extended to int.   The following targets
>> Then my suggestion would be to cover this in REE somehow.  We started
>> looking at that a couple years ago and set it aside.   But the basic
>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
>>
>> Jeff
>>
> 
> For
> 
> extern int baz (char c1);
> 
> int
> foo (char c1)
> {
>    return baz (c1);
> }
> 
> on i386, we get these
> 
> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
>          (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
>                      (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> 190 {extendqisi2}
>       (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
>          (nil)))
> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
>                  (const_int 4 [0x4])) [0  S4 A32])
>          (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
>       (nil))
> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
>          (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> <function_decl 0x7f27347aae00
> baz>) [0 baz S1 A8])
>              (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
>       (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> <function_decl 0x7f273
> 47aae00 baz>)
>          (nil))
>      (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
>          (nil)))
> 
> before REE.  How should REE work to elimate
> 
> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
>          (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
>                      (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> 190 {extendqisi2}
>       (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
>          (nil)))
> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
>                  (const_int 4 [0x4])) [0  S4 A32])
>          (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
>       (nil))
You'll have to write code to describe the ABI how the values in question 
are already extended to REE.  It's not going to "just work", you'll have 
to do some development.  I'm not inclined to ACK the expansion patch at 
this time given we've got multiple ways to handle extension removal.


jeff
  
H.J. Lu Nov. 5, 2024, 3:13 a.m. UTC | #6
On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> >>
> >>
> >>
> >> On 11/4/24 5:42 PM, H.J. Lu wrote:
> >>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> >>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> >>>>> true, all integer arguments smaller than int are passed as int:
> >>>>>
> >>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> >>>>> extern int baz (char c1);
> >>>>>
> >>>>> int
> >>>>> foo (char c1)
> >>>>> {
> >>>>>      return baz (c1);
> >>>>> }
> >>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> >>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> >>>>>         .file   "x.c"
> >>>>>         .text
> >>>>>         .p2align 4
> >>>>>         .globl  foo
> >>>>>         .type   foo, @function
> >>>>> foo:
> >>>>> .LFB0:
> >>>>>         .cfi_startproc
> >>>>>         movsbl  4(%esp), %eax
> >>>>>         movl    %eax, 4(%esp)
> >>>>>         jmp     baz
> >>>>>         .cfi_endproc
> >>>>> .LFE0:
> >>>>>         .size   foo, .-foo
> >>>>>         .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> >>>>>         .section        .note.GNU-stack,"",@progbits
> >>>>> [hjl@gnu-tgl-3 pr14907]$
> >>>>>
> >>>>> But integer promotion:
> >>>>>
> >>>>>         movsbl  4(%esp), %eax
> >>>>>         movl    %eax, 4(%esp)
> >>>>>
> >>>>> isn't necessary if incoming arguments and outgoing arguments are the
> >>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> >>>>> if incoming integer arguments are the same as outgoing arguments to
> >>>>> avoid unnecessary integer promotion.
> >>>> Is there a particular reason x86 can't use the same mechanisms that
> >>>
> >>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> >>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> >>> to return false will break LLVM which assumes that incoming char/short
> >>> arguments on x86 are always extended to int.   The following targets
> >> Then my suggestion would be to cover this in REE somehow.  We started
> >> looking at that a couple years ago and set it aside.   But the basic
> >> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> >>
> >> Jeff
> >>
> >
> > For
> >
> > extern int baz (char c1);
> >
> > int
> > foo (char c1)
> > {
> >    return baz (c1);
> > }
> >
> > on i386, we get these
> >
> > (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> >          (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> >                      (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > 190 {extendqisi2}
> >       (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> >          (nil)))
> > (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> >                  (const_int 4 [0x4])) [0  S4 A32])
> >          (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> >       (nil))
> > (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> >          (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > <function_decl 0x7f27347aae00
> > baz>) [0 baz S1 A8])
> >              (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> >       (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > <function_decl 0x7f273
> > 47aae00 baz>)
> >          (nil))
> >      (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> >          (nil)))
> >
> > before REE.  How should REE work to elimate
> >
> > (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> >          (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> >                      (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > 190 {extendqisi2}
> >       (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> >          (nil)))
> > (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> >                  (const_int 4 [0x4])) [0  S4 A32])
> >          (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> >       (nil))
> You'll have to write code to describe the ABI how the values in question
> are already extended to REE.  It's not going to "just work", you'll have
> to do some development.  I'm not inclined to ACK the expansion patch at
> this time given we've got multiple ways to handle extension removal.
>

For char and short parameters, x86 ABI leaves the upper bits in
32 bit fields undefined.  If the outgoing arguments are the same
as the incoming arguments, there is no need to extend outgoing
arguments.   Also ABI info isn't available in REE.  I am not sure if
REE is the best fit here.
  
Jeff Law Nov. 5, 2024, 4:22 a.m. UTC | #7
On 11/4/24 8:13 PM, H.J. Lu wrote:
> On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>>
>>
>>
>> On 11/4/24 7:52 PM, H.J. Lu wrote:
>>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>>>>
>>>>
>>>>
>>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
>>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
>>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
>>>>>>> true, all integer arguments smaller than int are passed as int:
>>>>>>>
>>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
>>>>>>> extern int baz (char c1);
>>>>>>>
>>>>>>> int
>>>>>>> foo (char c1)
>>>>>>> {
>>>>>>>       return baz (c1);
>>>>>>> }
>>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
>>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
>>>>>>>          .file   "x.c"
>>>>>>>          .text
>>>>>>>          .p2align 4
>>>>>>>          .globl  foo
>>>>>>>          .type   foo, @function
>>>>>>> foo:
>>>>>>> .LFB0:
>>>>>>>          .cfi_startproc
>>>>>>>          movsbl  4(%esp), %eax
>>>>>>>          movl    %eax, 4(%esp)
>>>>>>>          jmp     baz
>>>>>>>          .cfi_endproc
>>>>>>> .LFE0:
>>>>>>>          .size   foo, .-foo
>>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
>>>>>>>          .section        .note.GNU-stack,"",@progbits
>>>>>>> [hjl@gnu-tgl-3 pr14907]$
>>>>>>>
>>>>>>> But integer promotion:
>>>>>>>
>>>>>>>          movsbl  4(%esp), %eax
>>>>>>>          movl    %eax, 4(%esp)
>>>>>>>
>>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
>>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
>>>>>>> if incoming integer arguments are the same as outgoing arguments to
>>>>>>> avoid unnecessary integer promotion.
>>>>>> Is there a particular reason x86 can't use the same mechanisms that
>>>>>
>>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
>>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
>>>>> to return false will break LLVM which assumes that incoming char/short
>>>>> arguments on x86 are always extended to int.   The following targets
>>>> Then my suggestion would be to cover this in REE somehow.  We started
>>>> looking at that a couple years ago and set it aside.   But the basic
>>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
>>>>
>>>> Jeff
>>>>
>>>
>>> For
>>>
>>> extern int baz (char c1);
>>>
>>> int
>>> foo (char c1)
>>> {
>>>     return baz (c1);
>>> }
>>>
>>> on i386, we get these
>>>
>>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
>>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
>>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
>>> 190 {extendqisi2}
>>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
>>>           (nil)))
>>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
>>>                   (const_int 4 [0x4])) [0  S4 A32])
>>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
>>>        (nil))
>>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
>>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
>>> <function_decl 0x7f27347aae00
>>> baz>) [0 baz S1 A8])
>>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
>>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
>>> <function_decl 0x7f273
>>> 47aae00 baz>)
>>>           (nil))
>>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
>>>           (nil)))
>>>
>>> before REE.  How should REE work to elimate
>>>
>>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
>>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
>>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
>>> 190 {extendqisi2}
>>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
>>>           (nil)))
>>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
>>>                   (const_int 4 [0x4])) [0  S4 A32])
>>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
>>>        (nil))
>> You'll have to write code to describe the ABI how the values in question
>> are already extended to REE.  It's not going to "just work", you'll have
>> to do some development.  I'm not inclined to ACK the expansion patch at
>> this time given we've got multiple ways to handle extension removal.
>>
> 
> For char and short parameters, x86 ABI leaves the upper bits in
> 32 bit fields undefined.  If the outgoing arguments are the same
> as the incoming arguments, there is no need to extend outgoing
> arguments.   Also ABI info isn't available in REE.  I am not sure if
> REE is the best fit here.
The whole point is to make it available and utilize it.

jeff
  
Richard Biener Nov. 5, 2024, 9:09 a.m. UTC | #8
On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
> On 11/4/24 8:13 PM, H.J. Lu wrote:
> > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> >>
> >>
> >>
> >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> >>>>>>> true, all integer arguments smaller than int are passed as int:
> >>>>>>>
> >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> >>>>>>> extern int baz (char c1);
> >>>>>>>
> >>>>>>> int
> >>>>>>> foo (char c1)
> >>>>>>> {
> >>>>>>>       return baz (c1);
> >>>>>>> }
> >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> >>>>>>>          .file   "x.c"
> >>>>>>>          .text
> >>>>>>>          .p2align 4
> >>>>>>>          .globl  foo
> >>>>>>>          .type   foo, @function
> >>>>>>> foo:
> >>>>>>> .LFB0:
> >>>>>>>          .cfi_startproc
> >>>>>>>          movsbl  4(%esp), %eax
> >>>>>>>          movl    %eax, 4(%esp)
> >>>>>>>          jmp     baz
> >>>>>>>          .cfi_endproc
> >>>>>>> .LFE0:
> >>>>>>>          .size   foo, .-foo
> >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> >>>>>>>          .section        .note.GNU-stack,"",@progbits
> >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> >>>>>>>
> >>>>>>> But integer promotion:
> >>>>>>>
> >>>>>>>          movsbl  4(%esp), %eax
> >>>>>>>          movl    %eax, 4(%esp)
> >>>>>>>
> >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> >>>>>>> avoid unnecessary integer promotion.
> >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> >>>>>
> >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> >>>>> to return false will break LLVM which assumes that incoming char/short
> >>>>> arguments on x86 are always extended to int.   The following targets
> >>>> Then my suggestion would be to cover this in REE somehow.  We started
> >>>> looking at that a couple years ago and set it aside.   But the basic
> >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> >>>>
> >>>> Jeff
> >>>>
> >>>
> >>> For
> >>>
> >>> extern int baz (char c1);
> >>>
> >>> int
> >>> foo (char c1)
> >>> {
> >>>     return baz (c1);
> >>> }
> >>>
> >>> on i386, we get these
> >>>
> >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> >>> 190 {extendqisi2}
> >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> >>>           (nil)))
> >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> >>>                   (const_int 4 [0x4])) [0  S4 A32])
> >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> >>>        (nil))
> >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> >>> <function_decl 0x7f27347aae00
> >>> baz>) [0 baz S1 A8])
> >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> >>> <function_decl 0x7f273
> >>> 47aae00 baz>)
> >>>           (nil))
> >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> >>>           (nil)))
> >>>
> >>> before REE.  How should REE work to elimate
> >>>
> >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> >>> 190 {extendqisi2}
> >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> >>>           (nil)))
> >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> >>>                   (const_int 4 [0x4])) [0  S4 A32])
> >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> >>>        (nil))
> >> You'll have to write code to describe the ABI how the values in question
> >> are already extended to REE.  It's not going to "just work", you'll have
> >> to do some development.  I'm not inclined to ACK the expansion patch at
> >> this time given we've got multiple ways to handle extension removal.
> >>
> >
> > For char and short parameters, x86 ABI leaves the upper bits in
> > 32 bit fields undefined.  If the outgoing arguments are the same
> > as the incoming arguments, there is no need to extend outgoing
> > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > REE is the best fit here.
> The whole point is to make it available and utilize it.

I think most of the complication is due to the promotion being performed
on GIMPLE but GIMPLE not taking the incoming argument as promoted.

On my wishlist is to defer "applying" promote_prototypes to call RTL
expansion and keeping GIMPLE "type correct", aka _not_ promoting
arguments in the C/C++/Ada frontends.  I'll note that not all frontends
perform this promotion (sic!), and thus HJs pattern matching of the
sibling call site is required for correctness even!

I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
should reflect the incoming promotion state - using the target hook in
RTL expansion is "wrong" (see above - not applied consistently), but
DECL_ARG_TYPE should tell you where it was applied and where not.
With LTO and calling C from fortran this optimization might go wrong
(fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).

I'd say we should either

 a) drop targetm.promote_prototypes (and hope nobody uses it to
     implement an ABI requirement)
 b) apply targetm.promote_prototypes during RTL expansion

So the patch as-is isn't good I think (if you don't succeed with
Fortran due to lack of small integer types you can also try D
or Modula for the LTO wrong-code testcase).

ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
as said.

Richard.

> jeff
  
Richard Biener Nov. 5, 2024, 9:27 a.m. UTC | #9
On Tue, Nov 5, 2024 at 10:09 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> >
> >
> >
> > On 11/4/24 8:13 PM, H.J. Lu wrote:
> > > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > >>
> > >>
> > >>
> > >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > >>>>
> > >>>>
> > >>>>
> > >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > >>>>>>
> > >>>>>>
> > >>>>>>
> > >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > >>>>>>> true, all integer arguments smaller than int are passed as int:
> > >>>>>>>
> > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > >>>>>>> extern int baz (char c1);
> > >>>>>>>
> > >>>>>>> int
> > >>>>>>> foo (char c1)
> > >>>>>>> {
> > >>>>>>>       return baz (c1);
> > >>>>>>> }
> > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> > >>>>>>>          .file   "x.c"
> > >>>>>>>          .text
> > >>>>>>>          .p2align 4
> > >>>>>>>          .globl  foo
> > >>>>>>>          .type   foo, @function
> > >>>>>>> foo:
> > >>>>>>> .LFB0:
> > >>>>>>>          .cfi_startproc
> > >>>>>>>          movsbl  4(%esp), %eax
> > >>>>>>>          movl    %eax, 4(%esp)
> > >>>>>>>          jmp     baz
> > >>>>>>>          .cfi_endproc
> > >>>>>>> .LFE0:
> > >>>>>>>          .size   foo, .-foo
> > >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > >>>>>>>          .section        .note.GNU-stack,"",@progbits
> > >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> > >>>>>>>
> > >>>>>>> But integer promotion:
> > >>>>>>>
> > >>>>>>>          movsbl  4(%esp), %eax
> > >>>>>>>          movl    %eax, 4(%esp)
> > >>>>>>>
> > >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> > >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> > >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> > >>>>>>> avoid unnecessary integer promotion.
> > >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> > >>>>>
> > >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > >>>>> to return false will break LLVM which assumes that incoming char/short
> > >>>>> arguments on x86 are always extended to int.   The following targets
> > >>>> Then my suggestion would be to cover this in REE somehow.  We started
> > >>>> looking at that a couple years ago and set it aside.   But the basic
> > >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> > >>>>
> > >>>> Jeff
> > >>>>
> > >>>
> > >>> For
> > >>>
> > >>> extern int baz (char c1);
> > >>>
> > >>> int
> > >>> foo (char c1)
> > >>> {
> > >>>     return baz (c1);
> > >>> }
> > >>>
> > >>> on i386, we get these
> > >>>
> > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > >>> 190 {extendqisi2}
> > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > >>>           (nil)))
> > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > >>>        (nil))
> > >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> > >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > >>> <function_decl 0x7f27347aae00
> > >>> baz>) [0 baz S1 A8])
> > >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> > >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > >>> <function_decl 0x7f273
> > >>> 47aae00 baz>)
> > >>>           (nil))
> > >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> > >>>           (nil)))
> > >>>
> > >>> before REE.  How should REE work to elimate
> > >>>
> > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > >>> 190 {extendqisi2}
> > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > >>>           (nil)))
> > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > >>>        (nil))
> > >> You'll have to write code to describe the ABI how the values in question
> > >> are already extended to REE.  It's not going to "just work", you'll have
> > >> to do some development.  I'm not inclined to ACK the expansion patch at
> > >> this time given we've got multiple ways to handle extension removal.
> > >>
> > >
> > > For char and short parameters, x86 ABI leaves the upper bits in
> > > 32 bit fields undefined.  If the outgoing arguments are the same
> > > as the incoming arguments, there is no need to extend outgoing
> > > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > > REE is the best fit here.
> > The whole point is to make it available and utilize it.
>
> I think most of the complication is due to the promotion being performed
> on GIMPLE but GIMPLE not taking the incoming argument as promoted.
>
> On my wishlist is to defer "applying" promote_prototypes to call RTL
> expansion and keeping GIMPLE "type correct", aka _not_ promoting
> arguments in the C/C++/Ada frontends.  I'll note that not all frontends
> perform this promotion (sic!), and thus HJs pattern matching of the
> sibling call site is required for correctness even!
>
> I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
> should reflect the incoming promotion state - using the target hook in
> RTL expansion is "wrong" (see above - not applied consistently), but
> DECL_ARG_TYPE should tell you where it was applied and where not.
> With LTO and calling C from fortran this optimization might go wrong
> (fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).
>
> I'd say we should either
>
>  a) drop targetm.promote_prototypes (and hope nobody uses it to
>      implement an ABI requirement)
>  b) apply targetm.promote_prototypes during RTL expansion
>
> So the patch as-is isn't good I think (if you don't succeed with
> Fortran due to lack of small integer types you can also try D
> or Modula for the LTO wrong-code testcase).
>
> ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
> as said.

program test
    use iso_c_binding, only: c_short
    interface
      subroutine foo(a) bind(c)
        import c_short
        integer(kind=c_short), intent(in), value :: a
      end subroutine foo
    end interface
    integer(kind=c_short) a(5);
    call foo (a(3))
end

shows

;; foo (_7);

(insn 13 12 14 (set (reg:HI 102)
        (mem/c:HI (plus:DI (reg/f:DI 93 virtual-stack-vars)
                (const_int -6 [0xfffffffffffffffa])) [1 a[2]+0 S2
A16])) "t.f90":10:19 -1
     (nil))

(insn 14 13 15 (set (reg:HI 5 di)
        (reg:HI 102)) "t.f90":10:19 -1
     (nil))

(call_insn 15 14 0 (call (mem:QI (symbol_ref:DI ("foo") [flags 0x41]
<function_decl 0x7f7fcc430800 foo>) [0 foo S1 A8])
        (const_int 0 [0])) "t.f90":10:19 -1
     (expr_list:REG_CALL_DECL (symbol_ref:DI ("foo") [flags 0x41]
<function_decl 0x7f7fcc430800 foo>)
        (nil))
    (expr_list:HI (use (reg:HI 5 di))
        (nil)))

and with -Os generates

        movw    10(%rsp), %di
        call    foo

but a C TU would assume 'foo' receives promoted arguments.

Richard.



> Richard.
>
> > jeff
  
H.J. Lu Nov. 5, 2024, 9:49 p.m. UTC | #10
On Tue, Nov 5, 2024 at 5:27 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Nov 5, 2024 at 10:09 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > >
> > >
> > >
> > > On 11/4/24 8:13 PM, H.J. Lu wrote:
> > > > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > >>
> > > >>
> > > >>
> > > >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > > >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > >>>>
> > > >>>>
> > > >>>>
> > > >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > > >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > >>>>>>
> > > >>>>>>
> > > >>>>>>
> > > >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > > >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > > >>>>>>> true, all integer arguments smaller than int are passed as int:
> > > >>>>>>>
> > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > > >>>>>>> extern int baz (char c1);
> > > >>>>>>>
> > > >>>>>>> int
> > > >>>>>>> foo (char c1)
> > > >>>>>>> {
> > > >>>>>>>       return baz (c1);
> > > >>>>>>> }
> > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> > > >>>>>>>          .file   "x.c"
> > > >>>>>>>          .text
> > > >>>>>>>          .p2align 4
> > > >>>>>>>          .globl  foo
> > > >>>>>>>          .type   foo, @function
> > > >>>>>>> foo:
> > > >>>>>>> .LFB0:
> > > >>>>>>>          .cfi_startproc
> > > >>>>>>>          movsbl  4(%esp), %eax
> > > >>>>>>>          movl    %eax, 4(%esp)
> > > >>>>>>>          jmp     baz
> > > >>>>>>>          .cfi_endproc
> > > >>>>>>> .LFE0:
> > > >>>>>>>          .size   foo, .-foo
> > > >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > >>>>>>>          .section        .note.GNU-stack,"",@progbits
> > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> > > >>>>>>>
> > > >>>>>>> But integer promotion:
> > > >>>>>>>
> > > >>>>>>>          movsbl  4(%esp), %eax
> > > >>>>>>>          movl    %eax, 4(%esp)
> > > >>>>>>>
> > > >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> > > >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> > > >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> > > >>>>>>> avoid unnecessary integer promotion.
> > > >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> > > >>>>>
> > > >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > > >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > > >>>>> to return false will break LLVM which assumes that incoming char/short
> > > >>>>> arguments on x86 are always extended to int.   The following targets
> > > >>>> Then my suggestion would be to cover this in REE somehow.  We started
> > > >>>> looking at that a couple years ago and set it aside.   But the basic
> > > >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> > > >>>>
> > > >>>> Jeff
> > > >>>>
> > > >>>
> > > >>> For
> > > >>>
> > > >>> extern int baz (char c1);
> > > >>>
> > > >>> int
> > > >>> foo (char c1)
> > > >>> {
> > > >>>     return baz (c1);
> > > >>> }
> > > >>>
> > > >>> on i386, we get these
> > > >>>
> > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > >>> 190 {extendqisi2}
> > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > >>>           (nil)))
> > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > >>>        (nil))
> > > >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> > > >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > > >>> <function_decl 0x7f27347aae00
> > > >>> baz>) [0 baz S1 A8])
> > > >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> > > >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > > >>> <function_decl 0x7f273
> > > >>> 47aae00 baz>)
> > > >>>           (nil))
> > > >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> > > >>>           (nil)))
> > > >>>
> > > >>> before REE.  How should REE work to elimate
> > > >>>
> > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > >>> 190 {extendqisi2}
> > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > >>>           (nil)))
> > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > >>>        (nil))
> > > >> You'll have to write code to describe the ABI how the values in question
> > > >> are already extended to REE.  It's not going to "just work", you'll have
> > > >> to do some development.  I'm not inclined to ACK the expansion patch at
> > > >> this time given we've got multiple ways to handle extension removal.
> > > >>
> > > >
> > > > For char and short parameters, x86 ABI leaves the upper bits in
> > > > 32 bit fields undefined.  If the outgoing arguments are the same
> > > > as the incoming arguments, there is no need to extend outgoing
> > > > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > > > REE is the best fit here.
> > > The whole point is to make it available and utilize it.
> >
> > I think most of the complication is due to the promotion being performed
> > on GIMPLE but GIMPLE not taking the incoming argument as promoted.
> >
> > On my wishlist is to defer "applying" promote_prototypes to call RTL
> > expansion and keeping GIMPLE "type correct", aka _not_ promoting
> > arguments in the C/C++/Ada frontends.  I'll note that not all frontends
> > perform this promotion (sic!), and thus HJs pattern matching of the
> > sibling call site is required for correctness even!
> >
> > I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
> > should reflect the incoming promotion state - using the target hook in
> > RTL expansion is "wrong" (see above - not applied consistently), but
> > DECL_ARG_TYPE should tell you where it was applied and where not.
> > With LTO and calling C from fortran this optimization might go wrong
> > (fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).

My patch only uses DECL_ARG_TYPE to compare against the incoming
argument to check if the incoming argument is used as outgoing argument
unchanged.  My patch doesn't change any functionality whether the caller
does promotion or not since GCC always assumes the upper bits are
undefined.  My patch keeps the incoming argument unchanged only
when there is no change in source code.

> >
> > I'd say we should either
> >
> >  a) drop targetm.promote_prototypes (and hope nobody uses it to
> >      implement an ABI requirement)
> >  b) apply targetm.promote_prototypes during RTL expansion
> >
> > So the patch as-is isn't good I think (if you don't succeed with
> > Fortran due to lack of small integer types you can also try D
> > or Modula for the LTO wrong-code testcase).
> >
> > ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
> > as said.
>
> program test
>     use iso_c_binding, only: c_short
>     interface
>       subroutine foo(a) bind(c)
>         import c_short
>         integer(kind=c_short), intent(in), value :: a
>       end subroutine foo
>     end interface
>     integer(kind=c_short) a(5);
>     call foo (a(3))
> end
>
> shows
>
> ;; foo (_7);
>
> (insn 13 12 14 (set (reg:HI 102)
>         (mem/c:HI (plus:DI (reg/f:DI 93 virtual-stack-vars)
>                 (const_int -6 [0xfffffffffffffffa])) [1 a[2]+0 S2
> A16])) "t.f90":10:19 -1
>      (nil))
>
> (insn 14 13 15 (set (reg:HI 5 di)
>         (reg:HI 102)) "t.f90":10:19 -1
>      (nil))
>
> (call_insn 15 14 0 (call (mem:QI (symbol_ref:DI ("foo") [flags 0x41]
> <function_decl 0x7f7fcc430800 foo>) [0 foo S1 A8])
>         (const_int 0 [0])) "t.f90":10:19 -1
>      (expr_list:REG_CALL_DECL (symbol_ref:DI ("foo") [flags 0x41]
> <function_decl 0x7f7fcc430800 foo>)
>         (nil))
>     (expr_list:HI (use (reg:HI 5 di))
>         (nil)))
>
> and with -Os generates
>
>         movw    10(%rsp), %di
>         call    foo
>
> but a C TU would assume 'foo' receives promoted arguments.

GCC follows x86 psABI and assumes the upper bits are undefined.
But LLVM assumes the upper bits are extended:

$ cat x1.c
extern int baz (int);

int
foo (char c1)
{
  return baz (c1) + 1;
}
$ gcc -S -O2 x1.c
$ cat x1.s
.file "x1.c"
.text
.p2align 4
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movsbl %dil, %edi
call baz
addq $8, %rsp
.cfi_def_cfa_offset 8
addl $1, %eax
ret
.cfi_endproc
.LFE0:
.size foo, .-foo
.ident "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
.section .note.GNU-stack,"",@progbits
$ clang -S -O2 x1.c
$ cat x1.s
.text
.file "x1.c"
.globl foo                             # -- Begin function foo
.p2align 4, 0x90
.type foo,@function
foo:                                    # @foo
.cfi_startproc
# %bb.0:
pushq %rax
.cfi_def_cfa_offset 16
callq baz
incl %eax
popq %rcx
.cfi_def_cfa_offset 8
retq
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
.cfi_endproc
                                        # -- End function
.ident "clang version 19.1.0 (Fedora 19.1.0-1.fc41)"
.section ".note.GNU-stack","",@progbits
.addrsig
$
  
Richard Biener Nov. 6, 2024, 8:29 a.m. UTC | #11
On Tue, Nov 5, 2024 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Nov 5, 2024 at 5:27 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Nov 5, 2024 at 10:09 AM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > >
> > > >
> > > >
> > > > On 11/4/24 8:13 PM, H.J. Lu wrote:
> > > > > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > >>
> > > > >>
> > > > >>
> > > > >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > > > >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > >>>>
> > > > >>>>
> > > > >>>>
> > > > >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > > > >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > >>>>>>
> > > > >>>>>>
> > > > >>>>>>
> > > > >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > > > >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > > > >>>>>>> true, all integer arguments smaller than int are passed as int:
> > > > >>>>>>>
> > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > > > >>>>>>> extern int baz (char c1);
> > > > >>>>>>>
> > > > >>>>>>> int
> > > > >>>>>>> foo (char c1)
> > > > >>>>>>> {
> > > > >>>>>>>       return baz (c1);
> > > > >>>>>>> }
> > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> > > > >>>>>>>          .file   "x.c"
> > > > >>>>>>>          .text
> > > > >>>>>>>          .p2align 4
> > > > >>>>>>>          .globl  foo
> > > > >>>>>>>          .type   foo, @function
> > > > >>>>>>> foo:
> > > > >>>>>>> .LFB0:
> > > > >>>>>>>          .cfi_startproc
> > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > >>>>>>>          jmp     baz
> > > > >>>>>>>          .cfi_endproc
> > > > >>>>>>> .LFE0:
> > > > >>>>>>>          .size   foo, .-foo
> > > > >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > > >>>>>>>          .section        .note.GNU-stack,"",@progbits
> > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> > > > >>>>>>>
> > > > >>>>>>> But integer promotion:
> > > > >>>>>>>
> > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > >>>>>>>
> > > > >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> > > > >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> > > > >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> > > > >>>>>>> avoid unnecessary integer promotion.
> > > > >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> > > > >>>>>
> > > > >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > > > >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > > > >>>>> to return false will break LLVM which assumes that incoming char/short
> > > > >>>>> arguments on x86 are always extended to int.   The following targets
> > > > >>>> Then my suggestion would be to cover this in REE somehow.  We started
> > > > >>>> looking at that a couple years ago and set it aside.   But the basic
> > > > >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> > > > >>>>
> > > > >>>> Jeff
> > > > >>>>
> > > > >>>
> > > > >>> For
> > > > >>>
> > > > >>> extern int baz (char c1);
> > > > >>>
> > > > >>> int
> > > > >>> foo (char c1)
> > > > >>> {
> > > > >>>     return baz (c1);
> > > > >>> }
> > > > >>>
> > > > >>> on i386, we get these
> > > > >>>
> > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > >>> 190 {extendqisi2}
> > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > >>>           (nil)))
> > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > >>>        (nil))
> > > > >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> > > > >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > > > >>> <function_decl 0x7f27347aae00
> > > > >>> baz>) [0 baz S1 A8])
> > > > >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> > > > >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > > > >>> <function_decl 0x7f273
> > > > >>> 47aae00 baz>)
> > > > >>>           (nil))
> > > > >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> > > > >>>           (nil)))
> > > > >>>
> > > > >>> before REE.  How should REE work to elimate
> > > > >>>
> > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > >>> 190 {extendqisi2}
> > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > >>>           (nil)))
> > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > >>>        (nil))
> > > > >> You'll have to write code to describe the ABI how the values in question
> > > > >> are already extended to REE.  It's not going to "just work", you'll have
> > > > >> to do some development.  I'm not inclined to ACK the expansion patch at
> > > > >> this time given we've got multiple ways to handle extension removal.
> > > > >>
> > > > >
> > > > > For char and short parameters, x86 ABI leaves the upper bits in
> > > > > 32 bit fields undefined.  If the outgoing arguments are the same
> > > > > as the incoming arguments, there is no need to extend outgoing
> > > > > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > > > > REE is the best fit here.
> > > > The whole point is to make it available and utilize it.
> > >
> > > I think most of the complication is due to the promotion being performed
> > > on GIMPLE but GIMPLE not taking the incoming argument as promoted.
> > >
> > > On my wishlist is to defer "applying" promote_prototypes to call RTL
> > > expansion and keeping GIMPLE "type correct", aka _not_ promoting
> > > arguments in the C/C++/Ada frontends.  I'll note that not all frontends
> > > perform this promotion (sic!), and thus HJs pattern matching of the
> > > sibling call site is required for correctness even!
> > >
> > > I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
> > > should reflect the incoming promotion state - using the target hook in
> > > RTL expansion is "wrong" (see above - not applied consistently), but
> > > DECL_ARG_TYPE should tell you where it was applied and where not.
> > > With LTO and calling C from fortran this optimization might go wrong
> > > (fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).
>
> My patch only uses DECL_ARG_TYPE to compare against the incoming
> argument to check if the incoming argument is used as outgoing argument
> unchanged.  My patch doesn't change any functionality whether the caller
> does promotion or not since GCC always assumes the upper bits are
> undefined.  My patch keeps the incoming argument unchanged only
> when there is no change in source code.
>
> > >
> > > I'd say we should either
> > >
> > >  a) drop targetm.promote_prototypes (and hope nobody uses it to
> > >      implement an ABI requirement)
> > >  b) apply targetm.promote_prototypes during RTL expansion
> > >
> > > So the patch as-is isn't good I think (if you don't succeed with
> > > Fortran due to lack of small integer types you can also try D
> > > or Modula for the LTO wrong-code testcase).
> > >
> > > ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
> > > as said.
> >
> > program test
> >     use iso_c_binding, only: c_short
> >     interface
> >       subroutine foo(a) bind(c)
> >         import c_short
> >         integer(kind=c_short), intent(in), value :: a
> >       end subroutine foo
> >     end interface
> >     integer(kind=c_short) a(5);
> >     call foo (a(3))
> > end
> >
> > shows
> >
> > ;; foo (_7);
> >
> > (insn 13 12 14 (set (reg:HI 102)
> >         (mem/c:HI (plus:DI (reg/f:DI 93 virtual-stack-vars)
> >                 (const_int -6 [0xfffffffffffffffa])) [1 a[2]+0 S2
> > A16])) "t.f90":10:19 -1
> >      (nil))
> >
> > (insn 14 13 15 (set (reg:HI 5 di)
> >         (reg:HI 102)) "t.f90":10:19 -1
> >      (nil))
> >
> > (call_insn 15 14 0 (call (mem:QI (symbol_ref:DI ("foo") [flags 0x41]
> > <function_decl 0x7f7fcc430800 foo>) [0 foo S1 A8])
> >         (const_int 0 [0])) "t.f90":10:19 -1
> >      (expr_list:REG_CALL_DECL (symbol_ref:DI ("foo") [flags 0x41]
> > <function_decl 0x7f7fcc430800 foo>)
> >         (nil))
> >     (expr_list:HI (use (reg:HI 5 di))
> >         (nil)))
> >
> > and with -Os generates
> >
> >         movw    10(%rsp), %di
> >         call    foo
> >
> > but a C TU would assume 'foo' receives promoted arguments.
>
> GCC follows x86 psABI and assumes the upper bits are undefined.
> But LLVM assumes the upper bits are extended:

I'm saying your patch increases the likelyhood we run into problems with
out internal inconsistency of handling promote_prototypes - it adds to the
number of places we wrongly assume all callers honor it when the
callee was compiled in a way to honor the target hook.

So IMO the patch is wrong (in the sense of wrong-code), and as given
promote_prototypes is only useful for TU local binding functions.

I showed a way to rectify this by applying promote_prototypes at
call RTL expansion time (instead of or in addition to frontends).  Iff we
want to keep doing this "optimization" - given we perform this optimization
in combine it got effectively part of the de-factor x86 ABI as we assume
our callers perform the promotion, no?

Richard.

> $ cat x1.c
> extern int baz (int);
>
> int
> foo (char c1)
> {
>   return baz (c1) + 1;
> }
> $ gcc -S -O2 x1.c
> $ cat x1.s
> .file "x1.c"
> .text
> .p2align 4
> .globl foo
> .type foo, @function
> foo:
> .LFB0:
> .cfi_startproc
> subq $8, %rsp
> .cfi_def_cfa_offset 16
> movsbl %dil, %edi
> call baz
> addq $8, %rsp
> .cfi_def_cfa_offset 8
> addl $1, %eax
> ret
> .cfi_endproc
> .LFE0:
> .size foo, .-foo
> .ident "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> .section .note.GNU-stack,"",@progbits
> $ clang -S -O2 x1.c
> $ cat x1.s
> .text
> .file "x1.c"
> .globl foo                             # -- Begin function foo
> .p2align 4, 0x90
> .type foo,@function
> foo:                                    # @foo
> .cfi_startproc
> # %bb.0:
> pushq %rax
> .cfi_def_cfa_offset 16
> callq baz
> incl %eax
> popq %rcx
> .cfi_def_cfa_offset 8
> retq
> .Lfunc_end0:
> .size foo, .Lfunc_end0-foo
> .cfi_endproc
>                                         # -- End function
> .ident "clang version 19.1.0 (Fedora 19.1.0-1.fc41)"
> .section ".note.GNU-stack","",@progbits
> .addrsig
> $
>
> --
> H.J.
  
H.J. Lu Nov. 6, 2024, 9:52 a.m. UTC | #12
On Wed, Nov 6, 2024 at 4:29 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Nov 5, 2024 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, Nov 5, 2024 at 5:27 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Tue, Nov 5, 2024 at 10:09 AM Richard Biener
> > > <richard.guenther@gmail.com> wrote:
> > > >
> > > > On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > >
> > > > >
> > > > >
> > > > > On 11/4/24 8:13 PM, H.J. Lu wrote:
> > > > > > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > >>
> > > > > >>
> > > > > >>
> > > > > >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > > > > >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > >>>>
> > > > > >>>>
> > > > > >>>>
> > > > > >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > > > > >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > >>>>>>
> > > > > >>>>>>
> > > > > >>>>>>
> > > > > >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > > > > >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > > > > >>>>>>> true, all integer arguments smaller than int are passed as int:
> > > > > >>>>>>>
> > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > > > > >>>>>>> extern int baz (char c1);
> > > > > >>>>>>>
> > > > > >>>>>>> int
> > > > > >>>>>>> foo (char c1)
> > > > > >>>>>>> {
> > > > > >>>>>>>       return baz (c1);
> > > > > >>>>>>> }
> > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> > > > > >>>>>>>          .file   "x.c"
> > > > > >>>>>>>          .text
> > > > > >>>>>>>          .p2align 4
> > > > > >>>>>>>          .globl  foo
> > > > > >>>>>>>          .type   foo, @function
> > > > > >>>>>>> foo:
> > > > > >>>>>>> .LFB0:
> > > > > >>>>>>>          .cfi_startproc
> > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > >>>>>>>          jmp     baz
> > > > > >>>>>>>          .cfi_endproc
> > > > > >>>>>>> .LFE0:
> > > > > >>>>>>>          .size   foo, .-foo
> > > > > >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > > > >>>>>>>          .section        .note.GNU-stack,"",@progbits
> > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> > > > > >>>>>>>
> > > > > >>>>>>> But integer promotion:
> > > > > >>>>>>>
> > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > >>>>>>>
> > > > > >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> > > > > >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> > > > > >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> > > > > >>>>>>> avoid unnecessary integer promotion.
> > > > > >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> > > > > >>>>>
> > > > > >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > > > > >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > > > > >>>>> to return false will break LLVM which assumes that incoming char/short
> > > > > >>>>> arguments on x86 are always extended to int.   The following targets
> > > > > >>>> Then my suggestion would be to cover this in REE somehow.  We started
> > > > > >>>> looking at that a couple years ago and set it aside.   But the basic
> > > > > >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> > > > > >>>>
> > > > > >>>> Jeff
> > > > > >>>>
> > > > > >>>
> > > > > >>> For
> > > > > >>>
> > > > > >>> extern int baz (char c1);
> > > > > >>>
> > > > > >>> int
> > > > > >>> foo (char c1)
> > > > > >>> {
> > > > > >>>     return baz (c1);
> > > > > >>> }
> > > > > >>>
> > > > > >>> on i386, we get these
> > > > > >>>
> > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > >>> 190 {extendqisi2}
> > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > >>>           (nil)))
> > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > >>>        (nil))
> > > > > >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> > > > > >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > > > > >>> <function_decl 0x7f27347aae00
> > > > > >>> baz>) [0 baz S1 A8])
> > > > > >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> > > > > >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > > > > >>> <function_decl 0x7f273
> > > > > >>> 47aae00 baz>)
> > > > > >>>           (nil))
> > > > > >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> > > > > >>>           (nil)))
> > > > > >>>
> > > > > >>> before REE.  How should REE work to elimate
> > > > > >>>
> > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > >>> 190 {extendqisi2}
> > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > >>>           (nil)))
> > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > >>>        (nil))
> > > > > >> You'll have to write code to describe the ABI how the values in question
> > > > > >> are already extended to REE.  It's not going to "just work", you'll have
> > > > > >> to do some development.  I'm not inclined to ACK the expansion patch at
> > > > > >> this time given we've got multiple ways to handle extension removal.
> > > > > >>
> > > > > >
> > > > > > For char and short parameters, x86 ABI leaves the upper bits in
> > > > > > 32 bit fields undefined.  If the outgoing arguments are the same
> > > > > > as the incoming arguments, there is no need to extend outgoing
> > > > > > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > > > > > REE is the best fit here.
> > > > > The whole point is to make it available and utilize it.
> > > >
> > > > I think most of the complication is due to the promotion being performed
> > > > on GIMPLE but GIMPLE not taking the incoming argument as promoted.
> > > >
> > > > On my wishlist is to defer "applying" promote_prototypes to call RTL
> > > > expansion and keeping GIMPLE "type correct", aka _not_ promoting
> > > > arguments in the C/C++/Ada frontends.  I'll note that not all frontends
> > > > perform this promotion (sic!), and thus HJs pattern matching of the
> > > > sibling call site is required for correctness even!
> > > >
> > > > I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
> > > > should reflect the incoming promotion state - using the target hook in
> > > > RTL expansion is "wrong" (see above - not applied consistently), but
> > > > DECL_ARG_TYPE should tell you where it was applied and where not.
> > > > With LTO and calling C from fortran this optimization might go wrong
> > > > (fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).
> >
> > My patch only uses DECL_ARG_TYPE to compare against the incoming
> > argument to check if the incoming argument is used as outgoing argument
> > unchanged.  My patch doesn't change any functionality whether the caller
> > does promotion or not since GCC always assumes the upper bits are
> > undefined.  My patch keeps the incoming argument unchanged only
> > when there is no change in source code.
> >
> > > >
> > > > I'd say we should either
> > > >
> > > >  a) drop targetm.promote_prototypes (and hope nobody uses it to
> > > >      implement an ABI requirement)
> > > >  b) apply targetm.promote_prototypes during RTL expansion
> > > >
> > > > So the patch as-is isn't good I think (if you don't succeed with
> > > > Fortran due to lack of small integer types you can also try D
> > > > or Modula for the LTO wrong-code testcase).
> > > >
> > > > ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
> > > > as said.
> > >
> > > program test
> > >     use iso_c_binding, only: c_short
> > >     interface
> > >       subroutine foo(a) bind(c)
> > >         import c_short
> > >         integer(kind=c_short), intent(in), value :: a
> > >       end subroutine foo
> > >     end interface
> > >     integer(kind=c_short) a(5);
> > >     call foo (a(3))
> > > end
> > >
> > > shows
> > >
> > > ;; foo (_7);
> > >
> > > (insn 13 12 14 (set (reg:HI 102)
> > >         (mem/c:HI (plus:DI (reg/f:DI 93 virtual-stack-vars)
> > >                 (const_int -6 [0xfffffffffffffffa])) [1 a[2]+0 S2
> > > A16])) "t.f90":10:19 -1
> > >      (nil))
> > >
> > > (insn 14 13 15 (set (reg:HI 5 di)
> > >         (reg:HI 102)) "t.f90":10:19 -1
> > >      (nil))
> > >
> > > (call_insn 15 14 0 (call (mem:QI (symbol_ref:DI ("foo") [flags 0x41]
> > > <function_decl 0x7f7fcc430800 foo>) [0 foo S1 A8])
> > >         (const_int 0 [0])) "t.f90":10:19 -1
> > >      (expr_list:REG_CALL_DECL (symbol_ref:DI ("foo") [flags 0x41]
> > > <function_decl 0x7f7fcc430800 foo>)
> > >         (nil))
> > >     (expr_list:HI (use (reg:HI 5 di))
> > >         (nil)))
> > >
> > > and with -Os generates
> > >
> > >         movw    10(%rsp), %di
> > >         call    foo
> > >
> > > but a C TU would assume 'foo' receives promoted arguments.
> >
> > GCC follows x86 psABI and assumes the upper bits are undefined.
> > But LLVM assumes the upper bits are extended:
>
> I'm saying your patch increases the likelyhood we run into problems with
> out internal inconsistency of handling promote_prototypes - it adds to the
> number of places we wrongly assume all callers honor it when the
> callee was compiled in a way to honor the target hook.
>
> So IMO the patch is wrong (in the sense of wrong-code), and as given
> promote_prototypes is only useful for TU local binding functions.

TARGET_PROMOTE_PROTOTYPES isn't defined for psABI purpose.
x86 psABI doesn't require it.   GCC uses only the lower bits of incoming
arguments.   But it isn't the GCC's job to promote incoming arguments
and copy them to outgoing arguments since not usages of such functions
are compiled by GCC.

> I showed a way to rectify this by applying promote_prototypes at
> call RTL expansion time (instead of or in addition to frontends).  Iff we
> want to keep doing this "optimization" - given we perform this optimization
> in combine it got effectively part of the de-factor x86 ABI as we assume
> our callers perform the promotion, no?
>
> Richard.
>
> > $ cat x1.c
> > extern int baz (int);
> >
> > int
> > foo (char c1)
> > {
> >   return baz (c1) + 1;
> > }
> > $ gcc -S -O2 x1.c
> > $ cat x1.s
> > .file "x1.c"
> > .text
> > .p2align 4
> > .globl foo
> > .type foo, @function
> > foo:
> > .LFB0:
> > .cfi_startproc
> > subq $8, %rsp
> > .cfi_def_cfa_offset 16
> > movsbl %dil, %edi
> > call baz
> > addq $8, %rsp
> > .cfi_def_cfa_offset 8
> > addl $1, %eax
> > ret
> > .cfi_endproc
> > .LFE0:
> > .size foo, .-foo
> > .ident "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > .section .note.GNU-stack,"",@progbits
> > $ clang -S -O2 x1.c
> > $ cat x1.s
> > .text
> > .file "x1.c"
> > .globl foo                             # -- Begin function foo
> > .p2align 4, 0x90
> > .type foo,@function
> > foo:                                    # @foo
> > .cfi_startproc
> > # %bb.0:
> > pushq %rax
> > .cfi_def_cfa_offset 16
> > callq baz
> > incl %eax
> > popq %rcx
> > .cfi_def_cfa_offset 8
> > retq
> > .Lfunc_end0:
> > .size foo, .Lfunc_end0-foo
> > .cfi_endproc
> >                                         # -- End function
> > .ident "clang version 19.1.0 (Fedora 19.1.0-1.fc41)"
> > .section ".note.GNU-stack","",@progbits
> > .addrsig
> > $
> >
> > --
> > H.J.
  
Richard Biener Nov. 6, 2024, 10:01 a.m. UTC | #13
On Wed, Nov 6, 2024 at 10:52 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Nov 6, 2024 at 4:29 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Nov 5, 2024 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, Nov 5, 2024 at 5:27 PM Richard Biener
> > > <richard.guenther@gmail.com> wrote:
> > > >
> > > > On Tue, Nov 5, 2024 at 10:09 AM Richard Biener
> > > > <richard.guenther@gmail.com> wrote:
> > > > >
> > > > > On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > >
> > > > > >
> > > > > >
> > > > > > On 11/4/24 8:13 PM, H.J. Lu wrote:
> > > > > > > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > >>
> > > > > > >>
> > > > > > >>
> > > > > > >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > > > > > >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > >>>>
> > > > > > >>>>
> > > > > > >>>>
> > > > > > >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > > > > > >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > >>>>>>
> > > > > > >>>>>>
> > > > > > >>>>>>
> > > > > > >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > > > > > >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > > > > > >>>>>>> true, all integer arguments smaller than int are passed as int:
> > > > > > >>>>>>>
> > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > > > > > >>>>>>> extern int baz (char c1);
> > > > > > >>>>>>>
> > > > > > >>>>>>> int
> > > > > > >>>>>>> foo (char c1)
> > > > > > >>>>>>> {
> > > > > > >>>>>>>       return baz (c1);
> > > > > > >>>>>>> }
> > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> > > > > > >>>>>>>          .file   "x.c"
> > > > > > >>>>>>>          .text
> > > > > > >>>>>>>          .p2align 4
> > > > > > >>>>>>>          .globl  foo
> > > > > > >>>>>>>          .type   foo, @function
> > > > > > >>>>>>> foo:
> > > > > > >>>>>>> .LFB0:
> > > > > > >>>>>>>          .cfi_startproc
> > > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > > >>>>>>>          jmp     baz
> > > > > > >>>>>>>          .cfi_endproc
> > > > > > >>>>>>> .LFE0:
> > > > > > >>>>>>>          .size   foo, .-foo
> > > > > > >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > > > > >>>>>>>          .section        .note.GNU-stack,"",@progbits
> > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> > > > > > >>>>>>>
> > > > > > >>>>>>> But integer promotion:
> > > > > > >>>>>>>
> > > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > > >>>>>>>
> > > > > > >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> > > > > > >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> > > > > > >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> > > > > > >>>>>>> avoid unnecessary integer promotion.
> > > > > > >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> > > > > > >>>>>
> > > > > > >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > > > > > >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > > > > > >>>>> to return false will break LLVM which assumes that incoming char/short
> > > > > > >>>>> arguments on x86 are always extended to int.   The following targets
> > > > > > >>>> Then my suggestion would be to cover this in REE somehow.  We started
> > > > > > >>>> looking at that a couple years ago and set it aside.   But the basic
> > > > > > >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> > > > > > >>>>
> > > > > > >>>> Jeff
> > > > > > >>>>
> > > > > > >>>
> > > > > > >>> For
> > > > > > >>>
> > > > > > >>> extern int baz (char c1);
> > > > > > >>>
> > > > > > >>> int
> > > > > > >>> foo (char c1)
> > > > > > >>> {
> > > > > > >>>     return baz (c1);
> > > > > > >>> }
> > > > > > >>>
> > > > > > >>> on i386, we get these
> > > > > > >>>
> > > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > > >>> 190 {extendqisi2}
> > > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > > >>>           (nil)))
> > > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > > >>>        (nil))
> > > > > > >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> > > > > > >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > > > > > >>> <function_decl 0x7f27347aae00
> > > > > > >>> baz>) [0 baz S1 A8])
> > > > > > >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> > > > > > >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > > > > > >>> <function_decl 0x7f273
> > > > > > >>> 47aae00 baz>)
> > > > > > >>>           (nil))
> > > > > > >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> > > > > > >>>           (nil)))
> > > > > > >>>
> > > > > > >>> before REE.  How should REE work to elimate
> > > > > > >>>
> > > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > > >>> 190 {extendqisi2}
> > > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > > >>>           (nil)))
> > > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > > >>>        (nil))
> > > > > > >> You'll have to write code to describe the ABI how the values in question
> > > > > > >> are already extended to REE.  It's not going to "just work", you'll have
> > > > > > >> to do some development.  I'm not inclined to ACK the expansion patch at
> > > > > > >> this time given we've got multiple ways to handle extension removal.
> > > > > > >>
> > > > > > >
> > > > > > > For char and short parameters, x86 ABI leaves the upper bits in
> > > > > > > 32 bit fields undefined.  If the outgoing arguments are the same
> > > > > > > as the incoming arguments, there is no need to extend outgoing
> > > > > > > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > > > > > > REE is the best fit here.
> > > > > > The whole point is to make it available and utilize it.
> > > > >
> > > > > I think most of the complication is due to the promotion being performed
> > > > > on GIMPLE but GIMPLE not taking the incoming argument as promoted.
> > > > >
> > > > > On my wishlist is to defer "applying" promote_prototypes to call RTL
> > > > > expansion and keeping GIMPLE "type correct", aka _not_ promoting
> > > > > arguments in the C/C++/Ada frontends.  I'll note that not all frontends
> > > > > perform this promotion (sic!), and thus HJs pattern matching of the
> > > > > sibling call site is required for correctness even!
> > > > >
> > > > > I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
> > > > > should reflect the incoming promotion state - using the target hook in
> > > > > RTL expansion is "wrong" (see above - not applied consistently), but
> > > > > DECL_ARG_TYPE should tell you where it was applied and where not.
> > > > > With LTO and calling C from fortran this optimization might go wrong
> > > > > (fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).
> > >
> > > My patch only uses DECL_ARG_TYPE to compare against the incoming
> > > argument to check if the incoming argument is used as outgoing argument
> > > unchanged.  My patch doesn't change any functionality whether the caller
> > > does promotion or not since GCC always assumes the upper bits are
> > > undefined.  My patch keeps the incoming argument unchanged only
> > > when there is no change in source code.
> > >
> > > > >
> > > > > I'd say we should either
> > > > >
> > > > >  a) drop targetm.promote_prototypes (and hope nobody uses it to
> > > > >      implement an ABI requirement)
> > > > >  b) apply targetm.promote_prototypes during RTL expansion
> > > > >
> > > > > So the patch as-is isn't good I think (if you don't succeed with
> > > > > Fortran due to lack of small integer types you can also try D
> > > > > or Modula for the LTO wrong-code testcase).
> > > > >
> > > > > ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
> > > > > as said.
> > > >
> > > > program test
> > > >     use iso_c_binding, only: c_short
> > > >     interface
> > > >       subroutine foo(a) bind(c)
> > > >         import c_short
> > > >         integer(kind=c_short), intent(in), value :: a
> > > >       end subroutine foo
> > > >     end interface
> > > >     integer(kind=c_short) a(5);
> > > >     call foo (a(3))
> > > > end
> > > >
> > > > shows
> > > >
> > > > ;; foo (_7);
> > > >
> > > > (insn 13 12 14 (set (reg:HI 102)
> > > >         (mem/c:HI (plus:DI (reg/f:DI 93 virtual-stack-vars)
> > > >                 (const_int -6 [0xfffffffffffffffa])) [1 a[2]+0 S2
> > > > A16])) "t.f90":10:19 -1
> > > >      (nil))
> > > >
> > > > (insn 14 13 15 (set (reg:HI 5 di)
> > > >         (reg:HI 102)) "t.f90":10:19 -1
> > > >      (nil))
> > > >
> > > > (call_insn 15 14 0 (call (mem:QI (symbol_ref:DI ("foo") [flags 0x41]
> > > > <function_decl 0x7f7fcc430800 foo>) [0 foo S1 A8])
> > > >         (const_int 0 [0])) "t.f90":10:19 -1
> > > >      (expr_list:REG_CALL_DECL (symbol_ref:DI ("foo") [flags 0x41]
> > > > <function_decl 0x7f7fcc430800 foo>)
> > > >         (nil))
> > > >     (expr_list:HI (use (reg:HI 5 di))
> > > >         (nil)))
> > > >
> > > > and with -Os generates
> > > >
> > > >         movw    10(%rsp), %di
> > > >         call    foo
> > > >
> > > > but a C TU would assume 'foo' receives promoted arguments.
> > >
> > > GCC follows x86 psABI and assumes the upper bits are undefined.
> > > But LLVM assumes the upper bits are extended:
> >
> > I'm saying your patch increases the likelyhood we run into problems with
> > out internal inconsistency of handling promote_prototypes - it adds to the
> > number of places we wrongly assume all callers honor it when the
> > callee was compiled in a way to honor the target hook.
> >
> > So IMO the patch is wrong (in the sense of wrong-code), and as given
> > promote_prototypes is only useful for TU local binding functions.
>
> TARGET_PROMOTE_PROTOTYPES isn't defined for psABI purpose.
> x86 psABI doesn't require it.   GCC uses only the lower bits of incoming
> arguments.   But it isn't the GCC's job to promote incoming arguments
> and copy them to outgoing arguments since not usages of such functions
> are compiled by GCC.

But if you now elide the promotion for the call to foo in

static void foo (char c) { ... }
void bar (char c) { foo (c); }

then the existing combine optimization breaks since when all calls are
from the local TU (as for foo()) we assume that the promotion happened
but after your optimization it now depends on the callers of bar which
we do not control.

Richard.

> > I showed a way to rectify this by applying promote_prototypes at
> > call RTL expansion time (instead of or in addition to frontends).  Iff we
> > want to keep doing this "optimization" - given we perform this optimization
> > in combine it got effectively part of the de-factor x86 ABI as we assume
> > our callers perform the promotion, no?
> >
> > Richard.
> >
> > > $ cat x1.c
> > > extern int baz (int);
> > >
> > > int
> > > foo (char c1)
> > > {
> > >   return baz (c1) + 1;
> > > }
> > > $ gcc -S -O2 x1.c
> > > $ cat x1.s
> > > .file "x1.c"
> > > .text
> > > .p2align 4
> > > .globl foo
> > > .type foo, @function
> > > foo:
> > > .LFB0:
> > > .cfi_startproc
> > > subq $8, %rsp
> > > .cfi_def_cfa_offset 16
> > > movsbl %dil, %edi
> > > call baz
> > > addq $8, %rsp
> > > .cfi_def_cfa_offset 8
> > > addl $1, %eax
> > > ret
> > > .cfi_endproc
> > > .LFE0:
> > > .size foo, .-foo
> > > .ident "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > .section .note.GNU-stack,"",@progbits
> > > $ clang -S -O2 x1.c
> > > $ cat x1.s
> > > .text
> > > .file "x1.c"
> > > .globl foo                             # -- Begin function foo
> > > .p2align 4, 0x90
> > > .type foo,@function
> > > foo:                                    # @foo
> > > .cfi_startproc
> > > # %bb.0:
> > > pushq %rax
> > > .cfi_def_cfa_offset 16
> > > callq baz
> > > incl %eax
> > > popq %rcx
> > > .cfi_def_cfa_offset 8
> > > retq
> > > .Lfunc_end0:
> > > .size foo, .Lfunc_end0-foo
> > > .cfi_endproc
> > >                                         # -- End function
> > > .ident "clang version 19.1.0 (Fedora 19.1.0-1.fc41)"
> > > .section ".note.GNU-stack","",@progbits
> > > .addrsig
> > > $
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
  
H.J. Lu Nov. 7, 2024, 4:49 a.m. UTC | #14
On Wed, Nov 6, 2024 at 6:01 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Wed, Nov 6, 2024 at 10:52 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, Nov 6, 2024 at 4:29 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Tue, Nov 5, 2024 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Tue, Nov 5, 2024 at 5:27 PM Richard Biener
> > > > <richard.guenther@gmail.com> wrote:
> > > > >
> > > > > On Tue, Nov 5, 2024 at 10:09 AM Richard Biener
> > > > > <richard.guenther@gmail.com> wrote:
> > > > > >
> > > > > > On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > On 11/4/24 8:13 PM, H.J. Lu wrote:
> > > > > > > > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > > >>
> > > > > > > >>
> > > > > > > >>
> > > > > > > >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > > > > > > >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > > >>>>
> > > > > > > >>>>
> > > > > > > >>>>
> > > > > > > >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > > > > > > >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > > >>>>>>
> > > > > > > >>>>>>
> > > > > > > >>>>>>
> > > > > > > >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > > > > > > >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > > > > > > >>>>>>> true, all integer arguments smaller than int are passed as int:
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > > > > > > >>>>>>> extern int baz (char c1);
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> int
> > > > > > > >>>>>>> foo (char c1)
> > > > > > > >>>>>>> {
> > > > > > > >>>>>>>       return baz (c1);
> > > > > > > >>>>>>> }
> > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> > > > > > > >>>>>>>          .file   "x.c"
> > > > > > > >>>>>>>          .text
> > > > > > > >>>>>>>          .p2align 4
> > > > > > > >>>>>>>          .globl  foo
> > > > > > > >>>>>>>          .type   foo, @function
> > > > > > > >>>>>>> foo:
> > > > > > > >>>>>>> .LFB0:
> > > > > > > >>>>>>>          .cfi_startproc
> > > > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > > > >>>>>>>          jmp     baz
> > > > > > > >>>>>>>          .cfi_endproc
> > > > > > > >>>>>>> .LFE0:
> > > > > > > >>>>>>>          .size   foo, .-foo
> > > > > > > >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > > > > > >>>>>>>          .section        .note.GNU-stack,"",@progbits
> > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> But integer promotion:
> > > > > > > >>>>>>>
> > > > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > > > >>>>>>>
> > > > > > > >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> > > > > > > >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> > > > > > > >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> > > > > > > >>>>>>> avoid unnecessary integer promotion.
> > > > > > > >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> > > > > > > >>>>>
> > > > > > > >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > > > > > > >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > > > > > > >>>>> to return false will break LLVM which assumes that incoming char/short
> > > > > > > >>>>> arguments on x86 are always extended to int.   The following targets
> > > > > > > >>>> Then my suggestion would be to cover this in REE somehow.  We started
> > > > > > > >>>> looking at that a couple years ago and set it aside.   But the basic
> > > > > > > >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> > > > > > > >>>>
> > > > > > > >>>> Jeff
> > > > > > > >>>>
> > > > > > > >>>
> > > > > > > >>> For
> > > > > > > >>>
> > > > > > > >>> extern int baz (char c1);
> > > > > > > >>>
> > > > > > > >>> int
> > > > > > > >>> foo (char c1)
> > > > > > > >>> {
> > > > > > > >>>     return baz (c1);
> > > > > > > >>> }
> > > > > > > >>>
> > > > > > > >>> on i386, we get these
> > > > > > > >>>
> > > > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > > > >>> 190 {extendqisi2}
> > > > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > > > >>>           (nil)))
> > > > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > > > >>>        (nil))
> > > > > > > >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> > > > > > > >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > > > > > > >>> <function_decl 0x7f27347aae00
> > > > > > > >>> baz>) [0 baz S1 A8])
> > > > > > > >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> > > > > > > >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > > > > > > >>> <function_decl 0x7f273
> > > > > > > >>> 47aae00 baz>)
> > > > > > > >>>           (nil))
> > > > > > > >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> > > > > > > >>>           (nil)))
> > > > > > > >>>
> > > > > > > >>> before REE.  How should REE work to elimate
> > > > > > > >>>
> > > > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > > > >>> 190 {extendqisi2}
> > > > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > > > >>>           (nil)))
> > > > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > > > >>>        (nil))
> > > > > > > >> You'll have to write code to describe the ABI how the values in question
> > > > > > > >> are already extended to REE.  It's not going to "just work", you'll have
> > > > > > > >> to do some development.  I'm not inclined to ACK the expansion patch at
> > > > > > > >> this time given we've got multiple ways to handle extension removal.
> > > > > > > >>
> > > > > > > >
> > > > > > > > For char and short parameters, x86 ABI leaves the upper bits in
> > > > > > > > 32 bit fields undefined.  If the outgoing arguments are the same
> > > > > > > > as the incoming arguments, there is no need to extend outgoing
> > > > > > > > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > > > > > > > REE is the best fit here.
> > > > > > > The whole point is to make it available and utilize it.
> > > > > >
> > > > > > I think most of the complication is due to the promotion being performed
> > > > > > on GIMPLE but GIMPLE not taking the incoming argument as promoted.
> > > > > >
> > > > > > On my wishlist is to defer "applying" promote_prototypes to call RTL
> > > > > > expansion and keeping GIMPLE "type correct", aka _not_ promoting
> > > > > > arguments in the C/C++/Ada frontends.  I'll note that not all frontends
> > > > > > perform this promotion (sic!), and thus HJs pattern matching of the
> > > > > > sibling call site is required for correctness even!
> > > > > >
> > > > > > I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
> > > > > > should reflect the incoming promotion state - using the target hook in
> > > > > > RTL expansion is "wrong" (see above - not applied consistently), but
> > > > > > DECL_ARG_TYPE should tell you where it was applied and where not.
> > > > > > With LTO and calling C from fortran this optimization might go wrong
> > > > > > (fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).
> > > >
> > > > My patch only uses DECL_ARG_TYPE to compare against the incoming
> > > > argument to check if the incoming argument is used as outgoing argument
> > > > unchanged.  My patch doesn't change any functionality whether the caller
> > > > does promotion or not since GCC always assumes the upper bits are
> > > > undefined.  My patch keeps the incoming argument unchanged only
> > > > when there is no change in source code.
> > > >
> > > > > >
> > > > > > I'd say we should either
> > > > > >
> > > > > >  a) drop targetm.promote_prototypes (and hope nobody uses it to
> > > > > >      implement an ABI requirement)
> > > > > >  b) apply targetm.promote_prototypes during RTL expansion
> > > > > >
> > > > > > So the patch as-is isn't good I think (if you don't succeed with
> > > > > > Fortran due to lack of small integer types you can also try D
> > > > > > or Modula for the LTO wrong-code testcase).
> > > > > >
> > > > > > ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
> > > > > > as said.
> > > > >
> > > > > program test
> > > > >     use iso_c_binding, only: c_short
> > > > >     interface
> > > > >       subroutine foo(a) bind(c)
> > > > >         import c_short
> > > > >         integer(kind=c_short), intent(in), value :: a
> > > > >       end subroutine foo
> > > > >     end interface
> > > > >     integer(kind=c_short) a(5);
> > > > >     call foo (a(3))
> > > > > end
> > > > >
> > > > > shows
> > > > >
> > > > > ;; foo (_7);
> > > > >
> > > > > (insn 13 12 14 (set (reg:HI 102)
> > > > >         (mem/c:HI (plus:DI (reg/f:DI 93 virtual-stack-vars)
> > > > >                 (const_int -6 [0xfffffffffffffffa])) [1 a[2]+0 S2
> > > > > A16])) "t.f90":10:19 -1
> > > > >      (nil))
> > > > >
> > > > > (insn 14 13 15 (set (reg:HI 5 di)
> > > > >         (reg:HI 102)) "t.f90":10:19 -1
> > > > >      (nil))
> > > > >
> > > > > (call_insn 15 14 0 (call (mem:QI (symbol_ref:DI ("foo") [flags 0x41]
> > > > > <function_decl 0x7f7fcc430800 foo>) [0 foo S1 A8])
> > > > >         (const_int 0 [0])) "t.f90":10:19 -1
> > > > >      (expr_list:REG_CALL_DECL (symbol_ref:DI ("foo") [flags 0x41]
> > > > > <function_decl 0x7f7fcc430800 foo>)
> > > > >         (nil))
> > > > >     (expr_list:HI (use (reg:HI 5 di))
> > > > >         (nil)))
> > > > >
> > > > > and with -Os generates
> > > > >
> > > > >         movw    10(%rsp), %di
> > > > >         call    foo
> > > > >
> > > > > but a C TU would assume 'foo' receives promoted arguments.
> > > >
> > > > GCC follows x86 psABI and assumes the upper bits are undefined.
> > > > But LLVM assumes the upper bits are extended:
> > >
> > > I'm saying your patch increases the likelyhood we run into problems with
> > > out internal inconsistency of handling promote_prototypes - it adds to the
> > > number of places we wrongly assume all callers honor it when the
> > > callee was compiled in a way to honor the target hook.
> > >
> > > So IMO the patch is wrong (in the sense of wrong-code), and as given
> > > promote_prototypes is only useful for TU local binding functions.
> >
> > TARGET_PROMOTE_PROTOTYPES isn't defined for psABI purpose.
> > x86 psABI doesn't require it.   GCC uses only the lower bits of incoming
> > arguments.   But it isn't the GCC's job to promote incoming arguments
> > and copy them to outgoing arguments since not usages of such functions
> > are compiled by GCC.
>
> But if you now elide the promotion for the call to foo in
>
> static void foo (char c) { ... }

Only calling local functions, like foo, is an issue.  There is no
issue if foo is global.   Optimization for calling global functions
is OK.  Am I correct?

> void bar (char c) { foo (c); }
>
> then the existing combine optimization breaks since when all calls are
> from the local TU (as for foo()) we assume that the promotion happened
> but after your optimization it now depends on the callers of bar which
> we do not control.
>
> Richard.
>
> > > I showed a way to rectify this by applying promote_prototypes at
> > > call RTL expansion time (instead of or in addition to frontends).  Iff we
> > > want to keep doing this "optimization" - given we perform this optimization
> > > in combine it got effectively part of the de-factor x86 ABI as we assume
> > > our callers perform the promotion, no?
> > >
> > > Richard.
> > >
> > > > $ cat x1.c
> > > > extern int baz (int);
> > > >
> > > > int
> > > > foo (char c1)
> > > > {
> > > >   return baz (c1) + 1;
> > > > }
> > > > $ gcc -S -O2 x1.c
> > > > $ cat x1.s
> > > > .file "x1.c"
> > > > .text
> > > > .p2align 4
> > > > .globl foo
> > > > .type foo, @function
> > > > foo:
> > > > .LFB0:
> > > > .cfi_startproc
> > > > subq $8, %rsp
> > > > .cfi_def_cfa_offset 16
> > > > movsbl %dil, %edi
> > > > call baz
> > > > addq $8, %rsp
> > > > .cfi_def_cfa_offset 8
> > > > addl $1, %eax
> > > > ret
> > > > .cfi_endproc
> > > > .LFE0:
> > > > .size foo, .-foo
> > > > .ident "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > > .section .note.GNU-stack,"",@progbits
> > > > $ clang -S -O2 x1.c
> > > > $ cat x1.s
> > > > .text
> > > > .file "x1.c"
> > > > .globl foo                             # -- Begin function foo
> > > > .p2align 4, 0x90
> > > > .type foo,@function
> > > > foo:                                    # @foo
> > > > .cfi_startproc
> > > > # %bb.0:
> > > > pushq %rax
> > > > .cfi_def_cfa_offset 16
> > > > callq baz
> > > > incl %eax
> > > > popq %rcx
> > > > .cfi_def_cfa_offset 8
> > > > retq
> > > > .Lfunc_end0:
> > > > .size foo, .Lfunc_end0-foo
> > > > .cfi_endproc
> > > >                                         # -- End function
> > > > .ident "clang version 19.1.0 (Fedora 19.1.0-1.fc41)"
> > > > .section ".note.GNU-stack","",@progbits
> > > > .addrsig
> > > > $
> > > >
> > > > --
> > > > H.J.
> >
> >
> >
> > --
> > H.J.
  
Richard Biener Nov. 7, 2024, 12:16 p.m. UTC | #15
On Thu, Nov 7, 2024 at 5:50 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Nov 6, 2024 at 6:01 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Wed, Nov 6, 2024 at 10:52 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Wed, Nov 6, 2024 at 4:29 PM Richard Biener
> > > <richard.guenther@gmail.com> wrote:
> > > >
> > > > On Tue, Nov 5, 2024 at 10:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Tue, Nov 5, 2024 at 5:27 PM Richard Biener
> > > > > <richard.guenther@gmail.com> wrote:
> > > > > >
> > > > > > On Tue, Nov 5, 2024 at 10:09 AM Richard Biener
> > > > > > <richard.guenther@gmail.com> wrote:
> > > > > > >
> > > > > > > On Tue, Nov 5, 2024 at 5:23 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > On 11/4/24 8:13 PM, H.J. Lu wrote:
> > > > > > > > > On Tue, Nov 5, 2024 at 10:57 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >>
> > > > > > > > >> On 11/4/24 7:52 PM, H.J. Lu wrote:
> > > > > > > > >>> On Tue, Nov 5, 2024 at 8:48 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > > > >>>>
> > > > > > > > >>>>
> > > > > > > > >>>>
> > > > > > > > >>>> On 11/4/24 5:42 PM, H.J. Lu wrote:
> > > > > > > > >>>>> On Tue, Nov 5, 2024 at 8:07 AM Jeff Law <jeffreyalaw@gmail.com> wrote:
> > > > > > > > >>>>>>
> > > > > > > > >>>>>>
> > > > > > > > >>>>>>
> > > > > > > > >>>>>> On 11/1/24 4:32 PM, H.J. Lu wrote:
> > > > > > > > >>>>>>> For targets, like x86, which define TARGET_PROMOTE_PROTOTYPES to return
> > > > > > > > >>>>>>> true, all integer arguments smaller than int are passed as int:
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.c
> > > > > > > > >>>>>>> extern int baz (char c1);
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> int
> > > > > > > > >>>>>>> foo (char c1)
> > > > > > > > >>>>>>> {
> > > > > > > > >>>>>>>       return baz (c1);
> > > > > > > > >>>>>>> }
> > > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ gcc -S -O2 -m32 x.c
> > > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$ cat x.s
> > > > > > > > >>>>>>>          .file   "x.c"
> > > > > > > > >>>>>>>          .text
> > > > > > > > >>>>>>>          .p2align 4
> > > > > > > > >>>>>>>          .globl  foo
> > > > > > > > >>>>>>>          .type   foo, @function
> > > > > > > > >>>>>>> foo:
> > > > > > > > >>>>>>> .LFB0:
> > > > > > > > >>>>>>>          .cfi_startproc
> > > > > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > > > > >>>>>>>          jmp     baz
> > > > > > > > >>>>>>>          .cfi_endproc
> > > > > > > > >>>>>>> .LFE0:
> > > > > > > > >>>>>>>          .size   foo, .-foo
> > > > > > > > >>>>>>>          .ident  "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > > > > > > >>>>>>>          .section        .note.GNU-stack,"",@progbits
> > > > > > > > >>>>>>> [hjl@gnu-tgl-3 pr14907]$
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> But integer promotion:
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>>          movsbl  4(%esp), %eax
> > > > > > > > >>>>>>>          movl    %eax, 4(%esp)
> > > > > > > > >>>>>>>
> > > > > > > > >>>>>>> isn't necessary if incoming arguments and outgoing arguments are the
> > > > > > > > >>>>>>> same.  Use unpromoted incoming integer arguments as outgoing arguments
> > > > > > > > >>>>>>> if incoming integer arguments are the same as outgoing arguments to
> > > > > > > > >>>>>>> avoid unnecessary integer promotion.
> > > > > > > > >>>>>> Is there a particular reason x86 can't use the same mechanisms that
> > > > > > > > >>>>>
> > > > > > > > >>>>> Other targets define TARGET_PROMOTE_PROTOTYPES to return false
> > > > > > > > >>>>> to avoid this issue.   Changing x86 TARGET_PROMOTE_PROTOTYPES
> > > > > > > > >>>>> to return false will break LLVM which assumes that incoming char/short
> > > > > > > > >>>>> arguments on x86 are always extended to int.   The following targets
> > > > > > > > >>>> Then my suggestion would be to cover this in REE somehow.  We started
> > > > > > > > >>>> looking at that a couple years ago and set it aside.   But the basic
> > > > > > > > >>>> idea was to expose the ABI guarantees to REE, then let REE do its thing.
> > > > > > > > >>>>
> > > > > > > > >>>> Jeff
> > > > > > > > >>>>
> > > > > > > > >>>
> > > > > > > > >>> For
> > > > > > > > >>>
> > > > > > > > >>> extern int baz (char c1);
> > > > > > > > >>>
> > > > > > > > >>> int
> > > > > > > > >>> foo (char c1)
> > > > > > > > >>> {
> > > > > > > > >>>     return baz (c1);
> > > > > > > > >>> }
> > > > > > > > >>>
> > > > > > > > >>> on i386, we get these
> > > > > > > > >>>
> > > > > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > > > > >>> 190 {extendqisi2}
> > > > > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > > > > >>>           (nil)))
> > > > > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > > > > >>>        (nil))
> > > > > > > > >>> (call_insn/j 9 8 10 2 (set (reg:SI 0 ax)
> > > > > > > > >>>           (call (mem:QI (symbol_ref:SI ("baz") [flags 0x41]
> > > > > > > > >>> <function_decl 0x7f27347aae00
> > > > > > > > >>> baz>) [0 baz S1 A8])
> > > > > > > > >>>               (const_int 4 [0x4]))) "x.c":6:10 1420 {*sibcall_value}
> > > > > > > > >>>        (expr_list:REG_CALL_DECL (symbol_ref:SI ("baz") [flags 0x41]
> > > > > > > > >>> <function_decl 0x7f273
> > > > > > > > >>> 47aae00 baz>)
> > > > > > > > >>>           (nil))
> > > > > > > > >>>       (expr_list:SI (use (mem:SI (reg/f:SI 16 argp) [0  S4 A32]))
> > > > > > > > >>>           (nil)))
> > > > > > > > >>>
> > > > > > > > >>> before REE.  How should REE work to elimate
> > > > > > > > >>>
> > > > > > > > >>> (insn 7 4 8 2 (set (reg:SI 0 ax [orig:102 c1 ] [102])
> > > > > > > > >>>           (sign_extend:SI (mem/c:QI (plus:SI (reg/f:SI 7 sp)
> > > > > > > > >>>                       (const_int 4 [0x4])) [0 c1+0 S1 A32]))) "x.c":6:10
> > > > > > > > >>> 190 {extendqisi2}
> > > > > > > > >>>        (expr_list:REG_EQUIV (mem:SI (reg/f:SI 16 argp) [0  S4 A32])
> > > > > > > > >>>           (nil)))
> > > > > > > > >>> (insn 8 7 9 2 (set (mem:SI (plus:SI (reg/f:SI 7 sp)
> > > > > > > > >>>                   (const_int 4 [0x4])) [0  S4 A32])
> > > > > > > > >>>           (reg:SI 0 ax [orig:102 c1 ] [102])) "x.c":6:10 95 {*movsi_internal}
> > > > > > > > >>>        (nil))
> > > > > > > > >> You'll have to write code to describe the ABI how the values in question
> > > > > > > > >> are already extended to REE.  It's not going to "just work", you'll have
> > > > > > > > >> to do some development.  I'm not inclined to ACK the expansion patch at
> > > > > > > > >> this time given we've got multiple ways to handle extension removal.
> > > > > > > > >>
> > > > > > > > >
> > > > > > > > > For char and short parameters, x86 ABI leaves the upper bits in
> > > > > > > > > 32 bit fields undefined.  If the outgoing arguments are the same
> > > > > > > > > as the incoming arguments, there is no need to extend outgoing
> > > > > > > > > arguments.   Also ABI info isn't available in REE.  I am not sure if
> > > > > > > > > REE is the best fit here.
> > > > > > > > The whole point is to make it available and utilize it.
> > > > > > >
> > > > > > > I think most of the complication is due to the promotion being performed
> > > > > > > on GIMPLE but GIMPLE not taking the incoming argument as promoted.
> > > > > > >
> > > > > > > On my wishlist is to defer "applying" promote_prototypes to call RTL
> > > > > > > expansion and keeping GIMPLE "type correct", aka _not_ promoting
> > > > > > > arguments in the C/C++/Ada frontends.  I'll note that not all frontends
> > > > > > > perform this promotion (sic!), and thus HJs pattern matching of the
> > > > > > > sibling call site is required for correctness even!
> > > > > > >
> > > > > > > I'll also note that there is DECL_ARG_TYPE on PARM_DECLs which
> > > > > > > should reflect the incoming promotion state - using the target hook in
> > > > > > > RTL expansion is "wrong" (see above - not applied consistently), but
> > > > > > > DECL_ARG_TYPE should tell you where it was applied and where not.
> > > > > > > With LTO and calling C from fortran this optimization might go wrong
> > > > > > > (fortran doesn't promote, C sets DECL_ARG_TYPE to promoted type).
> > > > >
> > > > > My patch only uses DECL_ARG_TYPE to compare against the incoming
> > > > > argument to check if the incoming argument is used as outgoing argument
> > > > > unchanged.  My patch doesn't change any functionality whether the caller
> > > > > does promotion or not since GCC always assumes the upper bits are
> > > > > undefined.  My patch keeps the incoming argument unchanged only
> > > > > when there is no change in source code.
> > > > >
> > > > > > >
> > > > > > > I'd say we should either
> > > > > > >
> > > > > > >  a) drop targetm.promote_prototypes (and hope nobody uses it to
> > > > > > >      implement an ABI requirement)
> > > > > > >  b) apply targetm.promote_prototypes during RTL expansion
> > > > > > >
> > > > > > > So the patch as-is isn't good I think (if you don't succeed with
> > > > > > > Fortran due to lack of small integer types you can also try D
> > > > > > > or Modula for the LTO wrong-code testcase).
> > > > > > >
> > > > > > > ISTR we do rely on DECL_ARG_TYPE in combine but wrongly so
> > > > > > > as said.
> > > > > >
> > > > > > program test
> > > > > >     use iso_c_binding, only: c_short
> > > > > >     interface
> > > > > >       subroutine foo(a) bind(c)
> > > > > >         import c_short
> > > > > >         integer(kind=c_short), intent(in), value :: a
> > > > > >       end subroutine foo
> > > > > >     end interface
> > > > > >     integer(kind=c_short) a(5);
> > > > > >     call foo (a(3))
> > > > > > end
> > > > > >
> > > > > > shows
> > > > > >
> > > > > > ;; foo (_7);
> > > > > >
> > > > > > (insn 13 12 14 (set (reg:HI 102)
> > > > > >         (mem/c:HI (plus:DI (reg/f:DI 93 virtual-stack-vars)
> > > > > >                 (const_int -6 [0xfffffffffffffffa])) [1 a[2]+0 S2
> > > > > > A16])) "t.f90":10:19 -1
> > > > > >      (nil))
> > > > > >
> > > > > > (insn 14 13 15 (set (reg:HI 5 di)
> > > > > >         (reg:HI 102)) "t.f90":10:19 -1
> > > > > >      (nil))
> > > > > >
> > > > > > (call_insn 15 14 0 (call (mem:QI (symbol_ref:DI ("foo") [flags 0x41]
> > > > > > <function_decl 0x7f7fcc430800 foo>) [0 foo S1 A8])
> > > > > >         (const_int 0 [0])) "t.f90":10:19 -1
> > > > > >      (expr_list:REG_CALL_DECL (symbol_ref:DI ("foo") [flags 0x41]
> > > > > > <function_decl 0x7f7fcc430800 foo>)
> > > > > >         (nil))
> > > > > >     (expr_list:HI (use (reg:HI 5 di))
> > > > > >         (nil)))
> > > > > >
> > > > > > and with -Os generates
> > > > > >
> > > > > >         movw    10(%rsp), %di
> > > > > >         call    foo
> > > > > >
> > > > > > but a C TU would assume 'foo' receives promoted arguments.
> > > > >
> > > > > GCC follows x86 psABI and assumes the upper bits are undefined.
> > > > > But LLVM assumes the upper bits are extended:
> > > >
> > > > I'm saying your patch increases the likelyhood we run into problems with
> > > > out internal inconsistency of handling promote_prototypes - it adds to the
> > > > number of places we wrongly assume all callers honor it when the
> > > > callee was compiled in a way to honor the target hook.
> > > >
> > > > So IMO the patch is wrong (in the sense of wrong-code), and as given
> > > > promote_prototypes is only useful for TU local binding functions.
> > >
> > > TARGET_PROMOTE_PROTOTYPES isn't defined for psABI purpose.
> > > x86 psABI doesn't require it.   GCC uses only the lower bits of incoming
> > > arguments.   But it isn't the GCC's job to promote incoming arguments
> > > and copy them to outgoing arguments since not usages of such functions
> > > are compiled by GCC.
> >
> > But if you now elide the promotion for the call to foo in
> >
> > static void foo (char c) { ... }
>
> Only calling local functions, like foo, is an issue.  There is no
> issue if foo is global.   Optimization for calling global functions
> is OK.  Am I correct?

I think it's OK to elide the promotion for calls to global functions,
but as you can't rely on the incoming promotion, this would eventually
just undo promote_prototypes.

Like when you LTO the Fortran TU I gave with the C implementation the
Fortran FE will not have applied promote_prototypes to the call of the
C implementation but the C implementation will have DECL_ARG_TYPE
in a way to suggest it has.  And both functions likely appear to be local
due to using LTO.

So whether it's safe to elide promote_prototypes depends on whether
somebody else (in this case combine) would still rely on it being
performed (I think combine is wrong today, at least when LTO is involved
and thus it's "all callers visible" argument falls down).

Richard.

>
> > void bar (char c) { foo (c); }
> >
> > then the existing combine optimization breaks since when all calls are
> > from the local TU (as for foo()) we assume that the promotion happened
> > but after your optimization it now depends on the callers of bar which
> > we do not control.
> >
> > Richard.
> >
> > > > I showed a way to rectify this by applying promote_prototypes at
> > > > call RTL expansion time (instead of or in addition to frontends).  Iff we
> > > > want to keep doing this "optimization" - given we perform this optimization
> > > > in combine it got effectively part of the de-factor x86 ABI as we assume
> > > > our callers perform the promotion, no?
> > > >
> > > > Richard.
> > > >
> > > > > $ cat x1.c
> > > > > extern int baz (int);
> > > > >
> > > > > int
> > > > > foo (char c1)
> > > > > {
> > > > >   return baz (c1) + 1;
> > > > > }
> > > > > $ gcc -S -O2 x1.c
> > > > > $ cat x1.s
> > > > > .file "x1.c"
> > > > > .text
> > > > > .p2align 4
> > > > > .globl foo
> > > > > .type foo, @function
> > > > > foo:
> > > > > .LFB0:
> > > > > .cfi_startproc
> > > > > subq $8, %rsp
> > > > > .cfi_def_cfa_offset 16
> > > > > movsbl %dil, %edi
> > > > > call baz
> > > > > addq $8, %rsp
> > > > > .cfi_def_cfa_offset 8
> > > > > addl $1, %eax
> > > > > ret
> > > > > .cfi_endproc
> > > > > .LFE0:
> > > > > .size foo, .-foo
> > > > > .ident "GCC: (GNU) 14.2.1 20240912 (Red Hat 14.2.1-3)"
> > > > > .section .note.GNU-stack,"",@progbits
> > > > > $ clang -S -O2 x1.c
> > > > > $ cat x1.s
> > > > > .text
> > > > > .file "x1.c"
> > > > > .globl foo                             # -- Begin function foo
> > > > > .p2align 4, 0x90
> > > > > .type foo,@function
> > > > > foo:                                    # @foo
> > > > > .cfi_startproc
> > > > > # %bb.0:
> > > > > pushq %rax
> > > > > .cfi_def_cfa_offset 16
> > > > > callq baz
> > > > > incl %eax
> > > > > popq %rcx
> > > > > .cfi_def_cfa_offset 8
> > > > > retq
> > > > > .Lfunc_end0:
> > > > > .size foo, .Lfunc_end0-foo
> > > > > .cfi_endproc
> > > > >                                         # -- End function
> > > > > .ident "clang version 19.1.0 (Fedora 19.1.0-1.fc41)"
> > > > > .section ".note.GNU-stack","",@progbits
> > > > > .addrsig
> > > > > $
> > > > >
> > > > > --
> > > > > H.J.
> > >
> > >
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
  

Patch

diff --git a/gcc/calls.cc b/gcc/calls.cc
index f67067acad4..d731936ef01 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -62,6 +62,9 @@  along with GCC; see the file COPYING3.  If not see
 #include "value-query.h"
 #include "tree-pretty-print.h"
 #include "tree-eh.h"
+#include "ssa.h"
+#include "tree-ssa-live.h"
+#include "tree-outof-ssa.h"
 
 /* Like PREFERRED_STACK_BOUNDARY but in units of bytes, not bits.  */
 #define STACK_BYTES (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
@@ -76,6 +79,10 @@  struct arg_data
   machine_mode mode;
   /* Current RTL value for argument, or 0 if it isn't precomputed.  */
   rtx value;
+  /* Unpromoted incoming integer argument RTL value as outgoing argument.
+     or 0 if outgoing argument isn't a promoted integer incoming argument.
+   */
+  rtx unpromoted_int_parm_rtx;
   /* Initially-compute RTL value for argument; only for const functions.  */
   rtx initial_value;
   /* Register to pass this argument in, 0 if passed on stack, or an
@@ -1020,7 +1027,10 @@  precompute_register_parameters (int num_actuals, struct arg_data *args,
 	if (args[i].value == 0)
 	  {
 	    push_temp_slots ();
-	    args[i].value = expand_normal (args[i].tree_value);
+	    if (args[i].unpromoted_int_parm_rtx)
+	      args[i].value = args[i].unpromoted_int_parm_rtx;
+	    else
+	      args[i].value = expand_normal (args[i].tree_value);
 	    preserve_temp_slots (args[i].value);
 	    pop_temp_slots ();
 	  }
@@ -1281,6 +1291,63 @@  maybe_complain_about_tail_call (tree call_expr, const char *reason)
   CALL_EXPR_MUST_TAIL_CALL (call_expr) = 0;
 }
 
+/* Return unpromoted integer function argument rtx if SSA NAME ARG is a
+   promoted integer function parameter and the unpromoted integer function
+   argument mode is the same as the mode of the outgoing argument type
+   ARGTYPE.  Otherwise, return nullptr.  */
+
+static rtx
+get_unpromoted_int_parm_rtx_from_ssa_name (tree argtype, tree arg)
+{
+  tree var = SSA_NAME_VAR (arg);
+  if (TREE_CODE (var) != PARM_DECL)
+    return nullptr;
+  rtx op = get_rtx_for_ssa_name (arg);
+  tree type = DECL_ARG_TYPE (var);
+  machine_mode mode = GET_MODE (op);
+  if (mode == TYPE_MODE (type) || mode != TYPE_MODE (argtype))
+    return nullptr;
+  return op;
+}
+
+/* Return unpromoted integer function argument rtx if ARG is a promoted
+   integer function parameter and the unpromoted integer function argument
+   mode is the same as the mode of the outgoing argument type ARGTYPE.
+   Otherwise, return nullptr.  */
+
+static rtx
+get_unpromoted_int_parm_rtx (tree argtype, tree arg)
+{
+  tree type = TREE_TYPE (arg);
+  machine_mode mode = TYPE_MODE (type);
+  if (GET_MODE_CLASS (mode) != MODE_INT)
+    return nullptr;
+
+  if (TREE_CODE (arg) != SSA_NAME)
+    return nullptr;
+
+  if (SSA_NAME_IS_DEFAULT_DEF (arg))
+    return get_unpromoted_int_parm_rtx_from_ssa_name (argtype, arg);
+  else
+    {
+      gimple *stmt = get_gimple_for_ssa_name (arg);
+      if (stmt == nullptr)
+	return nullptr;
+
+      gassign *g = as_a<gassign *> (stmt);
+      if (gimple_assign_rhs_code (g) == NOP_EXPR)
+	{
+	  arg = gimple_assign_rhs1 (g);
+	  if (TREE_CODE (arg) == SSA_NAME
+	      && SSA_NAME_IS_DEFAULT_DEF (arg))
+	    return get_unpromoted_int_parm_rtx_from_ssa_name (argtype,
+							      arg);
+	}
+    }
+
+  return nullptr;
+}
+
 /* Fill in ARGS_SIZE and ARGS array based on the parameters found in
    CALL_EXPR EXP.
 
@@ -1326,7 +1393,8 @@  initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
 				 rtx *old_stack_level,
 				 poly_int64 *old_pending_adj,
 				 bool *must_preallocate, int *ecf_flags,
-				 bool *may_tailcall, bool call_from_thunk_p)
+				 bool *may_tailcall, bool call_from_thunk_p,
+				 tree type_arg_types)
 {
   CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far);
   location_t loc = EXPR_LOCATION (exp);
@@ -1375,6 +1443,35 @@  initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
       }
   }
 
+  /* Initialize unpromoted_int_parm_rtx if integer function arguments
+     are promoted to int.  */
+  tree func_type = fndecl ? TREE_TYPE (fndecl) : fntype;
+  if (type_arg_types
+      && num_actuals
+      && targetm.calls.promote_prototypes (func_type))
+    {
+      int j = num_actuals - 1;
+      if (struct_value_addr_value
+	  && args[j].tree_value == struct_value_addr_value)
+	j--;
+      for (tree chain = type_arg_types;
+	   chain;
+	   chain = TREE_CHAIN (chain))
+	{
+	  tree argtype = TREE_VALUE (chain);
+	  if (argtype == void_type_node)
+	    break;
+	  if (args[j].tree_value
+	      && TREE_CODE (args[j].tree_value) != ERROR_MARK)
+	    args[j].unpromoted_int_parm_rtx
+	      = get_unpromoted_int_parm_rtx (argtype,
+					     args[j].tree_value);
+	  if (j == 0)
+	    break;
+	  j--;
+	}
+    }
+
   /* I counts args in order (to be) pushed; ARGPOS counts in order written.  */
   for (argpos = 0; argpos < num_actuals; i--, argpos++)
     {
@@ -1546,6 +1643,31 @@  initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
       else
 	args[i].tail_call_reg = args[i].reg;
 
+      if (args[i].unpromoted_int_parm_rtx)
+	{
+	  /* If unpromoted_int_parm_rtx is available, change reg and
+	     tail_call_reg to the same mode as unpromoted_int_parm_rtx
+	     to match the mode of unpromoted_int_parm_rtx.  Otherwise,
+	     change unpromoted_int_parm_rtx to nullptr.  */
+	  if ((!args[i].reg
+	       || HARD_REGISTER_P (args[i].reg))
+	      && (!args[i].tail_call_reg
+		  || HARD_REGISTER_P (args[i].tail_call_reg)))
+	    {
+	      /* Use mode of unpromoted_int_parm_rtx.  */
+	      args[i].mode = GET_MODE (args[i].unpromoted_int_parm_rtx);
+	      if (args[i].reg)
+		args[i].reg
+		  = gen_rtx_REG (args[i].mode, REGNO (args[i].reg));
+	      if (args[i].tail_call_reg)
+		args[i].tail_call_reg
+		  = gen_rtx_REG (args[i].mode,
+				 REGNO (args[i].tail_call_reg));
+	    }
+	  else
+	    args[i].unpromoted_int_parm_rtx = nullptr;
+	}
+
       if (args[i].reg)
 	args[i].partial = targetm.calls.arg_partial_bytes (args_so_far, arg);
 
@@ -3031,7 +3153,8 @@  expand_call (tree exp, rtx target, int ignore)
 				   args_so_far, reg_parm_stack_space,
 				   &old_stack_level, &old_pending_adj,
 				   &must_preallocate, &flags,
-				   &try_tail_call, CALL_FROM_THUNK_P (exp));
+				   &try_tail_call, CALL_FROM_THUNK_P (exp),
+				   type_arg_types);
 
   if (args_size.var)
     must_preallocate = true;
@@ -5075,11 +5198,14 @@  store_one_arg (struct arg_data *arg, rtx argblock, int flags,
       if (arg->pass_on_stack)
 	stack_arg_under_construction++;
 
-      arg->value = expand_expr (pval,
-				(partial
-				 || TYPE_MODE (TREE_TYPE (pval)) != arg->mode)
-				? NULL_RTX : arg->stack,
-				VOIDmode, EXPAND_STACK_PARM);
+      if (arg->unpromoted_int_parm_rtx)
+	arg->value = arg->unpromoted_int_parm_rtx;
+      else
+	arg->value = expand_expr (pval,
+				  (partial
+				   || TYPE_MODE (TREE_TYPE (pval)) != arg->mode)
+				  ? NULL_RTX : arg->stack,
+				  VOIDmode, EXPAND_STACK_PARM);
 
       /* If we are promoting object (or for any other reason) the mode
 	 doesn't agree, convert the mode.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-1.c b/gcc/testsuite/gcc.target/i386/pr14907-1.c
new file mode 100644
index 00000000000..231819ed675
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-1.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86*	.cfi_startproc
+x86*	jmp	baz
+x86*	.cfi_endproc
+x86*...
+*/
+
+extern int baz (char);
+
+int
+foo (char c1)
+{
+  return baz (c1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-10.c b/gcc/testsuite/gcc.target/i386/pr14907-10.c
new file mode 100644
index 00000000000..099c4dc81d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-10.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "ia32*" "" "" { target { { *-*-linux* *-*-gnu* } && ia32 } } {^\t?\.} } } */
+
+/*
+ia32*foo:
+ia32*.LFB0:
+ia32*	.cfi_startproc
+ia32*	movsbl	4\(%esp\), %eax
+ia32*	movl	%eax, 4\(%esp\)
+ia32*	jmp	baz
+ia32*	.cfi_endproc
+ia32*...
+*/
+
+extern int baz (short);
+
+int
+foo (char c1)
+{
+  return baz (c1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-11.c b/gcc/testsuite/gcc.target/i386/pr14907-11.c
new file mode 100644
index 00000000000..12ac165c298
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-11.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern int baz (char, char);
+
+int
+foo (char c1, char c2)
+{
+  return baz (c1, c2) + 1;
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-12.c b/gcc/testsuite/gcc.target/i386/pr14907-12.c
new file mode 100644
index 00000000000..6cda72ef3a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-12.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s
+{
+  char c[20];
+};
+
+extern struct s baz (char, char);
+
+struct s
+foo (char c1, char c2)
+{
+  return baz (c1, c2);
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-13.c b/gcc/testsuite/gcc.target/i386/pr14907-13.c
new file mode 100644
index 00000000000..b4130fdcb57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-13.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern int baz (char, char, ...);
+
+int
+foo (char c1, char c2)
+{
+  return baz (c1, c2, 0, 0, 0, 1);
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-14.c b/gcc/testsuite/gcc.target/i386/pr14907-14.c
new file mode 100644
index 00000000000..9b8d7a7607d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-14.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s
+{
+  char c[20];
+};
+
+extern struct s baz (char, char, ...);
+
+struct s
+foo (char c1, char c2)
+{
+  return baz (c1, c2, 0, 1);
+}
+
+/* { dg-final { scan-assembler-not "movsbl" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-2.c b/gcc/testsuite/gcc.target/i386/pr14907-2.c
new file mode 100644
index 00000000000..5da7b029279
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-2.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86*	.cfi_startproc
+x86*	jmp	baz
+x86*	.cfi_endproc
+x86*...
+*/
+
+extern int baz (int, int, int, int, int, int, char, char);
+
+int
+foo (int a1, int a2, int a3, int a4, int a5, int a6, char c1, char c2)
+{
+  return baz (a1, a2, a3, a4, a5, a6, c1, c2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-3.c b/gcc/testsuite/gcc.target/i386/pr14907-3.c
new file mode 100644
index 00000000000..a8fb13f28f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-3.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*c1:
+x86*.LFB0:
+x86*	.cfi_startproc
+x86*	jmp	c2
+x86*	.cfi_endproc
+x86*...
+*/
+
+extern char c2 (char);
+
+char
+c1 (char c)
+{
+  return c2 (c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-4.c b/gcc/testsuite/gcc.target/i386/pr14907-4.c
new file mode 100644
index 00000000000..b5fb92fefcc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-4.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86*	.cfi_startproc
+x86*	jmp	baz
+x86*	.cfi_endproc
+x86*...
+*/
+
+extern int baz (short);
+
+int
+foo (short c1)
+{
+  return baz (c1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-5.c b/gcc/testsuite/gcc.target/i386/pr14907-5.c
new file mode 100644
index 00000000000..d9abb5c8cfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-5.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*foo:
+x86*.LFB0:
+x86*	.cfi_startproc
+x86*	jmp	baz
+x86*	.cfi_endproc
+x86*...
+*/
+
+extern int baz (int, int, int, int, int, int, short, short);
+
+int
+foo (int a1, int a2, int a3, int a4, int a5, int a6, short c1, short c2)
+{
+  return baz (a1, a2, a3, a4, a5, a6, c1, c2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-6.c b/gcc/testsuite/gcc.target/i386/pr14907-6.c
new file mode 100644
index 00000000000..b6d0183656a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-6.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x86*" "" "" { target *-*-linux* *-*-gnu* } {^\t?\.} } } */
+
+/*
+x86*c1:
+x86*.LFB0:
+x86*	.cfi_startproc
+x86*	jmp	c2
+x86*	.cfi_endproc
+x86*...
+*/
+
+extern short c2 (short);
+
+short
+c1 (short c)
+{
+  return c2 (c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-7.c b/gcc/testsuite/gcc.target/i386/pr14907-7.c
new file mode 100644
index 00000000000..e328e6e6f7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-7.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x64*" "" "" { target { { *-*-linux* *-*-gnu* } &&  { ! ia32 } } } {^\t?\.} } } */
+
+/*
+x64*foo:
+x64*.LFB0:
+x64*	.cfi_startproc
+x64*	movsbl	%dil, %edi
+x64*	jmp	baz
+x64*	.cfi_endproc
+x64*...
+*/
+
+extern int baz (int);
+
+int
+foo (char c1)
+{
+  return baz (c1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-8.c b/gcc/testsuite/gcc.target/i386/pr14907-8.c
new file mode 100644
index 00000000000..7d2611398c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-8.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "ia32*" "" "" { target { { *-*-linux* *-*-gnu* } && ia32 } } {^\t?\.} } } */
+
+/*
+ia32*foo:
+ia32*.LFB0:
+ia32*	.cfi_startproc
+ia32*	movsbl	4\(%esp\), %eax
+ia32*	movl	%eax, 4\(%esp\)
+ia32*	jmp	baz
+ia32*	.cfi_endproc
+ia32*...
+*/
+
+extern int baz (int);
+
+int
+foo (char c1)
+{
+  return baz (c1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr14907-9.c b/gcc/testsuite/gcc.target/i386/pr14907-9.c
new file mode 100644
index 00000000000..7b653e08303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr14907-9.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -g0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "x64*" "" "" { target { { *-*-linux* *-*-gnu* } &&  { ! ia32 } } } {^\t?\.} } } */
+
+/*
+x64*foo:
+x64*.LFB0:
+x64*	.cfi_startproc
+x64*	movsbl	%dil, %edi
+x64*	jmp	baz
+x64*	.cfi_endproc
+x64*...
+*/
+
+extern int baz (short);
+
+int
+foo (char c1)
+{
+  return baz (c1);
+}