Relax ix86_hardreg_mov_ok after split1.
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
warning
|
Patch is already merged
|
Commit Message
ix86_hardreg_mov_ok is added by r11-5066-gbe39636d9f68c4
> The solution proposed here is to have the x86 backend/recog prevent
> early RTL passes composing instructions (that set likely_spilled hard
> registers) that they (combine) can't simplify, until after reload.
> We allow sets from pseudo registers, immediate constants and memory
> accesses, but anything more complicated is performed via a temporary
> pseudo. Not only does this simplify things for the register allocator,
> but any remaining register-to-register moves are easily cleaned up
> by the late optimization passes after reload, such as peephole2 and
> cprop_hardreg.
The restriction is mainly for rtl optimization passes before pass_combine.
But split1 splits
```
(insn 17 13 18 2 (set (reg/i:V4SI 20 xmm0)
(vec_merge:V4SI (const_vector:V4SI [
(const_int -1 [0xffffffffffffffff]) repeated x4
])
(const_vector:V4SI [
(const_int 0 [0]) repeated x4
])
(unspec:QI [
(reg:V4SF 106)
(reg:V4SF 102)
(const_int 0 [0])
] UNSPEC_PCMP))) "/app/example.cpp":20:1 2929 {*avx_cmpv4sf3_1}
(expr_list:REG_DEAD (reg:V4SF 102)
(expr_list:REG_DEAD (reg:V4SF 106)
(nil))))
```
into:
```
(insn 23 13 24 2 (set (reg:V4SF 107)
(unspec:V4SF [
(reg:V4SF 106)
(reg:V4SF 102)
(const_int 0 [0])
] UNSPEC_PCMP)) "/app/example.cpp":20:1 -1
(nil))
(insn 24 23 18 2 (set (reg/i:V4SI 20 xmm0)
(subreg:V4SI (reg:V4SF 107) 0)) "/app/example.cpp":20:1 -1
(nil))
```
There're many splitters generating MOV insn with SUBREG and would have
same problem.
Instead of changing those splitters one by one, the patch relaxes
ix86_hard_mov_ok to allow mov subreg to hard register after
split1. ix86_pre_reload_split () is used to replace
!reload_completed && !lra_in_progress.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?
gcc/ChangeLog:
* config/i386/i386.cc (ix86_hardreg_mov_ok): Relax mov subreg
to hard register after split1.
gcc/testsuite/ChangeLog:
* g++.target/i386/pr115982.C: New test.
---
gcc/config/i386/i386.cc | 5 ++---
gcc/testsuite/g++.target/i386/pr115982.C | 11 +++++++++++
2 files changed, 13 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr115982.C
Comments
On Tue, Jul 23, 2024 at 3:08 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> ix86_hardreg_mov_ok is added by r11-5066-gbe39636d9f68c4
>
> > The solution proposed here is to have the x86 backend/recog prevent
> > early RTL passes composing instructions (that set likely_spilled hard
> > registers) that they (combine) can't simplify, until after reload.
> > We allow sets from pseudo registers, immediate constants and memory
> > accesses, but anything more complicated is performed via a temporary
> > pseudo. Not only does this simplify things for the register allocator,
> > but any remaining register-to-register moves are easily cleaned up
> > by the late optimization passes after reload, such as peephole2 and
> > cprop_hardreg.
>
> The restriction is mainly for rtl optimization passes before pass_combine.
>
> But split1 splits
>
> ```
> (insn 17 13 18 2 (set (reg/i:V4SI 20 xmm0)
> (vec_merge:V4SI (const_vector:V4SI [
> (const_int -1 [0xffffffffffffffff]) repeated x4
> ])
> (const_vector:V4SI [
> (const_int 0 [0]) repeated x4
> ])
> (unspec:QI [
> (reg:V4SF 106)
> (reg:V4SF 102)
> (const_int 0 [0])
> ] UNSPEC_PCMP))) "/app/example.cpp":20:1 2929 {*avx_cmpv4sf3_1}
> (expr_list:REG_DEAD (reg:V4SF 102)
> (expr_list:REG_DEAD (reg:V4SF 106)
> (nil))))
> ```
>
> into:
> ```
> (insn 23 13 24 2 (set (reg:V4SF 107)
> (unspec:V4SF [
> (reg:V4SF 106)
> (reg:V4SF 102)
> (const_int 0 [0])
> ] UNSPEC_PCMP)) "/app/example.cpp":20:1 -1
> (nil))
> (insn 24 23 18 2 (set (reg/i:V4SI 20 xmm0)
> (subreg:V4SI (reg:V4SF 107) 0)) "/app/example.cpp":20:1 -1
> (nil))
> ```
>
> There're many splitters generating MOV insn with SUBREG and would have
> same problem.
> Instead of changing those splitters one by one, the patch relaxes
> ix86_hard_mov_ok to allow mov subreg to hard register after
> split1. ix86_pre_reload_split () is used to replace
> !reload_completed && !lra_in_progress.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.cc (ix86_hardreg_mov_ok): Relax mov subreg
> to hard register after split1.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/i386/pr115982.C: New test.
LGTM, but please watch out for fallout.
Thanks,
Uros.
> ---
> gcc/config/i386/i386.cc | 5 ++---
> gcc/testsuite/g++.target/i386/pr115982.C | 11 +++++++++++
> 2 files changed, 13 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr115982.C
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 9c2ebe74fc9..77c441893b4 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -20212,7 +20212,7 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
> }
>
> /* Return true if a set of DST by the expression SRC should be allowed.
> - This prevents complex sets of likely_spilled hard regs before reload. */
> + This prevents complex sets of likely_spilled hard regs before split1. */
>
> bool
> ix86_hardreg_mov_ok (rtx dst, rtx src)
> @@ -20224,8 +20224,7 @@ ix86_hardreg_mov_ok (rtx dst, rtx src)
> ? standard_sse_constant_p (src, GET_MODE (dst))
> : x86_64_immediate_operand (src, GET_MODE (dst)))
> && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
> - && !reload_completed
> - && !lra_in_progress)
> + && ix86_pre_reload_split ())
> return false;
> return true;
> }
> diff --git a/gcc/testsuite/g++.target/i386/pr115982.C b/gcc/testsuite/g++.target/i386/pr115982.C
> new file mode 100644
> index 00000000000..4b91618405d
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr115982.C
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512vl -O2" } */
> +
> +typedef float VF __attribute__((__vector_size__(16)));
> +typedef int VI __attribute__((__vector_size__(16)));
> +
> +VI
> +foo (VF x)
> +{
> + return !x;
> +}
> --
> 2.31.1
>
@@ -20212,7 +20212,7 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
}
/* Return true if a set of DST by the expression SRC should be allowed.
- This prevents complex sets of likely_spilled hard regs before reload. */
+ This prevents complex sets of likely_spilled hard regs before split1. */
bool
ix86_hardreg_mov_ok (rtx dst, rtx src)
@@ -20224,8 +20224,7 @@ ix86_hardreg_mov_ok (rtx dst, rtx src)
? standard_sse_constant_p (src, GET_MODE (dst))
: x86_64_immediate_operand (src, GET_MODE (dst)))
&& ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
- && !reload_completed
- && !lra_in_progress)
+ && ix86_pre_reload_split ())
return false;
return true;
}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+
+typedef float VF __attribute__((__vector_size__(16)));
+typedef int VI __attribute__((__vector_size__(16)));
+
+VI
+foo (VF x)
+{
+ return !x;
+}