[pushed] aarch64: Fix bogus cnot optimisation [PR114603]
Checks
Commit Message
aarch64-sve.md had a pattern that combined:
cmpeq pb.T, pa/z, zc.T, #0
mov zd.T, pb/z, #1
into:
cnot zd.T, pa/m, zc.T
But this is only valid if pa.T is a ptrue. In other cases, the
original would set inactive elements of zd.T to 0, whereas the
combined form would copy elements from zc.T.
This isn't a regression on a known testcase. However, it's a nasty
wrong code bug that could conceivably trigger for autovec code (although
I've not been able to construct a reproducer so far). That fix is also
quite localised to the buggy operation. I'd therefore prefer to push
the fix now rather than wait for GCC 15.
Tested on aarch64-linux-gnu & pushed. I'll backport to branches if
there is no fallout.
Richard
gcc/
PR target/114603
* config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace
with...
(@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be
a ptrue.
(*cnot<mode>): Require operand 1 to be a ptrue.
* config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand):
Use aarch64_ptrue_cnot<mode> for _x operations that are predicated
with a ptrue. Represent other _x operations as fully-defined _m
operations.
gcc/testsuite/
PR target/114603
* gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.
---
.../aarch64/aarch64-sve-builtins-base.cc | 25 ++++++++++++-------
gcc/config/aarch64/aarch64-sve.md | 22 ++++++++--------
.../aarch64/sve/acle/general/cnot_1.c | 23 +++++++++++++++++
3 files changed, 50 insertions(+), 20 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
Comments
On Fri, Apr 5, 2024 at 3:52 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> aarch64-sve.md had a pattern that combined:
>
> cmpeq pb.T, pa/z, zc.T, #0
> mov zd.T, pb/z, #1
>
> into:
>
> cnot zd.T, pa/m, zc.T
>
> But this is only valid if pa.T is a ptrue. In other cases, the
> original would set inactive elements of zd.T to 0, whereas the
> combined form would copy elements from zc.T.
>
> This isn't a regression on a known testcase. However, it's a nasty
> wrong code bug that could conceivably trigger for autovec code (although
> I've not been able to construct a reproducer so far). That fix is also
> quite localised to the buggy operation. I'd therefore prefer to push
> the fix now rather than wait for GCC 15.
wrong-code bugs (and also rejects-valid or ice-on-valid) are always exempt
from the regression-only fixing. In practice every such bug will be a
regression,
in this case to when the combining pattern was introduced (unless that was
with the version with the initial introduction of the port of course).
Richard.
> Tested on aarch64-linux-gnu & pushed. I'll backport to branches if
> there is no fallout.
>
> Richard
>
> gcc/
> PR target/114603
> * config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace
> with...
> (@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be
> a ptrue.
> (*cnot<mode>): Require operand 1 to be a ptrue.
> * config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand):
> Use aarch64_ptrue_cnot<mode> for _x operations that are predicated
> with a ptrue. Represent other _x operations as fully-defined _m
> operations.
>
> gcc/testsuite/
> PR target/114603
> * gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.
> ---
> .../aarch64/aarch64-sve-builtins-base.cc | 25 ++++++++++++-------
> gcc/config/aarch64/aarch64-sve.md | 22 ++++++++--------
> .../aarch64/sve/acle/general/cnot_1.c | 23 +++++++++++++++++
> 3 files changed, 50 insertions(+), 20 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 257ca5bf6ad..5be2315a3c6 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -517,15 +517,22 @@ public:
> expand (function_expander &e) const override
> {
> machine_mode mode = e.vector_mode (0);
> - if (e.pred == PRED_x)
> - {
> - /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
> - a ptrue hint. */
> - e.add_ptrue_hint (0, e.gp_mode (0));
> - return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
> - }
> -
> - return e.use_cond_insn (code_for_cond_cnot (mode), 0);
> + machine_mode pred_mode = e.gp_mode (0);
> + /* The underlying _x pattern is effectively:
> +
> + dst = src == 0 ? 1 : 0
> +
> + rather than an UNSPEC_PRED_X. Using this form allows autovec
> + constructs to be matched by combine, but it means that the
> + predicate on the src == 0 comparison must be all-true.
> +
> + For simplicity, represent other _x operations as fully-defined _m
> + operations rather than using a separate bespoke pattern. */
> + if (e.pred == PRED_x
> + && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
> + return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
> + return e.use_cond_insn (code_for_cond_cnot (mode),
> + e.pred == PRED_x ? 1 : 0);
> }
> };
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
> index eca8623e587..0434358122d 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -3363,24 +3363,24 @@ (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
> ;; - CNOT
> ;; -------------------------------------------------------------------------
>
> -;; Predicated logical inverse.
> -(define_expand "@aarch64_pred_cnot<mode>"
> +;; Logical inverse, predicated with a ptrue.
> +(define_expand "@aarch64_ptrue_cnot<mode>"
> [(set (match_operand:SVE_FULL_I 0 "register_operand")
> (unspec:SVE_FULL_I
> [(unspec:<VPRED>
> [(match_operand:<VPRED> 1 "register_operand")
> - (match_operand:SI 2 "aarch64_sve_ptrue_flag")
> + (const_int SVE_KNOWN_PTRUE)
> (eq:<VPRED>
> - (match_operand:SVE_FULL_I 3 "register_operand")
> - (match_dup 4))]
> + (match_operand:SVE_FULL_I 2 "register_operand")
> + (match_dup 3))]
> UNSPEC_PRED_Z)
> - (match_dup 5)
> - (match_dup 4)]
> + (match_dup 4)
> + (match_dup 3)]
> UNSPEC_SEL))]
> "TARGET_SVE"
> {
> - operands[4] = CONST0_RTX (<MODE>mode);
> - operands[5] = CONST1_RTX (<MODE>mode);
> + operands[3] = CONST0_RTX (<MODE>mode);
> + operands[4] = CONST1_RTX (<MODE>mode);
> }
> )
>
> @@ -3389,7 +3389,7 @@ (define_insn "*cnot<mode>"
> (unspec:SVE_I
> [(unspec:<VPRED>
> [(match_operand:<VPRED> 1 "register_operand")
> - (match_operand:SI 5 "aarch64_sve_ptrue_flag")
> + (const_int SVE_KNOWN_PTRUE)
> (eq:<VPRED>
> (match_operand:SVE_I 2 "register_operand")
> (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
> @@ -11001,4 +11001,4 @@ (define_insn "@aarch64_sve_set_neonq_<mode>"
> GET_MODE (operands[2]));
> return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
> }
> -)
> \ No newline at end of file
> +)
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
> new file mode 100644
> index 00000000000..b1a489f0cf0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
> @@ -0,0 +1,23 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/*
> +** foo:
> +** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0
> +** mov z0\.s, \1/z, #1
> +** ret
> +*/
> +svint32_t foo(svbool_t pg, svint32_t y)
> +{
> + return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> --
> 2.25.1
>
Richard Biener <richard.guenther@gmail.com> writes:
> On Fri, Apr 5, 2024 at 3:52 PM Richard Sandiford
>> This isn't a regression on a known testcase. However, it's a nasty
>> wrong code bug that could conceivably trigger for autovec code (although
>> I've not been able to construct a reproducer so far). That fix is also
>> quite localised to the buggy operation. I'd therefore prefer to push
>> the fix now rather than wait for GCC 15.
>
> wrong-code bugs (and also rejects-valid or ice-on-valid) are always exempt
> from the regression-only fixing. In practice every such bug will be a
> regression,
> in this case to when the combining pattern was introduced (unless that was
> with the version with the initial introduction of the port of course).
Ah, thanks, hadn't realised that. Makes sense though.
It's good news of a sort since unfortunately I've another SVE wrong-code
fix in the works...
Richard
@@ -517,15 +517,22 @@ public:
expand (function_expander &e) const override
{
machine_mode mode = e.vector_mode (0);
- if (e.pred == PRED_x)
- {
- /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
- a ptrue hint. */
- e.add_ptrue_hint (0, e.gp_mode (0));
- return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
- }
-
- return e.use_cond_insn (code_for_cond_cnot (mode), 0);
+ machine_mode pred_mode = e.gp_mode (0);
+ /* The underlying _x pattern is effectively:
+
+ dst = src == 0 ? 1 : 0
+
+ rather than an UNSPEC_PRED_X. Using this form allows autovec
+ constructs to be matched by combine, but it means that the
+ predicate on the src == 0 comparison must be all-true.
+
+ For simplicity, represent other _x operations as fully-defined _m
+ operations rather than using a separate bespoke pattern. */
+ if (e.pred == PRED_x
+ && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
+ return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
+ return e.use_cond_insn (code_for_cond_cnot (mode),
+ e.pred == PRED_x ? 1 : 0);
}
};
@@ -3363,24 +3363,24 @@ (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
;; - CNOT
;; -------------------------------------------------------------------------
-;; Predicated logical inverse.
-(define_expand "@aarch64_pred_cnot<mode>"
+;; Logical inverse, predicated with a ptrue.
+(define_expand "@aarch64_ptrue_cnot<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand")
- (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (const_int SVE_KNOWN_PTRUE)
(eq:<VPRED>
- (match_operand:SVE_FULL_I 3 "register_operand")
- (match_dup 4))]
+ (match_operand:SVE_FULL_I 2 "register_operand")
+ (match_dup 3))]
UNSPEC_PRED_Z)
- (match_dup 5)
- (match_dup 4)]
+ (match_dup 4)
+ (match_dup 3)]
UNSPEC_SEL))]
"TARGET_SVE"
{
- operands[4] = CONST0_RTX (<MODE>mode);
- operands[5] = CONST1_RTX (<MODE>mode);
+ operands[3] = CONST0_RTX (<MODE>mode);
+ operands[4] = CONST1_RTX (<MODE>mode);
}
)
@@ -3389,7 +3389,7 @@ (define_insn "*cnot<mode>"
(unspec:SVE_I
[(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand")
- (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (const_int SVE_KNOWN_PTRUE)
(eq:<VPRED>
(match_operand:SVE_I 2 "register_operand")
(match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
@@ -11001,4 +11001,4 @@ (define_insn "@aarch64_sve_set_neonq_<mode>"
GET_MODE (operands[2]));
return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
}
-)
\ No newline at end of file
+)
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** foo:
+** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0
+** mov z0\.s, \1/z, #1
+** ret
+*/
+svint32_t foo(svbool_t pg, svint32_t y)
+{
+ return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
+}
+
+#ifdef __cplusplus
+}
+#endif