[v3,09/15] arm: Fix vcond_mask expander for MVE (PR target/100757)
Commit Message
The problem in this PR is that we call VPSEL with a mask of vector
type instead of HImode. This happens because operand 3 in vcond_mask
is the pre-computed vector comparison and has vector type.
This patch fixes it by implementing TARGET_VECTORIZE_GET_MASK_MODE,
returning the appropriate VxBI mode when targeting MVE. In turn, this
implies implementing vec_cmp<mode><MVE_vpred>,
vec_cmpu<mode><MVE_vpred> and vcond_mask_<mode><MVE_vpred>, and we can
move vec_cmp<mode><v_cmp_result>, vec_cmpu<mode><mode> and
vcond_mask_<mode><v_cmp_result> back to neon.md since they are not
used by MVE anymore. The new *<MVE_vpred> patterns listed above are
implemented in mve.md since they are only valid for MVE. However this
may make maintenance/comparison more painful than having all of them
in vec-common.md.
In the process, we can get rid of the recently added vcond_mve
parameter of arm_expand_vector_compare.
Compared to neon.md's vcond_mask_<mode><v_cmp_result> before my "arm:
Auto-vectorization for MVE: vcmp" patch (r12-834), it keeps the VDQWH
iterator added in r12-835 (to have V4HF/V8HF support), as well as the
(!<Is_float_mode> || flag_unsafe_math_optimizations) condition which
was not present before r12-834 although SF modes were enabled by VDQW
(I think this was a bug).
Using TARGET_VECTORIZE_GET_MASK_MODE has the advantage that we no
longer need to generate vpsel with vectors of 0 and 1: the masks are
now merged via scalar 'ands' instructions operating on 16-bit masks
after converting the boolean vectors.
In addition, this patch fixes a problem in arm_expand_vcond() where
the result would be a vector of 0 or 1 instead of operand 1 or 2.
Since we want to skip gcc.dg/signbit-2.c for MVE, we also add a new
arm_mve effective target.
Reducing the number of iterations in pr100757-3.c from 32 to 8, we
generate the code below:
float a[32];
float fn1(int d) {
float c = 4.0f;
for (int b = 0; b < 8; b++)
if (a[b] != 2.0f)
c = 5.0f;
return c;
}
fn1:
ldr r3, .L3+48
vldr.64 d4, .L3 // q2=(2.0,2.0,2.0,2.0)
vldr.64 d5, .L3+8
vldrw.32 q0, [r3] // q0=a(0..3)
adds r3, r3, #16
vcmp.f32 eq, q0, q2 // cmp a(0..3) == (2.0,2.0,2.0,2.0)
vldrw.32 q1, [r3] // q1=a(4..7)
vmrs r3, P0
vcmp.f32 eq, q1, q2 // cmp a(4..7) == (2.0,2.0,2.0,2.0)
vmrs r2, P0 @ movhi
ands r3, r3, r2 // r3=select(a(0..3]) & select(a(4..7))
vldr.64 d4, .L3+16 // q2=(5.0,5.0,5.0,5.0)
vldr.64 d5, .L3+24
vmsr P0, r3
vldr.64 d6, .L3+32 // q3=(4.0,4.0,4.0,4.0)
vldr.64 d7, .L3+40
vpsel q3, q3, q2 // q3=vcond_mask(4.0,5.0)
vmov.32 r2, q3[1] // keep the scalar max
vmov.32 r0, q3[3]
vmov.32 r3, q3[2]
vmov.f32 s11, s12
vmov s15, r2
vmov s14, r3
vmaxnm.f32 s15, s11, s15
vmaxnm.f32 s15, s15, s14
vmov s14, r0
vmaxnm.f32 s15, s15, s14
vmov r0, s15
bx lr
.L4:
.align 3
.L3:
.word 1073741824 // 2.0f
.word 1073741824
.word 1073741824
.word 1073741824
.word 1084227584 // 5.0f
.word 1084227584
.word 1084227584
.word 1084227584
.word 1082130432 // 4.0f
.word 1082130432
.word 1082130432
.word 1082130432
2022-01-13 Christophe Lyon <christophe.lyon@foss.st.com>
PR target/100757
gcc/
* config/arm/arm-protos.h (arm_get_mask_mode): New prototype.
(arm_expand_vector_compare): Update prototype.
* config/arm/arm.c (TARGET_VECTORIZE_GET_MASK_MODE): New.
(arm_vector_mode_supported_p): Add support for VxBI modes.
(arm_expand_vector_compare): Remove useless generation of vpsel.
(arm_expand_vcond): Fix select operands.
(arm_get_mask_mode): New.
* config/arm/mve.md (vec_cmp<mode><MVE_vpred>): New.
(vec_cmpu<mode><MVE_vpred>): New.
(vcond_mask_<mode><MVE_vpred>): New.
* config/arm/vec-common.md (vec_cmp<mode><v_cmp_result>)
(vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>): Move to ...
* config/arm/neon.md (vec_cmp<mode><v_cmp_result>)
(vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>): ... here
and disable for MVE.
* doc/sourcebuild.texi (arm_mve): Document new effective-target.
gcc/testsuite/
* gcc.dg/signbit-2.c: Skip when targeting ARM/MVE.
* lib/target-supports.exp (check_effective_target_arm_mve): New.
Comments
Hi Christophe,
> -----Original Message-----
> From: Gcc-patches <gcc-patches-
> bounces+kyrylo.tkachov=arm.com@gcc.gnu.org> On Behalf Of Christophe
> Lyon via Gcc-patches
> Sent: Thursday, January 13, 2022 2:56 PM
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH v3 09/15] arm: Fix vcond_mask expander for MVE (PR
> target/100757)
>
> The problem in this PR is that we call VPSEL with a mask of vector
> type instead of HImode. This happens because operand 3 in vcond_mask
> is the pre-computed vector comparison and has vector type.
>
> This patch fixes it by implementing TARGET_VECTORIZE_GET_MASK_MODE,
> returning the appropriate VxBI mode when targeting MVE. In turn, this
> implies implementing vec_cmp<mode><MVE_vpred>,
> vec_cmpu<mode><MVE_vpred> and vcond_mask_<mode><MVE_vpred>,
> and we can
> move vec_cmp<mode><v_cmp_result>, vec_cmpu<mode><mode> and
> vcond_mask_<mode><v_cmp_result> back to neon.md since they are not
> used by MVE anymore. The new *<MVE_vpred> patterns listed above are
> implemented in mve.md since they are only valid for MVE. However this
> may make maintenance/comparison more painful than having all of them
> in vec-common.md.
>
> In the process, we can get rid of the recently added vcond_mve
> parameter of arm_expand_vector_compare.
>
> Compared to neon.md's vcond_mask_<mode><v_cmp_result> before my
> "arm:
> Auto-vectorization for MVE: vcmp" patch (r12-834), it keeps the VDQWH
> iterator added in r12-835 (to have V4HF/V8HF support), as well as the
> (!<Is_float_mode> || flag_unsafe_math_optimizations) condition which
> was not present before r12-834 although SF modes were enabled by VDQW
> (I think this was a bug).
>
> Using TARGET_VECTORIZE_GET_MASK_MODE has the advantage that we no
> longer need to generate vpsel with vectors of 0 and 1: the masks are
> now merged via scalar 'ands' instructions operating on 16-bit masks
> after converting the boolean vectors.
>
> In addition, this patch fixes a problem in arm_expand_vcond() where
> the result would be a vector of 0 or 1 instead of operand 1 or 2.
>
> Since we want to skip gcc.dg/signbit-2.c for MVE, we also add a new
> arm_mve effective target.
>
> Reducing the number of iterations in pr100757-3.c from 32 to 8, we
> generate the code below:
>
> float a[32];
> float fn1(int d) {
> float c = 4.0f;
> for (int b = 0; b < 8; b++)
> if (a[b] != 2.0f)
> c = 5.0f;
> return c;
> }
>
> fn1:
> ldr r3, .L3+48
> vldr.64 d4, .L3 // q2=(2.0,2.0,2.0,2.0)
> vldr.64 d5, .L3+8
> vldrw.32 q0, [r3] // q0=a(0..3)
> adds r3, r3, #16
> vcmp.f32 eq, q0, q2 // cmp a(0..3) == (2.0,2.0,2.0,2.0)
> vldrw.32 q1, [r3] // q1=a(4..7)
> vmrs r3, P0
> vcmp.f32 eq, q1, q2 // cmp a(4..7) == (2.0,2.0,2.0,2.0)
> vmrs r2, P0 @ movhi
> ands r3, r3, r2 // r3=select(a(0..3]) & select(a(4..7))
> vldr.64 d4, .L3+16 // q2=(5.0,5.0,5.0,5.0)
> vldr.64 d5, .L3+24
> vmsr P0, r3
> vldr.64 d6, .L3+32 // q3=(4.0,4.0,4.0,4.0)
> vldr.64 d7, .L3+40
> vpsel q3, q3, q2 // q3=vcond_mask(4.0,5.0)
> vmov.32 r2, q3[1] // keep the scalar max
> vmov.32 r0, q3[3]
> vmov.32 r3, q3[2]
> vmov.f32 s11, s12
> vmov s15, r2
> vmov s14, r3
> vmaxnm.f32 s15, s11, s15
> vmaxnm.f32 s15, s15, s14
> vmov s14, r0
> vmaxnm.f32 s15, s15, s14
> vmov r0, s15
> bx lr
> .L4:
> .align 3
> .L3:
> .word 1073741824 // 2.0f
> .word 1073741824
> .word 1073741824
> .word 1073741824
> .word 1084227584 // 5.0f
> .word 1084227584
> .word 1084227584
> .word 1084227584
> .word 1082130432 // 4.0f
> .word 1082130432
> .word 1082130432
> .word 1082130432
>
> 2022-01-13 Christophe Lyon <christophe.lyon@foss.st.com>
>
> PR target/100757
> gcc/
> * config/arm/arm-protos.h (arm_get_mask_mode): New prototype.
> (arm_expand_vector_compare): Update prototype.
> * config/arm/arm.c (TARGET_VECTORIZE_GET_MASK_MODE): New.
> (arm_vector_mode_supported_p): Add support for VxBI modes.
> (arm_expand_vector_compare): Remove useless generation of vpsel.
> (arm_expand_vcond): Fix select operands.
> (arm_get_mask_mode): New.
> * config/arm/mve.md (vec_cmp<mode><MVE_vpred>): New.
> (vec_cmpu<mode><MVE_vpred>): New.
> (vcond_mask_<mode><MVE_vpred>): New.
> * config/arm/vec-common.md (vec_cmp<mode><v_cmp_result>)
> (vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>):
> Move to ...
> * config/arm/neon.md (vec_cmp<mode><v_cmp_result>)
> (vec_cmpu<mode><mode, vcond_mask_<mode><v_cmp_result>): ...
> here
> and disable for MVE.
> * doc/sourcebuild.texi (arm_mve): Document new effective-target.
>
> gcc/testsuite/
> * gcc.dg/signbit-2.c: Skip when targeting ARM/MVE.
> * lib/target-supports.exp (check_effective_target_arm_mve): New.
>
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index b978adf2038..a84613104b1 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -202,6 +202,7 @@ extern void arm_init_cumulative_args
> (CUMULATIVE_ARGS *, tree, rtx, tree);
> extern bool arm_pad_reg_upward (machine_mode, tree, int);
> #endif
> extern int arm_apply_result_size (void);
> +extern opt_machine_mode arm_get_mask_mode (machine_mode mode);
>
> #endif /* RTX_CODE */
>
> @@ -378,7 +379,7 @@ extern void arm_emit_coreregs_64bit_shift (enum
> rtx_code, rtx, rtx, rtx, rtx,
> extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
> extern bool arm_valid_symbolic_address_p (rtx);
> extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
> -extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool,
> bool);
> +extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
> #endif /* RTX_CODE */
>
> extern bool arm_gen_setmem (rtx *);
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index fa18c7bd3fe..7d56fa71806 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -829,6 +829,10 @@ static const struct attribute_spec
> arm_attribute_table[] =
>
> #undef TARGET_MD_ASM_ADJUST
> #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
> +
> +#undef TARGET_VECTORIZE_GET_MASK_MODE
> +#define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
> +
>
>
>
> /* Obstack for minipool constant handling. */
> static struct obstack minipool_obstack;
> @@ -29234,7 +29238,8 @@ arm_vector_mode_supported_p
> (machine_mode mode)
>
> if (TARGET_HAVE_MVE
> && (mode == V2DImode || mode == V4SImode || mode == V8HImode
> - || mode == V16QImode))
> + || mode == V16QImode
> + || mode == V16BImode || mode == V8BImode || mode ==
> V4BImode))
> return true;
>
> if (TARGET_HAVE_MVE_FLOAT
> @@ -31033,7 +31038,7 @@ arm_split_atomic_op (enum rtx_code code, rtx
> old_out, rtx new_out, rtx mem,
> }
>
>
>
> /* Return the mode for the MVE vector of predicates corresponding to
> MODE. */
> -machine_mode
> +opt_machine_mode
> arm_mode_to_pred_mode (machine_mode mode)
> {
> switch (GET_MODE_NUNITS (mode))
> @@ -31042,7 +31047,7 @@ arm_mode_to_pred_mode (machine_mode
> mode)
> case 8: return V8BImode;
> case 4: return V4BImode;
> }
> - gcc_unreachable ();
> + return opt_machine_mode ();
> }
>
> /* Expand code to compare vectors OP0 and OP1 using condition CODE.
> @@ -31050,16 +31055,12 @@ arm_mode_to_pred_mode (machine_mode
> mode)
> and return true if TARGET contains the inverse. If !CAN_INVERT,
> always store the result in TARGET, never its inverse.
>
> - If VCOND_MVE, do not emit the vpsel instruction here, let
> arm_expand_vcond do
> - it with the right destination type to avoid emiting two vpsel, one here and
> - one in arm_expand_vcond.
> -
> Note that the handling of floating-point comparisons is not
> IEEE compliant. */
>
> bool
> arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
> - bool can_invert, bool vcond_mve)
> + bool can_invert)
> {
> machine_mode cmp_result_mode = GET_MODE (target);
> machine_mode cmp_mode = GET_MODE (op0);
> @@ -31088,7 +31089,7 @@ arm_expand_vector_compare (rtx target,
> rtx_code code, rtx op0, rtx op1,
> and then store its inverse in TARGET. This avoids reusing
> TARGET (which for integer NE could be one of the inputs). */
> rtx tmp = gen_reg_rtx (cmp_result_mode);
> - if (arm_expand_vector_compare (tmp, code, op0, op1, true,
> vcond_mve))
> + if (arm_expand_vector_compare (tmp, code, op0, op1, true))
> gcc_unreachable ();
> emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode,
> tmp)));
> return false;
> @@ -31124,36 +31125,22 @@ arm_expand_vector_compare (rtx target,
> rtx_code code, rtx op0, rtx op1,
> case NE:
> if (TARGET_HAVE_MVE)
> {
> - rtx vpr_p0;
> - if (vcond_mve)
> - vpr_p0 = target;
> - else
> - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
> -
> switch (GET_MODE_CLASS (cmp_mode))
> {
> case MODE_VECTOR_INT:
> - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0,
> force_reg (cmp_mode, op1)));
> + emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
> + op0, force_reg (cmp_mode, op1)));
> break;
> case MODE_VECTOR_FLOAT:
> if (TARGET_HAVE_MVE_FLOAT)
> - emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0,
> op0, force_reg (cmp_mode, op1)));
> + emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
> + op0, force_reg (cmp_mode, op1)));
> else
> gcc_unreachable ();
> break;
> default:
> gcc_unreachable ();
> }
> -
> - /* If we are not expanding a vcond, build the result here. */
> - if (!vcond_mve)
> - {
> - rtx zero = gen_reg_rtx (cmp_result_mode);
> - rtx one = gen_reg_rtx (cmp_result_mode);
> - emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
> - emit_move_insn (one, CONST1_RTX (cmp_result_mode));
> - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode,
> target, one, zero, vpr_p0));
> - }
> }
> else
> emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
> @@ -31165,23 +31152,8 @@ arm_expand_vector_compare (rtx target,
> rtx_code code, rtx op0, rtx op1,
> case GEU:
> case GTU:
> if (TARGET_HAVE_MVE)
> - {
> - rtx vpr_p0;
> - if (vcond_mve)
> - vpr_p0 = target;
> - else
> - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
> -
> - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0,
> force_reg (cmp_mode, op1)));
> - if (!vcond_mve)
> - {
> - rtx zero = gen_reg_rtx (cmp_result_mode);
> - rtx one = gen_reg_rtx (cmp_result_mode);
> - emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
> - emit_move_insn (one, CONST1_RTX (cmp_result_mode));
> - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode,
> target, one, zero, vpr_p0));
> - }
> - }
> + emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
> + op0, force_reg (cmp_mode, op1)));
> else
> emit_insn (gen_neon_vc (code, cmp_mode, target,
> op0, force_reg (cmp_mode, op1)));
> @@ -31192,23 +31164,8 @@ arm_expand_vector_compare (rtx target,
> rtx_code code, rtx op0, rtx op1,
> case LEU:
> case LTU:
> if (TARGET_HAVE_MVE)
> - {
> - rtx vpr_p0;
> - if (vcond_mve)
> - vpr_p0 = target;
> - else
> - vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
> -
> - emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode,
> vpr_p0, force_reg (cmp_mode, op1), op0));
> - if (!vcond_mve)
> - {
> - rtx zero = gen_reg_rtx (cmp_result_mode);
> - rtx one = gen_reg_rtx (cmp_result_mode);
> - emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
> - emit_move_insn (one, CONST1_RTX (cmp_result_mode));
> - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode,
> target, one, zero, vpr_p0));
> - }
> - }
> + emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode,
> target,
> + force_reg (cmp_mode, op1), op0));
> else
> emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
> target, force_reg (cmp_mode, op1), op0));
> @@ -31223,8 +31180,8 @@ arm_expand_vector_compare (rtx target,
> rtx_code code, rtx op0, rtx op1,
> rtx gt_res = gen_reg_rtx (cmp_result_mode);
> rtx alt_res = gen_reg_rtx (cmp_result_mode);
> rtx_code alt_code = (code == LTGT ? LT : LE);
> - if (arm_expand_vector_compare (gt_res, GT, op0, op1, true,
> vcond_mve)
> - || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true,
> vcond_mve))
> + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
> + || arm_expand_vector_compare (alt_res, alt_code, op0, op1,
> true))
> gcc_unreachable ();
> emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
> gt_res, alt_res)));
> @@ -31244,19 +31201,15 @@ arm_expand_vcond (rtx *operands,
> machine_mode cmp_result_mode)
> {
> /* When expanding for MVE, we do not want to emit a (useless) vpsel in
> arm_expand_vector_compare, and another one here. */
> - bool vcond_mve=false;
> rtx mask;
>
> if (TARGET_HAVE_MVE)
> - {
> - vcond_mve=true;
> - mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode));
> - }
> + mask = gen_reg_rtx (arm_mode_to_pred_mode
> (cmp_result_mode).require ());
> else
> mask = gen_reg_rtx (cmp_result_mode);
>
> bool inverted = arm_expand_vector_compare (mask, GET_CODE
> (operands[3]),
> - operands[4], operands[5], true,
> vcond_mve);
> + operands[4], operands[5], true);
> if (inverted)
> std::swap (operands[1], operands[2]);
> if (TARGET_NEON)
> @@ -31264,20 +31217,20 @@ arm_expand_vcond (rtx *operands,
> machine_mode cmp_result_mode)
> mask, operands[1], operands[2]));
> else
> {
> - machine_mode cmp_mode = GET_MODE (operands[4]);
> - rtx vpr_p0 = mask;
> - rtx zero = gen_reg_rtx (cmp_mode);
> - rtx one = gen_reg_rtx (cmp_mode);
> - emit_move_insn (zero, CONST0_RTX (cmp_mode));
> - emit_move_insn (one, CONST1_RTX (cmp_mode));
> + machine_mode cmp_mode = GET_MODE (operands[0]);
> +
> switch (GET_MODE_CLASS (cmp_mode))
> {
> case MODE_VECTOR_INT:
> - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode,
> operands[0], one, zero, vpr_p0));
> + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
> + operands[1], operands[2], mask));
> break;
> case MODE_VECTOR_FLOAT:
> if (TARGET_HAVE_MVE_FLOAT)
> - emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one,
> zero, vpr_p0));
> + emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
> + operands[1], operands[2], mask));
> + else
> + gcc_unreachable ();
> break;
> default:
> gcc_unreachable ();
> @@ -34187,4 +34140,15 @@ arm_mode_base_reg_class (machine_mode
> mode)
>
> struct gcc_target targetm = TARGET_INITIALIZER;
>
> +/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
> +
> +opt_machine_mode
> +arm_get_mask_mode (machine_mode mode)
> +{
> + if (TARGET_HAVE_MVE)
> + return arm_mode_to_pred_mode (mode);
> +
> + return default_get_mask_mode (mode);
> +}
> +
> #include "gt-arm.h"
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 983aa10e652..35564e870bc 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -10527,3 +10527,57 @@ (define_expand "mov<mode>"
> operands[1] = force_reg (<MODE>mode, operands[1]);
> }
> )
> +
> +;; Expanders for vec_cmp and vcond
> +
> +(define_expand "vec_cmp<mode><MVE_vpred>"
> + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
> + (match_operator:<MVE_VPRED> 1 "comparison_operator"
> + [(match_operand:MVE_VLD_ST 2 "s_register_operand")
> + (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))]
> + "TARGET_HAVE_MVE
> + && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
> +{
> + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> + operands[2], operands[3], false);
> + DONE;
> +})
> +
> +(define_expand "vec_cmpu<mode><MVE_vpred>"
> + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
> + (match_operator:<MVE_VPRED> 1 "comparison_operator"
> + [(match_operand:MVE_2 2 "s_register_operand")
> + (match_operand:MVE_2 3 "reg_or_zero_operand")]))]
> + "TARGET_HAVE_MVE"
> +{
> + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> + operands[2], operands[3], false);
> + DONE;
> +})
> +
> +(define_expand "vcond_mask_<mode><MVE_vpred>"
> + [(set (match_operand:MVE_VLD_ST 0 "s_register_operand")
> + (if_then_else:MVE_VLD_ST
> + (match_operand:<MVE_VPRED> 3 "s_register_operand")
> + (match_operand:MVE_VLD_ST 1 "s_register_operand")
> + (match_operand:MVE_VLD_ST 2 "s_register_operand")))]
> + "TARGET_HAVE_MVE"
> +{
> + switch (GET_MODE_CLASS (<MODE>mode))
> + {
> + case MODE_VECTOR_INT:
> + emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
> + operands[1], operands[2], operands[3]));
> + break;
> + case MODE_VECTOR_FLOAT:
> + if (TARGET_HAVE_MVE_FLOAT)
> + emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0],
> + operands[1], operands[2], operands[3]));
> + else
> + gcc_unreachable ();
I think this logic is a bit too complicated. The vpselq_f pattern is already guarded on TARGET_HAVE_MVE_FLOAT so the compiler will ICE if it gets generated without MVE float.
So there's no need for this "if (TARGET_HAVE_MVE_FLOAT)" and gcc_unreachable ().
Ok with that change.
Thanks,
Kyrill
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + DONE;
> +})
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index e06c8245672..20e9f11ec81 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -1394,6 +1394,45 @@ (define_insn "*us_sub<mode>_neon"
> [(set_attr "type" "neon_qsub<q>")]
> )
>
> +(define_expand "vec_cmp<mode><v_cmp_result>"
> + [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
> + (match_operator:<V_cmp_result> 1 "comparison_operator"
> + [(match_operand:VDQWH 2 "s_register_operand")
> + (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
> + "TARGET_NEON
> + && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
> +{
> + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> + operands[2], operands[3], false);
> + DONE;
> +})
> +
> +(define_expand "vec_cmpu<mode><mode>"
> + [(set (match_operand:VDQIW 0 "s_register_operand")
> + (match_operator:VDQIW 1 "comparison_operator"
> + [(match_operand:VDQIW 2 "s_register_operand")
> + (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
> + "TARGET_NEON"
> +{
> + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> + operands[2], operands[3], false);
> + DONE;
> +})
> +
> +(define_expand "vcond_mask_<mode><v_cmp_result>"
> + [(set (match_operand:VDQWH 0 "s_register_operand")
> + (if_then_else:VDQWH
> + (match_operand:<V_cmp_result> 3 "s_register_operand")
> + (match_operand:VDQWH 1 "s_register_operand")
> + (match_operand:VDQWH 2 "s_register_operand")))]
> + "TARGET_NEON
> + && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
> +{
> + emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3],
> operands[1],
> + operands[2]));
> + DONE;
> +})
> +
> ;; Patterns for builtins.
>
> ; good for plain vadd, vaddq.
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> common.md
> index cef358e44f5..20586973ed9 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -363,33 +363,6 @@ (define_expand "vlshr<mode>3"
> }
> })
>
> -(define_expand "vec_cmp<mode><v_cmp_result>"
> - [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
> - (match_operator:<V_cmp_result> 1 "comparison_operator"
> - [(match_operand:VDQWH 2 "s_register_operand")
> - (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
> - "ARM_HAVE_<MODE>_ARITH
> - && !TARGET_REALLY_IWMMXT
> - && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
> -{
> - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> - operands[2], operands[3], false, false);
> - DONE;
> -})
> -
> -(define_expand "vec_cmpu<mode><mode>"
> - [(set (match_operand:VDQIW 0 "s_register_operand")
> - (match_operator:VDQIW 1 "comparison_operator"
> - [(match_operand:VDQIW 2 "s_register_operand")
> - (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
> - "ARM_HAVE_<MODE>_ARITH
> - && !TARGET_REALLY_IWMMXT"
> -{
> - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
> - operands[2], operands[3], false, false);
> - DONE;
> -})
> -
> ;; Conditional instructions. These are comparisons with conditional moves
> for
> ;; vectors. They perform the assignment:
> ;;
> @@ -461,31 +434,6 @@ (define_expand "vcondu<mode><v_cmp_result>"
> DONE;
> })
>
> -(define_expand "vcond_mask_<mode><v_cmp_result>"
> - [(set (match_operand:VDQWH 0 "s_register_operand")
> - (if_then_else:VDQWH
> - (match_operand:<V_cmp_result> 3 "s_register_operand")
> - (match_operand:VDQWH 1 "s_register_operand")
> - (match_operand:VDQWH 2 "s_register_operand")))]
> - "ARM_HAVE_<MODE>_ARITH
> - && !TARGET_REALLY_IWMMXT
> - && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
> -{
> - if (TARGET_NEON)
> - {
> - emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3],
> - operands[1], operands[2]));
> - }
> - else if (TARGET_HAVE_MVE)
> - {
> - emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
> - operands[1], operands[2], operands[3]));
> - }
> - else
> - gcc_unreachable ();
> - DONE;
> -})
> -
> (define_expand "vec_load_lanesoi<mode>"
> [(set (match_operand:OI 0 "s_register_operand")
> (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index 6095a35cd45..8d369935396 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2236,6 +2236,10 @@ ARM target supports the @code{-mfloat-
> abi=softfp} option.
> @anchor{arm_hard_ok}
> ARM target supports the @code{-mfloat-abi=hard} option.
>
> +@item arm_mve
> +@anchor{arm_mve}
> +ARM target supports generating MVE instructions.
> +
> @item arm_v8_1_lob_ok
> @anchor{arm_v8_1_lob_ok}
> ARM Target supports executing the Armv8.1-M Mainline Low Overhead Loop
> diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
> index b609f67dc9f..2f2dc448286 100644
> --- a/gcc/testsuite/gcc.dg/signbit-2.c
> +++ b/gcc/testsuite/gcc.dg/signbit-2.c
> @@ -4,6 +4,7 @@
> /* This test does not work when the truth type does not match vector type.
> */
> /* { dg-additional-options "-mno-avx512f" { target { i?86-*-* x86_64-*-* } } }
> */
> /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */
> +/* { dg-skip-if "no fallback for MVE" { arm_mve } } */
>
> #include <stdint.h>
>
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-
> supports.exp
> index 0fe1e1e077a..8dac516ec12 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -5234,6 +5234,18 @@ proc check_effective_target_arm_hard_ok { } {
> } "-mfloat-abi=hard"]
> }
>
> +# Return 1 if this is an ARM target supporting MVE.
> +proc check_effective_target_arm_mve { } {
> + if { ![istarget arm*-*-*] } {
> + return 0
> + }
> + return [check_no_compiler_messages arm_mve assembly {
> + #if !defined (__ARM_FEATURE_MVE)
> + #error FOO
> + #endif
> + }]
> +}
> +
> # Return 1 if the target supports ARMv8.1-M MVE with floating point
> # instructions, 0 otherwise. The test is valid for ARM.
> # Record the command line options needed.
> --
> 2.25.1
@@ -202,6 +202,7 @@ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
extern bool arm_pad_reg_upward (machine_mode, tree, int);
#endif
extern int arm_apply_result_size (void);
+extern opt_machine_mode arm_get_mask_mode (machine_mode mode);
#endif /* RTX_CODE */
@@ -378,7 +379,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
extern bool arm_valid_symbolic_address_p (rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
-extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool, bool);
+extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
#endif /* RTX_CODE */
extern bool arm_gen_setmem (rtx *);
@@ -829,6 +829,10 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_MD_ASM_ADJUST
#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
+#undef TARGET_VECTORIZE_GET_MASK_MODE
+#define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
+
/* Obstack for minipool constant handling. */
static struct obstack minipool_obstack;
@@ -29234,7 +29238,8 @@ arm_vector_mode_supported_p (machine_mode mode)
if (TARGET_HAVE_MVE
&& (mode == V2DImode || mode == V4SImode || mode == V8HImode
- || mode == V16QImode))
+ || mode == V16QImode
+ || mode == V16BImode || mode == V8BImode || mode == V4BImode))
return true;
if (TARGET_HAVE_MVE_FLOAT
@@ -31033,7 +31038,7 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
}
/* Return the mode for the MVE vector of predicates corresponding to MODE. */
-machine_mode
+opt_machine_mode
arm_mode_to_pred_mode (machine_mode mode)
{
switch (GET_MODE_NUNITS (mode))
@@ -31042,7 +31047,7 @@ arm_mode_to_pred_mode (machine_mode mode)
case 8: return V8BImode;
case 4: return V4BImode;
}
- gcc_unreachable ();
+ return opt_machine_mode ();
}
/* Expand code to compare vectors OP0 and OP1 using condition CODE.
@@ -31050,16 +31055,12 @@ arm_mode_to_pred_mode (machine_mode mode)
and return true if TARGET contains the inverse. If !CAN_INVERT,
always store the result in TARGET, never its inverse.
- If VCOND_MVE, do not emit the vpsel instruction here, let arm_expand_vcond do
- it with the right destination type to avoid emiting two vpsel, one here and
- one in arm_expand_vcond.
-
Note that the handling of floating-point comparisons is not
IEEE compliant. */
bool
arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
- bool can_invert, bool vcond_mve)
+ bool can_invert)
{
machine_mode cmp_result_mode = GET_MODE (target);
machine_mode cmp_mode = GET_MODE (op0);
@@ -31088,7 +31089,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
and then store its inverse in TARGET. This avoids reusing
TARGET (which for integer NE could be one of the inputs). */
rtx tmp = gen_reg_rtx (cmp_result_mode);
- if (arm_expand_vector_compare (tmp, code, op0, op1, true, vcond_mve))
+ if (arm_expand_vector_compare (tmp, code, op0, op1, true))
gcc_unreachable ();
emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
return false;
@@ -31124,36 +31125,22 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
case NE:
if (TARGET_HAVE_MVE)
{
- rtx vpr_p0;
- if (vcond_mve)
- vpr_p0 = target;
- else
- vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
-
switch (GET_MODE_CLASS (cmp_mode))
{
case MODE_VECTOR_INT:
- emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+ emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
break;
case MODE_VECTOR_FLOAT:
if (TARGET_HAVE_MVE_FLOAT)
- emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+ emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
else
gcc_unreachable ();
break;
default:
gcc_unreachable ();
}
-
- /* If we are not expanding a vcond, build the result here. */
- if (!vcond_mve)
- {
- rtx zero = gen_reg_rtx (cmp_result_mode);
- rtx one = gen_reg_rtx (cmp_result_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
- emit_move_insn (one, CONST1_RTX (cmp_result_mode));
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
- }
}
else
emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
@@ -31165,23 +31152,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
case GEU:
case GTU:
if (TARGET_HAVE_MVE)
- {
- rtx vpr_p0;
- if (vcond_mve)
- vpr_p0 = target;
- else
- vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
-
- emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
- if (!vcond_mve)
- {
- rtx zero = gen_reg_rtx (cmp_result_mode);
- rtx one = gen_reg_rtx (cmp_result_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
- emit_move_insn (one, CONST1_RTX (cmp_result_mode));
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
- }
- }
+ emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
else
emit_insn (gen_neon_vc (code, cmp_mode, target,
op0, force_reg (cmp_mode, op1)));
@@ -31192,23 +31164,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
case LEU:
case LTU:
if (TARGET_HAVE_MVE)
- {
- rtx vpr_p0;
- if (vcond_mve)
- vpr_p0 = target;
- else
- vpr_p0 = gen_reg_rtx (arm_mode_to_pred_mode (cmp_mode));
-
- emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0));
- if (!vcond_mve)
- {
- rtx zero = gen_reg_rtx (cmp_result_mode);
- rtx one = gen_reg_rtx (cmp_result_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
- emit_move_insn (one, CONST1_RTX (cmp_result_mode));
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
- }
- }
+ emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
+ force_reg (cmp_mode, op1), op0));
else
emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
target, force_reg (cmp_mode, op1), op0));
@@ -31223,8 +31180,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
rtx gt_res = gen_reg_rtx (cmp_result_mode);
rtx alt_res = gen_reg_rtx (cmp_result_mode);
rtx_code alt_code = (code == LTGT ? LT : LE);
- if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve)
- || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, vcond_mve))
+ if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
+ || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
gcc_unreachable ();
emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
gt_res, alt_res)));
@@ -31244,19 +31201,15 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
{
/* When expanding for MVE, we do not want to emit a (useless) vpsel in
arm_expand_vector_compare, and another one here. */
- bool vcond_mve=false;
rtx mask;
if (TARGET_HAVE_MVE)
- {
- vcond_mve=true;
- mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode));
- }
+ mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
else
mask = gen_reg_rtx (cmp_result_mode);
bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
- operands[4], operands[5], true, vcond_mve);
+ operands[4], operands[5], true);
if (inverted)
std::swap (operands[1], operands[2]);
if (TARGET_NEON)
@@ -31264,20 +31217,20 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
mask, operands[1], operands[2]));
else
{
- machine_mode cmp_mode = GET_MODE (operands[4]);
- rtx vpr_p0 = mask;
- rtx zero = gen_reg_rtx (cmp_mode);
- rtx one = gen_reg_rtx (cmp_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_mode));
- emit_move_insn (one, CONST1_RTX (cmp_mode));
+ machine_mode cmp_mode = GET_MODE (operands[0]);
+
switch (GET_MODE_CLASS (cmp_mode))
{
case MODE_VECTOR_INT:
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], one, zero, vpr_p0));
+ emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
+ operands[1], operands[2], mask));
break;
case MODE_VECTOR_FLOAT:
if (TARGET_HAVE_MVE_FLOAT)
- emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, vpr_p0));
+ emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
+ operands[1], operands[2], mask));
+ else
+ gcc_unreachable ();
break;
default:
gcc_unreachable ();
@@ -34187,4 +34140,15 @@ arm_mode_base_reg_class (machine_mode mode)
struct gcc_target targetm = TARGET_INITIALIZER;
+/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
+
+opt_machine_mode
+arm_get_mask_mode (machine_mode mode)
+{
+ if (TARGET_HAVE_MVE)
+ return arm_mode_to_pred_mode (mode);
+
+ return default_get_mask_mode (mode);
+}
+
#include "gt-arm.h"
@@ -10527,3 +10527,57 @@ (define_expand "mov<mode>"
operands[1] = force_reg (<MODE>mode, operands[1]);
}
)
+
+;; Expanders for vec_cmp and vcond
+
+(define_expand "vec_cmp<mode><MVE_vpred>"
+ [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
+ (match_operator:<MVE_VPRED> 1 "comparison_operator"
+ [(match_operand:MVE_VLD_ST 2 "s_register_operand")
+ (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))]
+ "TARGET_HAVE_MVE
+ && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><MVE_vpred>"
+ [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
+ (match_operator:<MVE_VPRED> 1 "comparison_operator"
+ [(match_operand:MVE_2 2 "s_register_operand")
+ (match_operand:MVE_2 3 "reg_or_zero_operand")]))]
+ "TARGET_HAVE_MVE"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
+(define_expand "vcond_mask_<mode><MVE_vpred>"
+ [(set (match_operand:MVE_VLD_ST 0 "s_register_operand")
+ (if_then_else:MVE_VLD_ST
+ (match_operand:<MVE_VPRED> 3 "s_register_operand")
+ (match_operand:MVE_VLD_ST 1 "s_register_operand")
+ (match_operand:MVE_VLD_ST 2 "s_register_operand")))]
+ "TARGET_HAVE_MVE"
+{
+ switch (GET_MODE_CLASS (<MODE>mode))
+ {
+ case MODE_VECTOR_INT:
+ emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
+ operands[1], operands[2], operands[3]));
+ break;
+ case MODE_VECTOR_FLOAT:
+ if (TARGET_HAVE_MVE_FLOAT)
+ emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0],
+ operands[1], operands[2], operands[3]));
+ else
+ gcc_unreachable ();
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
@@ -1394,6 +1394,45 @@ (define_insn "*us_sub<mode>_neon"
[(set_attr "type" "neon_qsub<q>")]
)
+(define_expand "vec_cmp<mode><v_cmp_result>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+ (match_operator:<V_cmp_result> 1 "comparison_operator"
+ [(match_operand:VDQWH 2 "s_register_operand")
+ (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
+ "TARGET_NEON
+ && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+ [(set (match_operand:VDQIW 0 "s_register_operand")
+ (match_operator:VDQIW 1 "comparison_operator"
+ [(match_operand:VDQIW 2 "s_register_operand")
+ (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+ "TARGET_NEON"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
+(define_expand "vcond_mask_<mode><v_cmp_result>"
+ [(set (match_operand:VDQWH 0 "s_register_operand")
+ (if_then_else:VDQWH
+ (match_operand:<V_cmp_result> 3 "s_register_operand")
+ (match_operand:VDQWH 1 "s_register_operand")
+ (match_operand:VDQWH 2 "s_register_operand")))]
+ "TARGET_NEON
+ && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
+ operands[2]));
+ DONE;
+})
+
;; Patterns for builtins.
; good for plain vadd, vaddq.
@@ -363,33 +363,6 @@ (define_expand "vlshr<mode>3"
}
})
-(define_expand "vec_cmp<mode><v_cmp_result>"
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
- (match_operator:<V_cmp_result> 1 "comparison_operator"
- [(match_operand:VDQWH 2 "s_register_operand")
- (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
- "ARM_HAVE_<MODE>_ARITH
- && !TARGET_REALLY_IWMMXT
- && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
- arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3], false, false);
- DONE;
-})
-
-(define_expand "vec_cmpu<mode><mode>"
- [(set (match_operand:VDQIW 0 "s_register_operand")
- (match_operator:VDQIW 1 "comparison_operator"
- [(match_operand:VDQIW 2 "s_register_operand")
- (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
- "ARM_HAVE_<MODE>_ARITH
- && !TARGET_REALLY_IWMMXT"
-{
- arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3], false, false);
- DONE;
-})
-
;; Conditional instructions. These are comparisons with conditional moves for
;; vectors. They perform the assignment:
;;
@@ -461,31 +434,6 @@ (define_expand "vcondu<mode><v_cmp_result>"
DONE;
})
-(define_expand "vcond_mask_<mode><v_cmp_result>"
- [(set (match_operand:VDQWH 0 "s_register_operand")
- (if_then_else:VDQWH
- (match_operand:<V_cmp_result> 3 "s_register_operand")
- (match_operand:VDQWH 1 "s_register_operand")
- (match_operand:VDQWH 2 "s_register_operand")))]
- "ARM_HAVE_<MODE>_ARITH
- && !TARGET_REALLY_IWMMXT
- && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
- if (TARGET_NEON)
- {
- emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3],
- operands[1], operands[2]));
- }
- else if (TARGET_HAVE_MVE)
- {
- emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
- operands[1], operands[2], operands[3]));
- }
- else
- gcc_unreachable ();
- DONE;
-})
-
(define_expand "vec_load_lanesoi<mode>"
[(set (match_operand:OI 0 "s_register_operand")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand")
@@ -2236,6 +2236,10 @@ ARM target supports the @code{-mfloat-abi=softfp} option.
@anchor{arm_hard_ok}
ARM target supports the @code{-mfloat-abi=hard} option.
+@item arm_mve
+@anchor{arm_mve}
+ARM target supports generating MVE instructions.
+
@item arm_v8_1_lob_ok
@anchor{arm_v8_1_lob_ok}
ARM Target supports executing the Armv8.1-M Mainline Low Overhead Loop
@@ -4,6 +4,7 @@
/* This test does not work when the truth type does not match vector type. */
/* { dg-additional-options "-mno-avx512f" { target { i?86-*-* x86_64-*-* } } } */
/* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */
+/* { dg-skip-if "no fallback for MVE" { arm_mve } } */
#include <stdint.h>
@@ -5234,6 +5234,18 @@ proc check_effective_target_arm_hard_ok { } {
} "-mfloat-abi=hard"]
}
+# Return 1 if this is an ARM target supporting MVE.
+proc check_effective_target_arm_mve { } {
+ if { ![istarget arm*-*-*] } {
+ return 0
+ }
+ return [check_no_compiler_messages arm_mve assembly {
+ #if !defined (__ARM_FEATURE_MVE)
+ #error FOO
+ #endif
+ }]
+}
+
# Return 1 if the target supports ARMv8.1-M MVE with floating point
# instructions, 0 otherwise. The test is valid for ARM.
# Record the command line options needed.