Mode iterator V_HW enables V1TI for target VXE which means
vec_cmpv1tiv1ti becomes available which leads to an ICE since there is
no corresponding insn.
Fixed by emulating comparisons and enabling mode V1TI unconditionally
for V_HW. For the sake of symmetry, I also added TI mode to V_HW since
TF mode is already included. As a consequence the consumers of V_HW
vec_{splat,slb,sld,sldw,sldb,srdb,srab,srb,test_mask_int,test_mask}
also become available for 128-bit integers.
This fixes gcc.c-torture/execute/pr105613.c and gcc.dg/pr106063.c.
gcc/ChangeLog:
* config/s390/vector.md (V_HW): Enable V1TI unconditionally and
add TI.
(vec_cmpu<VIT_HW:mode><VIT_HW:mode>): Add 128-bit integer
variants.
(*vec_cmpeq<mode><mode>_nocc_emu): Emulate operation.
(*vec_cmpgt<mode><mode>_nocc_emu): Emulate operation.
(*vec_cmpgtu<mode><mode>_nocc_emu): Emulate operation.
gcc/testsuite/ChangeLog:
* gcc.target/s390/vector/vec-cmp-emu-1.c: New test.
* gcc.target/s390/vector/vec-cmp-emu-2.c: New test.
* gcc.target/s390/vector/vec-cmp-emu-3.c: New test.
---
Bootstrapped and regtested on s390. Ok for mainline and GCC 14?
gcc/config/s390/vector.md | 113 ++++++++++++++++--
.../gcc.target/s390/vector/vec-cmp-emu-1.c | 35 ++++++
.../gcc.target/s390/vector/vec-cmp-emu-2.c | 18 +++
.../gcc.target/s390/vector/vec-cmp-emu-3.c | 17 +++
4 files changed, 171 insertions(+), 12 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-1.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-2.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cmp-emu-3.c
@@ -30,7 +30,7 @@
; V_HW2 is for having two iterators expanding independently e.g. vcond.
; It's similar to V_HW, but not fully identical: V1TI is not included, because
; there are no 128-bit compares.
-(define_mode_iterator V_HW [V16QI V8HI V4SI V2DI (V1TI "TARGET_VXE") V2DF
+(define_mode_iterator V_HW [V16QI V8HI V4SI V2DI V1TI TI V2DF
(V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
(TF "TARGET_VXE")])
(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")
@@ -50,6 +50,7 @@
(define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
(define_mode_iterator VI_HW_HS [V8HI V4SI])
(define_mode_iterator VI_HW_QH [V16QI V8HI])
+(define_mode_iterator VI_HW_T [V1TI TI])
; Directly supported vector modes with a certain number of elements
(define_mode_iterator V_HW_2 [V2DI V2DF])
@@ -151,7 +152,7 @@
(V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI")
(V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
(V1DI "V1DI") (V2DI "V2DI")
- (V1TI "V1TI")
+ (V1TI "V1TI") (TI "V1TI")
(V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI")
(V1DF "V1DI") (V2DF "V2DI")
(V1TF "V1TI") (TF "V1TI")])
@@ -160,7 +161,7 @@
(V1HI "v1hi") (V2HI "v2hi") (V4HI "v4hi") (V8HI "v8hi")
(V1SI "v1si") (V2SI "v2si") (V4SI "v4si")
(V1DI "v1di") (V2DI "v2di")
- (V1TI "v1ti")
+ (V1TI "v1ti") (TI "v1ti")
(V1SF "v1si") (V2SF "v2si") (V4SF "v4si")
(V1DF "v1di") (V2DF "v2di")
(V1TF "v1ti") (TF "v1ti")])
@@ -1956,11 +1957,11 @@
DONE;
})
-(define_expand "vec_cmpu<VI_HW:mode><VI_HW:mode>"
- [(set (match_operand:VI_HW 0 "register_operand" "")
- (match_operator:VI_HW 1 ""
- [(match_operand:VI_HW 2 "register_operand" "")
- (match_operand:VI_HW 3 "register_operand" "")]))]
+(define_expand "vec_cmpu<VIT_HW:mode><VIT_HW:mode>"
+ [(set (match_operand:VIT_HW 0 "register_operand" "")
+ (match_operator:VIT_HW 1 ""
+ [(match_operand:VIT_HW 2 "register_operand" "")
+ (match_operand:VIT_HW 3 "register_operand" "")]))]
"TARGET_VX"
{
s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);
@@ -1975,6 +1976,94 @@
"vc<VICMP_HW_OP:insn_cmp_op><VI:bhfgq>\t%v2,%v0,%v1"
[(set_attr "op_type" "VRR")])
+(define_insn_and_split "*vec_cmpeq<mode><mode>_nocc_emu"
+ [(set (match_operand:VI_HW_T 0 "register_operand" "=v")
+ (eq:VI_HW_T (match_operand:VI_HW_T 1 "register_operand" "v")
+ (match_operand:VI_HW_T 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(set (match_dup 3)
+ (eq:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (vec_select:V2DI (match_dup 3) (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 3)
+ (and:V2DI (match_dup 3) (match_dup 4)))
+ (set (match_dup 0)
+ (subreg:<MODE> (match_dup 3) 0))]
+{
+ operands[1] = simplify_gen_subreg (V2DImode, operands[1], <MODE>mode, 0);
+ operands[2] = simplify_gen_subreg (V2DImode, operands[2], <MODE>mode, 0);
+ operands[3] = gen_reg_rtx (V2DImode);
+ operands[4] = gen_reg_rtx (V2DImode);
+})
+
+(define_insn_and_split "*vec_cmpgt<mode><mode>_nocc_emu"
+ [(set (match_operand:VI_HW_T 0 "register_operand" "=v")
+ (gt:VI_HW_T (match_operand:VI_HW_T 1 "register_operand" "v")
+ (match_operand:VI_HW_T 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(set (match_dup 3)
+ (gt:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (eq:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 5)
+ (gtu:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 5)
+ (vec_select:V2DI (match_dup 5) (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 4)
+ (and:V2DI (match_dup 4) (match_dup 5)))
+ (set (match_dup 4)
+ (ior:V2DI (match_dup 3) (match_dup 4)))
+ (set (match_dup 4)
+ (vec_duplicate:V2DI
+ (vec_select:DI
+ (match_dup 4)
+ (parallel [(const_int 1)]))))
+ (set (match_dup 0)
+ (subreg:<MODE> (match_dup 4) 0))]
+{
+ operands[1] = simplify_gen_subreg (V2DImode, operands[1], <MODE>mode, 0);
+ operands[2] = simplify_gen_subreg (V2DImode, operands[2], <MODE>mode, 0);
+ operands[3] = gen_reg_rtx (V2DImode);
+ operands[4] = gen_reg_rtx (V2DImode);
+ operands[5] = gen_reg_rtx (V2DImode);
+})
+
+(define_insn_and_split "*vec_cmpgtu<mode><mode>_nocc_emu"
+ [(set (match_operand:VI_HW_T 0 "register_operand" "=v")
+ (gtu:VI_HW_T (match_operand:VI_HW_T 1 "register_operand" "v")
+ (match_operand:VI_HW_T 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "#"
+ "&& can_create_pseudo_p ()"
+ [(set (match_dup 3)
+ (gtu:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 4)
+ (eq:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 5)
+ (vec_select:V2DI (match_dup 3) (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 4)
+ (and:V2DI (match_dup 4) (match_dup 5)))
+ (set (match_dup 4)
+ (ior:V2DI (match_dup 3) (match_dup 4)))
+ (set (match_dup 4)
+ (vec_duplicate:V2DI
+ (vec_select:DI
+ (match_dup 4)
+ (parallel [(const_int 1)]))))
+ (set (match_dup 0)
+ (subreg:<MODE> (match_dup 4) 0))]
+{
+ operands[1] = simplify_gen_subreg (V2DImode, operands[1], <MODE>mode, 0);
+ operands[2] = simplify_gen_subreg (V2DImode, operands[2], <MODE>mode, 0);
+ operands[3] = gen_reg_rtx (V2DImode);
+ operands[4] = gen_reg_rtx (V2DImode);
+ operands[5] = gen_reg_rtx (V2DImode);
+})
+
;;
;; Floating point compares
@@ -2311,12 +2400,12 @@
; op0 = op3 == 0 ? op1 : op2
(define_insn "*vec_sel0<mode>"
- [(set (match_operand:V 0 "register_operand" "=v")
- (if_then_else:V
+ [(set (match_operand:VT 0 "register_operand" "=v")
+ (if_then_else:VT
(eq (match_operand:<TOINTVEC> 3 "register_operand" "v")
(match_operand:<TOINTVEC> 4 "const0_operand" ""))
- (match_operand:V 1 "register_operand" "v")
- (match_operand:V 2 "register_operand" "v")))]
+ (match_operand:VT 1 "register_operand" "v")
+ (match_operand:VT 2 "register_operand" "v")))]
"TARGET_VX"
"vsel\t%v0,%2,%1,%3"
[(set_attr "op_type" "VRR")])
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mzarch -march=z13" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef __attribute__ ((vector_size (16))) signed __int128 v1ti;
+typedef __attribute__ ((vector_size (16))) unsigned __int128 uv1ti;
+
+/*
+** eq:
+** vceqg (%v[0-9]+),%v[0-9]+,%v[0-9]+
+** vpdi (%v[0-9]+),\1,\1,4
+** vn %v24,(\1,\2|\2,\1)
+** br %r14
+*/
+
+v1ti
+eq (v1ti x, v1ti y)
+{
+ return x == y;
+}
+
+/*
+** ueq:
+** vceqg (%v[0-9]+),%v[0-9]+,%v[0-9]+
+** vpdi (%v[0-9]+),\1,\1,4
+** vn %v24,(\1,\2|\2,\1)
+** br %r14
+*/
+
+uv1ti
+ueq (uv1ti x, uv1ti y)
+{
+ return x == y;
+}
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler-times {\tvchg\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvchlg\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvceqg\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvo\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvrepg\t} 1 } } */
+
+typedef __attribute__ ((vector_size (16))) __int128 v1ti;
+
+v1ti
+gt (v1ti x, v1ti y)
+{
+ return x > y;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-final { scan-assembler-times {\tvchlg\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvceqg\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvn\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvo\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvrepg\t} 1 } } */
+
+typedef __attribute__ ((vector_size (16))) unsigned __int128 uv1ti;
+
+uv1ti
+gt (uv1ti x, uv1ti y)
+{
+ return x > y;
+}