@@ -253,10 +253,11 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
#define TYPES_b_integer(S, D) \
S (s8), TYPES_b_unsigned (S, D)
-/* _s8
+/* _mf8
+ _s8
_u8. */
#define TYPES_b_data(S, D) \
- TYPES_b_integer (S, D)
+ S (mf8), TYPES_b_integer (S, D)
/* _s8 _s16
_u8 _u16. */
@@ -539,16 +540,18 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
D (u8, s32), \
D (u16, s64)
-/* { _bf16 } { _bf16 }
+/* { _mf8 _bf16 } { _mf8 _bf16 }
{ _f16 _f32 _f64 } { _f16 _f32 _f64 }
{ _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
{ _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }. */
#define TYPES_reinterpret1(D, A) \
+ D (A, mf8), \
D (A, bf16), \
D (A, f16), D (A, f32), D (A, f64), \
D (A, s8), D (A, s16), D (A, s32), D (A, s64), \
D (A, u8), D (A, u16), D (A, u32), D (A, u64)
#define TYPES_reinterpret(S, D) \
+ TYPES_reinterpret1 (D, mf8), \
TYPES_reinterpret1 (D, bf16), \
TYPES_reinterpret1 (D, f16), \
TYPES_reinterpret1 (D, f32), \
@@ -81,6 +81,7 @@ DEF_SVE_MODE (vnum, none, none, vectors)
DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node)
DEF_SVE_TYPE (svcount_t, 11, __SVCount_t, boolean_type_node)
+DEF_SVE_TYPE (svmfloat8_t, 13, __SVMfloat8_t, aarch64_mfp8_type_node)
DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, bfloat16_type_node)
DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node)
DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node)
@@ -107,6 +108,8 @@ DEF_SVE_TYPE_SUFFIX (c8, svcount_t, count, 8, VNx16BImode)
DEF_SVE_TYPE_SUFFIX (c16, svcount_t, count, 16, VNx16BImode)
DEF_SVE_TYPE_SUFFIX (c32, svcount_t, count, 32, VNx16BImode)
DEF_SVE_TYPE_SUFFIX (c64, svcount_t, count, 64, VNx16BImode)
+DEF_SVE_NEON_TYPE_SUFFIX (mf8, svmfloat8_t, mfloat, 8, VNx16QImode,
+ Mfloat8x8_t, Mfloat8x16_t)
DEF_SVE_NEON_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode,
Bfloat16x4_t, Bfloat16x8_t)
DEF_SVE_NEON_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode,
@@ -173,6 +173,7 @@ enum type_class_index
TYPE_bfloat,
TYPE_count,
TYPE_float,
+ TYPE_mfloat,
TYPE_signed,
TYPE_unsigned,
NUM_TYPE_CLASSES
@@ -16,6 +16,7 @@ void f11(svfloat32_t) {}
void f12(svfloat64_t) {}
void f13(svbfloat16_t) {}
void f14(svcount_t) {}
+void f15(svmfloat8_t) {}
/* { dg-final { scan-assembler "_Z2f1u10__SVBool_t:" } } */
/* { dg-final { scan-assembler "_Z2f2u10__SVInt8_t:" } } */
@@ -31,3 +32,4 @@ void f14(svcount_t) {}
/* { dg-final { scan-assembler "_Z3f12u13__SVFloat64_t:" } } */
/* { dg-final { scan-assembler "_Z3f13u14__SVBfloat16_t:" } } */
/* { dg-final { scan-assembler "_Z3f14u11__SVCount_t:" } } */
+/* { dg-final { scan-assembler "_Z3f15u13__SVMfloat8_t:" } } */
@@ -14,6 +14,7 @@ void f11(__SVFloat32_t) {}
void f12(__SVFloat64_t) {}
void f13(__SVBfloat16_t) {}
void f14(__SVCount_t) {}
+void f15(__SVMfloat8_t) {}
/* { dg-final { scan-assembler "_Z2f1u10__SVBool_t:" } } */
/* { dg-final { scan-assembler "_Z2f2u10__SVInt8_t:" } } */
@@ -29,3 +30,4 @@ void f14(__SVCount_t) {}
/* { dg-final { scan-assembler "_Z3f12u13__SVFloat64_t:" } } */
/* { dg-final { scan-assembler "_Z3f13u14__SVBfloat16_t:" } } */
/* { dg-final { scan-assembler "_Z3f14u11__SVCount_t:" } } */
+/* { dg-final { scan-assembler "_Z3f15u13__SVMfloat8_t:" } } */
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** clasta_mf8_tied1:
+** clasta z0\.b, p0, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (clasta_mf8_tied1, svmfloat8_t,
+ z0 = svclasta_mf8 (p0, z0, z1),
+ z0 = svclasta (p0, z0, z1))
+
+/*
+** clasta_mf8_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** clasta z0\.b, p0, z0\.b, \1\.b
+** ret
+*/
+TEST_UNIFORM_Z (clasta_mf8_tied2, svmfloat8_t,
+ z0 = svclasta_mf8 (p0, z1, z0),
+ z0 = svclasta (p0, z1, z0))
+
+/*
+** clasta_mf8_untied:
+** movprfx z0, z1
+** clasta z0\.b, p0, z0\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (clasta_mf8_untied, svmfloat8_t,
+ z0 = svclasta_mf8 (p0, z1, z2),
+ z0 = svclasta (p0, z1, z2))
+
+/*
+** clasta_x0_mf8:
+** clasta b0, p0, b0, z2\.b
+** ret
+*/
+TEST_FOLD_LEFT_X (clasta_x0_mf8, mfloat8_t, svmfloat8_t,
+ x0 = svclasta_n_mf8 (p0, x0, z0),
+ x0 = svclasta (p0, x0, z0))
+
+/*
+** clasta_x1_mf8:
+** clasta b1, p0, b1, z2\.b
+** dup b0, v1.b\[0\]
+** ret
+*/
+TEST_FOLD_LEFT_X (clasta_x1_mf8, mfloat8_t, svmfloat8_t,
+ x0 = svclasta_n_mf8 (p0, x1, z0),
+ x0 = svclasta (p0, x1, z0))
new file mode 100644
@@ -0,0 +1,52 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** clastb_mf8_tied1:
+** clastb z0\.b, p0, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (clastb_mf8_tied1, svmfloat8_t,
+ z0 = svclastb_mf8 (p0, z0, z1),
+ z0 = svclastb (p0, z0, z1))
+
+/*
+** clastb_mf8_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** clastb z0\.b, p0, z0\.b, \1\.b
+** ret
+*/
+TEST_UNIFORM_Z (clastb_mf8_tied2, svmfloat8_t,
+ z0 = svclastb_mf8 (p0, z1, z0),
+ z0 = svclastb (p0, z1, z0))
+
+/*
+** clastb_mf8_untied:
+** movprfx z0, z1
+** clastb z0\.b, p0, z0\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (clastb_mf8_untied, svmfloat8_t,
+ z0 = svclastb_mf8 (p0, z1, z2),
+ z0 = svclastb (p0, z1, z2))
+
+/*
+** clastb_x0_mf8:
+** clastb b0, p0, b0, z2\.b
+** ret
+*/
+TEST_FOLD_LEFT_X (clastb_x0_mf8, mfloat8_t, svmfloat8_t,
+ x0 = svclastb_n_mf8 (p0, x0, z0),
+ x0 = svclastb (p0, x0, z0))
+
+/*
+** clastb_x1_mf8:
+** clastb b1, p0, b1, z2\.b
+** dup b0, v1.b\[0\]
+** ret
+*/
+TEST_FOLD_LEFT_X (clastb_x1_mf8, mfloat8_t, svmfloat8_t,
+ x0 = svclastb_n_mf8 (p0, x1, z0),
+ x0 = svclastb (p0, x1, z0))
@@ -62,6 +62,21 @@ TEST_CREATE (create2_u16, svuint16x2_t, svuint16_t,
z0 = svcreate2_u16 (z6, z5),
z0 = svcreate2 (z6, z5))
+/*
+** create2_mf8:
+** (
+** mov z0\.d, z4\.d
+** mov z1\.d, z5\.d
+** |
+** mov z1\.d, z5\.d
+** mov z0\.d, z4\.d
+** )
+** ret
+*/
+TEST_CREATE (create2_mf8, svmfloat8x2_t, svmfloat8_t,
+ z0 = svcreate2_mf8 (z4, z5),
+ z0 = svcreate2 (z4, z5))
+
/*
** create2_bf16:
** (
@@ -46,6 +46,17 @@ TEST_CREATE (create3_u16, svuint16x3_t, svuint16_t,
z0 = svcreate3_u16 (z6, z5, z4),
z0 = svcreate3 (z6, z5, z4))
+/*
+** create3_mf8:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_CREATE (create3_mf8, svmfloat8x3_t, svmfloat8_t,
+ z0 = svcreate3_mf8 (z4, z5, z6),
+ z0 = svcreate3 (z4, z5, z6))
+
/*
** create3_bf16:
** mov [^\n]+
@@ -50,6 +50,18 @@ TEST_CREATE (create4_u16, svuint16x4_t, svuint16_t,
z0 = svcreate4_u16 (z6, z5, z4, z7),
z0 = svcreate4 (z6, z5, z4, z7))
+/*
+** create4_mf8:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_CREATE (create4_mf8, svmfloat8x4_t, svmfloat8_t,
+ z0 = svcreate4_mf8 (z4, z5, z6, z7),
+ z0 = svcreate4 (z4, z5, z6, z7))
+
/*
** create4_bf16:
** mov [^\n]+
new file mode 100644
@@ -0,0 +1,124 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** dup_lane_w0_mf8_tied1:
+** mov (z[0-9]+\.b), w0
+** tbl z0\.b, {z0\.b}, \1
+** ret
+*/
+TEST_UNIFORM_ZX (dup_lane_w0_mf8_tied1, svmfloat8_t, uint8_t,
+ z0 = svdup_lane_mf8 (z0, x0),
+ z0 = svdup_lane (z0, x0))
+
+/*
+** dup_lane_w0_mf8_untied:
+** mov (z[0-9]+\.b), w0
+** tbl z0\.b, {z1\.b}, \1
+** ret
+*/
+TEST_UNIFORM_ZX (dup_lane_w0_mf8_untied, svmfloat8_t, uint8_t,
+ z0 = svdup_lane_mf8 (z1, x0),
+ z0 = svdup_lane (z1, x0))
+
+/*
+** dup_lane_0_mf8_tied1:
+** dup z0\.b, z0\.b\[0\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_0_mf8_tied1, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 0),
+ z0 = svdup_lane (z0, 0))
+
+/*
+** dup_lane_0_mf8_untied:
+** dup z0\.b, z1\.b\[0\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_0_mf8_untied, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z1, 0),
+ z0 = svdup_lane (z1, 0))
+
+/*
+** dup_lane_7_mf8:
+** dup z0\.b, z0\.b\[7\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_7_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 7),
+ z0 = svdup_lane (z0, 7))
+
+/*
+** dup_lane_8_mf8:
+** dup z0\.b, z0\.b\[8\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_8_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 8),
+ z0 = svdup_lane (z0, 8))
+
+/*
+** dup_lane_15_mf8:
+** dup z0\.b, z0\.b\[15\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_15_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 15),
+ z0 = svdup_lane (z0, 15))
+
+/*
+** dup_lane_16_mf8:
+** dup z0\.b, z0\.b\[16\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_16_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 16),
+ z0 = svdup_lane (z0, 16))
+
+/*
+** dup_lane_31_mf8:
+** dup z0\.b, z0\.b\[31\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_31_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 31),
+ z0 = svdup_lane (z0, 31))
+
+/*
+** dup_lane_32_mf8:
+** dup z0\.b, z0\.b\[32\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_32_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 32),
+ z0 = svdup_lane (z0, 32))
+
+/*
+** dup_lane_63_mf8:
+** dup z0\.b, z0\.b\[63\]
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_63_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 63),
+ z0 = svdup_lane (z0, 63))
+
+/*
+** dup_lane_64_mf8:
+** mov (z[0-9]+\.b), #64
+** tbl z0\.b, {z0\.b}, \1
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_64_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 64),
+ z0 = svdup_lane (z0, 64))
+
+/*
+** dup_lane_255_mf8:
+** mov (z[0-9]+\.b), #-1
+** tbl z0\.b, {z0\.b}, \1
+** ret
+*/
+TEST_UNIFORM_Z (dup_lane_255_mf8, svmfloat8_t,
+ z0 = svdup_lane_mf8 (z0, 255),
+ z0 = svdup_lane (z0, 255))
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** dup_w0_mf8:
+** mov z0\.b, b4
+** ret
+*/
+TEST_UNIFORM_ZX (dup_w0_mf8, svmfloat8_t, mfloat8_t,
+ z0 = svdup_n_mf8 (x0),
+ z0 = svdup_mf8 (x0))
+
+/*
+** dup_w0_mf8_m:
+** movprfx z0, z1
+** mov z0\.b, p0/m, b4
+** ret
+*/
+TEST_UNIFORM_ZX (dup_w0_mf8_m, svmfloat8_t, mfloat8_t,
+ z0 = svdup_n_mf8_m (z1, p0, x0),
+ z0 = svdup_mf8_m (z1, p0, x0))
+
+/*
+** dup_w0_mf8_x:
+** mov z0\.b, b4
+** ret
+*/
+TEST_UNIFORM_ZX (dup_w0_mf8_x, svmfloat8_t, mfloat8_t,
+ z0 = svdup_n_mf8_x (p0, x0),
+ z0 = svdup_mf8_x (p0, x0))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** dup_neonq_mf8_z0:
+** dup z0.q, z4.q\[0\]
+** ret
+*/
+TEST_DUP_NEONQ (dup_neonq_mf8_z0, mfloat8x16_t, svmfloat8_t,
+ z0 = svdup_neonq_mf8 (z4),
+ z0 = svdup_neonq (z4))
+
+/*
+** dup_neonq_mf8_z4:
+** dup z4.q, z4.q\[0\]
+** ret
+*/
+TEST_DUP_NEONQ (dup_neonq_mf8_z4, mfloat8x16_t, svmfloat8_t,
+ z4_res = svdup_neonq_mf8 (z4),
+ z4_res = svdup_neonq (z4))
+
+/*
+** dup_neonq_mf8_z5:
+** dup z5.q, z4.q\[0\]
+** ret
+*/
+TEST_DUP_NEONQ (dup_neonq_mf8_z5, mfloat8x16_t, svmfloat8_t,
+ z5_res = svdup_neonq_mf8 (z4),
+ z5_res = svdup_neonq (z4))
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** dupq_lane_0_mf8_tied:
+** dup z0\.q, z0\.q\[0\]
+** ret
+*/
+TEST_UNIFORM_Z (dupq_lane_0_mf8_tied, svmfloat8_t,
+ z0 = svdupq_lane_mf8 (z0, 0),
+ z0 = svdupq_lane (z0, 0))
+
+/*
+** dupq_lane_0_mf8_untied:
+** dup z0\.q, z1\.q\[0\]
+** ret
+*/
+TEST_UNIFORM_Z (dupq_lane_0_mf8_untied, svmfloat8_t,
+ z0 = svdupq_lane_mf8 (z1, 0),
+ z0 = svdupq_lane (z1, 0))
+
+/*
+** dupq_lane_1_mf8:
+** dup z0\.q, z0\.q\[1\]
+** ret
+*/
+TEST_UNIFORM_Z (dupq_lane_1_mf8, svmfloat8_t,
+ z0 = svdupq_lane_mf8 (z0, 1),
+ z0 = svdupq_lane (z0, 1))
+
+/*
+** dupq_lane_2_mf8:
+** dup z0\.q, z0\.q\[2\]
+** ret
+*/
+TEST_UNIFORM_Z (dupq_lane_2_mf8, svmfloat8_t,
+ z0 = svdupq_lane_mf8 (z0, 2),
+ z0 = svdupq_lane (z0, 2))
+
+/*
+** dupq_lane_3_mf8:
+** dup z0\.q, z0\.q\[3\]
+** ret
+*/
+TEST_UNIFORM_Z (dupq_lane_3_mf8, svmfloat8_t,
+ z0 = svdupq_lane_mf8 (z0, 3),
+ z0 = svdupq_lane (z0, 3))
new file mode 100644
@@ -0,0 +1,73 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ext_0_mf8_tied1:
+** ext z0\.b, z0\.b, z1\.b, #0
+** ret
+*/
+TEST_UNIFORM_Z (ext_0_mf8_tied1, svmfloat8_t,
+ z0 = svext_mf8 (z0, z1, 0),
+ z0 = svext (z0, z1, 0))
+
+/*
+** ext_0_mf8_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** ext z0\.b, z0\.b, \1\.b, #0
+** ret
+*/
+TEST_UNIFORM_Z (ext_0_mf8_tied2, svmfloat8_t,
+ z0 = svext_mf8 (z1, z0, 0),
+ z0 = svext (z1, z0, 0))
+
+/*
+** ext_0_mf8_untied:
+** movprfx z0, z1
+** ext z0\.b, z0\.b, z2\.b, #0
+** ret
+*/
+TEST_UNIFORM_Z (ext_0_mf8_untied, svmfloat8_t,
+ z0 = svext_mf8 (z1, z2, 0),
+ z0 = svext (z1, z2, 0))
+
+/*
+** ext_1_mf8:
+** movprfx z0, z1
+** ext z0\.b, z0\.b, z2\.b, #1
+** ret
+*/
+TEST_UNIFORM_Z (ext_1_mf8, svmfloat8_t,
+ z0 = svext_mf8 (z1, z2, 1),
+ z0 = svext (z1, z2, 1))
+
+/*
+** ext_2_mf8:
+** movprfx z0, z1
+** ext z0\.b, z0\.b, z2\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (ext_2_mf8, svmfloat8_t,
+ z0 = svext_mf8 (z1, z2, 2),
+ z0 = svext (z1, z2, 2))
+
+/*
+** ext_3_mf8:
+** movprfx z0, z1
+** ext z0\.b, z0\.b, z2\.b, #3
+** ret
+*/
+TEST_UNIFORM_Z (ext_3_mf8, svmfloat8_t,
+ z0 = svext_mf8 (z1, z2, 3),
+ z0 = svext (z1, z2, 3))
+
+/*
+** ext_255_mf8:
+** movprfx z0, z1
+** ext z0\.b, z0\.b, z2\.b, #255
+** ret
+*/
+TEST_UNIFORM_Z (ext_255_mf8, svmfloat8_t,
+ z0 = svext_mf8 (z1, z2, 255),
+ z0 = svext (z1, z2, 255))
new file mode 100644
@@ -0,0 +1,55 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get2_mf8_z0_0:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_GET (get2_mf8_z0_0, svmfloat8x2_t, svmfloat8_t,
+ z0 = svget2_mf8 (z4, 0),
+ z0 = svget2 (z4, 0))
+
+/*
+** get2_mf8_z0_1:
+** mov z0\.d, z5\.d
+** ret
+*/
+TEST_GET (get2_mf8_z0_1, svmfloat8x2_t, svmfloat8_t,
+ z0 = svget2_mf8 (z4, 1),
+ z0 = svget2 (z4, 1))
+
+/*
+** get2_mf8_z4_0:
+** ret
+*/
+TEST_GET (get2_mf8_z4_0, svmfloat8x2_t, svmfloat8_t,
+ z4_res = svget2_mf8 (z4, 0),
+ z4_res = svget2 (z4, 0))
+
+/*
+** get2_mf8_z4_1:
+** mov z4\.d, z5\.d
+** ret
+*/
+TEST_GET (get2_mf8_z4_1, svmfloat8x2_t, svmfloat8_t,
+ z4_res = svget2_mf8 (z4, 1),
+ z4_res = svget2 (z4, 1))
+
+/*
+** get2_mf8_z5_0:
+** mov z5\.d, z4\.d
+** ret
+*/
+TEST_GET (get2_mf8_z5_0, svmfloat8x2_t, svmfloat8_t,
+ z5_res = svget2_mf8 (z4, 0),
+ z5_res = svget2 (z4, 0))
+
+/*
+** get2_mf8_z5_1:
+** ret
+*/
+TEST_GET (get2_mf8_z5_1, svmfloat8x2_t, svmfloat8_t,
+ z5_res = svget2_mf8 (z4, 1),
+ z5_res = svget2 (z4, 1))
new file mode 100644
@@ -0,0 +1,108 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get3_mf8_z0_0:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_GET (get3_mf8_z0_0, svmfloat8x3_t, svmfloat8_t,
+ z0 = svget3_mf8 (z4, 0),
+ z0 = svget3 (z4, 0))
+
+/*
+** get3_mf8_z0_1:
+** mov z0\.d, z5\.d
+** ret
+*/
+TEST_GET (get3_mf8_z0_1, svmfloat8x3_t, svmfloat8_t,
+ z0 = svget3_mf8 (z4, 1),
+ z0 = svget3 (z4, 1))
+
+/*
+** get3_mf8_z0_2:
+** mov z0\.d, z6\.d
+** ret
+*/
+TEST_GET (get3_mf8_z0_2, svmfloat8x3_t, svmfloat8_t,
+ z0 = svget3_mf8 (z4, 2),
+ z0 = svget3 (z4, 2))
+
+/*
+** get3_mf8_z4_0:
+** ret
+*/
+TEST_GET (get3_mf8_z4_0, svmfloat8x3_t, svmfloat8_t,
+ z4_res = svget3_mf8 (z4, 0),
+ z4_res = svget3 (z4, 0))
+
+/*
+** get3_mf8_z4_1:
+** mov z4\.d, z5\.d
+** ret
+*/
+TEST_GET (get3_mf8_z4_1, svmfloat8x3_t, svmfloat8_t,
+ z4_res = svget3_mf8 (z4, 1),
+ z4_res = svget3 (z4, 1))
+
+/*
+** get3_mf8_z4_2:
+** mov z4\.d, z6\.d
+** ret
+*/
+TEST_GET (get3_mf8_z4_2, svmfloat8x3_t, svmfloat8_t,
+ z4_res = svget3_mf8 (z4, 2),
+ z4_res = svget3 (z4, 2))
+
+/*
+** get3_mf8_z5_0:
+** mov z5\.d, z4\.d
+** ret
+*/
+TEST_GET (get3_mf8_z5_0, svmfloat8x3_t, svmfloat8_t,
+ z5_res = svget3_mf8 (z4, 0),
+ z5_res = svget3 (z4, 0))
+
+/*
+** get3_mf8_z5_1:
+** ret
+*/
+TEST_GET (get3_mf8_z5_1, svmfloat8x3_t, svmfloat8_t,
+ z5_res = svget3_mf8 (z4, 1),
+ z5_res = svget3 (z4, 1))
+
+/*
+** get3_mf8_z5_2:
+** mov z5\.d, z6\.d
+** ret
+*/
+TEST_GET (get3_mf8_z5_2, svmfloat8x3_t, svmfloat8_t,
+ z5_res = svget3_mf8 (z4, 2),
+ z5_res = svget3 (z4, 2))
+
+/*
+** get3_mf8_z6_0:
+** mov z6\.d, z4\.d
+** ret
+*/
+TEST_GET (get3_mf8_z6_0, svmfloat8x3_t, svmfloat8_t,
+ z6_res = svget3_mf8 (z4, 0),
+ z6_res = svget3 (z4, 0))
+
+/*
+** get3_mf8_z6_1:
+** mov z6\.d, z5\.d
+** ret
+*/
+TEST_GET (get3_mf8_z6_1, svmfloat8x3_t, svmfloat8_t,
+ z6_res = svget3_mf8 (z4, 1),
+ z6_res = svget3 (z4, 1))
+
+/*
+** get3_mf8_z6_2:
+** ret
+*/
+TEST_GET (get3_mf8_z6_2, svmfloat8x3_t, svmfloat8_t,
+ z6_res = svget3_mf8 (z4, 2),
+ z6_res = svget3 (z4, 2))
new file mode 100644
@@ -0,0 +1,179 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get4_mf8_z0_0:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_GET (get4_mf8_z0_0, svmfloat8x4_t, svmfloat8_t,
+ z0 = svget4_mf8 (z4, 0),
+ z0 = svget4 (z4, 0))
+
+/*
+** get4_mf8_z0_1:
+** mov z0\.d, z5\.d
+** ret
+*/
+TEST_GET (get4_mf8_z0_1, svmfloat8x4_t, svmfloat8_t,
+ z0 = svget4_mf8 (z4, 1),
+ z0 = svget4 (z4, 1))
+
+/*
+** get4_mf8_z0_2:
+** mov z0\.d, z6\.d
+** ret
+*/
+TEST_GET (get4_mf8_z0_2, svmfloat8x4_t, svmfloat8_t,
+ z0 = svget4_mf8 (z4, 2),
+ z0 = svget4 (z4, 2))
+
+/*
+** get4_mf8_z0_3:
+** mov z0\.d, z7\.d
+** ret
+*/
+TEST_GET (get4_mf8_z0_3, svmfloat8x4_t, svmfloat8_t,
+ z0 = svget4_mf8 (z4, 3),
+ z0 = svget4 (z4, 3))
+
+/*
+** get4_mf8_z4_0:
+** ret
+*/
+TEST_GET (get4_mf8_z4_0, svmfloat8x4_t, svmfloat8_t,
+ z4_res = svget4_mf8 (z4, 0),
+ z4_res = svget4 (z4, 0))
+
+/*
+** get4_mf8_z4_1:
+** mov z4\.d, z5\.d
+** ret
+*/
+TEST_GET (get4_mf8_z4_1, svmfloat8x4_t, svmfloat8_t,
+ z4_res = svget4_mf8 (z4, 1),
+ z4_res = svget4 (z4, 1))
+
+/*
+** get4_mf8_z4_2:
+** mov z4\.d, z6\.d
+** ret
+*/
+TEST_GET (get4_mf8_z4_2, svmfloat8x4_t, svmfloat8_t,
+ z4_res = svget4_mf8 (z4, 2),
+ z4_res = svget4 (z4, 2))
+
+/*
+** get4_mf8_z4_3:
+** mov z4\.d, z7\.d
+** ret
+*/
+TEST_GET (get4_mf8_z4_3, svmfloat8x4_t, svmfloat8_t,
+ z4_res = svget4_mf8 (z4, 3),
+ z4_res = svget4 (z4, 3))
+
+/*
+** get4_mf8_z5_0:
+** mov z5\.d, z4\.d
+** ret
+*/
+TEST_GET (get4_mf8_z5_0, svmfloat8x4_t, svmfloat8_t,
+ z5_res = svget4_mf8 (z4, 0),
+ z5_res = svget4 (z4, 0))
+
+/*
+** get4_mf8_z5_1:
+** ret
+*/
+TEST_GET (get4_mf8_z5_1, svmfloat8x4_t, svmfloat8_t,
+ z5_res = svget4_mf8 (z4, 1),
+ z5_res = svget4 (z4, 1))
+
+/*
+** get4_mf8_z5_2:
+** mov z5\.d, z6\.d
+** ret
+*/
+TEST_GET (get4_mf8_z5_2, svmfloat8x4_t, svmfloat8_t,
+ z5_res = svget4_mf8 (z4, 2),
+ z5_res = svget4 (z4, 2))
+
+/*
+** get4_mf8_z5_3:
+** mov z5\.d, z7\.d
+** ret
+*/
+TEST_GET (get4_mf8_z5_3, svmfloat8x4_t, svmfloat8_t,
+ z5_res = svget4_mf8 (z4, 3),
+ z5_res = svget4 (z4, 3))
+
+/*
+** get4_mf8_z6_0:
+** mov z6\.d, z4\.d
+** ret
+*/
+TEST_GET (get4_mf8_z6_0, svmfloat8x4_t, svmfloat8_t,
+ z6_res = svget4_mf8 (z4, 0),
+ z6_res = svget4 (z4, 0))
+
+/*
+** get4_mf8_z6_1:
+** mov z6\.d, z5\.d
+** ret
+*/
+TEST_GET (get4_mf8_z6_1, svmfloat8x4_t, svmfloat8_t,
+ z6_res = svget4_mf8 (z4, 1),
+ z6_res = svget4 (z4, 1))
+
+/*
+** get4_mf8_z6_2:
+** ret
+*/
+TEST_GET (get4_mf8_z6_2, svmfloat8x4_t, svmfloat8_t,
+ z6_res = svget4_mf8 (z4, 2),
+ z6_res = svget4 (z4, 2))
+
+/*
+** get4_mf8_z6_3:
+** mov z6\.d, z7\.d
+** ret
+*/
+TEST_GET (get4_mf8_z6_3, svmfloat8x4_t, svmfloat8_t,
+ z6_res = svget4_mf8 (z4, 3),
+ z6_res = svget4 (z4, 3))
+
+/*
+** get4_mf8_z7_0:
+** mov z7\.d, z4\.d
+** ret
+*/
+TEST_GET (get4_mf8_z7_0, svmfloat8x4_t, svmfloat8_t,
+ z7_res = svget4_mf8 (z4, 0),
+ z7_res = svget4 (z4, 0))
+
+/*
+** get4_mf8_z7_1:
+** mov z7\.d, z5\.d
+** ret
+*/
+TEST_GET (get4_mf8_z7_1, svmfloat8x4_t, svmfloat8_t,
+ z7_res = svget4_mf8 (z4, 1),
+ z7_res = svget4 (z4, 1))
+
+/*
+** get4_mf8_z7_2:
+** mov z7\.d, z6\.d
+** ret
+*/
+TEST_GET (get4_mf8_z7_2, svmfloat8x4_t, svmfloat8_t,
+ z7_res = svget4_mf8 (z4, 2),
+ z7_res = svget4 (z4, 2))
+
+/*
+** get4_mf8_z7_3:
+** ret
+*/
+TEST_GET (get4_mf8_z7_3, svmfloat8x4_t, svmfloat8_t,
+ z7_res = svget4_mf8 (z4, 3),
+ z7_res = svget4 (z4, 3))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get_neonq_mf8_z0:
+** mov v0.16b, v4.16b
+** ret
+*/
+TEST_GET (get_neonq_mf8_z0, svmfloat8_t, mfloat8x16_t,
+ z0 = svget_neonq_mf8 (z4),
+ z0 = svget_neonq (z4))
+
+/*
+** get_neonq_mf8_z4:
+** ret
+*/
+TEST_GET (get_neonq_mf8_z4, svmfloat8_t, mfloat8x16_t,
+ z4_res = svget_neonq_mf8 (z4),
+ z4_res = svget_neonq (z4))
+
+/*
+** get_neonq_mf8_z5:
+** (
+** mov z5.d, z4.d
+** |
+** mov v5.16b, v4.16b
+** )
+** ret
+*/
+TEST_GET (get_neonq_mf8_z5, svmfloat8_t, mfloat8x16_t,
+ z5_res = svget_neonq_mf8 (z4),
+ z5_res = svget_neonq (z4))
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** insr_w0_mf8_tied1:
+** insr z0\.b, b4
+** ret
+*/
+TEST_UNIFORM_ZX (insr_w0_mf8_tied1, svmfloat8_t, mfloat8_t,
+ z0 = svinsr_n_mf8 (z0, x0),
+ z0 = svinsr (z0, x0))
+
+/*
+** insr_w0_mf8_untied:
+** movprfx z0, z1
+** insr z0\.b, b4
+** ret
+*/
+TEST_UNIFORM_ZX (insr_w0_mf8_untied, svmfloat8_t, mfloat8_t,
+ z0 = svinsr_n_mf8 (z1, x0),
+ z0 = svinsr (z1, x0))
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** lasta_x0_mf8:
+** lasta b0, p0, z0\.b
+** ret
+*/
+TEST_REDUCTION_X (lasta_x0_mf8, mfloat8_t, svmfloat8_t,
+ x0 = svlasta_mf8 (p0, z0),
+ x0 = svlasta (p0, z0))
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** lastb_x0_mf8:
+** lastb b0, p0, z0\.b
+** ret
+*/
+TEST_REDUCTION_X (lastb_x0_mf8, mfloat8_t, svmfloat8_t,
+ x0 = svlastb_mf8 (p0, z0),
+ x0 = svlastb (p0, z0))
new file mode 100644
@@ -0,0 +1,162 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1_mf8_base:
+** ld1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_base, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0),
+ z0 = svld1 (p0, x0))
+
+/*
+** ld1_mf8_index:
+** ld1b z0\.b, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_index, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0 + x1),
+ z0 = svld1 (p0, x0 + x1))
+
+/*
+** ld1_mf8_1:
+** ld1b z0\.b, p0/z, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0 + svcntb ()),
+ z0 = svld1 (p0, x0 + svcntb ()))
+
+/*
+** ld1_mf8_7:
+** ld1b z0\.b, p0/z, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_7, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0 + svcntb () * 7),
+ z0 = svld1 (p0, x0 + svcntb () * 7))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_8:
+** incb x0, all, mul #8
+** ld1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_8, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0 + svcntb () * 8),
+ z0 = svld1 (p0, x0 + svcntb () * 8))
+
+/*
+** ld1_mf8_m1:
+** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0 - svcntb ()),
+ z0 = svld1 (p0, x0 - svcntb ()))
+
+/*
+** ld1_mf8_m8:
+** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_m8, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0 - svcntb () * 8),
+ z0 = svld1 (p0, x0 - svcntb () * 8))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_mf8_m9:
+** decb x0, all, mul #9
+** ld1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1_mf8_m9, svmfloat8_t, mfloat8_t,
+ z0 = svld1_mf8 (p0, x0 - svcntb () * 9),
+ z0 = svld1 (p0, x0 - svcntb () * 9))
+
+/*
+** ld1_vnum_mf8_0:
+** ld1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_0, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, 0),
+ z0 = svld1_vnum (p0, x0, 0))
+
+/*
+** ld1_vnum_mf8_1:
+** ld1b z0\.b, p0/z, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, 1),
+ z0 = svld1_vnum (p0, x0, 1))
+
+/*
+** ld1_vnum_mf8_7:
+** ld1b z0\.b, p0/z, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_7, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, 7),
+ z0 = svld1_vnum (p0, x0, 7))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_8:
+** incb x0, all, mul #8
+** ld1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_8, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, 8),
+ z0 = svld1_vnum (p0, x0, 8))
+
+/*
+** ld1_vnum_mf8_m1:
+** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, -1),
+ z0 = svld1_vnum (p0, x0, -1))
+
+/*
+** ld1_vnum_mf8_m8:
+** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_m8, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, -8),
+ z0 = svld1_vnum (p0, x0, -8))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_mf8_m9:
+** decb x0, all, mul #9
+** ld1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_m9, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, -9),
+ z0 = svld1_vnum (p0, x0, -9))
+
+/*
+** ld1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b z0\.b, p0/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b z0\.b, p0/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD (ld1_vnum_mf8_x1, svmfloat8_t, mfloat8_t,
+ z0 = svld1_vnum_mf8 (p0, x0, x1),
+ z0 = svld1_vnum (p0, x0, x1))
new file mode 100644
@@ -0,0 +1,121 @@
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_mf8_base:
+** ld1rob z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_base, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_mf8_index:
+** ld1rob z0\.b, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_index, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_mf8_1:
+** add (x[0-9]+), x0, #?1
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_mf8_16:
+** add (x[0-9]+), x0, #?16
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_16, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 + 16),
+ z0 = svld1ro (p0, x0 + 16))
+
+/*
+** ld1ro_mf8_256:
+** add (x[0-9]+), x0, #?256
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_256, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 + 256),
+ z0 = svld1ro (p0, x0 + 256))
+
+/*
+** ld1ro_mf8_m1:
+** sub (x[0-9]+), x0, #?1
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_mf8_m16:
+** sub (x[0-9]+), x0, #?16
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_m16, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 - 16),
+ z0 = svld1ro (p0, x0 - 16))
+
+/*
+** ld1ro_mf8_m288:
+** sub (x[0-9]+), x0, #?288
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_m288, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 - 288),
+ z0 = svld1ro (p0, x0 - 288))
+
+/*
+** ld1ro_mf8_32:
+** ld1rob z0\.b, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_32, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 + 32),
+ z0 = svld1ro (p0, x0 + 32))
+
+/*
+** ld1ro_mf8_224:
+** ld1rob z0\.b, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_224, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 + 224),
+ z0 = svld1ro (p0, x0 + 224))
+
+/*
+** ld1ro_mf8_m32:
+** ld1rob z0\.b, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_m32, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 - 32),
+ z0 = svld1ro (p0, x0 - 32))
+
+/*
+** ld1ro_mf8_m256:
+** ld1rob z0\.b, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_mf8_m256, svmfloat8_t, mfloat8_t,
+ z0 = svld1ro_mf8 (p0, x0 - 256),
+ z0 = svld1ro (p0, x0 - 256))
+
new file mode 100644
@@ -0,0 +1,137 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1rq_mf8_base:
+** ld1rqb z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_base, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0),
+ z0 = svld1rq (p0, x0))
+
+/*
+** ld1rq_mf8_index:
+** ld1rqb z0\.b, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_index, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 + x1),
+ z0 = svld1rq (p0, x0 + x1))
+
+/*
+** ld1rq_mf8_1:
+** add (x[0-9]+), x0, #?1
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 + 1),
+ z0 = svld1rq (p0, x0 + 1))
+
+/*
+** ld1rq_mf8_8:
+** add (x[0-9]+), x0, #?8
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_8, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 + 8),
+ z0 = svld1rq (p0, x0 + 8))
+
+/*
+** ld1rq_mf8_15:
+** add (x[0-9]+), x0, #?15
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_15, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 + 15),
+ z0 = svld1rq (p0, x0 + 15))
+
+/*
+** ld1rq_mf8_16:
+** ld1rqb z0\.b, p0/z, \[x0, #?16\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_16, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 + 16),
+ z0 = svld1rq (p0, x0 + 16))
+
+/*
+** ld1rq_mf8_112:
+** ld1rqb z0\.b, p0/z, \[x0, #?112\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_112, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 + 112),
+ z0 = svld1rq (p0, x0 + 112))
+
+/*
+** ld1rq_mf8_128:
+** add (x[0-9]+), x0, #?128
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_128, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 + 128),
+ z0 = svld1rq (p0, x0 + 128))
+
+/*
+** ld1rq_mf8_m1:
+** sub (x[0-9]+), x0, #?1
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 - 1),
+ z0 = svld1rq (p0, x0 - 1))
+
+/*
+** ld1rq_mf8_m8:
+** sub (x[0-9]+), x0, #?8
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_m8, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 - 8),
+ z0 = svld1rq (p0, x0 - 8))
+
+/*
+** ld1rq_mf8_m15:
+** sub (x[0-9]+), x0, #?15
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_m15, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 - 15),
+ z0 = svld1rq (p0, x0 - 15))
+
+/*
+** ld1rq_mf8_m16:
+** ld1rqb z0\.b, p0/z, \[x0, #?-16\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_m16, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 - 16),
+ z0 = svld1rq (p0, x0 - 16))
+
+/*
+** ld1rq_mf8_m128:
+** ld1rqb z0\.b, p0/z, \[x0, #?-128\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_m128, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 - 128),
+ z0 = svld1rq (p0, x0 - 128))
+
+/*
+** ld1rq_mf8_m144:
+** sub (x[0-9]+), x0, #?144
+** ld1rqb z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1rq_mf8_m144, svmfloat8_t, mfloat8_t,
+ z0 = svld1rq_mf8 (p0, x0 - 144),
+ z0 = svld1rq (p0, x0 - 144))
new file mode 100644
@@ -0,0 +1,204 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld2_mf8_base:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_base, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0),
+ z0 = svld2 (p0, x0))
+
+/*
+** ld2_mf8_index:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_index, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 + x1),
+ z0 = svld2 (p0, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld2_mf8_1:
+** incb x0
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 + svcntb ()),
+ z0 = svld2 (p0, x0 + svcntb ()))
+
+/*
+** ld2_mf8_2:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 + svcntb () * 2),
+ z0 = svld2 (p0, x0 + svcntb () * 2))
+
+/*
+** ld2_mf8_14:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 + svcntb () * 14),
+ z0 = svld2 (p0, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld2_mf8_16:
+** incb x0, all, mul #16
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 + svcntb () * 16),
+ z0 = svld2 (p0, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld2_mf8_m1:
+** decb x0
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 - svcntb ()),
+ z0 = svld2 (p0, x0 - svcntb ()))
+
+/*
+** ld2_mf8_m2:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 - svcntb () * 2),
+ z0 = svld2 (p0, x0 - svcntb () * 2))
+
+/*
+** ld2_mf8_m16:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 - svcntb () * 16),
+ z0 = svld2 (p0, x0 - svcntb () * 16))
+
+/*
+** ld2_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld2_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_mf8 (p0, x0 - svcntb () * 18),
+ z0 = svld2 (p0, x0 - svcntb () * 18))
+
+/*
+** ld2_vnum_mf8_0:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, 0),
+ z0 = svld2_vnum (p0, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld2_vnum_mf8_1:
+** incb x0
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, 1),
+ z0 = svld2_vnum (p0, x0, 1))
+
+/*
+** ld2_vnum_mf8_2:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, 2),
+ z0 = svld2_vnum (p0, x0, 2))
+
+/*
+** ld2_vnum_mf8_14:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, 14),
+ z0 = svld2_vnum (p0, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld2_vnum_mf8_16:
+** incb x0, all, mul #16
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, 16),
+ z0 = svld2_vnum (p0, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld2_vnum_mf8_m1:
+** decb x0
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, -1),
+ z0 = svld2_vnum (p0, x0, -1))
+
+/*
+** ld2_vnum_mf8_m2:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, -2),
+ z0 = svld2_vnum (p0, x0, -2))
+
+/*
+** ld2_vnum_mf8_m16:
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, -16),
+ z0 = svld2_vnum (p0, x0, -16))
+
+/*
+** ld2_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, -18),
+ z0 = svld2_vnum (p0, x0, -18))
+
+/*
+** ld2_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD (ld2_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ z0 = svld2_vnum_mf8 (p0, x0, x1),
+ z0 = svld2_vnum (p0, x0, x1))
new file mode 100644
@@ -0,0 +1,246 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld3_mf8_base:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_base, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0),
+ z0 = svld3 (p0, x0))
+
+/*
+** ld3_mf8_index:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_index, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 + x1),
+ z0 = svld3 (p0, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_mf8_1:
+** incb x0
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_1, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 + svcntb ()),
+ z0 = svld3 (p0, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_mf8_2:
+** incb x0, all, mul #2
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_2, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 + svcntb () * 2),
+ z0 = svld3 (p0, x0 + svcntb () * 2))
+
+/*
+** ld3_mf8_3:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_3, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 + svcntb () * 3),
+ z0 = svld3 (p0, x0 + svcntb () * 3))
+
+/*
+** ld3_mf8_21:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_21, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 + svcntb () * 21),
+ z0 = svld3 (p0, x0 + svcntb () * 21))
+
+/*
+** ld3_mf8_24:
+** addvl (x[0-9]+), x0, #24
+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_24, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 + svcntb () * 24),
+ z0 = svld3 (p0, x0 + svcntb () * 24))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_mf8_m1:
+** decb x0
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_m1, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 - svcntb ()),
+ z0 = svld3 (p0, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_mf8_m2:
+** decb x0, all, mul #2
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_m2, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 - svcntb () * 2),
+ z0 = svld3 (p0, x0 - svcntb () * 2))
+
+/*
+** ld3_mf8_m3:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_m3, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 - svcntb () * 3),
+ z0 = svld3 (p0, x0 - svcntb () * 3))
+
+/*
+** ld3_mf8_m24:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_m24, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 - svcntb () * 24),
+ z0 = svld3 (p0, x0 - svcntb () * 24))
+
+/*
+** ld3_mf8_m27:
+** addvl (x[0-9]+), x0, #-27
+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld3_mf8_m27, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_mf8 (p0, x0 - svcntb () * 27),
+ z0 = svld3 (p0, x0 - svcntb () * 27))
+
+/*
+** ld3_vnum_mf8_0:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_0, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, 0),
+ z0 = svld3_vnum (p0, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_vnum_mf8_1:
+** incb x0
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_1, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, 1),
+ z0 = svld3_vnum (p0, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_vnum_mf8_2:
+** incb x0, all, mul #2
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_2, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, 2),
+ z0 = svld3_vnum (p0, x0, 2))
+
+/*
+** ld3_vnum_mf8_3:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_3, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, 3),
+ z0 = svld3_vnum (p0, x0, 3))
+
+/*
+** ld3_vnum_mf8_21:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_21, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, 21),
+ z0 = svld3_vnum (p0, x0, 21))
+
+/*
+** ld3_vnum_mf8_24:
+** addvl (x[0-9]+), x0, #24
+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_24, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, 24),
+ z0 = svld3_vnum (p0, x0, 24))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_vnum_mf8_m1:
+** decb x0
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_m1, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, -1),
+ z0 = svld3_vnum (p0, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld3_vnum_mf8_m2:
+** decb x0, all, mul #2
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_m2, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, -2),
+ z0 = svld3_vnum (p0, x0, -2))
+
+/*
+** ld3_vnum_mf8_m3:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_m3, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, -3),
+ z0 = svld3_vnum (p0, x0, -3))
+
+/*
+** ld3_vnum_mf8_m24:
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_m24, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, -24),
+ z0 = svld3_vnum (p0, x0, -24))
+
+/*
+** ld3_vnum_mf8_m27:
+** addvl (x[0-9]+), x0, #-27
+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_m27, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, -27),
+ z0 = svld3_vnum (p0, x0, -27))
+
+/*
+** ld3_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld3b {z0\.b - z2\.b}, p0/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD (ld3_vnum_mf8_x1, svmfloat8x3_t, mfloat8_t,
+ z0 = svld3_vnum_mf8 (p0, x0, x1),
+ z0 = svld3_vnum (p0, x0, x1))
new file mode 100644
@@ -0,0 +1,290 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld4_mf8_base:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_base, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0),
+ z0 = svld4 (p0, x0))
+
+/*
+** ld4_mf8_index:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_index, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 + x1),
+ z0 = svld4 (p0, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_mf8_1:
+** incb x0
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 + svcntb ()),
+ z0 = svld4 (p0, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_mf8_2:
+** incb x0, all, mul #2
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 + svcntb () * 2),
+ z0 = svld4 (p0, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_mf8_3:
+** incb x0, all, mul #3
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 + svcntb () * 3),
+ z0 = svld4 (p0, x0 + svcntb () * 3))
+
+/*
+** ld4_mf8_4:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 + svcntb () * 4),
+ z0 = svld4 (p0, x0 + svcntb () * 4))
+
+/*
+** ld4_mf8_28:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 + svcntb () * 28),
+ z0 = svld4 (p0, x0 + svcntb () * 28))
+
+/*
+** ld4_mf8_32:
+** [^{]*
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 + svcntb () * 32),
+ z0 = svld4 (p0, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_mf8_m1:
+** decb x0
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 - svcntb ()),
+ z0 = svld4 (p0, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_mf8_m2:
+** decb x0, all, mul #2
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 - svcntb () * 2),
+ z0 = svld4 (p0, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_mf8_m3:
+** decb x0, all, mul #3
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 - svcntb () * 3),
+ z0 = svld4 (p0, x0 - svcntb () * 3))
+
+/*
+** ld4_mf8_m4:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 - svcntb () * 4),
+ z0 = svld4 (p0, x0 - svcntb () * 4))
+
+/*
+** ld4_mf8_m32:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 - svcntb () * 32),
+ z0 = svld4 (p0, x0 - svcntb () * 32))
+
+/*
+** ld4_mf8_m36:
+** [^{]*
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD (ld4_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_mf8 (p0, x0 - svcntb () * 36),
+ z0 = svld4 (p0, x0 - svcntb () * 36))
+
+/*
+** ld4_vnum_mf8_0:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, 0),
+ z0 = svld4_vnum (p0, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_vnum_mf8_1:
+** incb x0
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, 1),
+ z0 = svld4_vnum (p0, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_vnum_mf8_2:
+** incb x0, all, mul #2
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, 2),
+ z0 = svld4_vnum (p0, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_vnum_mf8_3:
+** incb x0, all, mul #3
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, 3),
+ z0 = svld4_vnum (p0, x0, 3))
+
+/*
+** ld4_vnum_mf8_4:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, 4),
+ z0 = svld4_vnum (p0, x0, 4))
+
+/*
+** ld4_vnum_mf8_28:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, 28),
+ z0 = svld4_vnum (p0, x0, 28))
+
+/*
+** ld4_vnum_mf8_32:
+** [^{]*
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, 32),
+ z0 = svld4_vnum (p0, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_vnum_mf8_m1:
+** decb x0
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, -1),
+ z0 = svld4_vnum (p0, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_vnum_mf8_m2:
+** decb x0, all, mul #2
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, -2),
+ z0 = svld4_vnum (p0, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld4_vnum_mf8_m3:
+** decb x0, all, mul #3
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, -3),
+ z0 = svld4_vnum (p0, x0, -3))
+
+/*
+** ld4_vnum_mf8_m4:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, -4),
+ z0 = svld4_vnum (p0, x0, -4))
+
+/*
+** ld4_vnum_mf8_m32:
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, -32),
+ z0 = svld4_vnum (p0, x0, -32))
+
+/*
+** ld4_vnum_mf8_m36:
+** [^{]*
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, -36),
+ z0 = svld4_vnum (p0, x0, -36))
+
+/*
+** ld4_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld4b {z0\.b - z3\.b}, p0/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD (ld4_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ z0 = svld4_vnum_mf8 (p0, x0, x1),
+ z0 = svld4_vnum (p0, x0, x1))
new file mode 100644
@@ -0,0 +1,91 @@
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ldff1_mf8_base:
+** ldff1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldff1_mf8_base, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_mf8 (p0, x0),
+ z0 = svldff1 (p0, x0))
+
+/*
+** ldff1_mf8_index:
+** ldff1b z0\.b, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ldff1_mf8_index, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_mf8 (p0, x0 + x1),
+ z0 = svldff1 (p0, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldff1_mf8_1:
+** incb x0
+** ldff1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldff1_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_mf8 (p0, x0 + svcntb ()),
+ z0 = svldff1 (p0, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldff1_mf8_m1:
+** decb x0
+** ldff1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldff1_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_mf8 (p0, x0 - svcntb ()),
+ z0 = svldff1 (p0, x0 - svcntb ()))
+
+/*
+** ldff1_vnum_mf8_0:
+** ldff1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldff1_vnum_mf8_0, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_vnum_mf8 (p0, x0, 0),
+ z0 = svldff1_vnum (p0, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldff1_vnum_mf8_1:
+** incb x0
+** ldff1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldff1_vnum_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_vnum_mf8 (p0, x0, 1),
+ z0 = svldff1_vnum (p0, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldff1_vnum_mf8_m1:
+** decb x0
+** ldff1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldff1_vnum_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_vnum_mf8 (p0, x0, -1),
+ z0 = svldff1_vnum (p0, x0, -1))
+
+/*
+** ldff1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldff1b z0\.b, p0/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldff1b z0\.b, p0/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD (ldff1_vnum_mf8_x1, svmfloat8_t, mfloat8_t,
+ z0 = svldff1_vnum_mf8 (p0, x0, x1),
+ z0 = svldff1_vnum (p0, x0, x1))
new file mode 100644
@@ -0,0 +1,155 @@
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ldnf1_mf8_base:
+** ldnf1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_base, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0),
+ z0 = svldnf1 (p0, x0))
+
+/*
+** ldnf1_mf8_index:
+** add (x[0-9]+), x0, x1
+** ldnf1b z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_index, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0 + x1),
+ z0 = svldnf1 (p0, x0 + x1))
+
+/*
+** ldnf1_mf8_1:
+** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0 + svcntb ()),
+ z0 = svldnf1 (p0, x0 + svcntb ()))
+
+/*
+** ldnf1_mf8_7:
+** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_7, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0 + svcntb () * 7),
+ z0 = svldnf1 (p0, x0 + svcntb () * 7))
+
+/*
+** ldnf1_mf8_8:
+** incb x0, all, mul #8
+** ldnf1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_8, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0 + svcntb () * 8),
+ z0 = svldnf1 (p0, x0 + svcntb () * 8))
+
+/*
+** ldnf1_mf8_m1:
+** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0 - svcntb ()),
+ z0 = svldnf1 (p0, x0 - svcntb ()))
+
+/*
+** ldnf1_mf8_m8:
+** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_m8, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0 - svcntb () * 8),
+ z0 = svldnf1 (p0, x0 - svcntb () * 8))
+
+/*
+** ldnf1_mf8_m9:
+** decb x0, all, mul #9
+** ldnf1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnf1_mf8_m9, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_mf8 (p0, x0 - svcntb () * 9),
+ z0 = svldnf1 (p0, x0 - svcntb () * 9))
+
+/*
+** ldnf1_vnum_mf8_0:
+** ldnf1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_0, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, 0),
+ z0 = svldnf1_vnum (p0, x0, 0))
+
+/*
+** ldnf1_vnum_mf8_1:
+** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, 1),
+ z0 = svldnf1_vnum (p0, x0, 1))
+
+/*
+** ldnf1_vnum_mf8_7:
+** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_7, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, 7),
+ z0 = svldnf1_vnum (p0, x0, 7))
+
+/*
+** ldnf1_vnum_mf8_8:
+** incb x0, all, mul #8
+** ldnf1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_8, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, 8),
+ z0 = svldnf1_vnum (p0, x0, 8))
+
+/*
+** ldnf1_vnum_mf8_m1:
+** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, -1),
+ z0 = svldnf1_vnum (p0, x0, -1))
+
+/*
+** ldnf1_vnum_mf8_m8:
+** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_m8, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, -8),
+ z0 = svldnf1_vnum (p0, x0, -8))
+
+/*
+** ldnf1_vnum_mf8_m9:
+** decb x0, all, mul #9
+** ldnf1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_m9, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, -9),
+ z0 = svldnf1_vnum (p0, x0, -9))
+
+/*
+** ldnf1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnf1b z0\.b, p0/z, \[\2\]
+** ret
+*/
+TEST_LOAD (ldnf1_vnum_mf8_x1, svmfloat8_t, mfloat8_t,
+ z0 = svldnf1_vnum_mf8 (p0, x0, x1),
+ z0 = svldnf1_vnum (p0, x0, x1))
new file mode 100644
@@ -0,0 +1,162 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** ldnt1_mf8_base:
+** ldnt1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_base, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0),
+ z0 = svldnt1 (p0, x0))
+
+/*
+** ldnt1_mf8_index:
+** ldnt1b z0\.b, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_index, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0 + x1),
+ z0 = svldnt1 (p0, x0 + x1))
+
+/*
+** ldnt1_mf8_1:
+** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0 + svcntb ()),
+ z0 = svldnt1 (p0, x0 + svcntb ()))
+
+/*
+** ldnt1_mf8_7:
+** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_7, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0 + svcntb () * 7),
+ z0 = svldnt1 (p0, x0 + svcntb () * 7))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_8:
+** incb x0, all, mul #8
+** ldnt1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_8, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0 + svcntb () * 8),
+ z0 = svldnt1 (p0, x0 + svcntb () * 8))
+
+/*
+** ldnt1_mf8_m1:
+** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0 - svcntb ()),
+ z0 = svldnt1 (p0, x0 - svcntb ()))
+
+/*
+** ldnt1_mf8_m8:
+** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_m8, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0 - svcntb () * 8),
+ z0 = svldnt1 (p0, x0 - svcntb () * 8))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_mf8_m9:
+** decb x0, all, mul #9
+** ldnt1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnt1_mf8_m9, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_mf8 (p0, x0 - svcntb () * 9),
+ z0 = svldnt1 (p0, x0 - svcntb () * 9))
+
+/*
+** ldnt1_vnum_mf8_0:
+** ldnt1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_0, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, 0),
+ z0 = svldnt1_vnum (p0, x0, 0))
+
+/*
+** ldnt1_vnum_mf8_1:
+** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_1, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, 1),
+ z0 = svldnt1_vnum (p0, x0, 1))
+
+/*
+** ldnt1_vnum_mf8_7:
+** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_7, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, 7),
+ z0 = svldnt1_vnum (p0, x0, 7))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_8:
+** incb x0, all, mul #8
+** ldnt1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_8, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, 8),
+ z0 = svldnt1_vnum (p0, x0, 8))
+
+/*
+** ldnt1_vnum_mf8_m1:
+** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_m1, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, -1),
+ z0 = svldnt1_vnum (p0, x0, -1))
+
+/*
+** ldnt1_vnum_mf8_m8:
+** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_m8, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, -8),
+ z0 = svldnt1_vnum (p0, x0, -8))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_mf8_m9:
+** decb x0, all, mul #9
+** ldnt1b z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_m9, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, -9),
+ z0 = svldnt1_vnum (p0, x0, -9))
+
+/*
+** ldnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b z0\.b, p0/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b z0\.b, p0/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD (ldnt1_vnum_mf8_x1, svmfloat8_t, mfloat8_t,
+ z0 = svldnt1_vnum_mf8 (p0, x0, x1),
+ z0 = svldnt1_vnum (p0, x0, x1))
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** len_x0_mf8:
+** cntb x0
+** ret
+*/
+TEST_REDUCTION_X (len_x0_mf8, uint64_t, svmfloat8_t,
+ x0 = svlen_mf8 (z0),
+ x0 = svlen (z0))
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_bf16_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_bf16_mf8_tied1, svbfloat16_t, svmfloat8_t,
+ z0_res = svreinterpret_bf16_mf8 (z0),
+ z0_res = svreinterpret_bf16 (z0))
+
+/*
+** reinterpret_bf16_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_bf16_mf8_untied, svbfloat16_t, svmfloat8_t,
+ z0 = svreinterpret_bf16_mf8 (z4),
+ z0 = svreinterpret_bf16 (z4))
+
/*
** reinterpret_bf16_bf16_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_f16_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_f16_mf8_tied1, svfloat16_t, svmfloat8_t,
+ z0_res = svreinterpret_f16_mf8 (z0),
+ z0_res = svreinterpret_f16 (z0))
+
+/*
+** reinterpret_f16_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_f16_mf8_untied, svfloat16_t, svmfloat8_t,
+ z0 = svreinterpret_f16_mf8 (z4),
+ z0 = svreinterpret_f16 (z4))
+
/*
** reinterpret_f16_bf16_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_f32_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_f32_mf8_tied1, svfloat32_t, svmfloat8_t,
+ z0_res = svreinterpret_f32_mf8 (z0),
+ z0_res = svreinterpret_f32 (z0))
+
+/*
+** reinterpret_f32_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_f32_mf8_untied, svfloat32_t, svmfloat8_t,
+ z0 = svreinterpret_f32_mf8 (z4),
+ z0 = svreinterpret_f32 (z4))
+
/*
** reinterpret_f32_bf16_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_f64_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_f64_mf8_tied1, svfloat64_t, svmfloat8_t,
+ z0_res = svreinterpret_f64_mf8 (z0),
+ z0_res = svreinterpret_f64 (z0))
+
+/*
+** reinterpret_f64_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_f64_mf8_untied, svfloat64_t, svmfloat8_t,
+ z0 = svreinterpret_f64_mf8 (z4),
+ z0 = svreinterpret_f64 (z4))
+
/*
** reinterpret_f64_bf16_tied1:
** ret
new file mode 100644
@@ -0,0 +1,297 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** reinterpret_mf8_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_mf8_tied1, svmfloat8_t, svmfloat8_t,
+ z0_res = svreinterpret_mf8_mf8 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_mf8_untied, svmfloat8_t, svmfloat8_t,
+ z0 = svreinterpret_mf8_mf8 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_bf16_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_bf16_tied1, svmfloat8_t, svbfloat16_t,
+ z0_res = svreinterpret_mf8_bf16 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_bf16_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_bf16_untied, svmfloat8_t, svbfloat16_t,
+ z0 = svreinterpret_mf8_bf16 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_f16_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_f16_tied1, svmfloat8_t, svfloat16_t,
+ z0_res = svreinterpret_mf8_f16 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_f16_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_f16_untied, svmfloat8_t, svfloat16_t,
+ z0 = svreinterpret_mf8_f16 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_f32_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_f32_tied1, svmfloat8_t, svfloat32_t,
+ z0_res = svreinterpret_mf8_f32 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_f32_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_f32_untied, svmfloat8_t, svfloat32_t,
+ z0 = svreinterpret_mf8_f32 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_f64_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_f64_tied1, svmfloat8_t, svfloat64_t,
+ z0_res = svreinterpret_mf8_f64 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_f64_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_f64_untied, svmfloat8_t, svfloat64_t,
+ z0 = svreinterpret_mf8_f64 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_s8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_s8_tied1, svmfloat8_t, svint8_t,
+ z0_res = svreinterpret_mf8_s8 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_s8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_s8_untied, svmfloat8_t, svint8_t,
+ z0 = svreinterpret_mf8_s8 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_s16_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_s16_tied1, svmfloat8_t, svint16_t,
+ z0_res = svreinterpret_mf8_s16 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_s16_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_s16_untied, svmfloat8_t, svint16_t,
+ z0 = svreinterpret_mf8_s16 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_s32_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_s32_tied1, svmfloat8_t, svint32_t,
+ z0_res = svreinterpret_mf8_s32 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_s32_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_s32_untied, svmfloat8_t, svint32_t,
+ z0 = svreinterpret_mf8_s32 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_s64_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_s64_tied1, svmfloat8_t, svint64_t,
+ z0_res = svreinterpret_mf8_s64 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_s64_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_s64_untied, svmfloat8_t, svint64_t,
+ z0 = svreinterpret_mf8_s64 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_u8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_u8_tied1, svmfloat8_t, svuint8_t,
+ z0_res = svreinterpret_mf8_u8 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_u8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_u8_untied, svmfloat8_t, svuint8_t,
+ z0 = svreinterpret_mf8_u8 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_u16_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_u16_tied1, svmfloat8_t, svuint16_t,
+ z0_res = svreinterpret_mf8_u16 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_u16_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_u16_untied, svmfloat8_t, svuint16_t,
+ z0 = svreinterpret_mf8_u16 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_u32_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_u32_tied1, svmfloat8_t, svuint32_t,
+ z0_res = svreinterpret_mf8_u32 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_u32_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_u32_untied, svmfloat8_t, svuint32_t,
+ z0 = svreinterpret_mf8_u32 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_u64_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_u64_tied1, svmfloat8_t, svuint64_t,
+ z0_res = svreinterpret_mf8_u64 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_u64_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_mf8_u64_untied, svmfloat8_t, svuint64_t,
+ z0 = svreinterpret_mf8_u64 (z4),
+ z0 = svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_bf16_x2_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_bf16_x2_tied1, svmfloat8x2_t, svbfloat16x2_t,
+ z0_res = svreinterpret_mf8_bf16_x2 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_f32_x2_untied:
+** (
+** mov z0\.d, z4\.d
+** mov z1\.d, z5\.d
+** |
+** mov z0\.d, z4\.d
+** mov z1\.d, z5\.d
+** )
+** ret
+*/
+TEST_DUAL_XN (reinterpret_mf8_f32_x2_untied, svmfloat8x2_t, svfloat32x2_t, z0,
+ svreinterpret_mf8_f32_x2 (z4),
+ svreinterpret_mf8 (z4))
+
+/*
+** reinterpret_mf8_mf8_x3_untied:
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** ret
+*/
+TEST_DUAL_XN (reinterpret_mf8_mf8_x3_untied, svmfloat8x3_t, svmfloat8x3_t, z18,
+ svreinterpret_mf8_mf8_x3 (z23),
+ svreinterpret_mf8 (z23))
+
+/*
+** reinterpret_mf8_s64_x3_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_s64_x3_tied1, svmfloat8x3_t, svint64x3_t,
+ z0_res = svreinterpret_mf8_s64_x3 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_u8_x3_untied:
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** ret
+*/
+TEST_DUAL_XN (reinterpret_mf8_u8_x3_untied, svmfloat8x3_t, svuint8x3_t, z18,
+ svreinterpret_mf8_u8_x3 (z23),
+ svreinterpret_mf8 (z23))
+
+/*
+** reinterpret_mf8_u32_x4_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_mf8_u32_x4_tied1, svmfloat8x4_t, svuint32x4_t,
+ z0_res = svreinterpret_mf8_u32_x4 (z0),
+ z0_res = svreinterpret_mf8 (z0))
+
+/*
+** reinterpret_mf8_f64_x4_untied:
+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d
+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d
+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d
+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d
+** ret
+*/
+TEST_DUAL_XN (reinterpret_mf8_f64_x4_untied, svmfloat8x4_t, svfloat64x4_t, z28,
+ svreinterpret_mf8_f64_x4 (z4),
+ svreinterpret_mf8 (z4))
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_s16_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_s16_mf8_tied1, svint16_t, svmfloat8_t,
+ z0_res = svreinterpret_s16_mf8 (z0),
+ z0_res = svreinterpret_s16 (z0))
+
+/*
+** reinterpret_s16_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_s16_mf8_untied, svint16_t, svmfloat8_t,
+ z0 = svreinterpret_s16_mf8 (z4),
+ z0 = svreinterpret_s16 (z4))
+
/*
** reinterpret_s16_bf16_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_s32_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_s32_mf8_tied1, svint32_t, svmfloat8_t,
+ z0_res = svreinterpret_s32_mf8 (z0),
+ z0_res = svreinterpret_s32 (z0))
+
+/*
+** reinterpret_s32_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_s32_mf8_untied, svint32_t, svmfloat8_t,
+ z0 = svreinterpret_s32_mf8 (z4),
+ z0 = svreinterpret_s32 (z4))
+
/*
** reinterpret_s32_bf16_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_s64_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_s64_mf8_tied1, svint64_t, svmfloat8_t,
+ z0_res = svreinterpret_s64_mf8 (z0),
+ z0_res = svreinterpret_s64 (z0))
+
+/*
+** reinterpret_s64_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_s64_mf8_untied, svint64_t, svmfloat8_t,
+ z0 = svreinterpret_s64_mf8 (z4),
+ z0 = svreinterpret_s64 (z4))
+
/*
** reinterpret_s64_bf16_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_s8_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_s8_mf8_tied1, svint8_t, svmfloat8_t,
+ z0_res = svreinterpret_s8_mf8 (z0),
+ z0_res = svreinterpret_s8 (z0))
+
+/*
+** reinterpret_s8_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_s8_mf8_untied, svint8_t, svmfloat8_t,
+ z0 = svreinterpret_s8_mf8 (z4),
+ z0 = svreinterpret_s8 (z4))
+
/*
** reinterpret_s8_bf16_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_u16_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_u16_mf8_tied1, svuint16_t, svmfloat8_t,
+ z0_res = svreinterpret_u16_mf8 (z0),
+ z0_res = svreinterpret_u16 (z0))
+
+/*
+** reinterpret_u16_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_u16_mf8_untied, svuint16_t, svmfloat8_t,
+ z0 = svreinterpret_u16_mf8 (z4),
+ z0 = svreinterpret_u16 (z4))
+
/*
** reinterpret_u16_bf16_tied1:
** ret
@@ -229,6 +246,17 @@ TEST_DUAL_XN (reinterpret_u16_f32_x2_untied, svuint16x2_t, svfloat32x2_t, z0,
svreinterpret_u16_f32_x2 (z4),
svreinterpret_u16 (z4))
+/*
+** reinterpret_u16_mf8_x3_untied:
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** ret
+*/
+TEST_DUAL_XN (reinterpret_u16_mf8_x3_untied, svuint16x3_t, svmfloat8x3_t, z18,
+ svreinterpret_u16_mf8_x3 (z23),
+ svreinterpret_u16 (z23))
+
/*
** reinterpret_u16_s64_x3_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_u32_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_u32_mf8_tied1, svuint32_t, svmfloat8_t,
+ z0_res = svreinterpret_u32_mf8 (z0),
+ z0_res = svreinterpret_u32 (z0))
+
+/*
+** reinterpret_u32_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_u32_mf8_untied, svuint32_t, svmfloat8_t,
+ z0 = svreinterpret_u32_mf8 (z4),
+ z0 = svreinterpret_u32 (z4))
+
/*
** reinterpret_u32_bf16_tied1:
** ret
@@ -229,6 +246,17 @@ TEST_DUAL_XN (reinterpret_u32_f32_x2_untied, svuint32x2_t, svfloat32x2_t, z0,
svreinterpret_u32_f32_x2 (z4),
svreinterpret_u32 (z4))
+/*
+** reinterpret_u32_mf8_x3_untied:
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** ret
+*/
+TEST_DUAL_XN (reinterpret_u32_mf8_x3_untied, svuint32x3_t, svmfloat8x3_t, z18,
+ svreinterpret_u32_mf8_x3 (z23),
+ svreinterpret_u32 (z23))
+
/*
** reinterpret_u32_s64_x3_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_u64_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_u64_mf8_tied1, svuint64_t, svmfloat8_t,
+ z0_res = svreinterpret_u64_mf8 (z0),
+ z0_res = svreinterpret_u64 (z0))
+
+/*
+** reinterpret_u64_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_u64_mf8_untied, svuint64_t, svmfloat8_t,
+ z0 = svreinterpret_u64_mf8 (z4),
+ z0 = svreinterpret_u64 (z4))
+
/*
** reinterpret_u64_bf16_tied1:
** ret
@@ -229,6 +246,17 @@ TEST_DUAL_XN (reinterpret_u64_f32_x2_untied, svuint64x2_t, svfloat32x2_t, z0,
svreinterpret_u64_f32_x2 (z4),
svreinterpret_u64 (z4))
+/*
+** reinterpret_u64_mf8_x3_untied:
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** ret
+*/
+TEST_DUAL_XN (reinterpret_u64_mf8_x3_untied, svuint64x3_t, svmfloat8x3_t, z18,
+ svreinterpret_u64_mf8_x3 (z23),
+ svreinterpret_u64 (z23))
+
/*
** reinterpret_u64_s64_x3_tied1:
** ret
@@ -2,6 +2,23 @@
#include "test_sve_acle.h"
+/*
+** reinterpret_u8_mf8_tied1:
+** ret
+*/
+TEST_DUAL_Z_REV (reinterpret_u8_mf8_tied1, svuint8_t, svmfloat8_t,
+ z0_res = svreinterpret_u8_mf8 (z0),
+ z0_res = svreinterpret_u8 (z0))
+
+/*
+** reinterpret_u8_mf8_untied:
+** mov z0\.d, z4\.d
+** ret
+*/
+TEST_DUAL_Z (reinterpret_u8_mf8_untied, svuint8_t, svmfloat8_t,
+ z0 = svreinterpret_u8_mf8 (z4),
+ z0 = svreinterpret_u8 (z4))
+
/*
** reinterpret_u8_bf16_tied1:
** ret
@@ -214,6 +231,17 @@ TEST_DUAL_Z_REV (reinterpret_u8_bf16_x2_tied1, svuint8x2_t, svbfloat16x2_t,
z0_res = svreinterpret_u8_bf16_x2 (z0),
z0_res = svreinterpret_u8 (z0))
+/*
+** reinterpret_u8_mf8_x3_untied:
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d
+** ret
+*/
+TEST_DUAL_XN (reinterpret_u8_mf8_x3_untied, svuint8x3_t, svmfloat8x3_t, z18,
+ svreinterpret_u8_mf8_x3 (z23),
+ svreinterpret_u8 (z23))
+
/*
** reinterpret_u8_f32_x2_untied:
** (
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** rev_mf8_tied1:
+** rev z0\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (rev_mf8_tied1, svmfloat8_t,
+ z0 = svrev_mf8 (z0),
+ z0 = svrev (z0))
+
+/*
+** rev_mf8_untied:
+** rev z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (rev_mf8_untied, svmfloat8_t,
+ z0 = svrev_mf8 (z1),
+ z0 = svrev (z1))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** sel_mf8_tied1:
+** sel z0\.b, p0, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (sel_mf8_tied1, svmfloat8_t,
+ z0 = svsel_mf8 (p0, z0, z1),
+ z0 = svsel (p0, z0, z1))
+
+/*
+** sel_mf8_tied2:
+** sel z0\.b, p0, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (sel_mf8_tied2, svmfloat8_t,
+ z0 = svsel_mf8 (p0, z1, z0),
+ z0 = svsel (p0, z1, z0))
+
+/*
+** sel_mf8_untied:
+** sel z0\.b, p0, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (sel_mf8_untied, svmfloat8_t,
+ z0 = svsel_mf8 (p0, z1, z2),
+ z0 = svsel (p0, z1, z2))
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set2_mf8_z24_0:
+** mov z25\.d, z5\.d
+** mov z24\.d, z0\.d
+** ret
+*/
+TEST_SET (set2_mf8_z24_0, svmfloat8x2_t, svmfloat8_t,
+ z24 = svset2_mf8 (z4, 0, z0),
+ z24 = svset2 (z4, 0, z0))
+
+/*
+** set2_mf8_z24_1:
+** mov z24\.d, z4\.d
+** mov z25\.d, z0\.d
+** ret
+*/
+TEST_SET (set2_mf8_z24_1, svmfloat8x2_t, svmfloat8_t,
+ z24 = svset2_mf8 (z4, 1, z0),
+ z24 = svset2 (z4, 1, z0))
+
+/*
+** set2_mf8_z4_0:
+** mov z4\.d, z0\.d
+** ret
+*/
+TEST_SET (set2_mf8_z4_0, svmfloat8x2_t, svmfloat8_t,
+ z4 = svset2_mf8 (z4, 0, z0),
+ z4 = svset2 (z4, 0, z0))
+
+/*
+** set2_mf8_z4_1:
+** mov z5\.d, z0\.d
+** ret
+*/
+TEST_SET (set2_mf8_z4_1, svmfloat8x2_t, svmfloat8_t,
+ z4 = svset2_mf8 (z4, 1, z0),
+ z4 = svset2 (z4, 1, z0))
new file mode 100644
@@ -0,0 +1,63 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set3_mf8_z24_0:
+** mov z25\.d, z5\.d
+** mov z26\.d, z6\.d
+** mov z24\.d, z0\.d
+** ret
+*/
+TEST_SET (set3_mf8_z24_0, svmfloat8x3_t, svmfloat8_t,
+ z24 = svset3_mf8 (z4, 0, z0),
+ z24 = svset3 (z4, 0, z0))
+
+/*
+** set3_mf8_z24_1:
+** mov z24\.d, z4\.d
+** mov z26\.d, z6\.d
+** mov z25\.d, z0\.d
+** ret
+*/
+TEST_SET (set3_mf8_z24_1, svmfloat8x3_t, svmfloat8_t,
+ z24 = svset3_mf8 (z4, 1, z0),
+ z24 = svset3 (z4, 1, z0))
+
+/*
+** set3_mf8_z24_2:
+** mov z24\.d, z4\.d
+** mov z25\.d, z5\.d
+** mov z26\.d, z0\.d
+** ret
+*/
+TEST_SET (set3_mf8_z24_2, svmfloat8x3_t, svmfloat8_t,
+ z24 = svset3_mf8 (z4, 2, z0),
+ z24 = svset3 (z4, 2, z0))
+
+/*
+** set3_mf8_z4_0:
+** mov z4\.d, z0\.d
+** ret
+*/
+TEST_SET (set3_mf8_z4_0, svmfloat8x3_t, svmfloat8_t,
+ z4 = svset3_mf8 (z4, 0, z0),
+ z4 = svset3 (z4, 0, z0))
+
+/*
+** set3_mf8_z4_1:
+** mov z5\.d, z0\.d
+** ret
+*/
+TEST_SET (set3_mf8_z4_1, svmfloat8x3_t, svmfloat8_t,
+ z4 = svset3_mf8 (z4, 1, z0),
+ z4 = svset3 (z4, 1, z0))
+
+/*
+** set3_mf8_z4_2:
+** mov z6\.d, z0\.d
+** ret
+*/
+TEST_SET (set3_mf8_z4_2, svmfloat8x3_t, svmfloat8_t,
+ z4 = svset3_mf8 (z4, 2, z0),
+ z4 = svset3 (z4, 2, z0))
new file mode 100644
@@ -0,0 +1,87 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set4_mf8_z24_0:
+** mov z25\.d, z5\.d
+** mov z26\.d, z6\.d
+** mov z27\.d, z7\.d
+** mov z24\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z24_0, svmfloat8x4_t, svmfloat8_t,
+ z24 = svset4_mf8 (z4, 0, z0),
+ z24 = svset4 (z4, 0, z0))
+
+/*
+** set4_mf8_z24_1:
+** mov z24\.d, z4\.d
+** mov z26\.d, z6\.d
+** mov z27\.d, z7\.d
+** mov z25\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z24_1, svmfloat8x4_t, svmfloat8_t,
+ z24 = svset4_mf8 (z4, 1, z0),
+ z24 = svset4 (z4, 1, z0))
+
+/*
+** set4_mf8_z24_2:
+** mov z24\.d, z4\.d
+** mov z25\.d, z5\.d
+** mov z27\.d, z7\.d
+** mov z26\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z24_2, svmfloat8x4_t, svmfloat8_t,
+ z24 = svset4_mf8 (z4, 2, z0),
+ z24 = svset4 (z4, 2, z0))
+
+/*
+** set4_mf8_z24_3:
+** mov z24\.d, z4\.d
+** mov z25\.d, z5\.d
+** mov z26\.d, z6\.d
+** mov z27\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z24_3, svmfloat8x4_t, svmfloat8_t,
+ z24 = svset4_mf8 (z4, 3, z0),
+ z24 = svset4 (z4, 3, z0))
+
+/*
+** set4_mf8_z4_0:
+** mov z4\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z4_0, svmfloat8x4_t, svmfloat8_t,
+ z4 = svset4_mf8 (z4, 0, z0),
+ z4 = svset4 (z4, 0, z0))
+
+/*
+** set4_mf8_z4_1:
+** mov z5\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z4_1, svmfloat8x4_t, svmfloat8_t,
+ z4 = svset4_mf8 (z4, 1, z0),
+ z4 = svset4 (z4, 1, z0))
+
+/*
+** set4_mf8_z4_2:
+** mov z6\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z4_2, svmfloat8x4_t, svmfloat8_t,
+ z4 = svset4_mf8 (z4, 2, z0),
+ z4 = svset4 (z4, 2, z0))
+
+/*
+** set4_mf8_z4_3:
+** mov z7\.d, z0\.d
+** ret
+*/
+TEST_SET (set4_mf8_z4_3, svmfloat8x4_t, svmfloat8_t,
+ z4 = svset4_mf8 (z4, 3, z0),
+ z4 = svset4 (z4, 3, z0))
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set_neonq_mf8_z24:
+** ptrue (p[0-9]+).b, vl16
+** sel z24.b, \1, z0.b, z4.b
+** ret
+*/
+TEST_SET_NEONQ (set_neonq_mf8_z24, svmfloat8_t, mfloat8x16_t,
+ z24 = svset_neonq_mf8 (z4, z0),
+ z24 = svset_neonq (z4, z0))
+
+/*
+** set_neonq_mf8_z4:
+** ptrue (p[0-9]+).b, vl16
+** sel z4.b, \1, z0.b, z4.b
+** ret
+*/
+TEST_SET_NEONQ (set_neonq_mf8_z4, svmfloat8_t, mfloat8x16_t,
+ z4_res = svset_neonq_mf8 (z4, z0),
+ z4_res = svset_neonq (z4, z0))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** splice_mf8_tied1:
+** splice z0\.b, p0, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (splice_mf8_tied1, svmfloat8_t,
+ z0 = svsplice_mf8 (p0, z0, z1),
+ z0 = svsplice (p0, z0, z1))
+
+/*
+** splice_mf8_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** splice z0\.b, p0, z0\.b, \1\.b
+** ret
+*/
+TEST_UNIFORM_Z (splice_mf8_tied2, svmfloat8_t,
+ z0 = svsplice_mf8 (p0, z1, z0),
+ z0 = svsplice (p0, z1, z0))
+
+/*
+** splice_mf8_untied:
+** movprfx z0, z1
+** splice z0\.b, p0, z0\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (splice_mf8_untied, svmfloat8_t,
+ z0 = svsplice_mf8 (p0, z1, z2),
+ z0 = svsplice (p0, z1, z2))
new file mode 100644
@@ -0,0 +1,162 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** st1_mf8_base:
+** st1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st1_mf8_base, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0, z0),
+ svst1 (p0, x0, z0))
+
+/*
+** st1_mf8_index:
+** st1b z0\.b, p0, \[x0, x1\]
+** ret
+*/
+TEST_STORE (st1_mf8_index, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0 + x1, z0),
+ svst1 (p0, x0 + x1, z0))
+
+/*
+** st1_mf8_1:
+** st1b z0\.b, p0, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_STORE (st1_mf8_1, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0 + svcntb (), z0),
+ svst1 (p0, x0 + svcntb (), z0))
+
+/*
+** st1_mf8_7:
+** st1b z0\.b, p0, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_STORE (st1_mf8_7, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0 + svcntb () * 7, z0),
+ svst1 (p0, x0 + svcntb () * 7, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_8:
+** incb x0, all, mul #8
+** st1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st1_mf8_8, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0 + svcntb () * 8, z0),
+ svst1 (p0, x0 + svcntb () * 8, z0))
+
+/*
+** st1_mf8_m1:
+** st1b z0\.b, p0, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_STORE (st1_mf8_m1, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0 - svcntb (), z0),
+ svst1 (p0, x0 - svcntb (), z0))
+
+/*
+** st1_mf8_m8:
+** st1b z0\.b, p0, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_STORE (st1_mf8_m8, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0 - svcntb () * 8, z0),
+ svst1 (p0, x0 - svcntb () * 8, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_mf8_m9:
+** decb x0, all, mul #9
+** st1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st1_mf8_m9, svmfloat8_t, mfloat8_t,
+ svst1_mf8 (p0, x0 - svcntb () * 9, z0),
+ svst1 (p0, x0 - svcntb () * 9, z0))
+
+/*
+** st1_vnum_mf8_0:
+** st1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_0, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, 0, z0),
+ svst1_vnum (p0, x0, 0, z0))
+
+/*
+** st1_vnum_mf8_1:
+** st1b z0\.b, p0, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_1, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, 1, z0),
+ svst1_vnum (p0, x0, 1, z0))
+
+/*
+** st1_vnum_mf8_7:
+** st1b z0\.b, p0, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_7, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, 7, z0),
+ svst1_vnum (p0, x0, 7, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_8:
+** incb x0, all, mul #8
+** st1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_8, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, 8, z0),
+ svst1_vnum (p0, x0, 8, z0))
+
+/*
+** st1_vnum_mf8_m1:
+** st1b z0\.b, p0, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_m1, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, -1, z0),
+ svst1_vnum (p0, x0, -1, z0))
+
+/*
+** st1_vnum_mf8_m8:
+** st1b z0\.b, p0, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_m8, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, -8, z0),
+ svst1_vnum (p0, x0, -8, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_mf8_m9:
+** decb x0, all, mul #9
+** st1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_m9, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, -9, z0),
+ svst1_vnum (p0, x0, -9, z0))
+
+/*
+** st1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b z0\.b, p0, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b z0\.b, p0, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE (st1_vnum_mf8_x1, svmfloat8_t, mfloat8_t,
+ svst1_vnum_mf8 (p0, x0, x1, z0),
+ svst1_vnum (p0, x0, x1, z0))
new file mode 100644
@@ -0,0 +1,204 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** st2_mf8_base:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_mf8_base, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0, z0),
+ svst2 (p0, x0, z0))
+
+/*
+** st2_mf8_index:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, x1\]
+** ret
+*/
+TEST_STORE (st2_mf8_index, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 + x1, z0),
+ svst2 (p0, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st2_mf8_1:
+** incb x0
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 + svcntb (), z0),
+ svst2 (p0, x0 + svcntb (), z0))
+
+/*
+** st2_mf8_2:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE (st2_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 + svcntb () * 2, z0),
+ svst2 (p0, x0 + svcntb () * 2, z0))
+
+/*
+** st2_mf8_14:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE (st2_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 + svcntb () * 14, z0),
+ svst2 (p0, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st2_mf8_16:
+** incb x0, all, mul #16
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 + svcntb () * 16, z0),
+ svst2 (p0, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st2_mf8_m1:
+** decb x0
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 - svcntb (), z0),
+ svst2 (p0, x0 - svcntb (), z0))
+
+/*
+** st2_mf8_m2:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE (st2_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 - svcntb () * 2, z0),
+ svst2 (p0, x0 - svcntb () * 2, z0))
+
+/*
+** st2_mf8_m16:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE (st2_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 - svcntb () * 16, z0),
+ svst2 (p0, x0 - svcntb () * 16, z0))
+
+/*
+** st2_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\]
+** ret
+*/
+TEST_STORE (st2_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svst2_mf8 (p0, x0 - svcntb () * 18, z0),
+ svst2 (p0, x0 - svcntb () * 18, z0))
+
+/*
+** st2_vnum_mf8_0:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, 0, z0),
+ svst2_vnum (p0, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st2_vnum_mf8_1:
+** incb x0
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, 1, z0),
+ svst2_vnum (p0, x0, 1, z0))
+
+/*
+** st2_vnum_mf8_2:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, 2, z0),
+ svst2_vnum (p0, x0, 2, z0))
+
+/*
+** st2_vnum_mf8_14:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, 14, z0),
+ svst2_vnum (p0, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st2_vnum_mf8_16:
+** incb x0, all, mul #16
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, 16, z0),
+ svst2_vnum (p0, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st2_vnum_mf8_m1:
+** decb x0
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, -1, z0),
+ svst2_vnum (p0, x0, -1, z0))
+
+/*
+** st2_vnum_mf8_m2:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, -2, z0),
+ svst2_vnum (p0, x0, -2, z0))
+
+/*
+** st2_vnum_mf8_m16:
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, -16, z0),
+ svst2_vnum (p0, x0, -16, z0))
+
+/*
+** st2_vnum_mf8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\]
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, -18, z0),
+ svst2_vnum (p0, x0, -18, z0))
+
+/*
+** st2_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE (st2_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,
+ svst2_vnum_mf8 (p0, x0, x1, z0),
+ svst2_vnum (p0, x0, x1, z0))
new file mode 100644
@@ -0,0 +1,246 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** st3_mf8_base:
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_mf8_base, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0, z0),
+ svst3 (p0, x0, z0))
+
+/*
+** st3_mf8_index:
+** st3b {z0\.b - z2\.b}, p0, \[x0, x1\]
+** ret
+*/
+TEST_STORE (st3_mf8_index, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 + x1, z0),
+ svst3 (p0, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_mf8_1:
+** incb x0
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_mf8_1, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 + svcntb (), z0),
+ svst3 (p0, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_mf8_2:
+** incb x0, all, mul #2
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_mf8_2, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 + svcntb () * 2, z0),
+ svst3 (p0, x0 + svcntb () * 2, z0))
+
+/*
+** st3_mf8_3:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\]
+** ret
+*/
+TEST_STORE (st3_mf8_3, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 + svcntb () * 3, z0),
+ svst3 (p0, x0 + svcntb () * 3, z0))
+
+/*
+** st3_mf8_21:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\]
+** ret
+*/
+TEST_STORE (st3_mf8_21, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 + svcntb () * 21, z0),
+ svst3 (p0, x0 + svcntb () * 21, z0))
+
+/*
+** st3_mf8_24:
+** addvl (x[0-9]+), x0, #24
+** st3b {z0\.b - z2\.b}, p0, \[\1\]
+** ret
+*/
+TEST_STORE (st3_mf8_24, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 + svcntb () * 24, z0),
+ svst3 (p0, x0 + svcntb () * 24, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_mf8_m1:
+** decb x0
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_mf8_m1, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 - svcntb (), z0),
+ svst3 (p0, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_mf8_m2:
+** decb x0, all, mul #2
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_mf8_m2, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 - svcntb () * 2, z0),
+ svst3 (p0, x0 - svcntb () * 2, z0))
+
+/*
+** st3_mf8_m3:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\]
+** ret
+*/
+TEST_STORE (st3_mf8_m3, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 - svcntb () * 3, z0),
+ svst3 (p0, x0 - svcntb () * 3, z0))
+
+/*
+** st3_mf8_m24:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\]
+** ret
+*/
+TEST_STORE (st3_mf8_m24, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 - svcntb () * 24, z0),
+ svst3 (p0, x0 - svcntb () * 24, z0))
+
+/*
+** st3_mf8_m27:
+** addvl (x[0-9]+), x0, #-27
+** st3b {z0\.b - z2\.b}, p0, \[\1\]
+** ret
+*/
+TEST_STORE (st3_mf8_m27, svmfloat8x3_t, mfloat8_t,
+ svst3_mf8 (p0, x0 - svcntb () * 27, z0),
+ svst3 (p0, x0 - svcntb () * 27, z0))
+
+/*
+** st3_vnum_mf8_0:
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_0, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, 0, z0),
+ svst3_vnum (p0, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_vnum_mf8_1:
+** incb x0
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_1, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, 1, z0),
+ svst3_vnum (p0, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_vnum_mf8_2:
+** incb x0, all, mul #2
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_2, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, 2, z0),
+ svst3_vnum (p0, x0, 2, z0))
+
+/*
+** st3_vnum_mf8_3:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_3, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, 3, z0),
+ svst3_vnum (p0, x0, 3, z0))
+
+/*
+** st3_vnum_mf8_21:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_21, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, 21, z0),
+ svst3_vnum (p0, x0, 21, z0))
+
+/*
+** st3_vnum_mf8_24:
+** addvl (x[0-9]+), x0, #24
+** st3b {z0\.b - z2\.b}, p0, \[\1\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_24, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, 24, z0),
+ svst3_vnum (p0, x0, 24, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_vnum_mf8_m1:
+** decb x0
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_m1, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, -1, z0),
+ svst3_vnum (p0, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st3_vnum_mf8_m2:
+** decb x0, all, mul #2
+** st3b {z0\.b - z2\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_m2, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, -2, z0),
+ svst3_vnum (p0, x0, -2, z0))
+
+/*
+** st3_vnum_mf8_m3:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_m3, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, -3, z0),
+ svst3_vnum (p0, x0, -3, z0))
+
+/*
+** st3_vnum_mf8_m24:
+** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_m24, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, -24, z0),
+ svst3_vnum (p0, x0, -24, z0))
+
+/*
+** st3_vnum_mf8_m27:
+** addvl (x[0-9]+), x0, #-27
+** st3b {z0\.b - z2\.b}, p0, \[\1\]
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_m27, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, -27, z0),
+ svst3_vnum (p0, x0, -27, z0))
+
+/*
+** st3_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st3b {z0\.b - z2\.b}, p0, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st3b {z0\.b - z2\.b}, p0, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE (st3_vnum_mf8_x1, svmfloat8x3_t, mfloat8_t,
+ svst3_vnum_mf8 (p0, x0, x1, z0),
+ svst3_vnum (p0, x0, x1, z0))
new file mode 100644
@@ -0,0 +1,290 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** st4_mf8_base:
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_mf8_base, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0, z0),
+ svst4 (p0, x0, z0))
+
+/*
+** st4_mf8_index:
+** st4b {z0\.b - z3\.b}, p0, \[x0, x1\]
+** ret
+*/
+TEST_STORE (st4_mf8_index, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 + x1, z0),
+ svst4 (p0, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_mf8_1:
+** incb x0
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 + svcntb (), z0),
+ svst4 (p0, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_mf8_2:
+** incb x0, all, mul #2
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 + svcntb () * 2, z0),
+ svst4 (p0, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_mf8_3:
+** incb x0, all, mul #3
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 + svcntb () * 3, z0),
+ svst4 (p0, x0 + svcntb () * 3, z0))
+
+/*
+** st4_mf8_4:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE (st4_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 + svcntb () * 4, z0),
+ svst4 (p0, x0 + svcntb () * 4, z0))
+
+/*
+** st4_mf8_28:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE (st4_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 + svcntb () * 28, z0),
+ svst4 (p0, x0 + svcntb () * 28, z0))
+
+/*
+** st4_mf8_32:
+** [^{]*
+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\]
+** ret
+*/
+TEST_STORE (st4_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 + svcntb () * 32, z0),
+ svst4 (p0, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_mf8_m1:
+** decb x0
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 - svcntb (), z0),
+ svst4 (p0, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_mf8_m2:
+** decb x0, all, mul #2
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 - svcntb () * 2, z0),
+ svst4 (p0, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_mf8_m3:
+** decb x0, all, mul #3
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 - svcntb () * 3, z0),
+ svst4 (p0, x0 - svcntb () * 3, z0))
+
+/*
+** st4_mf8_m4:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE (st4_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 - svcntb () * 4, z0),
+ svst4 (p0, x0 - svcntb () * 4, z0))
+
+/*
+** st4_mf8_m32:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE (st4_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 - svcntb () * 32, z0),
+ svst4 (p0, x0 - svcntb () * 32, z0))
+
+/*
+** st4_mf8_m36:
+** [^{]*
+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\]
+** ret
+*/
+TEST_STORE (st4_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svst4_mf8 (p0, x0 - svcntb () * 36, z0),
+ svst4 (p0, x0 - svcntb () * 36, z0))
+
+/*
+** st4_vnum_mf8_0:
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, 0, z0),
+ svst4_vnum (p0, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_vnum_mf8_1:
+** incb x0
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, 1, z0),
+ svst4_vnum (p0, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_vnum_mf8_2:
+** incb x0, all, mul #2
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, 2, z0),
+ svst4_vnum (p0, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_vnum_mf8_3:
+** incb x0, all, mul #3
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, 3, z0),
+ svst4_vnum (p0, x0, 3, z0))
+
+/*
+** st4_vnum_mf8_4:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, 4, z0),
+ svst4_vnum (p0, x0, 4, z0))
+
+/*
+** st4_vnum_mf8_28:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, 28, z0),
+ svst4_vnum (p0, x0, 28, z0))
+
+/*
+** st4_vnum_mf8_32:
+** [^{]*
+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, 32, z0),
+ svst4_vnum (p0, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_vnum_mf8_m1:
+** decb x0
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, -1, z0),
+ svst4_vnum (p0, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_vnum_mf8_m2:
+** decb x0, all, mul #2
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, -2, z0),
+ svst4_vnum (p0, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st4_vnum_mf8_m3:
+** decb x0, all, mul #3
+** st4b {z0\.b - z3\.b}, p0, \[x0\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, -3, z0),
+ svst4_vnum (p0, x0, -3, z0))
+
+/*
+** st4_vnum_mf8_m4:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, -4, z0),
+ svst4_vnum (p0, x0, -4, z0))
+
+/*
+** st4_vnum_mf8_m32:
+** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, -32, z0),
+ svst4_vnum (p0, x0, -32, z0))
+
+/*
+** st4_vnum_mf8_m36:
+** [^{]*
+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\]
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, -36, z0),
+ svst4_vnum (p0, x0, -36, z0))
+
+/*
+** st4_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st4b {z0\.b - z3\.b}, p0, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st4b {z0\.b - z3\.b}, p0, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE (st4_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,
+ svst4_vnum_mf8 (p0, x0, x1, z0),
+ svst4_vnum (p0, x0, x1, z0))
new file mode 100644
@@ -0,0 +1,162 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** stnt1_mf8_base:
+** stnt1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_base, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0, z0),
+ svstnt1 (p0, x0, z0))
+
+/*
+** stnt1_mf8_index:
+** stnt1b z0\.b, p0, \[x0, x1\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_index, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0 + x1, z0),
+ svstnt1 (p0, x0 + x1, z0))
+
+/*
+** stnt1_mf8_1:
+** stnt1b z0\.b, p0, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_1, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0 + svcntb (), z0),
+ svstnt1 (p0, x0 + svcntb (), z0))
+
+/*
+** stnt1_mf8_7:
+** stnt1b z0\.b, p0, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_7, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0 + svcntb () * 7, z0),
+ svstnt1 (p0, x0 + svcntb () * 7, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_8:
+** incb x0, all, mul #8
+** stnt1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_8, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0 + svcntb () * 8, z0),
+ svstnt1 (p0, x0 + svcntb () * 8, z0))
+
+/*
+** stnt1_mf8_m1:
+** stnt1b z0\.b, p0, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_m1, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0 - svcntb (), z0),
+ svstnt1 (p0, x0 - svcntb (), z0))
+
+/*
+** stnt1_mf8_m8:
+** stnt1b z0\.b, p0, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_m8, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0 - svcntb () * 8, z0),
+ svstnt1 (p0, x0 - svcntb () * 8, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_mf8_m9:
+** decb x0, all, mul #9
+** stnt1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (stnt1_mf8_m9, svmfloat8_t, mfloat8_t,
+ svstnt1_mf8 (p0, x0 - svcntb () * 9, z0),
+ svstnt1 (p0, x0 - svcntb () * 9, z0))
+
+/*
+** stnt1_vnum_mf8_0:
+** stnt1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_0, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, 0, z0),
+ svstnt1_vnum (p0, x0, 0, z0))
+
+/*
+** stnt1_vnum_mf8_1:
+** stnt1b z0\.b, p0, \[x0, #1, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_1, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, 1, z0),
+ svstnt1_vnum (p0, x0, 1, z0))
+
+/*
+** stnt1_vnum_mf8_7:
+** stnt1b z0\.b, p0, \[x0, #7, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_7, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, 7, z0),
+ svstnt1_vnum (p0, x0, 7, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_8:
+** incb x0, all, mul #8
+** stnt1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_8, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, 8, z0),
+ svstnt1_vnum (p0, x0, 8, z0))
+
+/*
+** stnt1_vnum_mf8_m1:
+** stnt1b z0\.b, p0, \[x0, #-1, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_m1, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, -1, z0),
+ svstnt1_vnum (p0, x0, -1, z0))
+
+/*
+** stnt1_vnum_mf8_m8:
+** stnt1b z0\.b, p0, \[x0, #-8, mul vl\]
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_m8, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, -8, z0),
+ svstnt1_vnum (p0, x0, -8, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_mf8_m9:
+** decb x0, all, mul #9
+** stnt1b z0\.b, p0, \[x0\]
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_m9, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, -9, z0),
+ svstnt1_vnum (p0, x0, -9, z0))
+
+/*
+** stnt1_vnum_mf8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b z0\.b, p0, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b z0\.b, p0, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE (stnt1_vnum_mf8_x1, svmfloat8_t, mfloat8_t,
+ svstnt1_vnum_mf8 (p0, x0, x1, z0),
+ svstnt1_vnum (p0, x0, x1, z0))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** tbl_mf8_tied1:
+** tbl z0\.b, {z0\.b}, z4\.b
+** ret
+*/
+TEST_DUAL_Z (tbl_mf8_tied1, svmfloat8_t, svuint8_t,
+ z0 = svtbl_mf8 (z0, z4),
+ z0 = svtbl (z0, z4))
+
+/*
+** tbl_mf8_tied2:
+** tbl z0\.b, {z4\.b}, z0\.b
+** ret
+*/
+TEST_DUAL_Z_REV (tbl_mf8_tied2, svmfloat8_t, svuint8_t,
+ z0_res = svtbl_mf8 (z4, z0),
+ z0_res = svtbl (z4, z0))
+
+/*
+** tbl_mf8_untied:
+** tbl z0\.b, {z1\.b}, z4\.b
+** ret
+*/
+TEST_DUAL_Z (tbl_mf8_untied, svmfloat8_t, svuint8_t,
+ z0 = svtbl_mf8 (z1, z4),
+ z0 = svtbl (z1, z4))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1_mf8_tied1:
+** trn1 z0\.b, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (trn1_mf8_tied1, svmfloat8_t,
+ z0 = svtrn1_mf8 (z0, z1),
+ z0 = svtrn1 (z0, z1))
+
+/*
+** trn1_mf8_tied2:
+** trn1 z0\.b, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (trn1_mf8_tied2, svmfloat8_t,
+ z0 = svtrn1_mf8 (z1, z0),
+ z0 = svtrn1 (z1, z0))
+
+/*
+** trn1_mf8_untied:
+** trn1 z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (trn1_mf8_untied, svmfloat8_t,
+ z0 = svtrn1_mf8 (z1, z2),
+ z0 = svtrn1 (z1, z2))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_mf8_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_mf8_tied1, svmfloat8_t,
+ z0 = svtrn1q_mf8 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_mf8_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_mf8_tied2, svmfloat8_t,
+ z0 = svtrn1q_mf8 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_mf8_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_mf8_untied, svmfloat8_t,
+ z0 = svtrn1q_mf8 (z1, z2),
+ z0 = svtrn1q (z1, z2))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2_mf8_tied1:
+** trn2 z0\.b, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (trn2_mf8_tied1, svmfloat8_t,
+ z0 = svtrn2_mf8 (z0, z1),
+ z0 = svtrn2 (z0, z1))
+
+/*
+** trn2_mf8_tied2:
+** trn2 z0\.b, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (trn2_mf8_tied2, svmfloat8_t,
+ z0 = svtrn2_mf8 (z1, z0),
+ z0 = svtrn2 (z1, z0))
+
+/*
+** trn2_mf8_untied:
+** trn2 z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (trn2_mf8_untied, svmfloat8_t,
+ z0 = svtrn2_mf8 (z1, z2),
+ z0 = svtrn2 (z1, z2))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_mf8_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_mf8_tied1, svmfloat8_t,
+ z0 = svtrn2q_mf8 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_mf8_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_mf8_tied2, svmfloat8_t,
+ z0 = svtrn2q_mf8 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_mf8_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_mf8_untied, svmfloat8_t,
+ z0 = svtrn2q_mf8 (z1, z2),
+ z0 = svtrn2q (z1, z2))
@@ -37,6 +37,13 @@ TEST_UNDEF (uint16, svuint16x2_t,
TEST_UNDEF (float16, svfloat16x2_t,
z0 = svundef2_f16 ())
+/*
+** mfloat8:
+** ret
+*/
+TEST_UNDEF (mfloat8, svmfloat8x2_t,
+ z0 = svundef2_mf8 ())
+
/*
** bfloat16:
** ret
@@ -37,6 +37,13 @@ TEST_UNDEF (uint16, svuint16x3_t,
TEST_UNDEF (float16, svfloat16x3_t,
z0 = svundef3_f16 ())
+/*
+** mfloat8:
+** ret
+*/
+TEST_UNDEF (mfloat8, svmfloat8x3_t,
+ z0 = svundef3_mf8 ())
+
/*
** bfloat16:
** ret
@@ -37,6 +37,13 @@ TEST_UNDEF (uint16, svuint16x4_t,
TEST_UNDEF (float16, svfloat16x4_t,
z0 = svundef4_f16 ())
+/*
+** mfloat8:
+** ret
+*/
+TEST_UNDEF (mfloat8, svmfloat8x4_t,
+ z0 = svundef4_mf8 ())
+
/*
** bfloat16:
** ret
@@ -37,6 +37,13 @@ TEST_UNDEF (uint16, svuint16_t,
TEST_UNDEF (float16, svfloat16_t,
z0 = svundef_f16 ())
+/*
+** mfloat8:
+** ret
+*/
+TEST_UNDEF (mfloat8, svmfloat8_t,
+ z0 = svundef_mf8 ())
+
/*
** bfloat16:
** ret
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1_mf8_tied1:
+** uzp1 z0\.b, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (uzp1_mf8_tied1, svmfloat8_t,
+ z0 = svuzp1_mf8 (z0, z1),
+ z0 = svuzp1 (z0, z1))
+
+/*
+** uzp1_mf8_tied2:
+** uzp1 z0\.b, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (uzp1_mf8_tied2, svmfloat8_t,
+ z0 = svuzp1_mf8 (z1, z0),
+ z0 = svuzp1 (z1, z0))
+
+/*
+** uzp1_mf8_untied:
+** uzp1 z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (uzp1_mf8_untied, svmfloat8_t,
+ z0 = svuzp1_mf8 (z1, z2),
+ z0 = svuzp1 (z1, z2))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_mf8_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_mf8_tied1, svmfloat8_t,
+ z0 = svuzp1q_mf8 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_mf8_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_mf8_tied2, svmfloat8_t,
+ z0 = svuzp1q_mf8 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_mf8_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_mf8_untied, svmfloat8_t,
+ z0 = svuzp1q_mf8 (z1, z2),
+ z0 = svuzp1q (z1, z2))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2_mf8_tied1:
+** uzp2 z0\.b, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (uzp2_mf8_tied1, svmfloat8_t,
+ z0 = svuzp2_mf8 (z0, z1),
+ z0 = svuzp2 (z0, z1))
+
+/*
+** uzp2_mf8_tied2:
+** uzp2 z0\.b, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (uzp2_mf8_tied2, svmfloat8_t,
+ z0 = svuzp2_mf8 (z1, z0),
+ z0 = svuzp2 (z1, z0))
+
+/*
+** uzp2_mf8_untied:
+** uzp2 z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (uzp2_mf8_untied, svmfloat8_t,
+ z0 = svuzp2_mf8 (z1, z2),
+ z0 = svuzp2 (z1, z2))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_mf8_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_mf8_tied1, svmfloat8_t,
+ z0 = svuzp2q_mf8 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_mf8_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_mf8_tied2, svmfloat8_t,
+ z0 = svuzp2q_mf8 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_mf8_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_mf8_untied, svmfloat8_t,
+ z0 = svuzp2q_mf8 (z1, z2),
+ z0 = svuzp2q (z1, z2))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1_mf8_tied1:
+** zip1 z0\.b, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (zip1_mf8_tied1, svmfloat8_t,
+ z0 = svzip1_mf8 (z0, z1),
+ z0 = svzip1 (z0, z1))
+
+/*
+** zip1_mf8_tied2:
+** zip1 z0\.b, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (zip1_mf8_tied2, svmfloat8_t,
+ z0 = svzip1_mf8 (z1, z0),
+ z0 = svzip1 (z1, z0))
+
+/*
+** zip1_mf8_untied:
+** zip1 z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (zip1_mf8_untied, svmfloat8_t,
+ z0 = svzip1_mf8 (z1, z2),
+ z0 = svzip1 (z1, z2))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_mf8_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_mf8_tied1, svmfloat8_t,
+ z0 = svzip1q_mf8 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_mf8_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_mf8_tied2, svmfloat8_t,
+ z0 = svzip1q_mf8 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_mf8_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_mf8_untied, svmfloat8_t,
+ z0 = svzip1q_mf8 (z1, z2),
+ z0 = svzip1q (z1, z2))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2_mf8_tied1:
+** zip2 z0\.b, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_Z (zip2_mf8_tied1, svmfloat8_t,
+ z0 = svzip2_mf8 (z0, z1),
+ z0 = svzip2 (z0, z1))
+
+/*
+** zip2_mf8_tied2:
+** zip2 z0\.b, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (zip2_mf8_tied2, svmfloat8_t,
+ z0 = svzip2_mf8 (z1, z0),
+ z0 = svzip2 (z1, z0))
+
+/*
+** zip2_mf8_untied:
+** zip2 z0\.b, z1\.b, z2\.b
+** ret
+*/
+TEST_UNIFORM_Z (zip2_mf8_untied, svmfloat8_t,
+ z0 = svzip2_mf8 (z1, z2),
+ z0 = svzip2 (z1, z2))
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_mf8_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_mf8_tied1, svmfloat8_t,
+ z0 = svzip2q_mf8 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_mf8_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_mf8_tied2, svmfloat8_t,
+ z0 = svzip2q_mf8 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_mf8_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_mf8_untied, svmfloat8_t,
+ z0 = svzip2q_mf8 (z1, z2),
+ z0 = svzip2q (z1, z2))
@@ -14,6 +14,7 @@ svuint8_t ret_u8 (void) { return svdup_u8 (0); }
svuint16_t ret_u16 (void) { return svdup_u16 (0); }
svuint32_t ret_u32 (void) { return svdup_u32 (0); }
svuint64_t ret_u64 (void) { return svdup_u64 (0); }
+svmfloat8_t ret_mf8 (void) { return svundef_mf8 (); }
svbfloat16_t ret_bf16 (void) { return svundef_bf16 (); }
svfloat16_t ret_f16 (void) { return svdup_f16 (0); }
svfloat32_t ret_f32 (void) { return svdup_f32 (0); }
@@ -27,6 +28,7 @@ svuint8x2_t ret_u8x2 (void) { return svundef2_u8 (); }
svuint16x2_t ret_u16x2 (void) { return svundef2_u16 (); }
svuint32x2_t ret_u32x2 (void) { return svundef2_u32 (); }
svuint64x2_t ret_u64x2 (void) { return svundef2_u64 (); }
+svmfloat8x2_t ret_mf8x2 (void) { return svundef2_mf8 (); }
svbfloat16x2_t ret_bf16x2 (void) { return svundef2_bf16 (); }
svfloat16x2_t ret_f16x2 (void) { return svundef2_f16 (); }
svfloat32x2_t ret_f32x2 (void) { return svundef2_f32 (); }
@@ -40,6 +42,7 @@ svuint8x3_t ret_u8x3 (void) { return svundef3_u8 (); }
svuint16x3_t ret_u16x3 (void) { return svundef3_u16 (); }
svuint32x3_t ret_u32x3 (void) { return svundef3_u32 (); }
svuint64x3_t ret_u64x3 (void) { return svundef3_u64 (); }
+svmfloat8x3_t ret_mf8x3 (void) { return svundef3_mf8 (); }
svbfloat16x3_t ret_bf16x3 (void) { return svundef3_bf16 (); }
svfloat16x3_t ret_f16x3 (void) { return svundef3_f16 (); }
svfloat32x3_t ret_f32x3 (void) { return svundef3_f32 (); }
@@ -53,6 +56,7 @@ svuint8x4_t ret_u8x4 (void) { return svundef4_u8 (); }
svuint16x4_t ret_u16x4 (void) { return svundef4_u16 (); }
svuint32x4_t ret_u32x4 (void) { return svundef4_u32 (); }
svuint64x4_t ret_u64x4 (void) { return svundef4_u64 (); }
+svmfloat8x4_t ret_mf8x4 (void) { return svundef4_mf8 (); }
svbfloat16x4_t ret_bf16x4 (void) { return svundef4_bf16 (); }
svfloat16x4_t ret_f16x4 (void) { return svundef4_f16 (); }
svfloat32x4_t ret_f32x4 (void) { return svundef4_f32 (); }
@@ -70,6 +74,7 @@ svfloat64x4_t ret_f64x4 (void) { return svundef4_f64 (); }
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_mf8\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32\n} } } */
@@ -83,6 +88,7 @@ svfloat64x4_t ret_f64x4 (void) { return svundef4_f64 (); }
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x2\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_mf8x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x2\n} } } */
@@ -97,6 +103,7 @@ svfloat64x4_t ret_f64x4 (void) { return svundef4_f64 (); }
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x3\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_mf8x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x3\n} } } */
@@ -110,6 +117,7 @@ svfloat64x4_t ret_f64x4 (void) { return svundef4_f64 (); }
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x4\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_mf8x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x4\n} } } */
@@ -14,6 +14,7 @@ void fn_u8 (svuint8_t x) {}
void fn_u16 (svuint16_t x) {}
void fn_u32 (svuint32_t x) {}
void fn_u64 (svuint64_t x) {}
+void fn_mf8 (svmfloat8_t x) {}
void fn_bf16 (svbfloat16_t x) {}
void fn_f16 (svfloat16_t x) {}
void fn_f32 (svfloat32_t x) {}
@@ -27,6 +28,7 @@ void fn_u8x2 (svuint8x2_t x) {}
void fn_u16x2 (svuint16x2_t x) {}
void fn_u32x2 (svuint32x2_t x) {}
void fn_u64x2 (svuint64x2_t x) {}
+void fn_mf8x2 (svmfloat8x2_t x) {}
void fn_bf16x2 (svbfloat16x2_t x) {}
void fn_f16x2 (svfloat16x2_t x) {}
void fn_f32x2 (svfloat32x2_t x) {}
@@ -40,6 +42,7 @@ void fn_u8x3 (svuint8x3_t x) {}
void fn_u16x3 (svuint16x3_t x) {}
void fn_u32x3 (svuint32x3_t x) {}
void fn_u64x3 (svuint64x3_t x) {}
+void fn_mf8x3 (svmfloat8x3_t x) {}
void fn_bf16x3 (svbfloat16x3_t x) {}
void fn_f16x3 (svfloat16x3_t x) {}
void fn_f32x3 (svfloat32x3_t x) {}
@@ -53,6 +56,7 @@ void fn_u8x4 (svuint8x4_t x) {}
void fn_u16x4 (svuint16x4_t x) {}
void fn_u32x4 (svuint32x4_t x) {}
void fn_u64x4 (svuint64x4_t x) {}
+void fn_mf8x4 (svmfloat8x4_t x) {}
void fn_bf16x4 (svbfloat16x4_t x) {}
void fn_f16x4 (svfloat16x4_t x) {}
void fn_f32x4 (svfloat32x4_t x) {}
@@ -70,6 +74,7 @@ void fn_f64x4 (svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */
@@ -83,6 +88,7 @@ void fn_f64x4 (svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */
@@ -96,6 +102,7 @@ void fn_f64x4 (svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */
@@ -109,6 +116,7 @@ void fn_f64x4 (svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x4\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x4\n} } } */
@@ -10,6 +10,7 @@ void fn_u8 (float d0, float d1, float d2, float d3, svuint8_t x) {}
void fn_u16 (float d0, float d1, float d2, float d3, svuint16_t x) {}
void fn_u32 (float d0, float d1, float d2, float d3, svuint32_t x) {}
void fn_u64 (float d0, float d1, float d2, float d3, svuint64_t x) {}
+void fn_mf8 (float d0, float d1, float d2, float d3, svmfloat8_t x) {}
void fn_bf16 (float d0, float d1, float d2, float d3, svbfloat16_t x) {}
void fn_f16 (float d0, float d1, float d2, float d3, svfloat16_t x) {}
void fn_f32 (float d0, float d1, float d2, float d3, svfloat32_t x) {}
@@ -23,6 +24,7 @@ void fn_u8x2 (float d0, float d1, float d2, float d3, svuint8x2_t x) {}
void fn_u16x2 (float d0, float d1, float d2, float d3, svuint16x2_t x) {}
void fn_u32x2 (float d0, float d1, float d2, float d3, svuint32x2_t x) {}
void fn_u64x2 (float d0, float d1, float d2, float d3, svuint64x2_t x) {}
+void fn_mf8x2 (float d0, float d1, float d2, float d3, svmfloat8x2_t x) {}
void fn_bf16x2 (float d0, float d1, float d2, float d3, svbfloat16x2_t x) {}
void fn_f16x2 (float d0, float d1, float d2, float d3, svfloat16x2_t x) {}
void fn_f32x2 (float d0, float d1, float d2, float d3, svfloat32x2_t x) {}
@@ -36,6 +38,7 @@ void fn_u8x3 (float d0, float d1, float d2, float d3, svuint8x3_t x) {}
void fn_u16x3 (float d0, float d1, float d2, float d3, svuint16x3_t x) {}
void fn_u32x3 (float d0, float d1, float d2, float d3, svuint32x3_t x) {}
void fn_u64x3 (float d0, float d1, float d2, float d3, svuint64x3_t x) {}
+void fn_mf8x3 (float d0, float d1, float d2, float d3, svmfloat8x3_t x) {}
void fn_bf16x3 (float d0, float d1, float d2, float d3, svbfloat16x3_t x) {}
void fn_f16x3 (float d0, float d1, float d2, float d3, svfloat16x3_t x) {}
void fn_f32x3 (float d0, float d1, float d2, float d3, svfloat32x3_t x) {}
@@ -49,6 +52,7 @@ void fn_u8x4 (float d0, float d1, float d2, float d3, svuint8x4_t x) {}
void fn_u16x4 (float d0, float d1, float d2, float d3, svuint16x4_t x) {}
void fn_u32x4 (float d0, float d1, float d2, float d3, svuint32x4_t x) {}
void fn_u64x4 (float d0, float d1, float d2, float d3, svuint64x4_t x) {}
+void fn_mf8x4 (float d0, float d1, float d2, float d3, svmfloat8x4_t x) {}
void fn_bf16x4 (float d0, float d1, float d2, float d3, svbfloat16x4_t x) {}
void fn_f16x4 (float d0, float d1, float d2, float d3, svfloat16x4_t x) {}
void fn_f32x4 (float d0, float d1, float d2, float d3, svfloat32x4_t x) {}
@@ -62,6 +66,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3, svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */
@@ -75,6 +80,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3, svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */
@@ -88,6 +94,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3, svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */
@@ -101,6 +108,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3, svfloat64x4_t x) {}
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x4\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x4\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x4\n} } } */
@@ -18,6 +18,8 @@ void fn_u32 (float d0, float d1, float d2, float d3,
float d4, svuint32_t x) {}
void fn_u64 (float d0, float d1, float d2, float d3,
float d4, svuint64_t x) {}
+void fn_mf8 (float d0, float d1, float d2, float d3,
+ float d4, svmfloat8_t x) {}
void fn_bf16 (float d0, float d1, float d2, float d3,
float d4, svbfloat16_t x) {}
void fn_f16 (float d0, float d1, float d2, float d3,
@@ -43,6 +45,8 @@ void fn_u32x2 (float d0, float d1, float d2, float d3,
float d4, svuint32x2_t x) {}
void fn_u64x2 (float d0, float d1, float d2, float d3,
float d4, svuint64x2_t x) {}
+void fn_mf8x2 (float d0, float d1, float d2, float d3,
+ float d4, svmfloat8x2_t x) {}
void fn_bf16x2 (float d0, float d1, float d2, float d3,
float d4, svbfloat16x2_t x) {}
void fn_f16x2 (float d0, float d1, float d2, float d3,
@@ -68,6 +72,8 @@ void fn_u32x3 (float d0, float d1, float d2, float d3,
float d4, svuint32x3_t x) {}
void fn_u64x3 (float d0, float d1, float d2, float d3,
float d4, svuint64x3_t x) {}
+void fn_mf8x3 (float d0, float d1, float d2, float d3,
+ float d4, svmfloat8x3_t x) {}
void fn_bf16x3 (float d0, float d1, float d2, float d3,
float d4, svbfloat16x3_t x) {}
void fn_f16x3 (float d0, float d1, float d2, float d3,
@@ -93,6 +99,8 @@ void fn_u32x4 (float d0, float d1, float d2, float d3,
float d4, svuint32x4_t x) {}
void fn_u64x4 (float d0, float d1, float d2, float d3,
float d4, svuint64x4_t x) {}
+void fn_mf8x4 (float d0, float d1, float d2, float d3,
+ float d4, svmfloat8x4_t x) {}
void fn_bf16x4 (float d0, float d1, float d2, float d3,
float d4, svbfloat16x4_t x) {}
void fn_f16x4 (float d0, float d1, float d2, float d3,
@@ -110,6 +118,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */
@@ -123,6 +132,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */
@@ -136,6 +146,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */
@@ -149,6 +160,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */
+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_mf8x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */
@@ -18,6 +18,8 @@ void fn_u32 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint32_t x) {}
void fn_u64 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint64_t x) {}
+void fn_mf8 (float d0, float d1, float d2, float d3,
+ float d4, float d5, svmfloat8_t x) {}
void fn_bf16 (float d0, float d1, float d2, float d3,
float d4, float d5, svbfloat16_t x) {}
void fn_f16 (float d0, float d1, float d2, float d3,
@@ -43,6 +45,8 @@ void fn_u32x2 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint32x2_t x) {}
void fn_u64x2 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint64x2_t x) {}
+void fn_mf8x2 (float d0, float d1, float d2, float d3,
+ float d4, float d5, svmfloat8x2_t x) {}
void fn_bf16x2 (float d0, float d1, float d2, float d3,
float d4, float d5, svbfloat16x2_t x) {}
void fn_f16x2 (float d0, float d1, float d2, float d3,
@@ -68,6 +72,8 @@ void fn_u32x3 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint32x3_t x) {}
void fn_u64x3 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint64x3_t x) {}
+void fn_mf8x3 (float d0, float d1, float d2, float d3,
+ float d4, float d5, svmfloat8x3_t x) {}
void fn_bf16x3 (float d0, float d1, float d2, float d3,
float d4, float d5, svbfloat16x3_t x) {}
void fn_f16x3 (float d0, float d1, float d2, float d3,
@@ -93,6 +99,8 @@ void fn_u32x4 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint32x4_t x) {}
void fn_u64x4 (float d0, float d1, float d2, float d3,
float d4, float d5, svuint64x4_t x) {}
+void fn_mf8x4 (float d0, float d1, float d2, float d3,
+ float d4, float d5, svmfloat8x4_t x) {}
void fn_bf16x4 (float d0, float d1, float d2, float d3,
float d4, float d5, svbfloat16x4_t x) {}
void fn_f16x4 (float d0, float d1, float d2, float d3,
@@ -110,6 +118,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */
@@ -123,6 +132,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */
@@ -136,6 +146,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x3\n} } } */
+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_mf8x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x3\n} } } */
@@ -149,6 +160,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */
+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_mf8x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */
@@ -18,6 +18,8 @@ void fn_u32 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint32_t x) {}
void fn_u64 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint64_t x) {}
+void fn_mf8 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, svmfloat8_t x) {}
void fn_bf16 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svbfloat16_t x) {}
void fn_f16 (float d0, float d1, float d2, float d3,
@@ -43,6 +45,8 @@ void fn_u32x2 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint32x2_t x) {}
void fn_u64x2 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint64x2_t x) {}
+void fn_mf8x2 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, svmfloat8x2_t x) {}
void fn_bf16x2 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svbfloat16x2_t x) {}
void fn_f16x2 (float d0, float d1, float d2, float d3,
@@ -68,6 +72,8 @@ void fn_u32x3 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint32x3_t x) {}
void fn_u64x3 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint64x3_t x) {}
+void fn_mf8x3 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, svmfloat8x3_t x) {}
void fn_bf16x3 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svbfloat16x3_t x) {}
void fn_f16x3 (float d0, float d1, float d2, float d3,
@@ -93,6 +99,8 @@ void fn_u32x4 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint32x4_t x) {}
void fn_u64x4 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svuint64x4_t x) {}
+void fn_mf8x4 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, svmfloat8x4_t x) {}
void fn_bf16x4 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, svbfloat16x4_t x) {}
void fn_f16x4 (float d0, float d1, float d2, float d3,
@@ -110,6 +118,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */
+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_mf8\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */
/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */
@@ -123,6 +132,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x2\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x2\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x2\n} } } */
+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_mf8x2\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x2\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x2\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x2\n} } } */
@@ -136,6 +146,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x3\n} } } */
+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_mf8x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x3\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x3\n} } } */
@@ -149,6 +160,7 @@ void fn_f64x4 (float d0, float d1, float d2, float d3,
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */
+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_mf8x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */
/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */
@@ -18,6 +18,8 @@ void fn_u32 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint32_t x) {}
void fn_u64 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint64_t x) {}
+void fn_mf8 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, float d7, svmfloat8_t x) {}
void fn_bf16 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svbfloat16_t x) {}
void fn_f16 (float d0, float d1, float d2, float d3,
@@ -43,6 +45,8 @@ void fn_u32x2 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint32x2_t x) {}
void fn_u64x2 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint64x2_t x) {}
+void fn_mf8x2 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, float d7, svmfloat8x2_t x) {}
void fn_bf16x2 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svbfloat16x2_t x) {}
void fn_f16x2 (float d0, float d1, float d2, float d3,
@@ -68,6 +72,8 @@ void fn_u32x3 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint32x3_t x) {}
void fn_u64x3 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint64x3_t x) {}
+void fn_mf8x3 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, float d7, svmfloat8x3_t x) {}
void fn_bf16x3 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svbfloat16x3_t x) {}
void fn_f16x3 (float d0, float d1, float d2, float d3,
@@ -93,6 +99,8 @@ void fn_u32x4 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint32x4_t x) {}
void fn_u64x4 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svuint64x4_t x) {}
+void fn_mf8x4 (float d0, float d1, float d2, float d3,
+ float d4, float d5, float d6, float d7, svmfloat8x4_t x) {}
void fn_bf16x4 (float d0, float d1, float d2, float d3,
float d4, float d5, float d6, float d7, svbfloat16x4_t x) {}
void fn_f16x4 (float d0, float d1, float d2, float d3,
new file mode 100644
@@ -0,0 +1,63 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC aarch64 "arm_sve.h"
+
+/*
+** callee:
+** addvl sp, sp, #-1
+** str (p[4-7]), \[sp\]
+** ptrue \1\.b, all
+** (
+** ld1b (z[0-9]+\.b), \1/z, \[x1, #1, mul vl\]
+** ld1b (z[0-9]+\.b), \1/z, \[x1\]
+** st2b {\3 - \2}, p0, \[x0\]
+** |
+** ld1b (z[0-9]+\.b), \1/z, \[x1\]
+** ld1b (z[0-9]+\.b), \1/z, \[x1, #1, mul vl\]
+** st2b {\4 - \5}, p0, \[x0\]
+** )
+** st4b {z0\.b - z3\.b}, p1, \[x0\]
+** st3b {z4\.b - z6\.b}, p2, \[x0\]
+** st1b z7\.b, p3, \[x0\]
+** ldr \1, \[sp\]
+** addvl sp, sp, #1
+** ret
+*/
+void __attribute__((noipa))
+callee (void *x0, svmfloat8x4_t z0, svmfloat8x3_t z4, svmfloat8x2_t stack,
+ svmfloat8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3)
+{
+ svst2 (p0, x0, stack);
+ svst4 (p1, x0, z0);
+ svst3 (p2, x0, z4);
+ svst1_mf8 (p3, x0, z7);
+}
+
+void __attribute__((noipa))
+caller (void *x0)
+{
+ svbool_t pg;
+ pg = svptrue_b8 ();
+ callee (x0,
+ svld4_vnum_mf8 (pg, x0, -8),
+ svld3_vnum_mf8 (pg, x0, -3),
+ svld2_vnum_mf8 (pg, x0, 0),
+ svld1_vnum_mf8 (pg, x0, 2),
+ svptrue_pat_b8 (SV_VL1),
+ svptrue_pat_b16 (SV_VL2),
+ svptrue_pat_b32 (SV_VL3),
+ svptrue_pat_b64 (SV_VL4));
+}
+
+/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[(?:x1|sp)\]\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[(?:x1|sp), #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC aarch64 "arm_sve.h"
+
+/*
+** callee:
+** (
+** ldr (z[0-9]+), \[x1, #1, mul vl\]
+** ldr (z[0-9]+), \[x1\]
+** st2b {\2\.b - \1\.b}, p0, \[x0\]
+** |
+** ldr (z[0-9]+), \[x1\]
+** ldr (z[0-9]+), \[x1, #1, mul vl\]
+** st2b {\3\.b - \4\.b}, p0, \[x0\]
+** )
+** st4b {z0\.b - z3\.b}, p1, \[x0\]
+** st3b {z4\.b - z6\.b}, p2, \[x0\]
+** st1b z7\.b, p3, \[x0\]
+** ret
+*/
+void __attribute__((noipa))
+callee (void *x0, svmfloat8x4_t z0, svmfloat8x3_t z4, svmfloat8x2_t stack,
+ svmfloat8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3)
+{
+ svst2 (p0, x0, stack);
+ svst4 (p1, x0, z0);
+ svst3 (p2, x0, z4);
+ svst1_mf8 (p3, x0, z7);
+}
+
+void __attribute__((noipa))
+caller (void *x0)
+{
+ svbool_t pg;
+ pg = svptrue_b8 ();
+ callee (x0,
+ svld4_vnum_mf8 (pg, x0, -8),
+ svld3_vnum_mf8 (pg, x0, -3),
+ svld2_vnum_mf8 (pg, x0, 0),
+ svld1_vnum_mf8 (pg, x0, 2),
+ svptrue_pat_b8 (SV_VL1),
+ svptrue_pat_b16 (SV_VL2),
+ svptrue_pat_b32 (SV_VL3),
+ svptrue_pat_b64 (SV_VL4));
+}
+
+/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[(?:x1|sp)\]\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[(?:x1|sp), #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */
new file mode 100644
@@ -0,0 +1,71 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -fno-cprop-registers -fdisable-rtl-combine -g" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC aarch64 "arm_sve.h"
+
+/*
+** callee1:
+** ptrue p3\.b, all
+** ...
+** ld1b (z[0-9]+\.b), p3/z, \[x1, #3, mul vl\]
+** ...
+** st4b {z[0-9]+\.b - \1}, p0, \[x0\]
+** st2b {z3\.b - z4\.b}, p1, \[x0\]
+** st3b {z5\.b - z7\.b}, p2, \[x0\]
+** ret
+*/
+void __attribute__((noipa))
+callee1 (void *x0, svmfloat8x3_t z0, svmfloat8x2_t z3, svmfloat8x3_t z5,
+ svmfloat8x4_t stack1, svmfloat8_t stack2, svbool_t p0,
+ svbool_t p1, svbool_t p2)
+{
+ svst4_mf8 (p0, x0, stack1);
+ svst2_mf8 (p1, x0, z3);
+ svst3_mf8 (p2, x0, z5);
+}
+
+/*
+** callee2:
+** ptrue p3\.b, all
+** ld1b (z[0-9]+\.b), p3/z, \[x2\]
+** st1b \1, p0, \[x0\]
+** st2b {z3\.b - z4\.b}, p1, \[x0\]
+** st3b {z0\.b - z2\.b}, p2, \[x0\]
+** ret
+*/
+void __attribute__((noipa))
+callee2 (void *x0, svmfloat8x3_t z0, svmfloat8x2_t z3, svmfloat8x3_t z5,
+ svmfloat8x4_t stack1, svmfloat8_t stack2, svbool_t p0,
+ svbool_t p1, svbool_t p2)
+{
+ svst1_mf8 (p0, x0, stack2);
+ svst2_mf8 (p1, x0, z3);
+ svst3_mf8 (p2, x0, z0);
+}
+
+void __attribute__((noipa))
+caller (void *x0)
+{
+ svbool_t pg;
+ pg = svptrue_b8 ();
+ callee1 (x0,
+ svld3_vnum_mf8 (pg, x0, -9),
+ svld2_vnum_mf8 (pg, x0, -2),
+ svld3_vnum_mf8 (pg, x0, 0),
+ svld4_vnum_mf8 (pg, x0, 8),
+ svld1_vnum_mf8 (pg, x0, 5),
+ svptrue_pat_b8 (SV_VL1),
+ svptrue_pat_b16 (SV_VL2),
+ svptrue_pat_b32 (SV_VL3));
+}
+
+/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */
+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -fno-cprop-registers -fdisable-rtl-combine -g" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC aarch64 "arm_sve.h"
+
+/*
+** callee1:
+** ...
+** ldr (z[0-9]+), \[x1, #3, mul vl\]
+** ...
+** st4b {z[0-9]+\.b - \1\.b}, p0, \[x0\]
+** st2b {z3\.b - z4\.b}, p1, \[x0\]
+** st3b {z5\.b - z7\.b}, p2, \[x0\]
+** ret
+*/
+void __attribute__((noipa))
+callee1 (void *x0, svmfloat8x3_t z0, svmfloat8x2_t z3, svmfloat8x3_t z5,
+ svmfloat8x4_t stack1, svmfloat8_t stack2, svbool_t p0,
+ svbool_t p1, svbool_t p2)
+{
+ svst4_mf8 (p0, x0, stack1);
+ svst2_mf8 (p1, x0, z3);
+ svst3_mf8 (p2, x0, z5);
+}
+
+/*
+** callee2:
+** ptrue p3\.b, all
+** ld1b (z[0-9]+\.b), p3/z, \[x2\]
+** st1b \1, p0, \[x0\]
+** st2b {z3\.b - z4\.b}, p1, \[x0\]
+** st3b {z0\.b - z2\.b}, p2, \[x0\]
+** ret
+*/
+void __attribute__((noipa))
+callee2 (void *x0, svmfloat8x3_t z0, svmfloat8x2_t z3, svmfloat8x3_t z5,
+ svmfloat8x4_t stack1, svmfloat8_t stack2, svbool_t p0,
+ svbool_t p1, svbool_t p2)
+{
+ svst1_mf8 (p0, x0, stack2);
+ svst2_mf8 (p1, x0, z3);
+ svst3_mf8 (p2, x0, z0);
+}
+
+void __attribute__((noipa))
+caller (void *x0)
+{
+ svbool_t pg;
+ pg = svptrue_b8 ();
+ callee1 (x0,
+ svld3_vnum_mf8 (pg, x0, -9),
+ svld2_vnum_mf8 (pg, x0, -2),
+ svld3_vnum_mf8 (pg, x0, 0),
+ svld4_vnum_mf8 (pg, x0, 8),
+ svld1_vnum_mf8 (pg, x0, 5),
+ svptrue_pat_b8 (SV_VL1),
+ svptrue_pat_b16 (SV_VL2),
+ svptrue_pat_b32 (SV_VL3));
+}
+
+/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */
+/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */
+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */
+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */
@@ -2,6 +2,7 @@
#include <arm_sve.h>
+typedef mfloat8_t mfloat8x32_t __attribute__((vector_size (32)));
typedef bfloat16_t bfloat16x16_t __attribute__((vector_size (32)));
typedef float16_t float16x16_t __attribute__((vector_size (32)));
typedef float32_t float32x8_t __attribute__((vector_size (32)));
@@ -15,6 +16,7 @@ typedef uint16_t uint16x16_t __attribute__((vector_size (32)));
typedef uint32_t uint32x8_t __attribute__((vector_size (32)));
typedef uint64_t uint64x4_t __attribute__((vector_size (32)));
+void mfloat8_callee (mfloat8x32_t);
void bfloat16_callee (bfloat16x16_t);
void float16_callee (float16x16_t);
void float32_callee (float32x8_t);
@@ -28,6 +30,12 @@ void uint16_callee (uint16x16_t);
void uint32_callee (uint32x8_t);
void uint64_callee (uint64x4_t);
+void
+mfloat8_caller (mfloat8_t val)
+{
+ mfloat8_callee (svdup_mf8 (val));
+}
+
void
bfloat16_caller (bfloat16_t val)
{
@@ -100,8 +108,8 @@ uint64_caller (void)
uint64_callee (svindex_u64 (1, 4));
}
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0\]} 3 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x0\]} 4 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0\]} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0\]} 3 } } */
-/* { dg-final { scan-assembler-times {\tadd\tx0, sp, #?16\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tadd\tx0, sp, #?16\n} 13 } } */
@@ -2,6 +2,7 @@
#include <arm_sve.h>
+typedef mfloat8_t mfloat8x32_t __attribute__((vector_size (32)));
typedef bfloat16_t bfloat16x16_t __attribute__((vector_size (32)));
typedef float16_t float16x16_t __attribute__((vector_size (32)));
typedef float32_t float32x8_t __attribute__((vector_size (32)));
@@ -15,6 +16,7 @@ typedef uint16_t uint16x16_t __attribute__((vector_size (32)));
typedef uint32_t uint32x8_t __attribute__((vector_size (32)));
typedef uint64_t uint64x4_t __attribute__((vector_size (32)));
+void mfloat8_callee (svmfloat8_t);
void bfloat16_callee (svbfloat16_t);
void float16_callee (svfloat16_t);
void float32_callee (svfloat32_t);
@@ -28,6 +30,12 @@ void uint16_callee (svuint16_t);
void uint32_callee (svuint32_t);
void uint64_callee (svuint64_t);
+void
+mfloat8_caller (mfloat8x32_t arg)
+{
+ mfloat8_callee (arg);
+}
+
void
bfloat16_caller (bfloat16x16_t arg)
{
@@ -100,7 +108,7 @@ uint64_caller (uint64x4_t arg)
uint64_callee (arg);
}
-/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0\]} 3 } } */
/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0\]} 4 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0\]} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0\]} 3 } } */
@@ -25,6 +25,14 @@ CALLEE (s8, __SVInt8_t)
*/
CALLEE (u8, __SVUint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, all
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, __SVMfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, all
@@ -115,7 +123,7 @@ CALLEE (f64, __SVFloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, __SVInt8_t)
*/
CALLER (u8, __SVUint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, __SVMfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, __SVFloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, __SVBfloat16_t)
+CALLER_NON_NUMERIC (bf16, __SVBfloat16_t)
/*
** caller_s32:
@@ -25,6 +25,14 @@ CALLEE (s8, __SVInt8_t)
*/
CALLEE (u8, __SVUint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl128
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, __SVMfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl128
@@ -115,7 +123,7 @@ CALLEE (f64, __SVFloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, __SVInt8_t)
*/
CALLER (u8, __SVUint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, __SVMfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, __SVFloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, __SVBfloat16_t)
+CALLER_NON_NUMERIC (bf16, __SVBfloat16_t)
/*
** caller_s32:
@@ -25,6 +25,14 @@ CALLEE (s8, __SVInt8_t)
*/
CALLEE (u8, __SVUint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl16
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, __SVMfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl16
@@ -115,7 +123,7 @@ CALLEE (f64, __SVFloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, __SVInt8_t)
*/
CALLER (u8, __SVUint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, __SVMfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, __SVFloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, __SVBfloat16_t)
+CALLER_NON_NUMERIC (bf16, __SVBfloat16_t)
/*
** caller_s32:
@@ -25,6 +25,14 @@ CALLEE (s8, __SVInt8_t)
*/
CALLEE (u8, __SVUint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl256
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, __SVMfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl256
@@ -115,7 +123,7 @@ CALLEE (f64, __SVFloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, __SVInt8_t)
*/
CALLER (u8, __SVUint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, __SVMfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, __SVFloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, __SVBfloat16_t)
+CALLER_NON_NUMERIC (bf16, __SVBfloat16_t)
/*
** caller_s32:
@@ -25,6 +25,14 @@ CALLEE (s8, __SVInt8_t)
*/
CALLEE (u8, __SVUint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl32
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, __SVMfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl32
@@ -115,7 +123,7 @@ CALLEE (f64, __SVFloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, __SVInt8_t)
*/
CALLER (u8, __SVUint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, __SVMfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, __SVFloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, __SVBfloat16_t)
+CALLER_NON_NUMERIC (bf16, __SVBfloat16_t)
/*
** caller_s32:
@@ -25,6 +25,14 @@ CALLEE (s8, __SVInt8_t)
*/
CALLEE (u8, __SVUint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl64
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, __SVMfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl64
@@ -115,7 +123,7 @@ CALLEE (f64, __SVFloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, __SVInt8_t)
*/
CALLER (u8, __SVUint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, __SVMfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, __SVFloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, __SVBfloat16_t)
+CALLER_NON_NUMERIC (bf16, __SVBfloat16_t)
/*
** caller_s32:
@@ -27,6 +27,14 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, all
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, all
@@ -115,7 +123,7 @@ CALLEE (f64, svfloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, svfloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, svbfloat16_t)
+CALLER_NON_NUMERIC (bf16, svbfloat16_t)
/*
** caller_s32:
@@ -27,6 +27,14 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl128
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl128
@@ -115,7 +123,7 @@ CALLEE (f64, svfloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, svfloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, svbfloat16_t)
+CALLER_NON_NUMERIC (bf16, svbfloat16_t)
/*
** caller_s32:
@@ -27,6 +27,14 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl16
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl16
@@ -115,7 +123,7 @@ CALLEE (f64, svfloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, svfloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, svbfloat16_t)
+CALLER_NON_NUMERIC (bf16, svbfloat16_t)
/*
** caller_s32:
@@ -27,6 +27,14 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl256
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl256
@@ -115,7 +123,7 @@ CALLEE (f64, svfloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, svfloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, svbfloat16_t)
+CALLER_NON_NUMERIC (bf16, svbfloat16_t)
/*
** caller_s32:
@@ -27,6 +27,14 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl32
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl32
@@ -115,7 +123,7 @@ CALLEE (f64, svfloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, svfloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, svbfloat16_t)
+CALLER_NON_NUMERIC (bf16, svbfloat16_t)
/*
** caller_s32:
@@ -27,6 +27,14 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl64
+** ld1b z0\.b, \1/z, \[x0\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl64
@@ -115,7 +123,7 @@ CALLEE (f64, svfloat64_t)
return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \
}
-#define CALLER_BF16(SUFFIX, TYPE) \
+#define CALLER_NON_NUMERIC(SUFFIX, TYPE) \
typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \
__attribute__((noipa)) \
caller_##SUFFIX (TYPE *ptr1) \
@@ -147,6 +155,15 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+CALLER_NON_NUMERIC (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -189,7 +206,7 @@ CALLER (f16, svfloat16_t)
** ldp x29, x30, \[sp\], 16
** ret
*/
-CALLER_BF16 (bf16, svbfloat16_t)
+CALLER_NON_NUMERIC (bf16, svbfloat16_t)
/*
** caller_s32:
@@ -6,6 +6,7 @@
typedef int8_t svint8_t __attribute__ ((vector_size (32)));
typedef uint8_t svuint8_t __attribute__ ((vector_size (32)));
+typedef __mfp8 svmfloat8_t __attribute__ ((vector_size (32)));
typedef int16_t svint16_t __attribute__ ((vector_size (32)));
typedef uint16_t svuint16_t __attribute__ ((vector_size (32)));
@@ -53,6 +54,19 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_u8:
+** (
+** ld1 ({v.*}), \[x0\]
+** st1 \1, \[x8\]
+** |
+** ldp (q[0-9]+, q[0-9]+), \[x0\]
+** stp \2, \[x8\]
+** )
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** (
@@ -171,6 +185,16 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ldr b0, \[sp, 16\]
+** ldp x29, x30, \[sp\], 48
+** ret
+*/
+CALLER (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -6,6 +6,7 @@
typedef int8_t svint8_t __attribute__ ((vector_size (128)));
typedef uint8_t svuint8_t __attribute__ ((vector_size (128)));
+typedef __mfp8 svmfloat8_t __attribute__ ((vector_size (128)));
typedef int16_t svint16_t __attribute__ ((vector_size (128)));
typedef uint16_t svuint16_t __attribute__ ((vector_size (128)));
@@ -45,6 +46,15 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl128
+** ld1b (z[0-9]+)\.b, \1/z, \[x0\]
+** st1b \2\.b, \1, \[x8\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl128
@@ -166,6 +176,18 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\]
+** st1b \1, \2, \[[^]]*\]
+** ...
+** ret
+*/
+CALLER (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -6,6 +6,7 @@
typedef int8_t svint8_t __attribute__ ((vector_size (16)));
typedef uint8_t svuint8_t __attribute__ ((vector_size (16)));
+typedef __mfp8 svmfloat8_t __attribute__ ((vector_size (16)));
typedef int16_t svint16_t __attribute__ ((vector_size (16)));
typedef uint16_t svuint16_t __attribute__ ((vector_size (16)));
@@ -41,6 +42,13 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ldr q0, \[x0\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ldr q0, \[x0\]
@@ -140,6 +148,17 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ...
+** str q0, \[[^]]*\]
+** ...
+** ret
+*/
+CALLER (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -6,6 +6,7 @@
typedef int8_t svint8_t __attribute__ ((vector_size (256)));
typedef uint8_t svuint8_t __attribute__ ((vector_size (256)));
+typedef __mfp8 svmfloat8_t __attribute__ ((vector_size (256)));
typedef int16_t svint16_t __attribute__ ((vector_size (256)));
typedef uint16_t svuint16_t __attribute__ ((vector_size (256)));
@@ -45,6 +46,15 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl256
+** ld1b (z[0-9]+)\.b, \1/z, \[x0\]
+** st1b \2\.b, \1, \[x8\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl256
@@ -166,6 +176,18 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\]
+** st1b \1, \2, \[[^]]*\]
+** ...
+** ret
+*/
+CALLER (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -6,6 +6,7 @@
typedef int8_t svint8_t __attribute__ ((vector_size (32)));
typedef uint8_t svuint8_t __attribute__ ((vector_size (32)));
+typedef __mfp8 svmfloat8_t __attribute__ ((vector_size (32)));
typedef int16_t svint16_t __attribute__ ((vector_size (32)));
typedef uint16_t svuint16_t __attribute__ ((vector_size (32)));
@@ -45,6 +46,15 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl32
+** ld1b (z[0-9]+)\.b, \1/z, \[x0\]
+** st1b \2\.b, \1, \[x8\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl32
@@ -166,6 +176,18 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\]
+** st1b \1, \2, \[[^]]*\]
+** ...
+** ret
+*/
+CALLER (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -6,6 +6,7 @@
typedef int8_t svint8_t __attribute__ ((vector_size (64)));
typedef uint8_t svuint8_t __attribute__ ((vector_size (64)));
+typedef __mfp8 svmfloat8_t __attribute__ ((vector_size (64)));
typedef int16_t svint16_t __attribute__ ((vector_size (64)));
typedef uint16_t svuint16_t __attribute__ ((vector_size (64)));
@@ -45,6 +46,15 @@ CALLEE (s8, svint8_t)
*/
CALLEE (u8, svuint8_t)
+/*
+** callee_mf8:
+** ptrue (p[0-7])\.b, vl64
+** ld1b (z[0-9]+)\.b, \1/z, \[x0\]
+** st1b \2\.b, \1, \[x8\]
+** ret
+*/
+CALLEE (mf8, svmfloat8_t)
+
/*
** callee_s16:
** ptrue (p[0-7])\.b, vl64
@@ -166,6 +176,18 @@ CALLER (s8, svint8_t)
*/
CALLER (u8, svuint8_t)
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\]
+** st1b \1, \2, \[[^]]*\]
+** ...
+** ret
+*/
+CALLER (mf8, svmfloat8_t)
+
/*
** caller_s16:
** ...
@@ -60,6 +60,34 @@ caller_u8 (void)
return svtrn2 (svget2 (res, 1), svget2 (res, 0));
}
+/*
+** callee_mf8:
+** mov z0\.b, b2
+** mov z1\.b, b3
+** ret
+*/
+svmfloat8x2_t __attribute__((noipa))
+callee_mf8 (mfloat8_t h0, mfloat8_t h1, mfloat8_t h2, mfloat8_t h3)
+{
+ return svcreate2 (svdup_mf8 (h2), svdup_mf8 (h3));
+}
+
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** trn2 z0\.b, z1\.b, z0\.b
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+svmfloat8_t __attribute__((noipa))
+caller_mf8 (mfloat8_t h0, mfloat8_t h1, mfloat8_t h2, mfloat8_t h3)
+{
+ svmfloat8x2_t res;
+ res = callee_mf8 (h0, h1, h2, h3);
+ return svtrn2 (svget2 (res, 1), svget2 (res, 0));
+}
+
/*
** callee_s16:
** mov z0\.h, #1
@@ -66,6 +66,35 @@ caller_u8 (void)
svget3 (res, 0), svget3 (res, 1), svget3 (res, 2));
}
+/*
+** callee_mf8:
+** mov z0\.b, b0
+** mov z1\.b, b1
+** mov z2\.b, b2
+** ret
+*/
+svmfloat8x3_t __attribute__((noipa))
+callee_mf8 (mfloat8_t h0, mfloat8_t h1, mfloat8_t h2)
+{
+ return svcreate3 (svdup_mf8 (h0), svdup_mf8 (h1), svdup_mf8 (h2));
+}
+
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** trn2 z0\.b, z0\.b, z2\.b
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+svmfloat8_t __attribute__((noipa))
+caller_mf8 (mfloat8_t h0, mfloat8_t h1, mfloat8_t h2)
+{
+ svmfloat8x3_t res;
+ res = callee_mf8 (h0, h1, h2);
+ return svtrn2 (svget3 (res, 0), svget3 (res, 2));
+}
+
/*
** callee_s16:
** mov z0\.h, #1
@@ -74,6 +74,39 @@ caller_u8 (void)
svget4 (res, 3)));
}
+/*
+** callee_mf8:
+** mov z0\.b, b4
+** mov z1\.b, b5
+** mov z2\.b, b6
+** mov z3\.b, b7
+** ret
+*/
+svmfloat8x4_t __attribute__((noipa))
+callee_mf8 (mfloat8_t h0, mfloat8_t h1, mfloat8_t h2, mfloat8_t h3,
+ mfloat8_t h4, mfloat8_t h5, mfloat8_t h6, mfloat8_t h7)
+{
+ return svcreate4 (svdup_mf8 (h4), svdup_mf8 (h5),
+ svdup_mf8 (h6), svdup_mf8 (h7));
+}
+
+/*
+** caller_mf8:
+** ...
+** bl callee_mf8
+** trn2 z0\.b, z0\.b, z3\.b
+** ldp x29, x30, \[sp\], 16
+** ret
+*/
+svmfloat8_t __attribute__((noipa))
+caller_mf8 (mfloat8_t h0, mfloat8_t h1, mfloat8_t h2, mfloat8_t h3,
+ mfloat8_t h4, mfloat8_t h5, mfloat8_t h6, mfloat8_t h7)
+{
+ svmfloat8x4_t res;
+ res = callee_mf8 (h0, h1, h2, h3, h4, h5, h6, h7);
+ return svtrn2 (svget4 (res, 0), svget4 (res, 3));
+}
+
/*
** callee_s16:
** mov z0\.h, #1
new file mode 100644
@@ -0,0 +1,182 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-stack-clash-protection -fno-cprop-registers -fdisable-rtl-combine -g" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_sve.h>
+#include <stdarg.h>
+
+/*
+** callee_0:
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\]
+** ...
+** st1b \1, \2, \[x0\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+callee_0 (mfloat8_t *ptr, ...)
+{
+ va_list va;
+ svmfloat8_t vec;
+
+ va_start (va, ptr);
+ vec = va_arg (va, svmfloat8_t);
+ va_end (va);
+ svst1 (svptrue_b8 (), ptr, vec);
+}
+
+/* FIXME: optimize the umov and mov pair. */
+/*
+** caller_0:
+** ...
+** umov (w[0-9]+), v0.b\[0\]
+** ...
+** mov (z[0-9]+\.b), \1
+** ...
+** st1b \2, p[0-7], \[x1\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+caller_0 (mfloat8_t *ptr, mfloat8_t in)
+{
+ callee_0 (ptr, svdup_mf8 (in));
+}
+
+/*
+** callee_1:
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\]
+** ...
+** st1b \1, p[0-7], \[x0\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+callee_1 (mfloat8_t *ptr, ...)
+{
+ va_list va;
+ svmfloat8_t vec;
+
+ va_start (va, ptr);
+ va_arg (va, int);
+ vec = va_arg (va, svmfloat8_t);
+ va_end (va);
+ svst1 (svptrue_b8 (), ptr, vec);
+}
+
+/* FIXME: optimize the umov and mov pair. */
+/*
+** caller_1:
+** ...
+** umov (w[0-9]+), v0.b\[0\]
+** ...
+** mov (z[0-9]+\.b), \1
+** ...
+** st1b \2, p[0-7], \[x2\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+caller_1 (mfloat8_t *ptr, mfloat8_t in)
+{
+ callee_1 (ptr, 1, svdup_mf8 (in));
+}
+
+/*
+** callee_7:
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ...
+** st1b \1, p[0-7], \[x0\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+callee_7 (mfloat8_t *ptr, ...)
+{
+ va_list va;
+ svmfloat8_t vec;
+
+ va_start (va, ptr);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ vec = va_arg (va, svmfloat8_t);
+ va_end (va);
+ svst1 (svptrue_b8 (), ptr, vec);
+}
+
+/* FIXME: optimize the umov and mov pair. */
+/*
+** caller_7:
+** ...
+** umov (w[0-9]+), v0.b\[0\]
+** ...
+** mov (z[0-9]+\.b), \1
+** ...
+** st1b \2, p[0-7], \[x7\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+caller_7 (mfloat8_t *ptr, mfloat8_t in)
+{
+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_mf8 (in));
+}
+
+/* FIXME: We should be able to get rid of the va_list object. */
+/*
+** callee_8:
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ...
+** st1b \3, \4, \[x0\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+callee_8 (mfloat8_t *ptr, ...)
+{
+ va_list va;
+ svmfloat8_t vec;
+
+ va_start (va, ptr);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ vec = va_arg (va, svmfloat8_t);
+ va_end (va);
+ svst1 (svptrue_b8 (), ptr, vec);
+}
+
+/* FIXME: optimize the umov and mov pair. */
+/*
+** caller_8:
+** ...
+** umov (w[0-9]+), v0.b\[0\]
+** ...
+** mov (z[0-9]+\.b), \1
+** ...
+** st1b \2, p[0-7], \[(x[0-9]+)\]
+** ...
+** str \3, \[sp\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+caller_8 (mfloat8_t *ptr, mfloat8_t in)
+{
+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_mf8 (in));
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** tbl2_mf8_tied1:
+** tbl z0\.b, {z0\.b(?:, | - )z1\.b}, z4\.b
+** ret
+*/
+TEST_TBL2 (tbl2_mf8_tied1, svmfloat8x2_t, svmfloat8_t, svuint8_t,
+ z0_res = svtbl2_mf8 (z0, z4),
+ z0_res = svtbl2 (z0, z4))
+
+/*
+** tbl2_mf8_tied2:
+** tbl z0\.b, {z1\.b(?:, | - )z2\.b}, z0\.b
+** ret
+*/
+TEST_TBL2_REV (tbl2_mf8_tied2, svmfloat8x2_t, svmfloat8_t, svuint8_t,
+ z0_res = svtbl2_mf8 (z1, z0),
+ z0_res = svtbl2 (z1, z0))
+
+/*
+** tbl2_mf8_untied:
+** tbl z0\.b, {z2\.b(?:, | - )z3\.b}, z4\.b
+** ret
+*/
+TEST_TBL2 (tbl2_mf8_untied, svmfloat8x2_t, svmfloat8_t, svuint8_t,
+ z0_res = svtbl2_mf8 (z2, z4),
+ z0_res = svtbl2 (z2, z4))
+
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** tbx_mf8_tied1:
+** tbx z0\.b, z1\.b, z4\.b
+** ret
+*/
+TEST_DUAL_Z (tbx_mf8_tied1, svmfloat8_t, svuint8_t,
+ z0 = svtbx_mf8 (z0, z1, z4),
+ z0 = svtbx (z0, z1, z4))
+
+/* Bad RA choice: no preferred output sequence. */
+TEST_DUAL_Z (tbx_mf8_tied2, svmfloat8_t, svuint8_t,
+ z0 = svtbx_mf8 (z1, z0, z4),
+ z0 = svtbx (z1, z0, z4))
+
+/* Bad RA choice: no preferred output sequence. */
+TEST_DUAL_Z_REV (tbx_mf8_tied3, svmfloat8_t, svuint8_t,
+ z0_res = svtbx_mf8 (z4, z5, z0),
+ z0_res = svtbx (z4, z5, z0))
+
+/*
+** tbx_mf8_untied:
+** (
+** mov z0\.d, z1\.d
+** tbx z0\.b, z2\.b, z4\.b
+** |
+** tbx z1\.b, z2\.b, z4\.b
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (tbx_mf8_untied, svmfloat8_t, svuint8_t,
+ z0 = svtbx_mf8 (z1, z2, z4),
+ z0 = svtbx (z1, z2, z4))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** whilerw_rr_mf8:
+** whilerw p0\.b, x0, x1
+** ret
+*/
+TEST_COMPARE_S (whilerw_rr_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 (x0, x1),
+ p0 = svwhilerw (x0, x1))
+
+/*
+** whilerw_0r_mf8:
+** whilerw p0\.b, xzr, x1
+** ret
+*/
+TEST_COMPARE_S (whilerw_0r_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 ((const mfloat8_t *) 0, x1),
+ p0 = svwhilerw ((const mfloat8_t *) 0, x1))
+
+/*
+** whilerw_cr_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilerw p0\.b, \1, x1
+** ret
+*/
+TEST_COMPARE_S (whilerw_cr_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 ((const mfloat8_t *) 1073741824, x1),
+ p0 = svwhilerw ((const mfloat8_t *) 1073741824, x1))
+
+/*
+** whilerw_r0_mf8:
+** whilerw p0\.b, x0, xzr
+** ret
+*/
+TEST_COMPARE_S (whilerw_r0_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 (x0, (const mfloat8_t *) 0),
+ p0 = svwhilerw (x0, (const mfloat8_t *) 0))
+
+/*
+** whilerw_rc_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilerw p0\.b, x0, \1
+** ret
+*/
+TEST_COMPARE_S (whilerw_rc_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 (x0, (const mfloat8_t *) 1073741824),
+ p0 = svwhilerw (x0, (const mfloat8_t *) 1073741824))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** whilewr_rr_mf8:
+** whilewr p0\.b, x0, x1
+** ret
+*/
+TEST_COMPARE_S (whilewr_rr_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 (x0, x1),
+ p0 = svwhilewr (x0, x1))
+
+/*
+** whilewr_0r_mf8:
+** whilewr p0\.b, xzr, x1
+** ret
+*/
+TEST_COMPARE_S (whilewr_0r_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 ((const mfloat8_t *) 0, x1),
+ p0 = svwhilewr ((const mfloat8_t *) 0, x1))
+
+/*
+** whilewr_cr_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilewr p0\.b, \1, x1
+** ret
+*/
+TEST_COMPARE_S (whilewr_cr_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 ((const mfloat8_t *) 1073741824, x1),
+ p0 = svwhilewr ((const mfloat8_t *) 1073741824, x1))
+
+/*
+** whilewr_r0_mf8:
+** whilewr p0\.b, x0, xzr
+** ret
+*/
+TEST_COMPARE_S (whilewr_r0_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 (x0, (const mfloat8_t *) 0),
+ p0 = svwhilewr (x0, (const mfloat8_t *) 0))
+
+/*
+** whilewr_rc_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilewr p0\.b, x0, \1
+** ret
+*/
+TEST_COMPARE_S (whilewr_rc_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 (x0, (const mfloat8_t *) 1073741824),
+ p0 = svwhilewr (x0, (const mfloat8_t *) 1073741824))