aarch64: Add support for mfloat8x{8|16}_t types
Checks
Commit Message
I've tested this with a hacked in FP8 intrinsic. Is this patch ok for master,
or should it wait until we've implemented the intrinsics?
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.cc (MODE_d_mf8): New.
(MODE_q_mf8): New.
(QUAL_mf8): New.
(aarch64_lookup_simd_type_in_table): Match modal_float bit.
(aarch64_init_simd_builtin_types): Initialise FP8 simd types.
* config/aarch64/aarch64-builtins.h
(enum aarch64_type_qualifiers): Add qualifier_modal_float bit.
* config/aarch64/aarch64-simd-builtin-types.def:
Add Mfloat8x{8|16}_t types.
* config/aarch64/arm_neon.h: Add mfloat8x{8|16}_t typedefs.
Comments
Andrew Carlotti <andrew.carlotti@arm.com> writes:
> I've tested this with a hacked in FP8 intrinsic.
Looks good.
> Is this patch ok for master,
> or should it wait until we've implemented the intrinsics?
IMO it would be OK to apply...
> @@ -1190,6 +1194,10 @@ aarch64_init_simd_builtin_types (void)
> aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
> aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
>
> + /* Init FP8 element types. */
> + aarch64_simd_types[Mfloat8x8_t].eltype = aarch64_mfp8_type_node;
> + aarch64_simd_types[Mfloat8x16_t].eltype = aarch64_mfp8_type_node;
> +
> for (i = 0; i < nelts; i++)
> {
> tree eltype = aarch64_simd_types[i].eltype;
> diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
> index 6111cd0d4fe1136feabb36a4077cf86d13b835e2..83b2da2e7dc0962c1e5957e25c8f6232c2148fe5 100644
> --- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
> @@ -52,3 +52,5 @@
> ENTRY (Float64x2_t, V2DF, none, 13)
> ENTRY (Bfloat16x4_t, V4BF, none, 14)
> ENTRY (Bfloat16x8_t, V8BF, none, 14)
> + ENTRY (Mfloat8x8_t, V8QI, modal_float, 13)
> + ENTRY (Mfloat8x16_t, V16QI, modal_float, 14)
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 0092314cf7586bf77c8272cff8156608f56eaedc..63f860886c3152e3de7f29833119f9f65b6774a2 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -73,6 +73,8 @@ typedef __Poly64_t poly64_t;
> typedef __Poly128_t poly128_t;
>
> typedef __mfp8 mfloat8_t;
> +typedef __Mfloat8x8_t mfloat8x8_t;
> +typedef __Mfloat8x16_t mfloat8x16_t;
>
> typedef __fp16 float16_t;
> typedef float float32_t;
...this part. But then we should be able to test that independently,
e.g. by adding it to gcc.target/aarch64/movv2x16qi.c & co.
I think...
> diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h
> index e326fe666769cedd6c06d0752ed30b9359745ac9..00db7a74885db4d97ed365e8e3e2d7cf7d8410a4 100644
> --- a/gcc/config/aarch64/aarch64-builtins.h
> +++ b/gcc/config/aarch64/aarch64-builtins.h
> @@ -54,6 +54,8 @@ enum aarch64_type_qualifiers
> /* Lane indices selected in quadtuplets. - must be in range, and flipped for
> bigendian. */
> qualifier_lane_quadtup_index = 0x1000,
> + /* Modal FP types. */
> + qualifier_modal_float = 0x2000,
> };
>
> #define ENTRY(E, M, Q, G) E,
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> index 7d17df05a0f7e8c42596af7f1de4652b59563fe0..f6921ac1d5619e85d4239cd93d1bc84c1d55d4df 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -133,6 +133,7 @@
> #define MODE_d_f16 E_V4HFmode
> #define MODE_d_f32 E_V2SFmode
> #define MODE_d_f64 E_V1DFmode
> +#define MODE_d_mf8 E_V8QImode
> #define MODE_d_s8 E_V8QImode
> #define MODE_d_s16 E_V4HImode
> #define MODE_d_s32 E_V2SImode
> @@ -148,6 +149,7 @@
> #define MODE_q_f16 E_V8HFmode
> #define MODE_q_f32 E_V4SFmode
> #define MODE_q_f64 E_V2DFmode
> +#define MODE_q_mf8 E_V16QImode
> #define MODE_q_s8 E_V16QImode
> #define MODE_q_s16 E_V8HImode
> #define MODE_q_s32 E_V4SImode
> @@ -177,6 +179,7 @@
> #define QUAL_p16 qualifier_poly
> #define QUAL_p64 qualifier_poly
> #define QUAL_p128 qualifier_poly
> +#define QUAL_mf8 qualifier_modal_float
>
> #define LENGTH_d ""
> #define LENGTH_q "q"
> @@ -1087,7 +1090,8 @@ aarch64_lookup_simd_type_in_table (machine_mode mode,
> {
> int i;
> int nelts = ARRAY_SIZE (aarch64_simd_types);
> - int q = qualifiers & (qualifier_poly | qualifier_unsigned);
> + int q = qualifiers
> + & (qualifier_poly | qualifier_unsigned | qualifier_modal_float);
>
> for (i = 0; i < nelts; i++)
> {
...this is probably better left to the patch that needs it.
Let's see what others think though.
Thanks,
Richard
@@ -54,6 +54,8 @@ enum aarch64_type_qualifiers
/* Lane indices selected in quadtuplets. - must be in range, and flipped for
bigendian. */
qualifier_lane_quadtup_index = 0x1000,
+ /* Modal FP types. */
+ qualifier_modal_float = 0x2000,
};
#define ENTRY(E, M, Q, G) E,
@@ -133,6 +133,7 @@
#define MODE_d_f16 E_V4HFmode
#define MODE_d_f32 E_V2SFmode
#define MODE_d_f64 E_V1DFmode
+#define MODE_d_mf8 E_V8QImode
#define MODE_d_s8 E_V8QImode
#define MODE_d_s16 E_V4HImode
#define MODE_d_s32 E_V2SImode
@@ -148,6 +149,7 @@
#define MODE_q_f16 E_V8HFmode
#define MODE_q_f32 E_V4SFmode
#define MODE_q_f64 E_V2DFmode
+#define MODE_q_mf8 E_V16QImode
#define MODE_q_s8 E_V16QImode
#define MODE_q_s16 E_V8HImode
#define MODE_q_s32 E_V4SImode
@@ -177,6 +179,7 @@
#define QUAL_p16 qualifier_poly
#define QUAL_p64 qualifier_poly
#define QUAL_p128 qualifier_poly
+#define QUAL_mf8 qualifier_modal_float
#define LENGTH_d ""
#define LENGTH_q "q"
@@ -1087,7 +1090,8 @@ aarch64_lookup_simd_type_in_table (machine_mode mode,
{
int i;
int nelts = ARRAY_SIZE (aarch64_simd_types);
- int q = qualifiers & (qualifier_poly | qualifier_unsigned);
+ int q = qualifiers
+ & (qualifier_poly | qualifier_unsigned | qualifier_modal_float);
for (i = 0; i < nelts; i++)
{
@@ -1190,6 +1194,10 @@ aarch64_init_simd_builtin_types (void)
aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
+ /* Init FP8 element types. */
+ aarch64_simd_types[Mfloat8x8_t].eltype = aarch64_mfp8_type_node;
+ aarch64_simd_types[Mfloat8x16_t].eltype = aarch64_mfp8_type_node;
+
for (i = 0; i < nelts; i++)
{
tree eltype = aarch64_simd_types[i].eltype;
@@ -52,3 +52,5 @@
ENTRY (Float64x2_t, V2DF, none, 13)
ENTRY (Bfloat16x4_t, V4BF, none, 14)
ENTRY (Bfloat16x8_t, V8BF, none, 14)
+ ENTRY (Mfloat8x8_t, V8QI, modal_float, 13)
+ ENTRY (Mfloat8x16_t, V16QI, modal_float, 14)
@@ -73,6 +73,8 @@ typedef __Poly64_t poly64_t;
typedef __Poly128_t poly128_t;
typedef __mfp8 mfloat8_t;
+typedef __Mfloat8x8_t mfloat8x8_t;
+typedef __Mfloat8x16_t mfloat8x16_t;
typedef __fp16 float16_t;
typedef float float32_t;