[v5,3/5] aarch64: add svcvt* FP8 intrinsics

Message ID 20241128211234.1714776-4-claudio.bantaloukas@arm.com
State New
Headers
Series aarch64: Add fp8 sve foundation |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply

Commit Message

Claudio Bantaloukas Nov. 28, 2024, 9:12 p.m. UTC
  This patch adds the following intrinsics:
- svcvt1_bf16[_mf8]_fpm
- svcvt1_f16[_mf8]_fpm
- svcvt2_bf16[_mf8]_fpm
- svcvt2_f16[_mf8]_fpm
- svcvtlt1_bf16[_mf8]_fpm
- svcvtlt1_f16[_mf8]_fpm
- svcvtlt2_bf16[_mf8]_fpm
- svcvtlt2_f16[_mf8]_fpm
- svcvtn_mf8[_f16_x2]_fpm (unpredicated)
- svcvtnb_mf8[_f32_x2]_fpm
- svcvtnt_mf8[_f32_x2]_fpm

The underlying instructions are only available when SVE2 is enabled and the PE
is not in streaming SVE mode. They are also available when SME2 is enabled and
the PE is in streaming SVE mode.

gcc/
	* config/aarch64/aarch64-sve-builtins-shapes.cc
	(parse_signature): Add an fpm_t (uint64_t) argument to functions that
	set the fpm register.
	(unary_convertxn_narrowt_def): New class.
	(unary_convertxn_narrowt): New shape.
	(unary_convertxn_narrow_def): New class.
	(unary_convertxn_narrow): New shape.
	* config/aarch64/aarch64-sve-builtins-shapes.h
	(unary_convertxn_narrowt): Declare.
	(unary_convertxn_narrow): Likewise.
	* config/aarch64/aarch64-sve-builtins-sve2.cc
	(svcvt_fp8_impl): New class.
	(svcvtn_impl): Handle fp8 cases.
	(svcvt1, svcvt2, svcvtlt1, svcvtlt2): Add new FUNCTION.
	(svcvtnb): Likewise.
	* config/aarch64/aarch64-sve-builtins-sve2.def
	(svcvt1, svcvt2, svcvtlt1, svcvtlt2): Add new DEF_SVE_FUNCTION_GS_FPM.
	(svcvtn): Likewise.
	(svcvtnb, svcvtnt): Likewise.
	* config/aarch64/aarch64-sve-builtins-sve2.h
	(svcvt1, svcvt2, svcvtlt1, svcvtlt2, svcvtnb, svcvtnt): Declare.
	* config/aarch64/aarch64-sve-builtins.cc
	(TYPES_cvt_mf8, TYPES_cvtn_mf8, TYPES_cvtnx_mf8): Add new types arrays.
	(function_builder::get_name): Append _fpm to functions that set fpmr.
	(function_resolver::check_gp_argument): Deal with the fpm_t argument.
	(function_expander::expand): Set the fpm register before
	calling the insn if the function warrants it.
	* config/aarch64/aarch64-sve2.md (@aarch64_sve2_fp8_cvt): Add new.
	(@aarch64_sve2_fp8_cvtn): Likewise.
	(@aarch64_sve2_fp8_cvtnb): Likewise.
	(@aarch64_sve_cvtnt): Likewise.
	* config/aarch64/aarch64.h (TARGET_SSVE_FP8): Add new.
	* config/aarch64/iterators.md
	(VNx8SF_ONLY, SVE_FULL_HFx2): New mode iterators.
	(UNSPEC_F1CVT, UNSPEC_F1CVTLT, UNSPEC_F2CVT, UNSPEC_F2CVTLT): Add new.
	(UNSPEC_FCVTNB, UNSPEC_FCVTNT): Likewise.
	(UNSPEC_FP8FCVTN): Likewise.
	(FP8CVT_UNS, fp8_cvt_uns_op): Likewise.

gcc/testsuite/

	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
	(TEST_DUAL_Z): Add fpm0 argument
	* gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrow_1.c:
	Add new tests.
	* gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrowt_1.c:
	Likewise.
	* gcc.target/aarch64/sve2/acle/asm/cvt_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/cvtlt_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/cvtn_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/cvtnb_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/cvtnt_mf8.c: Likewise.
	* lib/target-supports.exp: Add aarch64_asm_fp8_ok check.
---
 .../aarch64/aarch64-sve-builtins-shapes.cc    | 78 +++++++++++++++++++
 .../aarch64/aarch64-sve-builtins-shapes.h     |  2 +
 .../aarch64/aarch64-sve-builtins-sve2.cc      | 28 ++++++-
 .../aarch64/aarch64-sve-builtins-sve2.def     | 12 +++
 .../aarch64/aarch64-sve-builtins-sve2.h       |  6 ++
 gcc/config/aarch64/aarch64-sve-builtins.cc    | 31 +++++++-
 gcc/config/aarch64/aarch64-sve2.md            | 51 ++++++++++++
 gcc/config/aarch64/aarch64.h                  |  5 ++
 gcc/config/aarch64/iterators.md               | 24 ++++++
 .../aarch64/sve/acle/asm/test_sve_acle.h      |  2 +-
 .../acle/general-c/unary_convertxn_narrow_1.c | 60 ++++++++++++++
 .../general-c/unary_convertxn_narrowt_1.c     | 38 +++++++++
 .../aarch64/sve2/acle/asm/cvt_mf8.c           | 48 ++++++++++++
 .../aarch64/sve2/acle/asm/cvtlt_mf8.c         | 50 ++++++++++++
 .../aarch64/sve2/acle/asm/cvtn_mf8.c          | 30 +++++++
 .../aarch64/sve2/acle/asm/cvtnb_mf8.c         | 20 +++++
 .../aarch64/sve2/acle/asm/cvtnt_mf8.c         | 31 ++++++++
 gcc/testsuite/lib/target-supports.exp         |  2 +-
 18 files changed, 513 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrow_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrowt_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtn_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnb_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_mf8.c
  

Patch

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index ebe2e581728..62831b3c1e2 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -325,6 +325,8 @@  parse_signature (const function_instance &instance, const char *format,
 	argument_types.quick_push (argument_type);
     }
   gcc_assert (format[0] == 0);
+  if (instance.fpm_mode == FPM_set)
+    argument_types.quick_push (get_typenode_from_name (UINT64_TYPE));
   return return_type;
 }
 
@@ -4596,6 +4598,46 @@  struct unary_convert_narrowt_def : public overloaded_base<1>
 };
 SHAPE (unary_convert_narrowt)
 
+/* sv<t0>_t svfoo_t0[_t1_g](sv<t0>_t, sv<t1>x<g_t, fpm_t)
+
+   Similar to unary_convert_narrowt but for tuple arguments with support for
+   modal floating point.  */
+struct unary_convertxn_narrowt_def : public overloaded_base<1>
+{
+  bool
+  explicit_group_suffix_p () const override
+  {
+    return false;
+  }
+
+  bool
+  has_merge_argument_p (const function_instance &, unsigned int) const override
+  {
+    return true;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,t1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    gcc_assert(r.fpm_mode == FPM_set);
+    sve_type type;
+    if (!r.check_num_arguments (3)
+        || !(type = r.infer_sve_type (1))
+	|| !r.require_scalar_type (2, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_convertxn_narrowt)
+
 /* sv<t0>x<g0>_t svfoo_t0[_t1_g](sv<t1>x<g1>_t)
 
    where the target type <t0> must be specified explicitly but the
@@ -4628,6 +4670,42 @@  struct unary_convertxn_def : public unary_convert_def
 };
 SHAPE (unary_convertxn)
 
+/* sv<t0>_t svfoo_t0[_t1_g](sv<t1>x<g1>_t)
+
+   where the target type <t0> must be specified explicitly but the
+   source type <t1> can be inferred.
+
+   Functions with a group suffix are unpredicated. */
+struct unary_convertxn_narrow_def : public unary_convert_def
+{
+  bool
+  explicit_group_suffix_p () const override
+  {
+    return false;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,t1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    gcc_assert(r.fpm_mode == FPM_set);
+    sve_type type;
+    if (!r.check_num_arguments (2)
+        || !(type = r.infer_sve_type (0))
+	|| !r.require_scalar_type (1, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_convertxn_narrow)
+
 /* sv<t0>_t svfoo_<t0>(sv<t0>_t, uint64_t)
 
    where the final argument is an integer constant expression in the
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index e1d661c5a46..dc3d4557288 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -229,7 +229,9 @@  namespace aarch64_sve
     extern const function_shape *const unary;
     extern const function_shape *const unary_convert;
     extern const function_shape *const unary_convert_narrowt;
+    extern const function_shape *const unary_convertxn_narrowt;
     extern const function_shape *const unary_convertxn;
+    extern const function_shape *const unary_convertxn_narrow;
     extern const function_shape *const unary_lane;
     extern const function_shape *const unary_long;
     extern const function_shape *const unary_n;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 6bfc62bdce6..1a1d2c4c6ec 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -221,13 +221,34 @@  public:
   }
 };
 
+class svcvt_fp8_impl : public function_base
+{
+public:
+  CONSTEXPR
+  svcvt_fp8_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    auto icode = code_for_aarch64_sve2_fp8_cvt (m_unspec, e.result_mode ());
+    return e.use_exact_insn (icode);
+  }
+
+  int m_unspec;
+};
+
 class svcvtn_impl : public function_base
 {
 public:
   rtx
   expand (function_expander &e) const override
   {
-    return e.use_exact_insn (code_for_aarch64_sve_cvtn (e.result_mode ()));
+    insn_code icode;
+    if (e.fpm_mode == FPM_set)
+      icode = code_for_aarch64_sve2_fp8_cvtn (GET_MODE (e.args[0]));
+    else
+      icode = code_for_aarch64_sve_cvtn (e.result_mode ());
+    return e.use_exact_insn (icode);
   }
 };
 
@@ -922,9 +943,14 @@  FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
 FUNCTION (svcdot, svcdot_impl,)
 FUNCTION (svcdot_lane, svcdot_lane_impl,)
 FUNCTION (svclamp, svclamp_impl,)
+FUNCTION (svcvt1, svcvt_fp8_impl, (UNSPEC_F1CVT))
+FUNCTION (svcvt2, svcvt_fp8_impl, (UNSPEC_F2CVT))
+FUNCTION (svcvtlt1, svcvt_fp8_impl, (UNSPEC_F1CVTLT))
+FUNCTION (svcvtlt2, svcvt_fp8_impl, (UNSPEC_F2CVTLT))
 FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
 FUNCTION (svcvtl, svcvtl_impl,)
 FUNCTION (svcvtn, svcvtn_impl,)
+FUNCTION (svcvtnb, fixed_insn_function, (CODE_FOR_aarch64_sve2_fp8_cvtnbvnx16qi))
 FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
 FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
 FUNCTION (svdup_laneq, svdup_laneq_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 2189855d705..8a63998fcc6 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -367,3 +367,15 @@  DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, h_bfloat, x24, none)
 DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, h_bfloat, x24, none)
 DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, h_bfloat, x24, none)
 #undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_FP8, \
+	       AARCH64_FL_SME2 | AARCH64_FL_FP8)
+DEF_SVE_FUNCTION_GS_FPM (svcvt1, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvt2, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtlt1, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtlt2, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtn, unary_convertxn_narrow, cvtn_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtnb, unary_convertxn_narrow, cvtnx_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtnt, unary_convertxn_narrowt, cvtnx_mf8, x2, none, set)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index bfe3d170e70..d26751e8042 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -62,8 +62,14 @@  namespace aarch64_sve
     extern const function_base *const svclamp;
     extern const function_base *const svcntp;
     extern const function_base *const svcvtl;
+    extern const function_base *const svcvt1;
+    extern const function_base *const svcvt2;
+    extern const function_base *const svcvtlt1;
+    extern const function_base *const svcvtlt2;
     extern const function_base *const svcvtlt;
     extern const function_base *const svcvtn;
+    extern const function_base *const svcvtnb;
+    extern const function_base *const svcvtnt;
     extern const function_base *const svcvtx;
     extern const function_base *const svcvtxnt;
     extern const function_base *const svdup_laneq;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 66320be9adc..4201ece9d59 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -481,6 +481,20 @@  CONSTEXPR const group_suffix_info group_suffixes[] = {
   D (f32, s32), \
   D (f32, u32)
 
+/* _f16_mf8
+   _bf16_mf8.  */
+#define TYPES_cvt_mf8(S, D) \
+  D (f16, mf8), D (bf16, mf8)
+
+/* _mf8_f16
+   _mf8_bf16.  */
+#define TYPES_cvtn_mf8(S, D) \
+  D (mf8, f16), D (mf8, bf16)
+
+/* _mf8_f32.  */
+#define TYPES_cvtnx_mf8(S, D) \
+  D (mf8, f32)
+
 /* { _s32 _s64 } x { _b8 _b16 _b32 _b64 }
    { _u32 _u64 }.  */
 #define TYPES_inc_dec_n1(D, A) \
@@ -793,9 +807,12 @@  DEF_SVE_TYPES_ARRAY (cvt_bfloat);
 DEF_SVE_TYPES_ARRAY (cvt_h_s_float);
 DEF_SVE_TYPES_ARRAY (cvt_f32_f16);
 DEF_SVE_TYPES_ARRAY (cvt_long);
+DEF_SVE_TYPES_ARRAY (cvt_mf8);
 DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
 DEF_SVE_TYPES_ARRAY (cvt_narrow);
 DEF_SVE_TYPES_ARRAY (cvt_s_s);
+DEF_SVE_TYPES_ARRAY (cvtn_mf8);
+DEF_SVE_TYPES_ARRAY (cvtnx_mf8);
 DEF_SVE_TYPES_ARRAY (inc_dec_n);
 DEF_SVE_TYPES_ARRAY (qcvt_x2);
 DEF_SVE_TYPES_ARRAY (qcvt_x4);
@@ -1428,6 +1445,8 @@  function_builder::get_name (const function_instance &instance,
   if (!overloaded_p || instance.shape->explicit_group_suffix_p ())
     append_name (instance.group_suffix ().string);
   append_name (pred_suffixes[instance.pred]);
+  if (instance.fpm_mode == FPM_set)
+    append_name ("_fpm");
   return finish_name ();
 }
 
@@ -3063,11 +3082,12 @@  function_resolver::check_gp_argument (unsigned int nops,
 {
   gcc_assert (pred != PRED_za_m);
   i = 0;
+  unsigned int nfpm_args = (fpm_mode == FPM_set)? 1:0;
   if (pred != PRED_none)
     {
       /* Unary merge operations should use resolve_unary instead.  */
       gcc_assert (!shape->has_merge_argument_p (*this, nops));
-      nargs = nops + 1;
+      nargs = nops + nfpm_args + 1;
       if (!check_num_arguments (nargs)
 	  || !require_vector_type (i, gp_type_index ()))
 	return false;
@@ -3075,7 +3095,7 @@  function_resolver::check_gp_argument (unsigned int nops,
     }
   else
     {
-      nargs = nops;
+      nargs = nops + nfpm_args;
       if (!check_num_arguments (nargs))
 	return false;
     }
@@ -4512,6 +4532,13 @@  function_expander::expand ()
   for (unsigned int i = 0; i < nargs; ++i)
     args.quick_push (expand_normal (CALL_EXPR_ARG (call_expr, i)));
 
+  if (fpm_mode == FPM_set)
+    {
+      /* The last element of these functions is always an fpm_t that must be
+         written to FPMR before the call to the instruction itself. */
+      gcc_assert (args.last ()->mode == DImode);
+      emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), args.last ());
+    }
   return base->expand (*this);
 }
 
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 66affa85d36..e5bd2861b48 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2936,6 +2936,14 @@  (define_insn "@aarch64_<optab>_lane_<mode>"
 ;; ---- [FP<-FP] Widening conversions
 ;; -------------------------------------------------------------------------
 ;; Includes:
+;; - BF1CVT
+;; - BF1CVTLT
+;; - BF2CVT
+;; - BF2CVTLT
+;; - F1CVT
+;; - F1CVTLT
+;; - F2CVT
+;; - F2CVTLT
 ;; - FCVTLT
 ;; -------------------------------------------------------------------------
 
@@ -3001,6 +3009,16 @@  (define_insn "*cond_<sve_fp_op><mode>_strict"
   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
 )
 
+(define_insn "@aarch64_sve2_fp8_cvt_<fp8_cvt_uns_op><mode>"
+  [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HF
+	  [(match_operand:VNx16QI 1 "register_operand" "w")
+	  (reg:DI FPM_REGNUM)]
+	  FP8CVT_UNS))]
+  "TARGET_SSVE_FP8"
+  "<b><fp8_cvt_uns_op>\t%0.h, %1.b"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Narrowing conversions
 ;; -------------------------------------------------------------------------
@@ -3150,6 +3168,8 @@  (define_insn "@aarch64_sve_cvtl<mode>"
 ;; - BFCVTN
 ;; - FCVT
 ;; - FCVTN
+;; - FCVTNB
+;; - FCVTNT
 ;; -------------------------------------------------------------------------
 
 (define_insn "truncvnx8sf<mode>2"
@@ -3169,6 +3189,37 @@  (define_insn "@aarch64_sve_cvtn<mode>"
   "<b>fcvtn\t%0.h, %1"
 )
 
+(define_insn "@aarch64_sve2_fp8_cvtn<mode>"
+  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+	(unspec:VNx16QI
+	  [(match_operand:SVE_FULL_HFx2 1 "aligned_register_operand" "Uw2")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_FP8FCVTN))]
+  "TARGET_SSVE_FP8"
+  "<b>fcvtn\t%0.b, %1"
+)
+
+(define_insn "@aarch64_sve2_fp8_cvtnb<mode>"
+  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx16QI_ONLY
+	  [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_FCVTNB))]
+  "TARGET_SSVE_FP8"
+  "fcvtnb\t%0.b, %1"
+)
+
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx16QI_ONLY
+	  [(match_operand:VNx16QI_ONLY 1 "register_operand" "0")
+	   (match_operand:VNx8SF 2 "aligned_register_operand" "Uw2")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_FCVTNT))]
+  "TARGET_SSVE_FP8"
+  "fcvtnt\t%0.b, %2"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-INT] Multi-vector conversions
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index b063c315fba..f43b1659db6 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -513,6 +513,11 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 #define TARGET_SSVE_B16B16 \
   (AARCH64_HAVE_ISA (SVE_B16B16) && TARGET_SVE2_OR_SME2)
 
+/* Some fp8 instructions require +fp8 and one of +sve2 or +sme2.  */
+#define TARGET_SSVE_FP8 (TARGET_FP8 \
+			 && (TARGET_SVE2 || TARGET_STREAMING) \
+			 && (TARGET_SME2 || TARGET_NON_STREAMING))
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 023893d35f3..26716d593de 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -477,6 +477,9 @@  (define_mode_iterator SVE_FULL_BHSIx2 [VNx32QI VNx16HI VNx8SI])
 ;; Fully-packed SVE vector modes that have 16-bit float elements.
 (define_mode_iterator SVE_FULL_HF [VNx8BF VNx8HF])
 
+;; Pairs of the above.
+(define_mode_iterator SVE_FULL_HFx2 [VNx16BF VNx16HF])
+
 ;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI
 				    VNx8BF VNx8HF VNx4SF VNx2DF])
@@ -960,7 +963,13 @@  (define_c_enum "unspec"
     UNSPEC_COND_FLOGB	; Used in aarch64-sve2.md.
     UNSPEC_EORBT	; Used in aarch64-sve2.md.
     UNSPEC_EORTB	; Used in aarch64-sve2.md.
+    UNSPEC_F1CVT	; Used in aarch64-sve2.md.
+    UNSPEC_F1CVTLT	; Used in aarch64-sve2.md.
+    UNSPEC_F2CVT	; Used in aarch64-sve2.md.
+    UNSPEC_F2CVTLT	; Used in aarch64-sve2.md.
     UNSPEC_FADDP	; Used in aarch64-sve2.md.
+    UNSPEC_FCVTNB	; Used in aarch64-sve2.md.
+    UNSPEC_FCVTNT	; Used in aarch64-sve2.md.
     UNSPEC_FMAXNMP	; Used in aarch64-sve2.md.
     UNSPEC_FMAXP	; Used in aarch64-sve2.md.
     UNSPEC_FMINNMP	; Used in aarch64-sve2.md.
@@ -969,6 +978,7 @@  (define_c_enum "unspec"
     UNSPEC_FMLALT	; Used in aarch64-sve2.md.
     UNSPEC_FMLSLB	; Used in aarch64-sve2.md.
     UNSPEC_FMLSLT	; Used in aarch64-sve2.md.
+    UNSPEC_FP8FCVTN	; Used in aarch64-sve2.md.
     UNSPEC_HISTCNT	; Used in aarch64-sve2.md.
     UNSPEC_HISTSEG	; Used in aarch64-sve2.md.
     UNSPEC_LD1_COUNT	; Used in aarch64-sve2.md.
@@ -4731,3 +4741,17 @@  (define_int_attr faminmax_uns_op
 
 (define_code_attr faminmax_op
   [(smax "famax") (smin "famin")])
+
+;; Iterators and attributes for fp8 sve/sme conversions
+
+(define_int_iterator FP8CVT_UNS
+  [UNSPEC_F1CVT
+   UNSPEC_F2CVT
+   UNSPEC_F1CVTLT
+   UNSPEC_F2CVTLT])
+
+(define_int_attr fp8_cvt_uns_op
+  [(UNSPEC_F1CVT "f1cvt")
+   (UNSPEC_F2CVT "f2cvt")
+   (UNSPEC_F1CVTLT "f1cvtlt")
+   (UNSPEC_F2CVTLT "f2cvtlt")])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
index e9112c02b3e..4a146c3e157 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
@@ -75,7 +75,7 @@ 
 #define TEST_DUAL_Z(NAME, TYPE1, TYPE2, CODE1, CODE2)		\
   PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE1 z2, TYPE1 z3,	\
 		       TYPE2 z4, TYPE2 z5, TYPE2 z6, TYPE2 z7,	\
-		       svbool_t p0, svbool_t p1))		\
+		       svbool_t p0, svbool_t p1, fpm_t fpm0))	\
   {								\
     INVOKE (CODE1, CODE2);					\
     return z0;							\
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrow_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrow_1.c
new file mode 100644
index 00000000000..d312e857d81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrow_1.c
@@ -0,0 +1,60 @@ 
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2+fp8"
+
+void
+test (svfloat16x2_t f16x2, svbfloat16x2_t bf16x2, svfloat32x2_t f32x2,
+      svfloat16x3_t f16x3, svfloat16x4_t f16x4,
+      svfloat32x3_t f32x3, svfloat32x4_t f32x4,
+      fpm_t fpm0,
+      svbool_t pg, float f, svint8_t s8, svint32x2_t s32x2)
+  __arm_streaming
+{
+  svcvtn_mf8_fpm (f16x2, fpm0);
+  svcvtn_mf8_fpm (bf16x2, fpm0);
+
+  svcvtn_mf8_fpm (); /* { dg-error {too few arguments to function 'svcvtn_mf8_fpm'} } */
+  
+  svcvtn_mf8_fpm (f16x2); /* { dg-error {too few arguments to function 'svcvtn_mf8_fpm'} } */
+  svcvtn_mf8_fpm (fpm0); /* { dg-error {too few arguments to function 'svcvtn_mf8_fpm'} } */
+  
+  svcvtn_mf8_fpm (f); /* { dg-error {too few arguments to function 'svcvtn_mf8_fpm'} } */
+  svcvtn_mf8_fpm (pg); /* { dg-error {too few arguments to function 'svcvtn_mf8_fpm'} } */
+  svcvtn_mf8_fpm (s8); /* { dg-error {too few arguments to function 'svcvtn_mf8_fpm'} } */
+
+  svcvtn_mf8_fpm (f16x2, f16x2, fpm0); /* { dg-error {too many arguments to function 'svcvtn_mf8_fpm'} } */
+
+  svcvtn_mf8_fpm (f16x3, fpm0); /* { dg-error {'svcvtn_mf8_fpm' has no form that takes 'svfloat16x3_t' arguments} } */
+  svcvtn_mf8_fpm (f16x4, fpm0); /* { dg-error {'svcvtn_mf8_fpm' has no form that takes 'svfloat16x4_t' arguments} } */
+  svcvtn_mf8_fpm (0, fpm0); /* { dg-error {passing 'int' to argument 1 of 'svcvtn_mf8_fpm', which expects an SVE type rather than a scalar type} } */
+  svcvtn_mf8_fpm (f, fpm0); /* { dg-error {passing 'float' to argument 1 of 'svcvtn_mf8_fpm', which expects an SVE type rather than a scalar type} } */
+  svcvtn_mf8_fpm (pg, fpm0); /* { dg-error {'svcvtn_mf8_fpm' has no form that takes 'svbool_t' arguments} } */
+  svcvtn_mf8_fpm (s8, fpm0); /* { dg-error {'svcvtn_mf8_fpm' has no form that takes 'svint8_t' arguments} } */
+  svcvtn_mf8_fpm (s32x2, fpm0); /* { dg-error {'svcvtn_mf8_fpm' has no form that takes 'svint32x2_t' arguments} } */
+  
+  svcvtn_mf8_fpm (f16x2, f16x2); /* { dg-error {passing 'svfloat16x2_t' to argument 2 of 'svcvtn_mf8_fpm', which expects 'uint64_t'} } */
+
+
+  svcvtnb_mf8_fpm (f32x2, fpm0);
+
+  svcvtnb_mf8_fpm (); /* { dg-error {too few arguments to function 'svcvtnb_mf8_fpm'} } */
+  
+  svcvtnb_mf8_fpm (f32x2); /* { dg-error {too few arguments to function 'svcvtnb_mf8_fpm'} } */
+  svcvtnb_mf8_fpm (fpm0); /* { dg-error {too few arguments to function 'svcvtnb_mf8_fpm'} } */
+  
+  svcvtnb_mf8_fpm (f); /* { dg-error {too few arguments to function 'svcvtnb_mf8_fpm'} } */
+  svcvtnb_mf8_fpm (pg); /* { dg-error {too few arguments to function 'svcvtnb_mf8_fpm'} } */
+  svcvtnb_mf8_fpm (s8); /* { dg-error {too few arguments to function 'svcvtnb_mf8_fpm'} } */
+
+  svcvtnb_mf8_fpm (f32x2, f32x2, fpm0); /* { dg-error {too many arguments to function 'svcvtnb_mf8_fpm'} } */
+
+  svcvtnb_mf8_fpm (f32x3, fpm0); /* { dg-error {'svcvtnb_mf8_fpm' has no form that takes 'svfloat32x3_t' arguments} } */
+  svcvtnb_mf8_fpm (f32x4, fpm0); /* { dg-error {'svcvtnb_mf8_fpm' has no form that takes 'svfloat32x4_t' arguments} } */
+  svcvtnb_mf8_fpm (0, fpm0); /* { dg-error {passing 'int' to argument 1 of 'svcvtnb_mf8_fpm', which expects an SVE type rather than a scalar type} } */
+  svcvtnb_mf8_fpm (f, fpm0); /* { dg-error {passing 'float' to argument 1 of 'svcvtnb_mf8_fpm', which expects an SVE type rather than a scalar type} } */
+  svcvtnb_mf8_fpm (pg, fpm0); /* { dg-error {'svcvtnb_mf8_fpm' has no form that takes 'svbool_t' arguments} } */
+  svcvtnb_mf8_fpm (s8, fpm0); /* { dg-error {'svcvtnb_mf8_fpm' has no form that takes 'svint8_t' arguments} } */
+  svcvtnb_mf8_fpm (s32x2, fpm0); /* { dg-error {'svcvtnb_mf8_fpm' has no form that takes 'svint32x2_t' arguments} } */
+  
+  svcvtnb_mf8_fpm (f32x2, f32x2); /* { dg-error {passing 'svfloat32x2_t' to argument 2 of 'svcvtnb_mf8_fpm', which expects 'uint64_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrowt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrowt_1.c
new file mode 100644
index 00000000000..ab97eef3472
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convertxn_narrowt_1.c
@@ -0,0 +1,38 @@ 
+#include <arm_sve.h>
+
+#pragma GCC target "+sme2+fp8"
+
+void
+test (svmfloat8_t f8, svfloat32x2_t f32x2, fpm_t fpm0,
+      svfloat16x2_t f16x2, svfloat16x4_t f16x4,
+      svfloat32x3_t f32x3, svfloat32x4_t f32x4,
+      svbool_t pg, float f, svint8_t s8, svint32x2_t s32x2)
+  __arm_streaming
+{
+  svcvtnt_mf8_fpm (f8, f32x2, fpm0);
+
+  svcvtnt_mf8_fpm (); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  
+  svcvtnt_mf8_fpm (f8); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (f32x2); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (fpm0); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (f); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (f8, f32x2); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (f32x2, fpm0); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (f8, fpm0); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (pg); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+  svcvtnt_mf8_fpm (s8); /* { dg-error {too few arguments to function 'svcvtnt_mf8_fpm'} } */
+
+  svcvtnt_mf8_fpm (f8, f16x2, fpm0); /* { dg-error {'svcvtnt_mf8_fpm' has no form that takes 'svfloat16x2_t' arguments} } */
+  svcvtnt_mf8_fpm (f8, f16x4, fpm0); /* { dg-error {'svcvtnt_mf8_fpm' has no form that takes 'svfloat16x4_t' arguments} } */
+  svcvtnt_mf8_fpm (f8, f32x3, fpm0); /* { dg-error {'svcvtnt_mf8_fpm' has no form that takes 'svfloat32x3_t' arguments} } */
+  svcvtnt_mf8_fpm (f8, f32x4, fpm0); /* { dg-error {'svcvtnt_mf8_fpm' has no form that takes 'svfloat32x4_t' arguments} } */
+
+  svcvtnt_mf8_fpm (f8, 0, fpm0); /* { dg-error {passing 'int' to argument 2 of 'svcvtnt_mf8_fpm', which expects an SVE type rather than a scalar type} } */
+  svcvtnt_mf8_fpm (f8, f, fpm0); /* { dg-error {passing 'float' to argument 2 of 'svcvtnt_mf8_fpm', which expects an SVE type rather than a scalar type} } */
+  svcvtnt_mf8_fpm (f8, pg, fpm0); /* { dg-error {'svcvtnt_mf8_fpm' has no form that takes 'svbool_t' arguments} } */
+  svcvtnt_mf8_fpm (f8, s8, fpm0); /* { dg-error {'svcvtnt_mf8_fpm' has no form that takes 'svint8_t' arguments} } */
+  svcvtnt_mf8_fpm (f8, s32x2, fpm0); /* { dg-error {'svcvtnt_mf8_fpm' has no form that takes 'svint32x2_t' arguments} } */
+  
+  svcvtnt_mf8_fpm (f8, f32x2, f32x2); /* { dg-error {passing 'svfloat32x2_t' to argument 3 of 'svcvtnt_mf8_fpm', which expects 'uint64_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_mf8.c
new file mode 100644
index 00000000000..4fd915ee73a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvt_mf8.c
@@ -0,0 +1,48 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+bf16+fp8"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cvt1_f16_mf8_fpm:
+**	msr	fpmr, x0
+**	f1cvt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvt1_f16_mf8_fpm, svfloat16_t, svmfloat8_t,
+	     z0 = svcvt1_f16_mf8_fpm (z4, fpm0), z0 = svcvt1_f16_fpm (z4, fpm0))
+
+/*
+** cvt1_bf16_mf8_fpm:
+**	msr	fpmr, x0
+**	bf1cvt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvt1_bf16_mf8_fpm, svbfloat16_t, svmfloat8_t,
+	     z0 = svcvt1_bf16_mf8_fpm (z4, fpm0),
+	     z0 = svcvt1_bf16_fpm (z4, fpm0))
+
+/*
+** cvt2_f16_mf8_fpm:
+**	msr	fpmr, x0
+**	f2cvt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvt2_f16_mf8_fpm, svfloat16_t, svmfloat8_t,
+	     z0 = svcvt2_f16_mf8_fpm (z4, fpm0), z0 = svcvt2_f16_fpm (z4, fpm0))
+
+/*
+** cvt2_bf16_mf8_fpm:
+**	msr	fpmr, x0
+**	bf2cvt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvt2_bf16_mf8_fpm, svbfloat16_t, svmfloat8_t,
+	     z0 = svcvt2_bf16_mf8_fpm (z4, fpm0),
+	     z0 = svcvt2_bf16_fpm (z4, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_mf8.c
new file mode 100644
index 00000000000..fb645eed630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_mf8.c
@@ -0,0 +1,50 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+bf16+fp8"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cvtlt1_f16_mf8_fpm:
+**	msr	fpmr, x0
+**	f1cvtlt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvtlt1_f16_mf8_fpm, svfloat16_t, svmfloat8_t,
+	     z0 = svcvtlt1_f16_mf8_fpm (z4, fpm0),
+	     z0 = svcvtlt1_f16_fpm (z4, fpm0))
+
+/*
+** cvtlt1_bf16_mf8_fpm:
+**	msr	fpmr, x0
+**	bf1cvtlt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvtlt1_bf16_mf8_fpm, svbfloat16_t, svmfloat8_t,
+	     z0 = svcvtlt1_bf16_mf8_fpm (z4, fpm0),
+	     z0 = svcvtlt1_bf16_fpm (z4, fpm0))
+
+/*
+** cvtlt2_f16_mf8_fpm:
+**	msr	fpmr, x0
+**	f2cvtlt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvtlt2_f16_mf8_fpm, svfloat16_t, svmfloat8_t,
+	     z0 = svcvtlt2_f16_mf8_fpm (z4, fpm0),
+	     z0 = svcvtlt2_f16_fpm (z4, fpm0))
+
+/*
+** cvtlt2_bf16_mf8_fpm:
+**	msr	fpmr, x0
+**	bf2cvtlt	z0\.h, z4\.b
+**	ret
+*/
+TEST_DUAL_Z (cvtlt2_bf16_mf8_fpm, svbfloat16_t, svmfloat8_t,
+	     z0 = svcvtlt2_bf16_mf8_fpm (z4, fpm0),
+	     z0 = svcvtlt2_bf16_fpm (z4, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtn_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtn_mf8.c
new file mode 100644
index 00000000000..b0bff2ffb0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtn_mf8.c
@@ -0,0 +1,30 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+bf16+fp8"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cvtn_mf8_f16_x2_fpm:
+**	msr	fpmr, x2
+**	fcvtn	z0\.b, {z4\.h(?:, | - )z5\.h}
+**	ret
+*/
+TEST_DUAL_Z (cvtn_mf8_f16_x2_fpm, svmfloat8_t, svfloat16x2_t,
+	     z0 = svcvtn_mf8_f16_x2_fpm (z4, fpm0),
+	     z0 = svcvtn_mf8_fpm (z4, fpm0))
+
+/*
+** cvtn_mf8_bf16_x2_fpm:
+**	msr	fpmr, x2
+**	bfcvtn	z0\.b, {z4\.h(?:, | - )z5\.h}
+**	ret
+*/
+TEST_DUAL_Z (cvtn_mf8_bf16_x2_fpm, svmfloat8_t, svbfloat16x2_t,
+	     z0 = svcvtn_mf8_bf16_x2_fpm (z4, fpm0),
+	     z0 = svcvtn_mf8_fpm (z4, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnb_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnb_mf8.c
new file mode 100644
index 00000000000..c7c58ebff53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnb_mf8.c
@@ -0,0 +1,20 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+bf16+fp8"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cvtnb_mf8_f32_x2_fpm:
+**	msr	fpmr, x2
+**	fcvtnb	z0\.b, {z4\.s(?:, | - )z5\.s}
+**	ret
+*/
+TEST_DUAL_Z (cvtnb_mf8_f32_x2_fpm, svmfloat8_t, svfloat32x2_t,
+	     z0 = svcvtnb_mf8_f32_x2_fpm (z4, fpm0),
+	     z0 = svcvtnb_mf8_fpm (z4, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_mf8.c
new file mode 100644
index 00000000000..46b42c4318d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_mf8.c
@@ -0,0 +1,31 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+bf16+fp8"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cvtnt_mf8_f32_x2_fpm_untied:
+**	msr	fpmr, x2
+**	fcvtnt	z1\.b, {z4\.s(?:, | - )z5\.s}
+**	mov	z0.d, z1.d
+**	ret
+*/
+TEST_DUAL_Z (cvtnt_mf8_f32_x2_fpm_untied, svmfloat8_t, svfloat32x2_t,
+	     z0 = svcvtnt_mf8_f32_x2_fpm (z1, z4, fpm0),
+	     z0 = svcvtnt_mf8_fpm (z1, z4, fpm0))
+
+/*
+** cvtnt_mf8_f32_x2_fpm_tied:
+**	msr	fpmr, x2
+**	fcvtnt	z0\.b, {z4\.s(?:, | - )z5\.s}
+**	ret
+*/
+TEST_DUAL_Z (cvtnt_mf8_f32_x2_fpm_tied, svmfloat8_t, svfloat32x2_t,
+	     z0 = svcvtnt_mf8_f32_x2_fpm (z0, z4, fpm0),
+	     z0 = svcvtnt_mf8_fpm (z0, z4, fpm0))
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4d3e3ac04d4..a3edccf1fda 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12140,7 +12140,7 @@  proc check_effective_target_aarch64_tiny { } {
 foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
 			  "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64"
 			  "sme" "sme-i16i64" "sme2" "sve-b16b16"
-			  "sme-b16b16" "sme-f16f16" "sme2p1" } {
+			  "sme-b16b16" "sme-f16f16" "sme2p1" "fp8" } {
     eval [string map [list FUNC $aarch64_ext] {
 	proc check_effective_target_aarch64_asm_FUNC_ok { } {
 	  if { [istarget aarch64*-*-*] } {