[v5,4/5] aarch64: add SVE2 FP8 multiply accumulate intrinsics

Message ID 20241128211234.1714776-5-claudio.bantaloukas@arm.com
State New
Headers
Series aarch64: Add fp8 sve foundation |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm fail Patch failed to apply

Commit Message

Claudio Bantaloukas Nov. 28, 2024, 9:12 p.m. UTC
  This patch adds support for the following intrinsics:
- svmlalb[_f16_mf8]_fpm
- svmlalb[_n_f16_mf8]_fpm
- svmlalt[_f16_mf8]_fpm
- svmlalt[_n_f16_mf8]_fpm
- svmlalb_lane[_f16_mf8]_fpm
- svmlalt_lane[_f16_mf8]_fpm
- svmlallbb[_f32_mf8]_fpm
- svmlallbb[_n_f32_mf8]_fpm
- svmlallbt[_f32_mf8]_fpm
- svmlallbt[_n_f32_mf8]_fpm
- svmlalltb[_f32_mf8]_fpm
- svmlalltb[_n_f32_mf8]_fpm
- svmlalltt[_f32_mf8]_fpm
- svmlalltt[_n_f32_mf8]_fpm
- svmlallbb_lane[_f32_mf8]_fpm
- svmlallbt_lane[_f32_mf8]_fpm
- svmlalltb_lane[_f32_mf8]_fpm
- svmlalltt_lane[_f32_mf8]_fpm

These are available under a combination of the FP8FMA and SVE2 features.
Alternatively under the SSVE_FP8FMA feature under streaming mode.

gcc/
	* config/aarch64/aarch64-option-extensions.def
	(fp8fma, ssve-fp8fma): Add new options.
	* config/aarch64/aarch64-sve-builtins-functions.h
	(unspec_based_function_base): Add unspec_for_mfp8.
	(unspec_for): Return unspec_for_mfp8 on fpm-using cases.
	(sme_1mode_function): Fix call to parent ctor.
	(sme_2mode_function_t): Likewise.
	(unspec_based_mla_function, unspec_based_mla_lane_function): Handle
	fpm-using cases.
	* config/aarch64/aarch64-sve-builtins-shapes.cc
	(parse_element_type): Treat M as TYPE_SUFFIX_mf8
	(ternary_mfloat8_lane_def): Add new class.
	(ternary_mfloat8_opt_n_def): Likewise.
	(ternary_mfloat8_lane): Add new shape.
	(ternary_mfloat8_opt_n): Likewise.
	* config/aarch64/aarch64-sve-builtins-shapes.h
	(ternary_mfloat8_lane, ternary_mfloat8_opt_n): Declare.
	* config/aarch64/aarch64-sve-builtins-sve2.cc
	(svmlalb_lane, svmlalb, svmlalt_lane, svmlalt): Update definitions
	with mfloat8_t unspec in ctor.
	(svmlallbb_lane, svmlallbb, svmlallbt_lane, svmlallbt, svmlalltb_lane,
	svmlalltb, svmlalltt_lane, svmlalltt, svmlal_impl): Add new FUNCTIONs.
	(svqrshr, svqrshrn, svqrshru, svqrshrun): Update definitions with
	nop mfloat8 unspec in ctor.
	* config/aarch64/aarch64-sve-builtins-sve2.def
	(svmlalb, svmlalt, svmlalb_lane, svmlalt_lane, svmlallbb, svmlallbt,
	svmlalltb, svmlalltt, svmlalltt_lane, svmlallbb_lane, svmlallbt_lane,
	svmlalltb_lane): Add new DEF_SVE_FUNCTION_GS_FPMs.
	* config/aarch64/aarch64-sve-builtins-sve2.h
	(svmlallbb_lane, svmlallbb, svmlallbt_lane, svmlallbt, svmlalltb_lane,
	svmlalltb, svmlalltt_lane, svmlalltt): Declare.
	* config/aarch64/aarch64-sve-builtins.cc
	(TYPES_h_float_mf8, TYPES_s_float_mf8): Add new types.
	(h_float_mf8, s_float_mf8): Add new SVE_TYPES_ARRAY.
	* config/aarch64/aarch64-sve2.md
	(@aarch64_sve_add_<sve2_fp8_fma_op_vnx8hf><mode>): Add new.
	(@aarch64_sve_add_<sve2_fp8_fma_op_vnx4sf><mode>): Add new.
	(@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx8hf><mode>): Likewise.
	(@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx4sf><mode>): Likewise.
	* config/aarch64/aarch64.h
	(TARGET_FP8FMA, TARGET_SSVE_FP8FMA): Likewise.
	* config/aarch64/iterators.md
	(VNx8HF_ONLY): Add new.
	(UNSPEC_FMLALB_FP8, UNSPEC_FMLALLBB_FP8, UNSPEC_FMLALLBT_FP8,
	UNSPEC_FMLALLTB_FP8, UNSPEC_FMLALLTT_FP8, UNSPEC_FMLALT_FP8): Likewise.
	(SVE2_FP8_TERNARY_VNX8HF, SVE2_FP8_TERNARY_VNX4SF): Likewise.
	(SVE2_FP8_TERNARY_LANE_VNX8HF, SVE2_FP8_TERNARY_LANE_VNX4SF): Likewise.
	(sve2_fp8_fma_op_vnx8hf, sve2_fp8_fma_op_vnx4sf): Likewise.
	* doc/invoke.texi: Document fp8fma and sve-fp8fma extensions.

gcc/testsuite/

	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
	(TEST_DUAL_Z_REV, TEST_DUAL_LANE_REG, TEST_DUAL_ZD) Add fpm0 argument.
	* gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c: Add
	new shape test.
	* gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c:
	Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c: Add new test.
	* gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c: Likewise.
	* gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c: Likewise.
	* lib/target-supports.exp: Add check_effective_target for fp8fma and
	ssve-fp8fma
---
 .../aarch64/aarch64-option-extensions.def     |  4 +
 .../aarch64/aarch64-sve-builtins-functions.h  | 16 +++-
 .../aarch64/aarch64-sve-builtins-shapes.cc    | 78 ++++++++++++++++
 .../aarch64/aarch64-sve-builtins-shapes.h     |  2 +
 .../aarch64/aarch64-sve-builtins-sve2.cc      | 46 +++++++---
 .../aarch64/aarch64-sve-builtins-sve2.def     | 17 ++++
 .../aarch64/aarch64-sve-builtins-sve2.h       |  8 ++
 gcc/config/aarch64/aarch64-sve-builtins.cc    | 10 ++
 gcc/config/aarch64/aarch64-sve2.md            | 81 +++++++++++++++++
 gcc/config/aarch64/aarch64.h                  |  9 ++
 gcc/config/aarch64/iterators.md               | 37 ++++++++
 gcc/doc/invoke.texi                           |  5 +
 .../aarch64/sve/acle/asm/test_sve_acle.h      |  6 +-
 .../acle/general-c/ternary_mfloat8_lane_1.c   | 84 +++++++++++++++++
 .../acle/general-c/ternary_mfloat8_opt_n_1.c  | 60 ++++++++++++
 .../aarch64/sve2/acle/asm/mlalb_lane_mf8.c    | 91 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/mlalb_mf8.c         | 78 ++++++++++++++++
 .../aarch64/sve2/acle/asm/mlallbb_lane_mf8.c  | 91 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/mlallbb_mf8.c       | 78 ++++++++++++++++
 .../aarch64/sve2/acle/asm/mlallbt_lane_mf8.c  | 91 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/mlallbt_mf8.c       | 78 ++++++++++++++++
 .../aarch64/sve2/acle/asm/mlalltb_lane_mf8.c  | 91 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/mlalltb_mf8.c       | 78 ++++++++++++++++
 .../aarch64/sve2/acle/asm/mlalltt_lane_mf8.c  | 91 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/mlalltt_mf8.c       | 78 ++++++++++++++++
 .../aarch64/sve2/acle/asm/mlalt_lane_mf8.c    | 91 +++++++++++++++++++
 .../aarch64/sve2/acle/asm/mlalt_mf8.c         | 78 ++++++++++++++++
 gcc/testsuite/lib/target-supports.exp         |  3 +-
 28 files changed, 1458 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c
  

Patch

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index f4cf6618238..f39c9e6f897 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -245,6 +245,10 @@  AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
 
 AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8")
 
+AARCH64_OPT_EXTENSION("fp8fma", FP8FMA, (FP8), (), (), "fp8fma")
+
+AARCH64_OPT_EXTENSION("ssve-fp8fma", SSVE_FP8FMA, (SME2,FP8), (), (), "ssve-fp8fma")
+ 
 AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
 
 #undef AARCH64_OPT_FMV_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 409062ca3dd..3dad0c02972 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -270,10 +270,12 @@  public:
   CONSTEXPR unspec_based_function_base (int unspec_for_sint,
 					int unspec_for_uint,
 					int unspec_for_fp,
+					int unspec_for_mfp8 = -1,
 					unsigned int suffix_index = 0)
     : m_unspec_for_sint (unspec_for_sint),
       m_unspec_for_uint (unspec_for_uint),
       m_unspec_for_fp (unspec_for_fp),
+      m_unspec_for_mfp8 (unspec_for_mfp8),
       m_suffix_index (suffix_index)
   {}
 
@@ -281,6 +283,9 @@  public:
   int
   unspec_for (const function_instance &instance) const
   {
+    if (instance.fpm_mode == FPM_set)
+      return m_unspec_for_mfp8;
+
     auto &suffix = instance.type_suffix (m_suffix_index);
     return (!suffix.integer_p ? m_unspec_for_fp
 	    : suffix.unsigned_p ? m_unspec_for_uint
@@ -292,6 +297,7 @@  public:
   int m_unspec_for_sint;
   int m_unspec_for_uint;
   int m_unspec_for_fp;
+  int m_unspec_for_mfp8;
 
   /* Which type suffix is used to choose between the unspecs.  */
   unsigned int m_suffix_index;
@@ -427,7 +433,7 @@  public:
 
   CONSTEXPR sme_1mode_function (int unspec_for_sint, int unspec_for_uint,
 				int unspec_for_fp)
-    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1)
+    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1)
   {}
 
   rtx
@@ -457,7 +463,7 @@  public:
 
   CONSTEXPR sme_2mode_function_t (int unspec_for_sint, int unspec_for_uint,
 				  int unspec_for_fp)
-    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1)
+    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1)
   {}
 
   rtx
@@ -496,7 +502,8 @@  public:
   {
     int unspec = unspec_for (e);
     insn_code icode;
-    if (e.type_suffix (m_suffix_index).float_p)
+    if (e.type_suffix (m_suffix_index).float_p
+        && e.fpm_mode != FPM_set)
       {
 	/* Put the operands in the normal (fma ...) order, with the accumulator
 	   last.  This fits naturally since that's also the unprinted operand
@@ -526,7 +533,8 @@  public:
   {
     int unspec = unspec_for (e);
     insn_code icode;
-    if (e.type_suffix (m_suffix_index).float_p)
+    if (e.type_suffix (m_suffix_index).float_p
+        && e.fpm_mode != FPM_set)
       {
 	/* Put the operands in the normal (fma ...) order, with the accumulator
 	   last.  This fits naturally since that's also the unprinted operand
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 62831b3c1e2..94f4da8ce31 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -96,6 +96,7 @@  apply_predication (const function_instance &instance, tree return_type,
    B       - bfloat16_t
    c       - a predicate-as-counter
    h<elt>  - a half-sized version of <elt>
+   M       - mfloat8_t
    p       - a predicate (represented as TYPE_SUFFIX_b)
    q<elt>  - a quarter-sized version of <elt>
    s<bits> - a signed type with the given number of bits
@@ -140,6 +141,9 @@  parse_element_type (const function_instance &instance, const char *&format)
   if (ch == 'B')
     return TYPE_SUFFIX_bf16;
 
+  if (ch == 'M')
+    return TYPE_SUFFIX_mf8;
+
   if (ch == 'q')
     {
       type_suffix_index suffix = parse_element_type (instance, format);
@@ -4015,6 +4019,44 @@  SHAPE (ternary_bfloat_lane)
 typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def;
 SHAPE (ternary_bfloat_lanex2)
 
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 15].  */
+struct ternary_mfloat8_lane_def
+    : public ternary_resize2_lane_base<8, TYPE_mfloat, TYPE_mfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    gcc_assert (group.fpm_mode == FPM_set);
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vM,vM,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_lane_index (3, 2, 1);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    type_suffix_index type;
+    if (!r.check_num_arguments (5)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_integer_immediate (3)
+	|| !r.require_scalar_type (4, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, GROUP_none);
+  }
+};
+SHAPE (ternary_mfloat8_lane)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
    sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t).  */
 struct ternary_bfloat_opt_n_def
@@ -4030,6 +4072,42 @@  struct ternary_bfloat_opt_n_def
 };
 SHAPE (ternary_bfloat_opt_n)
 
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloatt8_t, svmfloat8_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, svmfloat8_t, bfloat8_t).  */
+struct ternary_mfloat8_opt_n_def
+    : public ternary_resize2_opt_n_base<8, TYPE_mfloat, TYPE_mfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    gcc_assert (group.fpm_mode == FPM_set);
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vM,vM", group, MODE_none);
+    build_all (b, "v0,v0,vM,sM", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    type_suffix_index type;
+    if (!r.check_num_arguments (4)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_vector_or_scalar_type (2)
+	|| !r.require_scalar_type (3, "uint64_t"))
+      return error_mark_node;
+
+    auto mode = r.mode_suffix_id;
+    if (r.scalar_argument_p (2))
+      mode = MODE_n;
+    else if (!r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t))
+      return error_mark_node;
+
+    return r.resolve_to (mode, type, TYPE_SUFFIX_mf8, GROUP_none);
+  }
+};
+SHAPE (ternary_mfloat8_opt_n)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
 		       uint64_t)
 
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index dc3d4557288..1c8937ae027 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -213,6 +213,8 @@  namespace aarch64_sve
     extern const function_shape *const ternary_lane_rotate;
     extern const function_shape *const ternary_long_lane;
     extern const function_shape *const ternary_long_opt_n;
+    extern const function_shape *const ternary_mfloat8_lane;
+    extern const function_shape *const ternary_mfloat8_opt_n;
     extern const function_shape *const ternary_opt_n;
     extern const function_shape *const ternary_qq_or_011_lane;
     extern const function_shape *const ternary_qq_lane_rotate;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 1a1d2c4c6ec..ad52030f226 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -990,16 +990,34 @@  FUNCTION (svminnmqv, reduction, (-1, -1, UNSPEC_FMINNMQV))
 FUNCTION (svminp, unspec_based_pred_function, (UNSPEC_SMINP, UNSPEC_UMINP,
 					       UNSPEC_FMINP))
 FUNCTION (svminqv, reduction, (UNSPEC_SMINQV, UNSPEC_UMINQV, UNSPEC_FMINQV))
-FUNCTION (svmlalb, unspec_based_mla_function, (UNSPEC_SMULLB,
-					       UNSPEC_UMULLB, UNSPEC_FMLALB))
-FUNCTION (svmlalb_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLB,
-							 UNSPEC_UMULLB,
-							 UNSPEC_FMLALB))
-FUNCTION (svmlalt, unspec_based_mla_function, (UNSPEC_SMULLT,
-					       UNSPEC_UMULLT, UNSPEC_FMLALT))
-FUNCTION (svmlalt_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLT,
-							 UNSPEC_UMULLT,
-							 UNSPEC_FMLALT))
+FUNCTION (svmlalb_lane, unspec_based_mla_lane_function,
+	  (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLALB,
+	   UNSPEC_FMLALB_FP8))
+FUNCTION (svmlalb, unspec_based_mla_function,
+	  (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLALB,
+	   UNSPEC_FMLALB_FP8))
+FUNCTION (svmlallbb_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBB_FP8))
+FUNCTION (svmlallbb, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBB_FP8))
+FUNCTION (svmlallbt_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBT_FP8))
+FUNCTION (svmlallbt, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBT_FP8))
+FUNCTION (svmlalltb_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTB_FP8))
+FUNCTION (svmlalltb, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTB_FP8))
+FUNCTION (svmlalltt_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTT_FP8))
+FUNCTION (svmlalltt, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTT_FP8))
+FUNCTION (svmlalt_lane, unspec_based_mla_lane_function,
+	  (UNSPEC_SMULLT, UNSPEC_UMULLT, UNSPEC_FMLALT,
+	   UNSPEC_FMLALT_FP8))
+FUNCTION (svmlalt, unspec_based_mla_function,
+	  (UNSPEC_SMULLT, UNSPEC_UMULLT, UNSPEC_FMLALT,
+	   UNSPEC_FMLALT_FP8))
 FUNCTION (svmlslb, unspec_based_mls_function, (UNSPEC_SMULLB,
 					       UNSPEC_UMULLB, UNSPEC_FMLSLB))
 FUNCTION (svmlslb_lane, unspec_based_mls_lane_function, (UNSPEC_SMULLB,
@@ -1072,15 +1090,15 @@  FUNCTION (svqrdmulh_lane, unspec_based_lane_function, (UNSPEC_SQRDMULH,
 						       -1, -1))
 FUNCTION (svqrshl, svqrshl_impl,)
 FUNCTION (svqrshr, unspec_based_uncond_function, (UNSPEC_SQRSHR,
-						  UNSPEC_UQRSHR, -1, 1))
+						  UNSPEC_UQRSHR, -1, -1, 1))
 FUNCTION (svqrshrn, unspec_based_uncond_function, (UNSPEC_SQRSHRN,
-						   UNSPEC_UQRSHRN, -1, 1))
+						   UNSPEC_UQRSHRN, -1, -1, 1))
 FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB,
 					     UNSPEC_UQRSHRNB, -1))
 FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT,
 					     UNSPEC_UQRSHRNT, -1))
-FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, 1))
-FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, 1))
+FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, -1, 1))
+FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, -1, 1))
 FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1))
 FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1))
 FUNCTION (svqshl, svqshl_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 8a63998fcc6..b489e8fad2f 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -379,3 +379,20 @@  DEF_SVE_FUNCTION_GS_FPM (svcvtn, unary_convertxn_narrow, cvtn_mf8, x2, none, set
 DEF_SVE_FUNCTION_GS_FPM (svcvtnb, unary_convertxn_narrow, cvtnx_mf8, x2, none, set)
 DEF_SVE_FUNCTION_GS_FPM (svcvtnt, unary_convertxn_narrowt, cvtnx_mf8, x2, none, set)
 #undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8FMA, \
+			AARCH64_FL_SSVE_FP8FMA)
+DEF_SVE_FUNCTION_GS_FPM (svmlalb, ternary_mfloat8_opt_n, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalt, ternary_mfloat8_opt_n, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalb_lane, ternary_mfloat8_lane, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalt_lane, ternary_mfloat8_lane, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbb, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbt, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltb, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltt, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index d26751e8042..ff3e0cc0e9f 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -108,6 +108,14 @@  namespace aarch64_sve
     extern const function_base *const svminqv;
     extern const function_base *const svmlalb;
     extern const function_base *const svmlalb_lane;
+    extern const function_base *const svmlallbb_lane;
+    extern const function_base *const svmlallbb;
+    extern const function_base *const svmlallbt_lane;
+    extern const function_base *const svmlallbt;
+    extern const function_base *const svmlalltb_lane;
+    extern const function_base *const svmlalltb;
+    extern const function_base *const svmlalltt_lane;
+    extern const function_base *const svmlalltt;
     extern const function_base *const svmlalt;
     extern const function_base *const svmlalt_lane;
     extern const function_base *const svmlslb;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 4201ece9d59..00284162cc0 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -347,10 +347,18 @@  CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_s_data (S, D), \
   TYPES_d_data (S, D)
 
+/* _f16_mf8.  */
+#define TYPES_h_float_mf8(S, D) \
+  D (f16, mf8)
+
 /* _f32.  */
 #define TYPES_s_float(S, D) \
   S (f32)
 
+/* _f32_mf8.  */
+#define TYPES_s_float_mf8(S, D) \
+  D (f32, mf8)
+
 /*      _f32
    _s16 _s32 _s64
    _u16 _u32 _u64.  */
@@ -777,6 +785,7 @@  DEF_SVE_TYPES_ARRAY (bhs_widen);
 DEF_SVE_TYPES_ARRAY (c);
 DEF_SVE_TYPES_ARRAY (h_bfloat);
 DEF_SVE_TYPES_ARRAY (h_float);
+DEF_SVE_TYPES_ARRAY (h_float_mf8);
 DEF_SVE_TYPES_ARRAY (h_integer);
 DEF_SVE_TYPES_ARRAY (hs_signed);
 DEF_SVE_TYPES_ARRAY (hs_integer);
@@ -788,6 +797,7 @@  DEF_SVE_TYPES_ARRAY (hsd_integer);
 DEF_SVE_TYPES_ARRAY (hsd_data);
 DEF_SVE_TYPES_ARRAY (s_float);
 DEF_SVE_TYPES_ARRAY (s_float_hsd_integer);
+DEF_SVE_TYPES_ARRAY (s_float_mf8);
 DEF_SVE_TYPES_ARRAY (s_float_sd_integer);
 DEF_SVE_TYPES_ARRAY (s_signed);
 DEF_SVE_TYPES_ARRAY (s_unsigned);
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index e5bd2861b48..5498eac0b03 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -67,6 +67,7 @@ 
 ;; ---- [INT] Shift-and-accumulate operations
 ;; ---- [INT] Shift-and-insert operations
 ;; ---- [INT] Sum of absolute differences
+;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
 ;;
 ;; == Extending arithmetic
 ;; ---- [INT] Multi-register widening conversions
@@ -1993,6 +1994,86 @@  (define_insn "*aarch64_sve2_<su>aba<mode>"
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLALB (vectors, FP8 to FP16)
+;; - FMLALT (vectors, FP8 to FP16)
+;; - FMLALB (indexed, FP8 to FP16)
+;; - FMLALT (indexed, FP8 to FP16)
+;; - FMLALLBB (vectors)
+;; - FMLALLBB (indexed)
+;; - FMLALLBT (vectors)
+;; - FMLALLBT (indexed)
+;; - FMLALLTB (vectors)
+;; - FMLALLTB (indexed)
+;; - FMLALLTT (vectors)
+;; - FMLALLTT (indexed)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx8hf><mode>"
+  [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
+	(unspec:VNx8HF_ONLY
+	  [(match_operand:VNx8HF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (reg:DI FPM_REGNUM)]
+	  SVE2_FP8_TERNARY_VNX8HF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , w ; *              ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b
+  }
+)
+
+(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx4sf><mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:VNx4SF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (reg:DI FPM_REGNUM)]
+	  SVE2_FP8_TERNARY_VNX4SF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , w ; *              ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b
+  }
+)
+
+(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx8hf><mode>"
+  [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
+	(unspec:VNx8HF_ONLY
+	  [(match_operand:VNx8HF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (match_operand:SI 4 "const_int_operand")
+	   (reg:DI FPM_REGNUM)]
+	  SVE2_FP8_TERNARY_LANE_VNX8HF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , y ; *              ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4]
+     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4]
+  }
+)
+
+(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx4sf><mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:VNx4SF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (match_operand:SI 4 "const_int_operand")
+	   (reg:DI FPM_REGNUM)]
+	  SVE2_FP8_TERNARY_LANE_VNX4SF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , y ; *              ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4]
+     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4]
+  }
+)
+
 ;; =========================================================================
 ;; == Extending arithmetic
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index f43b1659db6..80a1fa40709 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -518,6 +518,15 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 			 && (TARGET_SVE2 || TARGET_STREAMING) \
 			 && (TARGET_SME2 || TARGET_NON_STREAMING))
 
+/* fp8 multiply-accumulate instructions are enabled through +fp8fma.  */
+#define TARGET_FP8FMA AARCH64_HAVE_ISA (FP8FMA)
+
+/* SVE2 versions of fp8 multiply-accumulate instructions are enabled for
+   non-streaming mode by +fp8fma and for streaming mode by +ssve-fp8fma.  */
+#define TARGET_SSVE_FP8FMA \
+  (((TARGET_SVE2 && TARGET_FP8FMA) || TARGET_STREAMING) \
+   && (AARCH64_HAVE_ISA (SSVE_FP8FMA) || TARGET_NON_STREAMING))
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 26716d593de..4b265a73d9a 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -430,6 +430,7 @@  (define_mode_iterator VMULD [V4HI V8HI V2SI V4SI
 (define_mode_iterator VNx16QI_ONLY [VNx16QI])
 (define_mode_iterator VNx16SI_ONLY [VNx16SI])
 (define_mode_iterator VNx8HI_ONLY [VNx8HI])
+(define_mode_iterator VNx8HF_ONLY [VNx8HF])
 (define_mode_iterator VNx8BF_ONLY [VNx8BF])
 (define_mode_iterator VNx8SI_ONLY [VNx8SI])
 (define_mode_iterator VNx8SF_ONLY [VNx8SF])
@@ -975,7 +976,13 @@  (define_c_enum "unspec"
     UNSPEC_FMINNMP	; Used in aarch64-sve2.md.
     UNSPEC_FMINP	; Used in aarch64-sve2.md.
     UNSPEC_FMLALB	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALB_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLBB_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLBT_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLTB_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLTT_FP8	; Used in aarch64-sve2.md.
     UNSPEC_FMLALT	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALT_FP8	; Used in aarch64-sve2.md.
     UNSPEC_FMLSLB	; Used in aarch64-sve2.md.
     UNSPEC_FMLSLT	; Used in aarch64-sve2.md.
     UNSPEC_FP8FCVTN	; Used in aarch64-sve2.md.
@@ -4755,3 +4762,33 @@  (define_int_attr fp8_cvt_uns_op
    (UNSPEC_F2CVT "f2cvt")
    (UNSPEC_F1CVTLT "f1cvtlt")
    (UNSPEC_F2CVTLT "f2cvtlt")])
+
+(define_int_iterator SVE2_FP8_TERNARY_VNX8HF
+  [UNSPEC_FMLALB_FP8
+   UNSPEC_FMLALT_FP8])
+
+(define_int_iterator SVE2_FP8_TERNARY_VNX4SF
+  [UNSPEC_FMLALLBB_FP8
+   UNSPEC_FMLALLBT_FP8
+   UNSPEC_FMLALLTB_FP8
+   UNSPEC_FMLALLTT_FP8])
+
+(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX8HF
+  [UNSPEC_FMLALB_FP8
+   UNSPEC_FMLALT_FP8])
+
+(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX4SF
+  [UNSPEC_FMLALLBB_FP8
+   UNSPEC_FMLALLBT_FP8
+   UNSPEC_FMLALLTB_FP8
+   UNSPEC_FMLALLTT_FP8])
+
+(define_int_attr sve2_fp8_fma_op_vnx8hf
+  [(UNSPEC_FMLALB_FP8 "fmlalb")
+   (UNSPEC_FMLALT_FP8 "fmlalt")])
+
+(define_int_attr sve2_fp8_fma_op_vnx4sf
+  [(UNSPEC_FMLALLBB_FP8 "fmlallbb")
+   (UNSPEC_FMLALLBT_FP8 "fmlallbt")
+   (UNSPEC_FMLALLTB_FP8 "fmlalltb")
+   (UNSPEC_FMLALLTT_FP8 "fmlalltt")])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1b7b712085f..2a4f016e2df 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21952,6 +21952,11 @@  Enable support for Armv8.9-a/9.4-a translation hardening extension.
 Enable the RCpc3 (Release Consistency) extension.
 @item fp8
 Enable the fp8 (8-bit floating point) extension.
+@item fp8fma
+Enable the fp8 (8-bit floating point) multiply accumulate extension.
+@item ssve-fp8fma
+Enable the fp8 (8-bit floating point) multiply accumulate extension in streaming
+mode.
 @item faminmax
 Enable the Floating Point Absolute Maximum/Minimum extension.
 @item sve-b16b16
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
index 4a146c3e157..d3ae707ac49 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
@@ -84,7 +84,7 @@ 
 #define TEST_DUAL_Z_REV(NAME, TYPE1, TYPE2, CODE1, CODE2)	\
   PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE2 z2, TYPE2 z3,	\
 		       TYPE1 z4, TYPE1 z5, TYPE1 z6, TYPE1 z7,	\
-		       svbool_t p0, svbool_t p1))		\
+		       svbool_t p0, svbool_t p1, fpm_t fpm0))	\
   {								\
     TYPE1 z0_res;						\
     INVOKE (CODE1, CODE2);					\
@@ -136,7 +136,7 @@ 
   }
 
 #define TEST_DUAL_LANE_REG(NAME, ZTYPE1, ZTYPE2, REG, CODE1, CODE2) \
-  PROTO (NAME, void, (void))					\
+  PROTO (NAME, void, (fpm_t fpm0))				\
   {								\
     register ZTYPE1 z0 __asm ("z0");				\
     register ZTYPE2 z1 __asm ("z1");				\
@@ -194,7 +194,7 @@ 
   PROTO (NAME, ZTYPE1, (ZTYPE1 z0, ZTYPE1 z1, ZTYPE1 z2,	\
 			ZTYPE1 z3, ZTYPE2 z4, ZTYPE2 z5,	\
 			ZTYPE2 z6, STYPE d7, svbool_t p0,	\
-			svbool_t p1))				\
+			svbool_t p1, fpm_t fpm0))		\
   {								\
     INVOKE (CODE1, CODE2);					\
     return z0;							\
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c
new file mode 100644
index 00000000000..6bdd3c06dc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_1.c
@@ -0,0 +1,84 @@ 
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+ssve-fp8fma")
+
+void
+f1 (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm, 
+    svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f, int i)
+    __arm_streaming 
+{
+  svmlalb_lane_fpm (f16, f8, f8, 0, fpm);
+  svmlalb_lane_fpm (f16, f8, f8, 7, fpm);
+  svmlalb_lane_fpm (f16, f8, f8, 8, fpm);
+  svmlalb_lane_fpm (f16, f8, f8, 15, fpm);
+
+  svmlalb_lane_fpm (f16); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f8); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f8, f8, 0); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f8, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlalb_lane_fpm'} } */
+
+  svmlalb_lane_fpm (f16, f8, f8, 15, 0, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f8, f8, 15, fpm, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f8, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */
+  svmlalb_lane_fpm (f16, f16, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlalb_lane_fpm'} } */
+
+  svmlalb_lane_fpm (f32, bf16, bf16, 0, fpm); /* { dg-error {passing 'svbfloat16_t' to argument 2 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlalb_lane_fpm (0, f8, f8, 0, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_lane_fpm', which expects an SVE type rather than a scalar} } */
+  svmlalb_lane_fpm (pg, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_lane_fpm (u8, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_lane_fpm (u16, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_lane_fpm (f32, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svfloat32_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_lane_fpm (f64, f8, f8, 0, fpm); /* { dg-error {'svmlalb_lane_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_lane_fpm (f16, 0, f8, 0, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlalb_lane_fpm (f16, f32, f8, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlalb_lane_fpm (f16, f8, 0, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlalb_lane_fpm (f16, f8, f32, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svmlalb_lane_fpm', which expects 'svmfloat8_t'} } */
+
+  svmlalb_lane_fpm (f16, f8, f8, s32, fpm); /* { dg-error {argument 4 of 'svmlalb_lane_fpm' must be an integer constant expression} } */
+  svmlalb_lane_fpm (f16, f8, f8, i, fpm); /* { dg-error {argument 4 of 'svmlalb_lane_fpm' must be an integer constant expression} } */
+  svmlalb_lane_fpm (f16, f8, f8, 16, fpm); /* { dg-error {passing 16 to argument 4 of 'svmlalb_lane_fpm', which expects a value in the range \[0, 15\]} } */
+  svmlalb_lane_fpm (f16, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svmlalb_lane_fpm', which expects a value in the range \[0, 15\]} } */
+  svmlalb_lane_fpm (f16, f8, f8, 15, f8); /* { dg-error {passing 'svmfloat8_t' to argument 5 of 'svmlalb_lane_fpm', which expects 'uint64_t'} } */
+
+
+  svmlallbb_lane_fpm (f32, f8, f8, 0, fpm);
+  svmlallbb_lane_fpm (f32, f8, f8, 7, fpm);
+  svmlallbb_lane_fpm (f32, f8, f8, 8, fpm);
+  svmlallbb_lane_fpm (f32, f8, f8, 15, fpm);
+
+  svmlallbb_lane_fpm (f32); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f8); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f8, f8); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f8, f8, 0); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f8, f8, 15, fpm); /* { dg-error {too few arguments to function 'svmlallbb_lane_fpm'} } */
+
+  svmlallbb_lane_fpm (f32, f8, f8, 15, 0, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f8, f8, 15, fpm, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f8, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */
+  svmlallbb_lane_fpm (f32, f16, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svmlallbb_lane_fpm'} } */
+
+  svmlallbb_lane_fpm (f32, bf16, bf16, 0, fpm); /* { dg-error {passing 'svbfloat16_t' to argument 2 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlallbb_lane_fpm (0, f8, f8, 0, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlallbb_lane_fpm', which expects an SVE type rather than a scalar} } */
+  svmlallbb_lane_fpm (pg, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_lane_fpm (u8, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_lane_fpm (u16, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_lane_fpm (f16, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svfloat16_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_lane_fpm (f64, f8, f8, 0, fpm); /* { dg-error {'svmlallbb_lane_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_lane_fpm (f32, 0, f8, 0, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlallbb_lane_fpm (f32, f32, f8, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlallbb_lane_fpm (f32, f8, 0, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */
+  svmlallbb_lane_fpm (f32, f8, f32, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svmlallbb_lane_fpm', which expects 'svmfloat8_t'} } */
+
+  svmlallbb_lane_fpm (f32, f8, f8, s32, fpm); /* { dg-error {argument 4 of 'svmlallbb_lane_fpm' must be an integer constant expression} } */
+  svmlallbb_lane_fpm (f32, f8, f8, i, fpm); /* { dg-error {argument 4 of 'svmlallbb_lane_fpm' must be an integer constant expression} } */
+  svmlallbb_lane_fpm (f32, f8, f8, 16, fpm); /* { dg-error {passing 16 to argument 4 of 'svmlallbb_lane_fpm', which expects a value in the range \[0, 15\]} } */
+  svmlallbb_lane_fpm (f32, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svmlallbb_lane_fpm', which expects a value in the range \[0, 15\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c
new file mode 100644
index 00000000000..1b6ff882e68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_opt_n_1.c
@@ -0,0 +1,60 @@ 
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve2+fp8fma")
+
+void
+test (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm, 
+    svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f)
+{
+  svmlalb_fpm (f16, f8, f8, fpm);
+  svmlalt_fpm (f16, f8, f8, fpm);
+  svmlalb_fpm (f16, f8, f, fpm);
+
+  svmlalb_fpm (f16); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */
+  svmlalb_fpm (f16, f8); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */
+  svmlalb_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */
+  svmlalb_fpm (f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */
+  svmlalb_fpm (f16, f8, fpm); /* { dg-error {too few arguments to function 'svmlalb_fpm'} } */
+  svmlalb_fpm (f16, f8, f8, fpm, 0); /* { dg-error {too many arguments to function 'svmlalb_fpm'} } */
+
+  svmlalt_fpm (f32, f8, f8, fpm); /* { dg-error {'svmlalt_fpm' has no form that takes 'svfloat32_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_fpm (0, f8, f8, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_fpm', which expects an SVE type rather than a scalar} } */
+  svmlalb_fpm (pg, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_fpm (u8, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_fpm (u16, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_fpm (f64, f8, f8, fpm); /* { dg-error {'svmlalb_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */
+  svmlalb_fpm (f16, 0, f8, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_fpm', which expects 'svmfloat8_t'} } */
+  svmlalb_fpm (f16, f16, f8, fpm); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmlalb_fpm', which expects 'svmfloat8_t'} } */
+  svmlalb_fpm (f16, f8, 0, fpm); /* { dg-error {invalid conversion to type 'mfloat8_t'} } */
+  svmlalb_fpm (f16, f8, f16, fpm); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmlalb_fpm', which expects 'svmfloat8_t'} } */
+  svmlalb_fpm (f16, f8, f8, f8); /* { dg-error {passing 'svmfloat8_t' to argument 4 of 'svmlalb_fpm', which expects 'uint64_t'} } */
+
+
+  svmlallbb_fpm (f32, f8, f8, fpm);
+  svmlallbt_fpm (f32, f8, f8, fpm);
+  svmlalltb_fpm (f32, f8, f8, fpm);
+  svmlalltt_fpm (f32, f8, f8, fpm);
+  svmlallbb_fpm (f32, f8, f, fpm);
+
+  svmlallbb_fpm (f16, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svfloat16_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_fpm (f32); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */
+  svmlallbb_fpm (f32, f8); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */
+  svmlallbb_fpm (f32, f8, f8); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */
+  svmlallbb_fpm (f8, f8, fpm); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */
+  svmlallbb_fpm (f32, f8, fpm); /* { dg-error {too few arguments to function 'svmlallbb_fpm'} } */
+  svmlallbb_fpm (f32, f8, f8, fpm, 0); /* { dg-error {too many arguments to function 'svmlallbb_fpm'} } */
+  svmlallbb_fpm (0, f8, f8, fpm); /* { dg-error {passing 'int' to argument 1 of 'svmlallbb_fpm', which expects an SVE type rather than a scalar} } */
+  svmlallbb_fpm (pg, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_fpm (u8, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_fpm (u16, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_fpm (f64, f8, f8, fpm); /* { dg-error {'svmlallbb_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */
+  svmlallbb_fpm (f32, 0, f8, fpm); /* { dg-error {passing 'int' to argument 2 of 'svmlallbb_fpm', which expects 'svmfloat8_t'} } */
+  svmlallbb_fpm (f32, f16, f8, fpm); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmlallbb_fpm', which expects 'svmfloat8_t'} } */
+  svmlallbb_fpm (f32, f8, 0, fpm); /* { dg-error {invalid conversion to type 'mfloat8_t'} } */
+  svmlallbb_fpm (f32, f8, f16, fpm); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmlallbb_fpm', which expects 'svmfloat8_t'} } */
+  svmlallbb_fpm (f32, f8, f8, f8); /* { dg-error {passing 'svmfloat8_t' to argument 4 of 'svmlallbb_fpm', which expects 'uint64_t'} } */
+  
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c
new file mode 100644
index 00000000000..e7af1b6dcc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_lane_mf8.c
@@ -0,0 +1,91 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalb_lane_0_f16_tied1:
+** 	msr	fpmr, x0
+**	fmlalb	z0\.h, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalb_lane_0_f16_tied1, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalb_lane_f16_mf8_fpm (z0, z4, z5, 0, fpm0),
+	     z0 = svmlalb_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** mlalb_lane_0_f16_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalb	z0\.h, \1\.b, z1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalb_lane_0_f16_tied2, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalb_lane_f16_mf8_fpm (z4, z0, z1, 0, fpm0),
+		 z0_res = svmlalb_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** mlalb_lane_0_f16_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalb	z0\.h, z1\.b, \1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalb_lane_0_f16_tied3, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalb_lane_f16_mf8_fpm (z4, z1, z0, 0, fpm0),
+		 z0_res = svmlalb_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** mlalb_lane_0_f16_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalb	z0\.h, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalb_lane_0_f16_untied, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalb_lane_f16_mf8_fpm (z1, z4, z5, 0, fpm0),
+	     z0 = svmlalb_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** mlalb_lane_1_f16:
+** 	msr	fpmr, x0
+**	fmlalb	z0\.h, z4\.b, z5\.b\[1\]
+**	ret
+*/
+TEST_DUAL_Z (mlalb_lane_1_f16, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalb_lane_f16_mf8_fpm (z0, z4, z5, 1, fpm0),
+	     z0 = svmlalb_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** mlalb_lane_z8_f16:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z8\.d
+**	fmlalb	z0\.h, z1\.b, \1\.b\[1\]
+**	ldr	d8, \[sp\], 32
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalb_lane_z8_f16, svfloat16_t, svmfloat8_t, z8,
+		    z0 = svmlalb_lane_f16_mf8_fpm (z0, z1, z8, 1, fpm0),
+		    z0 = svmlalb_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** mlalb_lane_z16_f16:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z16\.d
+**	fmlalb	z0\.h, z1\.b, \1\.b\[15\]
+**	...
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalb_lane_z16_f16, svfloat16_t, svmfloat8_t, z16,
+		    z0 = svmlalb_lane_f16_mf8_fpm (z0, z1, z16, 15, fpm0),
+		    z0 = svmlalb_lane_fpm (z0, z1, z16, 15, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c
new file mode 100644
index 00000000000..424640031fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalb_mf8.c
@@ -0,0 +1,78 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalb_f16_mf8_tied1:
+** 	msr	fpmr, x0
+**	fmlalb	z0\.h, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalb_f16_mf8_tied1, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalb_f16_mf8_fpm (z0, z4, z5, fpm0),
+	     z0 = svmlalb_fpm (z0, z4, z5, fpm0))
+
+/*
+** mlalb_f16_mf8_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalb	z0\.h, \1\.b, z1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalb_f16_mf8_tied2, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalb_f16_mf8_fpm (z4, z0, z1, fpm0),
+		 z0_res = svmlalb_fpm (z4, z0, z1, fpm0))
+
+/*
+** mlalb_f16_mf8_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalb	z0\.h, z1\.b, \1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalb_f16_mf8_tied3, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalb_f16_mf8_fpm (z4, z1, z0, fpm0),
+		 z0_res = svmlalb_fpm (z4, z1, z0, fpm0))
+
+/*
+** mlalb_f16_mf8_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalb	z0\.h, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalb_f16_mf8_untied, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalb_f16_mf8_fpm (z1, z4, z5, fpm0),
+	     z0 = svmlalb_fpm (z1, z4, z5, fpm0))
+
+/*
+** mlalb_h7_f16_tied1:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	fmlalb	z0\.h, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat16_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalb_n_f16_mf8_fpm (z0, z4, d7, fpm0),
+	      z0 = svmlalb_fpm (z0, z4, d7, fpm0))
+
+/*
+** mlalb_h7_f16_untied:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	movprfx	z0, z1
+**	fmlalb	z0\.h, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat16_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalb_n_f16_mf8_fpm (z1, z4, d7, fpm0),
+	      z0 = svmlalb_fpm (z1, z4, d7, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c
new file mode 100644
index 00000000000..07a529d8dc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_lane_mf8.c
@@ -0,0 +1,91 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlallbb_lane_0_f16_tied1:
+** 	msr	fpmr, x0
+**	fmlallbb	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlallbb_lane_0_f16_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbb_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0),
+	     z0 = svmlallbb_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** mlallbb_lane_0_f32_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbb	z0\.s, \1\.b, z1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbb_lane_0_f32_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbb_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0),
+		 z0_res = svmlallbb_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** mlallbb_lane_0_f32_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbb	z0\.s, z1\.b, \1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbb_lane_0_f32_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbb_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0),
+		 z0_res = svmlallbb_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** mlallbb_lane_0_f32_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlallbb	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlallbb_lane_0_f32_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbb_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0),
+	     z0 = svmlallbb_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** mlallbb_lane_1_f32:
+** 	msr	fpmr, x0
+**	fmlallbb	z0\.s, z4\.b, z5\.b\[1\]
+**	ret
+*/
+TEST_DUAL_Z (mlallbb_lane_1_f32, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbb_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0),
+	     z0 = svmlallbb_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** mlallbb_lane_z8_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z8\.d
+**	fmlallbb	z0\.s, z1\.b, \1\.b\[1\]
+**	ldr	d8, \[sp\], 32
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlallbb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
+		    z0 = svmlallbb_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0),
+		    z0 = svmlallbb_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** mlallbb_lane_z16_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z16\.d
+**	fmlallbb	z0\.s, z1\.b, \1\.b\[15\]
+**	...
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlallbb_lane_z16_f32, svfloat32_t, svmfloat8_t, z16,
+		    z0 = svmlallbb_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0),
+		    z0 = svmlallbb_lane_fpm (z0, z1, z16, 15, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c
new file mode 100644
index 00000000000..543cd9030d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbb_mf8.c
@@ -0,0 +1,78 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlallbb_f32_mf8_tied1:
+** 	msr	fpmr, x0
+**	fmlallbb	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlallbb_f32_mf8_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbb_f32_mf8_fpm (z0, z4, z5, fpm0),
+	     z0 = svmlallbb_fpm (z0, z4, z5, fpm0))
+
+/*
+** mlallbb_f32_mf8_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbb	z0\.s, \1\.b, z1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbb_f32_mf8_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbb_f32_mf8_fpm (z4, z0, z1, fpm0),
+		 z0_res = svmlallbb_fpm (z4, z0, z1, fpm0))
+
+/*
+** mlallbb_f32_mf8_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbb	z0\.s, z1\.b, \1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbb_f32_mf8_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbb_f32_mf8_fpm (z4, z1, z0, fpm0),
+		 z0_res = svmlallbb_fpm (z4, z1, z0, fpm0))
+
+/*
+** mlallbb_f32_mf8_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlallbb	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlallbb_f32_mf8_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbb_f32_mf8_fpm (z1, z4, z5, fpm0),
+	     z0 = svmlallbb_fpm (z1, z4, z5, fpm0))
+
+/*
+** mlalb_h7_f16_tied1:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	fmlallbb	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlallbb_n_f32_mf8_fpm (z0, z4, d7, fpm0),
+	      z0 = svmlallbb_fpm (z0, z4, d7, fpm0))
+
+/*
+** mlalb_h7_f16_untied:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	movprfx	z0, z1
+**	fmlallbb	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlallbb_n_f32_mf8_fpm (z1, z4, d7, fpm0),
+	      z0 = svmlallbb_fpm (z1, z4, d7, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c
new file mode 100644
index 00000000000..9da29fbfb0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_lane_mf8.c
@@ -0,0 +1,91 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlallbt_lane_0_f16_tied1:
+** 	msr	fpmr, x0
+**	fmlallbt	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlallbt_lane_0_f16_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbt_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0),
+	     z0 = svmlallbt_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** mlallbt_lane_0_f32_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbt	z0\.s, \1\.b, z1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbt_lane_0_f32_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbt_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0),
+		 z0_res = svmlallbt_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** mlallbt_lane_0_f32_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbt	z0\.s, z1\.b, \1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbt_lane_0_f32_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbt_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0),
+		 z0_res = svmlallbt_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** mlallbt_lane_0_f32_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlallbt	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlallbt_lane_0_f32_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbt_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0),
+	     z0 = svmlallbt_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** mlallbt_lane_1_f32:
+** 	msr	fpmr, x0
+**	fmlallbt	z0\.s, z4\.b, z5\.b\[1\]
+**	ret
+*/
+TEST_DUAL_Z (mlallbt_lane_1_f32, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbt_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0),
+	     z0 = svmlallbt_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** mlallbt_lane_z8_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z8\.d
+**	fmlallbt	z0\.s, z1\.b, \1\.b\[1\]
+**	ldr	d8, \[sp\], 32
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlallbt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
+		    z0 = svmlallbt_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0),
+		    z0 = svmlallbt_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** mlallbt_lane_z16_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z16\.d
+**	fmlallbt	z0\.s, z1\.b, \1\.b\[15\]
+**	...
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlallbt_lane_z16_f32, svfloat32_t, svmfloat8_t, z16,
+		    z0 = svmlallbt_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0),
+		    z0 = svmlallbt_lane_fpm (z0, z1, z16, 15, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c
new file mode 100644
index 00000000000..aa8299c66b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlallbt_mf8.c
@@ -0,0 +1,78 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlallbt_f32_mf8_tied1:
+** 	msr	fpmr, x0
+**	fmlallbt	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlallbt_f32_mf8_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbt_f32_mf8_fpm (z0, z4, z5, fpm0),
+	     z0 = svmlallbt_fpm (z0, z4, z5, fpm0))
+
+/*
+** mlallbt_f32_mf8_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbt	z0\.s, \1\.b, z1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbt_f32_mf8_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbt_f32_mf8_fpm (z4, z0, z1, fpm0),
+		 z0_res = svmlallbt_fpm (z4, z0, z1, fpm0))
+
+/*
+** mlallbt_f32_mf8_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlallbt	z0\.s, z1\.b, \1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlallbt_f32_mf8_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlallbt_f32_mf8_fpm (z4, z1, z0, fpm0),
+		 z0_res = svmlallbt_fpm (z4, z1, z0, fpm0))
+
+/*
+** mlallbt_f32_mf8_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlallbt	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlallbt_f32_mf8_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlallbt_f32_mf8_fpm (z1, z4, z5, fpm0),
+	     z0 = svmlallbt_fpm (z1, z4, z5, fpm0))
+
+/*
+** mlalb_h7_f16_tied1:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	fmlallbt	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlallbt_n_f32_mf8_fpm (z0, z4, d7, fpm0),
+	      z0 = svmlallbt_fpm (z0, z4, d7, fpm0))
+
+/*
+** mlalb_h7_f16_untied:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	movprfx	z0, z1
+**	fmlallbt	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlallbt_n_f32_mf8_fpm (z1, z4, d7, fpm0),
+	      z0 = svmlallbt_fpm (z1, z4, d7, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c
new file mode 100644
index 00000000000..cbe297c188b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_lane_mf8.c
@@ -0,0 +1,91 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalltb_lane_0_f16_tied1:
+** 	msr	fpmr, x0
+**	fmlalltb	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalltb_lane_0_f16_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltb_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0),
+	     z0 = svmlalltb_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** mlalltb_lane_0_f32_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltb	z0\.s, \1\.b, z1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltb_lane_0_f32_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltb_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0),
+		 z0_res = svmlalltb_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** mlalltb_lane_0_f32_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltb	z0\.s, z1\.b, \1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltb_lane_0_f32_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltb_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0),
+		 z0_res = svmlalltb_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** mlalltb_lane_0_f32_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalltb	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalltb_lane_0_f32_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltb_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0),
+	     z0 = svmlalltb_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** mlalltb_lane_1_f32:
+** 	msr	fpmr, x0
+**	fmlalltb	z0\.s, z4\.b, z5\.b\[1\]
+**	ret
+*/
+TEST_DUAL_Z (mlalltb_lane_1_f32, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltb_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0),
+	     z0 = svmlalltb_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** mlalltb_lane_z8_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z8\.d
+**	fmlalltb	z0\.s, z1\.b, \1\.b\[1\]
+**	ldr	d8, \[sp\], 32
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalltb_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
+		    z0 = svmlalltb_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0),
+		    z0 = svmlalltb_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** mlalltb_lane_z16_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z16\.d
+**	fmlalltb	z0\.s, z1\.b, \1\.b\[15\]
+**	...
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalltb_lane_z16_f32, svfloat32_t, svmfloat8_t, z16,
+		    z0 = svmlalltb_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0),
+		    z0 = svmlalltb_lane_fpm (z0, z1, z16, 15, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c
new file mode 100644
index 00000000000..a921dbd1881
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltb_mf8.c
@@ -0,0 +1,78 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalltb_f32_mf8_tied1:
+** 	msr	fpmr, x0
+**	fmlalltb	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalltb_f32_mf8_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltb_f32_mf8_fpm (z0, z4, z5, fpm0),
+	     z0 = svmlalltb_fpm (z0, z4, z5, fpm0))
+
+/*
+** mlalltb_f32_mf8_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltb	z0\.s, \1\.b, z1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltb_f32_mf8_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltb_f32_mf8_fpm (z4, z0, z1, fpm0),
+		 z0_res = svmlalltb_fpm (z4, z0, z1, fpm0))
+
+/*
+** mlalltb_f32_mf8_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltb	z0\.s, z1\.b, \1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltb_f32_mf8_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltb_f32_mf8_fpm (z4, z1, z0, fpm0),
+		 z0_res = svmlalltb_fpm (z4, z1, z0, fpm0))
+
+/*
+** mlalltb_f32_mf8_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalltb	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalltb_f32_mf8_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltb_f32_mf8_fpm (z1, z4, z5, fpm0),
+	     z0 = svmlalltb_fpm (z1, z4, z5, fpm0))
+
+/*
+** mlalb_h7_f16_tied1:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	fmlalltb	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalltb_n_f32_mf8_fpm (z0, z4, d7, fpm0),
+	      z0 = svmlalltb_fpm (z0, z4, d7, fpm0))
+
+/*
+** mlalb_h7_f16_untied:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	movprfx	z0, z1
+**	fmlalltb	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalltb_n_f32_mf8_fpm (z1, z4, d7, fpm0),
+	      z0 = svmlalltb_fpm (z1, z4, d7, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c
new file mode 100644
index 00000000000..fc5bfba7877
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_lane_mf8.c
@@ -0,0 +1,91 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalltt_lane_0_f16_tied1:
+** 	msr	fpmr, x0
+**	fmlalltt	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalltt_lane_0_f16_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltt_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0),
+	     z0 = svmlalltt_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** mlalltt_lane_0_f32_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltt	z0\.s, \1\.b, z1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltt_lane_0_f32_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltt_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0),
+		 z0_res = svmlalltt_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** mlalltt_lane_0_f32_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltt	z0\.s, z1\.b, \1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltt_lane_0_f32_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltt_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0),
+		 z0_res = svmlalltt_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** mlalltt_lane_0_f32_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalltt	z0\.s, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalltt_lane_0_f32_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltt_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0),
+	     z0 = svmlalltt_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** mlalltt_lane_1_f32:
+** 	msr	fpmr, x0
+**	fmlalltt	z0\.s, z4\.b, z5\.b\[1\]
+**	ret
+*/
+TEST_DUAL_Z (mlalltt_lane_1_f32, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltt_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0),
+	     z0 = svmlalltt_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** mlalltt_lane_z8_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z8\.d
+**	fmlalltt	z0\.s, z1\.b, \1\.b\[1\]
+**	ldr	d8, \[sp\], 32
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalltt_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
+		    z0 = svmlalltt_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0),
+		    z0 = svmlalltt_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** mlalltt_lane_z16_f32:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z16\.d
+**	fmlalltt	z0\.s, z1\.b, \1\.b\[15\]
+**	...
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalltt_lane_z16_f32, svfloat32_t, svmfloat8_t, z16,
+		    z0 = svmlalltt_lane_f32_mf8_fpm (z0, z1, z16, 15, fpm0),
+		    z0 = svmlalltt_lane_fpm (z0, z1, z16, 15, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c
new file mode 100644
index 00000000000..5cd6beb348a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalltt_mf8.c
@@ -0,0 +1,78 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalltt_f32_mf8_tied1:
+** 	msr	fpmr, x0
+**	fmlalltt	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalltt_f32_mf8_tied1, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltt_f32_mf8_fpm (z0, z4, z5, fpm0),
+	     z0 = svmlalltt_fpm (z0, z4, z5, fpm0))
+
+/*
+** mlalltt_f32_mf8_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltt	z0\.s, \1\.b, z1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltt_f32_mf8_tied2, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltt_f32_mf8_fpm (z4, z0, z1, fpm0),
+		 z0_res = svmlalltt_fpm (z4, z0, z1, fpm0))
+
+/*
+** mlalltt_f32_mf8_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalltt	z0\.s, z1\.b, \1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalltt_f32_mf8_tied3, svfloat32_t, svmfloat8_t,
+		 z0_res = svmlalltt_f32_mf8_fpm (z4, z1, z0, fpm0),
+		 z0_res = svmlalltt_fpm (z4, z1, z0, fpm0))
+
+/*
+** mlalltt_f32_mf8_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalltt	z0\.s, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalltt_f32_mf8_untied, svfloat32_t, svmfloat8_t,
+	     z0 = svmlalltt_f32_mf8_fpm (z1, z4, z5, fpm0),
+	     z0 = svmlalltt_fpm (z1, z4, z5, fpm0))
+
+/*
+** mlalb_h7_f16_tied1:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	fmlalltt	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_tied1, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalltt_n_f32_mf8_fpm (z0, z4, d7, fpm0),
+	      z0 = svmlalltt_fpm (z0, z4, d7, fpm0))
+
+/*
+** mlalb_h7_f16_untied:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	movprfx	z0, z1
+**	fmlalltt	z0\.s, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalb_h7_f16_untied, svfloat32_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalltt_n_f32_mf8_fpm (z1, z4, d7, fpm0),
+	      z0 = svmlalltt_fpm (z1, z4, d7, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c
new file mode 100644
index 00000000000..4f5a1045420
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_lane_mf8.c
@@ -0,0 +1,91 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalt_lane_0_f16_tied1:
+** 	msr	fpmr, x0
+**	fmlalt	z0\.h, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalt_lane_0_f16_tied1, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalt_lane_f16_mf8_fpm (z0, z4, z5, 0, fpm0),
+	     z0 = svmlalt_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** mlalt_lane_0_f16_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalt	z0\.h, \1\.b, z1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalt_lane_0_f16_tied2, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalt_lane_f16_mf8_fpm (z4, z0, z1, 0, fpm0),
+		 z0_res = svmlalt_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** mlalt_lane_0_f16_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalt	z0\.h, z1\.b, \1\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalt_lane_0_f16_tied3, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalt_lane_f16_mf8_fpm (z4, z1, z0, 0, fpm0),
+		 z0_res = svmlalt_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** mlalt_lane_0_f16_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalt	z0\.h, z4\.b, z5\.b\[0\]
+**	ret
+*/
+TEST_DUAL_Z (mlalt_lane_0_f16_untied, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalt_lane_f16_mf8_fpm (z1, z4, z5, 0, fpm0),
+	     z0 = svmlalt_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** mlalt_lane_1_f16:
+** 	msr	fpmr, x0
+**	fmlalt	z0\.h, z4\.b, z5\.b\[1\]
+**	ret
+*/
+TEST_DUAL_Z (mlalt_lane_1_f16, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalt_lane_f16_mf8_fpm (z0, z4, z5, 1, fpm0),
+	     z0 = svmlalt_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** mlalt_lane_z8_f16:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z8\.d
+**	fmlalt	z0\.h, z1\.b, \1\.b\[1\]
+**	ldr	d8, \[sp\], 32
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalt_lane_z8_f16, svfloat16_t, svmfloat8_t, z8,
+		    z0 = svmlalt_lane_f16_mf8_fpm (z0, z1, z8, 1, fpm0),
+		    z0 = svmlalt_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** mlalt_lane_z16_f16:
+**	...
+** 	msr	fpmr, x0
+**	mov	(z[0-7])\.d, z16\.d
+**	fmlalt	z0\.h, z1\.b, \1\.b\[15\]
+**	...
+**	ret
+*/
+TEST_DUAL_LANE_REG (mlalt_lane_z16_f16, svfloat16_t, svmfloat8_t, z16,
+		    z0 = svmlalt_lane_f16_mf8_fpm (z0, z1, z16, 15, fpm0),
+		    z0 = svmlalt_lane_fpm (z0, z1, z16, 15, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c
new file mode 100644
index 00000000000..3a305d31cb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/mlalt_mf8.c
@@ -0,0 +1,78 @@ 
+/* { dg-do assemble { target aarch64_asm_fp8fma_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_fp8fma_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+fp8fma"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+ssve-fp8fma"
+#endif
+
+/*
+** mlalt_f16_mf8_tied1:
+** 	msr	fpmr, x0
+**	fmlalt	z0\.h, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalt_f16_mf8_tied1, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalt_f16_mf8_fpm (z0, z4, z5, fpm0),
+	     z0 = svmlalt_fpm (z0, z4, z5, fpm0))
+
+/*
+** mlalt_f16_mf8_tied2:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalt	z0\.h, \1\.b, z1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalt_f16_mf8_tied2, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalt_f16_mf8_fpm (z4, z0, z1, fpm0),
+		 z0_res = svmlalt_fpm (z4, z0, z1, fpm0))
+
+/*
+** mlalt_f16_mf8_tied3:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	fmlalt	z0\.h, z1\.b, \1\.b
+**	ret
+*/
+TEST_DUAL_Z_REV (mlalt_f16_mf8_tied3, svfloat16_t, svmfloat8_t,
+		 z0_res = svmlalt_f16_mf8_fpm (z4, z1, z0, fpm0),
+		 z0_res = svmlalt_fpm (z4, z1, z0, fpm0))
+
+/*
+** mlalt_f16_mf8_untied:
+** 	msr	fpmr, x0
+**	movprfx	z0, z1
+**	fmlalt	z0\.h, z4\.b, z5\.b
+**	ret
+*/
+TEST_DUAL_Z (mlalt_f16_mf8_untied, svfloat16_t, svmfloat8_t,
+	     z0 = svmlalt_f16_mf8_fpm (z1, z4, z5, fpm0),
+	     z0 = svmlalt_fpm (z1, z4, z5, fpm0))
+
+/*
+** mlalt_h7_f16_tied1:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	fmlalt	z0\.h, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalt_h7_f16_tied1, svfloat16_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalt_n_f16_mf8_fpm (z0, z4, d7, fpm0),
+	      z0 = svmlalt_fpm (z0, z4, d7, fpm0))
+
+/*
+** mlalt_h7_f16_untied:
+** 	msr	fpmr, x0
+**	mov	(z[0-9]+\.b), b7
+**	movprfx	z0, z1
+**	fmlalt	z0\.h, z4\.b, \1
+**	ret
+*/
+TEST_DUAL_ZD (mlalt_h7_f16_untied, svfloat16_t, svmfloat8_t, mfloat8_t,
+	      z0 = svmlalt_n_f16_mf8_fpm (z1, z4, d7, fpm0),
+	      z0 = svmlalt_fpm (z1, z4, d7, fpm0))
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index a3edccf1fda..a122178bd21 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12140,7 +12140,8 @@  proc check_effective_target_aarch64_tiny { } {
 foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
 			  "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64"
 			  "sme" "sme-i16i64" "sme2" "sve-b16b16"
-			  "sme-b16b16" "sme-f16f16" "sme2p1" "fp8" } {
+			  "sme-b16b16" "sme-f16f16" "sme2p1" "fp8" "fp8fma"
+			  "ssve-fp8fma" } {
     eval [string map [list FUNC $aarch64_ext] {
 	proc check_effective_target_aarch64_asm_FUNC_ok { } {
 	  if { [istarget aarch64*-*-*] } {