[4/5] aarch64: Add ZT0

Message ID mptwmugp9d2.fsf@arm.com
State New
Headers
Series [1/5] aarch64: Add +sme2 |

Commit Message

Richard Sandiford Nov. 17, 2023, 5:39 p.m. UTC
  SME2 adds a 512-bit lookup table called ZT0.  It is enabled
and disabled by PSTATE.ZA, just like ZA itself.  This patch
adds support for the register, including saving and restoring
contents.

The code reuses the V8DI that was added for LS64, including
the associated memory classification rules.  (The ZT0 range
is more restricted than the LS64 range, but that's enforced
by predicates and constraints.)

gcc/
	* config/aarch64/aarch64.md (ZT0_REGNUM): New constant.
	(LAST_FAKE_REGNUM): Bump to include it.
	* config/aarch64/aarch64.h (FIXED_REGISTERS): Add an entry for ZT0.
	(CALL_REALLY_USED_REGISTERS, REGISTER_NAMES): Likewise.
	(REG_CLASS_CONTENTS): Likewise.
	(machine_function): Add zt0_save_buffer.
	(CUMULATIVE_ARGS): Add shared_zt0_flags;
	* config/aarch64/aarch64.cc (aarch64_check_state_string): Handle zt0.
	(aarch64_fntype_pstate_za, aarch64_fndecl_pstate_za): Likewise.
	(aarch64_function_arg): Add the shared ZT0 flags as an extra
	limb of the parallel.
	(aarch64_init_cumulative_args): Initialize shared_zt0_flags.
	(aarch64_extra_live_on_entry): Handle ZT0_REGNUM.
	(aarch64_epilogue_uses): Likewise.
	(aarch64_get_zt0_save_buffer, aarch64_save_zt0): New functions.
	(aarch64_restore_zt0): Likewise.
	(aarch64_start_call_args): Reject calls to functions that share
	ZT0 from functions that have no ZT0 state.  Save ZT0 around shared-ZA
	calls that do not share ZT0.
	(aarch64_expand_call): Handle ZT0.  Reject calls to functions that
	share ZT0 but not ZA from functions with ZA state.
	(aarch64_end_call_args): Restore ZT0 after calls to shared-ZA functions
	that do not share ZT0.
	(aarch64_set_current_function): Require +sme2 for functions that
	have ZT0 state.
	(aarch64_function_attribute_inlinable_p): Don't allow functions to
	be inlined if they have local zt0 state.
	(AARCH64_IPA_CLOBBERS_ZT0): New constant.
	(aarch64_update_ipa_fn_target_info): Record asms that clobber ZT0.
	(aarch64_can_inline_p): Don't inline callees that clobber ZT0
	into functions that have ZT0 state.
	(aarch64_comp_type_attributes): Check for compatible ZT0 sharing.
	(aarch64_optimize_mode_switching): Use mode switching if the
	function has ZT0 state.
	(aarch64_mode_emit_local_sme_state): Save and restore ZT0 around
	calls to private-ZA functions.
	(aarch64_mode_needed_local_sme_state): Require ZA to be active
	for instructions that access ZT0.
	(aarch64_md_asm_adjust): Extend handling of ZA clobbers to ZT0.
	* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
	Define __ARM_STATE_ZT0.
	* config/aarch64/aarch64-sme.md (UNSPECV_ASM_UPDATE_ZT0): New unspecv.
	(aarch64_asm_update_zt0): New insn.
	(UNSPEC_RESTORE_ZT0): New unspec.
	(aarch64_sme_ldr_zt0, aarch64_restore_zt0): New insns.
	(aarch64_sme_str_zt0): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/sme/zt0_state_1.c: New test.
	* gcc.target/aarch64/sme/zt0_state_2.c: Likewise.
	* gcc.target/aarch64/sme/zt0_state_3.c: Likewise.
	* gcc.target/aarch64/sme/zt0_state_4.c: Likewise.
	* gcc.target/aarch64/sme/zt0_state_5.c: Likewise.
---
 gcc/config/aarch64/aarch64-c.cc               |   1 +
 gcc/config/aarch64/aarch64-sme.md             |  63 +++++
 gcc/config/aarch64/aarch64.cc                 | 205 ++++++++++++--
 gcc/config/aarch64/aarch64.h                  |  14 +-
 gcc/config/aarch64/aarch64.md                 |   7 +-
 .../gcc.target/aarch64/sme/zt0_state_1.c      |  65 +++++
 .../gcc.target/aarch64/sme/zt0_state_2.c      |  31 +++
 .../gcc.target/aarch64/sme/zt0_state_3.c      |   6 +
 .../gcc.target/aarch64/sme/zt0_state_4.c      |  53 ++++
 .../gcc.target/aarch64/sme/zt0_state_5.c      | 260 ++++++++++++++++++
 10 files changed, 670 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
  

Patch

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 2a8ca46987a..017380b7563 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -74,6 +74,7 @@  aarch64_define_unconditional_macros (cpp_reader *pfile)
   builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
 
   builtin_define ("__ARM_STATE_ZA");
+  builtin_define ("__ARM_STATE_ZT0");
 
   /* Define keyword attributes like __arm_streaming as macros that expand
      to the associated [[...]] attribute.  Use __extension__ in the attribute
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index da0745f6570..505805e2ecf 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -27,7 +27,9 @@ 
 ;;
 ;; == Loads, stores and moves
 ;; ---- Single-vector loads
+;; ---- Table loads
 ;; ---- Single-vector stores
+;; ---- Table stores
 ;; ---- Single-vector moves
 ;; ---- Zeroing
 ;;
@@ -209,6 +211,7 @@  (define_c_enum "unspec" [
 
 (define_c_enum "unspecv" [
   UNSPECV_ASM_UPDATE_ZA
+  UNSPECV_ASM_UPDATE_ZT0
 ])
 
 ;; Use the ABI-defined routine to commit an uncommitted lazy save.
@@ -400,6 +403,19 @@  (define_insn "aarch64_asm_update_za"
   [(set_attr "type" "no_insn")]
 )
 
+;; A similar pattern for ZT0.
+(define_insn "aarch64_asm_update_zt0"
+  [(set (reg:V8DI ZT0_REGNUM)
+	(unspec_volatile:V8DI
+	  [(reg:V8DI ZT0_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand 0 "const_int_operand")]
+	  UNSPECV_ASM_UPDATE_ZT0))]
+  ""
+  ""
+  [(set_attr "type" "no_insn")]
+)
+
 ;; This pseudo-instruction is emitted as part of a call to a private-ZA
 ;; function from a function with ZA state.  It marks a natural place to set
 ;; up a lazy save, if that turns out to be necessary.  The save itself
@@ -544,6 +560,38 @@  (define_insn "@aarch64_sme_ldrn<mode>"
   "ldr\tza[%w0, %1], [%2, #%1, mul vl]"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Table loads
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LDR
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [
+  UNSPEC_RESTORE_ZT0
+])
+
+(define_insn "aarch64_sme_ldr_zt0"
+  [(set (reg:V8DI ZT0_REGNUM)
+	(match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q"))
+   (use (reg:DI SME_STATE_REGNUM))]
+  "TARGET_SME2"
+  "ldr\tzt0, %0"
+)
+
+;; This version is used after calls to private-ZA functions.  Since ZT0_REGNUM
+;; represents the current function's state, it isn't clobbered by private-ZA
+;; functions, so we need to make it depend on the ZA reinitialization code.
+(define_insn "aarch64_restore_zt0"
+  [(set (reg:V8DI ZT0_REGNUM)
+	(unspec:V8DI
+	  [(reg:DI SME_STATE_REGNUM)
+	   (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q")]
+	  UNSPEC_RESTORE_ZT0))]
+  "TARGET_SME2"
+  "ldr\tzt0, %0"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Single-vector stores
 ;; -------------------------------------------------------------------------
@@ -614,6 +662,21 @@  (define_insn "@aarch64_sme_strn<mode>"
   "str\tza[%w0, %1], [%2, #%1, mul vl]"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Table stores
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - STR
+;; -------------------------------------------------------------------------
+
+(define_insn "aarch64_sme_str_zt0"
+  [(set (match_operand:V8DI 0 "aarch64_sync_memory_operand" "=Q")
+	(reg:V8DI ZT0_REGNUM))
+   (use (reg:DI SME_STATE_REGNUM))]
+  "TARGET_SME2"
+  "str\tzt0, %0"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- Single-vector moves
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 36576159b4f..6a6ae1c723c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2911,7 +2911,8 @@  aarch64_check_state_string (tree name, tree value)
     }
 
   const char *state_name = TREE_STRING_POINTER (value);
-  if (strcmp (state_name, "za") != 0)
+  if (strcmp (state_name, "za") != 0
+      && strcmp (state_name, "zt0") != 0)
     {
       error ("unrecognized state string %qs", state_name);
       return false;
@@ -4457,7 +4458,8 @@  aarch64_fntype_shared_flags (const_tree fntype, const char *state_name)
 static aarch64_feature_flags
 aarch64_fntype_pstate_za (const_tree fntype)
 {
-  if (aarch64_fntype_shared_flags (fntype, "za"))
+  if (aarch64_fntype_shared_flags (fntype, "za")
+      || aarch64_fntype_shared_flags (fntype, "zt0"))
     return AARCH64_FL_ZA_ON;
 
   return 0;
@@ -4512,7 +4514,8 @@  aarch64_fndecl_has_state (tree fndecl, const char *state_name)
 static aarch64_feature_flags
 aarch64_fndecl_pstate_za (const_tree fndecl)
 {
-  if (aarch64_fndecl_has_new_state (fndecl, "za"))
+  if (aarch64_fndecl_has_new_state (fndecl, "za")
+      || aarch64_fndecl_has_new_state (fndecl, "zt0"))
     return AARCH64_FL_ZA_ON;
 
   return aarch64_fntype_pstate_za (TREE_TYPE (fndecl));
@@ -9330,9 +9333,11 @@  aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
 						  pcum->pcs_variant);
       rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum);
       rtx shared_za_flags = gen_int_mode (pcum->shared_za_flags, SImode);
-      return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, abi_cookie,
+      rtx shared_zt0_flags = gen_int_mode (pcum->shared_zt0_flags, SImode);
+      return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, abi_cookie,
 						    sme_mode_switch_args,
-						    shared_za_flags));
+						    shared_za_flags,
+						    shared_zt0_flags));
     }
 
   aarch64_layout_arg (pcum_v, arg);
@@ -9370,6 +9375,8 @@  aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
   pcum->silent_p = silent_p;
   pcum->shared_za_flags
     = (fntype ? aarch64_fntype_shared_flags (fntype, "za") : 0U);
+  pcum->shared_zt0_flags
+    = (fntype ? aarch64_fntype_shared_flags (fntype, "zt0") : 0U);
   pcum->num_sme_mode_switch_args = 0;
 
   if (!silent_p
@@ -11516,6 +11523,13 @@  aarch64_extra_live_on_entry (bitmap regs)
       auto za_flags = aarch64_cfun_shared_flags ("za");
       if (za_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT))
 	bitmap_set_bit (regs, ZA_REGNUM);
+
+      /* Since ZT0 is call-clobbered, it is only live on input if
+	 it is explicitly shared, and is not a pure output.  */
+      auto zt0_flags = aarch64_cfun_shared_flags ("zt0");
+      if (zt0_flags != 0
+	  && zt0_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT))
+	bitmap_set_bit (regs, ZT0_REGNUM);
     }
 }
 
@@ -11544,6 +11558,8 @@  aarch64_epilogue_uses (int regno)
     return 1;
   if (regno == ZA_REGNUM && aarch64_cfun_shared_flags ("za") != 0)
     return 1;
+  if (regno == ZT0_REGNUM && aarch64_cfun_shared_flags ("zt0") != 0)
+    return 1;
   return 0;
 }
 
@@ -13237,6 +13253,40 @@  aarch64_restore_za (rtx tpidr2_block)
   emit_insn (gen_aarch64_tpidr2_restore ());
 }
 
+/* Return the ZT0 save buffer, creating one if necessary.  */
+
+static rtx
+aarch64_get_zt0_save_buffer ()
+{
+  if (!cfun->machine->zt0_save_buffer)
+    cfun->machine->zt0_save_buffer = assign_stack_local (V8DImode, 64, 128);
+  return cfun->machine->zt0_save_buffer;
+}
+
+/* Save ZT0 to the current function's save buffer.  */
+
+static void
+aarch64_save_zt0 ()
+{
+  rtx mem = aarch64_get_zt0_save_buffer ();
+  mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+  emit_insn (gen_aarch64_sme_str_zt0 (mem));
+}
+
+/* Restore ZT0 from the current function's save buffer.  FROM_LAZY_SAVE_P
+   is true if the load is happening after a call to a private-ZA function,
+   false if it can be treated as a normal load.  */
+
+static void
+aarch64_restore_zt0 (bool from_lazy_save_p)
+{
+  rtx mem = aarch64_get_zt0_save_buffer ();
+  mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+  emit_insn (from_lazy_save_p
+	     ? gen_aarch64_restore_zt0 (mem)
+	     : gen_aarch64_sme_ldr_zt0 (mem));
+}
+
 /* Implement TARGET_START_CALL_ARGS.  */
 
 static void
@@ -13257,6 +13307,10 @@  aarch64_start_call_args (cumulative_args_t ca_v)
       && !aarch64_cfun_has_state ("za"))
     error ("call to a function that shares %qs state from a function"
 	   " that has no %qs state", "za", "za");
+  else if ((ca->shared_zt0_flags & (AARCH64_STATE_IN | AARCH64_STATE_OUT))
+	   && !aarch64_cfun_has_state ("zt0"))
+    error ("call to a function that shares %qs state from a function"
+	   " that has no %qs state", "zt0", "zt0");
   else if (!TARGET_ZA && (ca->isa_mode & AARCH64_FL_ZA_ON))
     error ("call to a function that shares SME state from a function"
 	   " that has no SME state");
@@ -13266,6 +13320,13 @@  aarch64_start_call_args (cumulative_args_t ca_v)
      The code itself is inserted by the mode-switching pass.  */
   if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON))
     emit_insn (gen_aarch64_start_private_za_call ());
+
+  /* If this is a call to a shared-ZA function that doesn't share ZT0,
+     save and restore ZT0 around the call.  */
+  if (aarch64_cfun_has_state ("zt0")
+      && (ca->isa_mode & AARCH64_FL_ZA_ON)
+      && ca->shared_zt0_flags == 0)
+    aarch64_save_zt0 ();
 }
 
 /* This function is used by the call expanders of the machine description.
@@ -13278,8 +13339,8 @@  aarch64_start_call_args (cumulative_args_t ca_v)
        The second element is a PARALLEL that lists all the argument
        registers that need to be saved and restored around a change
        in PSTATE.SM, or const0_rtx if no such switch is needed.
-       The third element is a const_int that contains the sharing flags
-       for ZA.
+       The third and fourth elements are const_ints that contain the
+       sharing flags for ZA and ZT0 respectively.
    SIBCALL indicates whether this function call is normal call or sibling call.
    It will generate different pattern accordingly.  */
 
@@ -13293,16 +13354,28 @@  aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
   rtx callee_abi = cookie;
   rtx sme_mode_switch_args = const0_rtx;
   unsigned int shared_za_flags = 0;
+  unsigned int shared_zt0_flags = 0;
   if (GET_CODE (cookie) == PARALLEL)
     {
       callee_abi = XVECEXP (cookie, 0, 0);
       sme_mode_switch_args = XVECEXP (cookie, 0, 1);
       shared_za_flags = INTVAL (XVECEXP (cookie, 0, 2));
+      shared_zt0_flags = INTVAL (XVECEXP (cookie, 0, 3));
     }
 
   gcc_assert (CONST_INT_P (callee_abi));
   auto callee_isa_mode = aarch64_callee_isa_mode (callee_abi);
 
+  if (aarch64_cfun_has_state ("za")
+      && (callee_isa_mode & AARCH64_FL_ZA_ON)
+      && !shared_za_flags)
+    {
+      sorry ("call to a function that shares state other than %qs"
+	     " from a function that has %qs state", "za", "za");
+      inform (input_location, "use %<__arm_preserves(\"za\")%> if the"
+	      " callee preserves ZA");
+    }
+
   gcc_assert (MEM_P (mem));
   callee = XEXP (mem, 0);
   mode = GET_MODE (callee);
@@ -13335,6 +13408,8 @@  aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
      we want to know whether the call committed a lazy save.  */
   if (TARGET_ZA && !shared_za_flags)
     return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM));
+  if (shared_zt0_flags & AARCH64_STATE_OUT)
+    return_values.safe_push (gen_rtx_REG (V8DImode, ZT0_REGNUM));
 
   /* Create the new return value, if necessary.  */
   if (orig_num_return_values != return_values.length ())
@@ -13420,10 +13495,12 @@  aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
     }
 
   /* Add any ZA-related information.
+
      ZA_REGNUM represents the current function's ZA state, rather than
      the contents of the ZA register itself.  We ensure that the function's
      ZA state is preserved by private-ZA call sequences, so the call itself
-     does not use or clobber ZA_REGNUM.  */
+     does not use or clobber ZA_REGNUM.  The same thing applies to
+     ZT0_REGNUM.  */
   if (TARGET_ZA)
     {
       /* The callee requires ZA to be active if the callee is shared-ZA,
@@ -13443,10 +13520,14 @@  aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
 		 gen_rtx_REG (VNx16BImode, LOWERING_REGNUM));
 
       /* If the callee is a shared-ZA function, record whether it uses the
-	 current value of ZA.  */
+	 current value of ZA and ZT0.  */
       if (shared_za_flags & AARCH64_STATE_IN)
 	use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
 		 gen_rtx_REG (VNx16BImode, ZA_REGNUM));
+
+      if (shared_zt0_flags & AARCH64_STATE_IN)
+	use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+		 gen_rtx_REG (V8DImode, ZT0_REGNUM));
     }
 }
 
@@ -13462,6 +13543,13 @@  aarch64_end_call_args (cumulative_args_t ca_v)
      The code itself is inserted by the mode-switching pass.  */
   if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON))
     emit_insn (gen_aarch64_end_private_za_call ());
+
+  /* If this is a call to a shared-ZA function that doesn't share ZT0,
+     save and restore ZT0 around the call.  */
+  if (aarch64_cfun_has_state ("zt0")
+      && (ca->isa_mode & AARCH64_FL_ZA_ON)
+      && ca->shared_zt0_flags == 0)
+    aarch64_restore_zt0 (false);
 }
 
 /* Emit call insn with PAT and do aarch64-specific handling.  */
@@ -20982,6 +21070,20 @@  aarch64_set_current_function (tree fndecl)
 		       : AARCH64_FL_DEFAULT_ISA_MODE);
   auto isa_flags = TREE_TARGET_OPTION (new_tree)->x_aarch64_isa_flags;
 
+  static bool reported_zt0_p;
+  if (!reported_zt0_p
+      && !(isa_flags & AARCH64_FL_SME2)
+      && fndecl
+      && aarch64_fndecl_has_state (fndecl, "zt0"))
+    {
+      error ("functions with %qs state require the ISA extension %qs",
+	     "zt0", "sme2");
+      inform (input_location, "you can enable %qs using the command-line"
+	      " option %<-march%>, or by using the %<target%>"
+	      " attribute or pragma", "sme2");
+      reported_zt0_p = true;
+    }
+
   /* If nothing to do, return.  #pragma GCC reset or #pragma GCC pop to
      the default have been handled by aarch64_save_restore_target_globals from
      aarch64_pragma_target_parse.  */
@@ -21593,9 +21695,10 @@  aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 static bool
 aarch64_function_attribute_inlinable_p (const_tree fndecl)
 {
-  /* A function that has local ZA state cannot be inlined into its caller,
-     since we only support managing ZA switches at function scope.  */
-  return !aarch64_fndecl_has_new_state (fndecl, "za");
+  /* A function that has local SME state cannot be inlined into its caller,
+     since we only support managing PSTATE.ZA switches at function scope.  */
+  return (!aarch64_fndecl_has_new_state (fndecl, "za")
+	  && !aarch64_fndecl_has_new_state (fndecl, "zt0"));
 }
 
 /* Helper for aarch64_can_inline_p.  In the case where CALLER and CALLEE are
@@ -21626,9 +21729,10 @@  aarch64_tribools_ok_for_inlining_p (int caller, int callee,
    Not meaningful for streaming-compatible functions.  */
 constexpr auto AARCH64_IPA_SM_FIXED = 1U << 0;
 
-/* Set if the function clobbers ZA.  Not meaningful for functions that
+/* Set if the function clobbers ZA and ZT0.  Not meaningful for functions that
    have ZA state.  */
 constexpr auto AARCH64_IPA_CLOBBERS_ZA = 1U << 1;
+constexpr auto AARCH64_IPA_CLOBBERS_ZT0 = 1U << 2;
 
 /* Implement TARGET_NEED_IPA_FN_TARGET_INFO.  */
 
@@ -21656,6 +21760,8 @@  aarch64_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
 	  const char *clobber = TREE_STRING_POINTER (TREE_VALUE (op));
 	  if (strcmp (clobber, "za") == 0)
 	    info |= AARCH64_IPA_CLOBBERS_ZA;
+	  if (strcmp (clobber, "zt0") == 0)
+	    info |= AARCH64_IPA_CLOBBERS_ZT0;
 	}
     }
   if (auto *call = dyn_cast<const gcall *> (stmt))
@@ -21731,21 +21837,25 @@  aarch64_can_inline_p (tree caller, tree callee)
       && callee_has_property (AARCH64_IPA_SM_FIXED))
     return false;
 
-  /* aarch64_function_attribute_inlinable_p prevents new-ZA functions
-     from being inlined into others.  We also need to prevent inlining
-     of shared-ZA functions into functions without ZA state, since this
-     is an error condition.
+  /* aarch64_function_attribute_inlinable_p prevents new-ZA and new-ZT0
+     functions from being inlined into others.  We also need to prevent
+     inlining of shared-ZA functions into functions without ZA state,
+     since this is an error condition.
 
      The only other problematic case for ZA is inlining a function that
-     directly clobbers ZA into a function that has ZA state.  */
+     directly clobbers ZA or ZT0 into a function that has ZA or ZT0 state.  */
   auto caller_za = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON);
   auto callee_za = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON);
   if (!caller_za && callee_za)
     return false;
-  if (caller_za
-      && !callee_za
+  if (!callee_za
+      && aarch64_fndecl_has_state (caller, "za")
       && callee_has_property (AARCH64_IPA_CLOBBERS_ZA))
     return false;
+  if (!callee_za
+      && aarch64_fndecl_has_state (caller, "zt0")
+      && callee_has_property (AARCH64_IPA_CLOBBERS_ZT0))
+    return false;
 
   /* Allow non-strict aligned functions inlining into strict
      aligned ones.  */
@@ -29826,6 +29936,9 @@  aarch64_comp_type_attributes (const_tree type1, const_tree type2)
   if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "za")
       != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "za"))
     return 0;
+  if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "zt0")
+      != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "zt0"))
+    return 0;
   return 1;
 }
 
@@ -30293,7 +30406,9 @@  aarch64_optimize_mode_switching (aarch64_mode_entity entity)
 {
   bool have_sme_state = (aarch64_cfun_incoming_pstate_za () != 0
 			 || (aarch64_cfun_has_new_state ("za")
-			     && df_regs_ever_live_p (ZA_REGNUM)));
+			     && df_regs_ever_live_p (ZA_REGNUM))
+			 || (aarch64_cfun_has_new_state ("zt0")
+			     && df_regs_ever_live_p (ZT0_REGNUM)));
 
   if (have_sme_state && nonlocal_goto_handler_labels)
     {
@@ -30380,6 +30495,11 @@  aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 	     In that case, ZA still contains the current function's ZA state,
 	     and we just need to cancel the lazy save.  */
 	  emit_insn (gen_aarch64_clear_tpidr2 ());
+
+	  /* Restore the ZT0 state, if we have some.  */
+	  if (aarch64_cfun_has_state ("zt0"))
+	    aarch64_restore_zt0 (true);
+
 	  return;
 	}
 
@@ -30388,6 +30508,10 @@  aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 	  /* Retrieve the current function's ZA state from the lazy save
 	     buffer.  */
 	  aarch64_restore_za (aarch64_get_tpidr2_ptr ());
+
+	  /* Restore the ZT0 state, if we have some.  */
+	  if (aarch64_cfun_has_state ("zt0"))
+	    aarch64_restore_zt0 (true);
 	  return;
 	}
 
@@ -30404,6 +30528,11 @@  aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 
 	     Both cases leave ZA zeroed.  */
 	  emit_insn (gen_aarch64_smstart_za ());
+
+	  /* Restore the ZT0 state, if we have some.  */
+	  if (prev_mode == aarch64_local_sme_state::OFF
+	      && aarch64_cfun_has_state ("zt0"))
+	    aarch64_restore_zt0 (true);
 	  return;
 	}
 
@@ -30422,6 +30551,10 @@  aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
 	  || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD
 	  || prev_mode == aarch64_local_sme_state::INACTIVE_CALLER)
 	{
+	  /* Save the ZT0 state, if we have some.  */
+	  if (aarch64_cfun_has_state ("zt0"))
+	    aarch64_save_zt0 ();
+
 	  /* A transition from ACTIVE_LIVE to INACTIVE_LOCAL is the usual
 	     case of setting up a lazy save buffer before a call.
 	     A transition from INACTIVE_CALLER is similar, except that
@@ -30449,6 +30582,13 @@  aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
   if (mode == aarch64_local_sme_state::INACTIVE_CALLER
       || mode == aarch64_local_sme_state::OFF)
     {
+      /* Save the ZT0 state, if we have some.  */
+      if ((prev_mode == aarch64_local_sme_state::ACTIVE_LIVE
+	   || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD)
+	  && mode == aarch64_local_sme_state::OFF
+	  && aarch64_cfun_has_state ("zt0"))
+	aarch64_save_zt0 ();
+
       /* The transition to INACTIVE_CALLER is used before returning from
 	 new("za") functions.  Any state in ZA belongs to the current
 	 function rather than a caller, but that state is no longer
@@ -30597,8 +30737,10 @@  aarch64_mode_needed_local_sme_state (rtx_insn *insn, HARD_REG_SET live)
 	    : aarch64_local_sme_state::OFF);
 
   /* Force ZA to contain the current function's ZA state if INSN wants
-     to access it.  */
-  if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM))
+     to access it.  Do the same for accesses to ZT0, since ZA and ZT0
+     are both controlled by PSTATE.ZA.  */
+  if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM)
+      || aarch64_insn_references_sme_state_p (insn, ZT0_REGNUM))
     return (TEST_HARD_REG_BIT (live, ZA_REGNUM)
 	    ? aarch64_local_sme_state::ACTIVE_LIVE
 	    : aarch64_local_sme_state::ACTIVE_DEAD);
@@ -30883,27 +31025,34 @@  aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
      write directly.   Use a separate insn to model the effect.
 
      We must ensure that ZA is active on entry, which is enforced by using
-     SME_STATE_REGNUM.  The asm must ensure that ZA is active on return.  */
+     SME_STATE_REGNUM.  The asm must ensure that ZA is active on return.
+
+     The same thing applies to ZT0.  */
   if (TARGET_ZA)
     for (unsigned int i = clobbers.length (); i-- > 0; )
       {
 	rtx x = clobbers[i];
-	if (REG_P (x) && REGNO (x) == ZA_REGNUM)
+	if (REG_P (x)
+	    && (REGNO (x) == ZA_REGNUM || REGNO (x) == ZT0_REGNUM))
 	  {
 	    auto id = cfun->machine->next_asm_update_za_id++;
 
 	    start_sequence ();
 	    if (seq)
 	      emit_insn (seq);
-	    emit_insn (gen_aarch64_asm_update_za (gen_int_mode (id, SImode)));
+	    rtx id_rtx = gen_int_mode (id, SImode);
+	    emit_insn (REGNO (x) == ZA_REGNUM
+		       ? gen_aarch64_asm_update_za (id_rtx)
+		       : gen_aarch64_asm_update_zt0 (id_rtx));
 	    seq = get_insns ();
 	    end_sequence ();
 
-	    uses.safe_push (gen_rtx_REG (VNx16QImode, ZA_REGNUM));
+	    auto mode = REGNO (x) == ZA_REGNUM ? VNx16QImode : V8DImode;
+	    uses.safe_push (gen_rtx_REG (mode, REGNO (x)));
 	    uses.safe_push (gen_rtx_REG (DImode, SME_STATE_REGNUM));
 
 	    clobbers.ordered_remove (i);
-	    CLEAR_HARD_REG_BIT (clobbered_regs, ZA_REGNUM);
+	    CLEAR_HARD_REG_BIT (clobbered_regs, REGNO (x));
 	  }
       }
   return seq;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 14205ce34b3..e42be08bbd3 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -485,7 +485,7 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
     0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
     0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
     1, 1,			/* FFR and FFRT */	\
-    1, 1, 1, 1, 1, 1, 1		/* Fake registers */	\
+    1, 1, 1, 1, 1, 1, 1, 1	/* Fake registers */	\
   }
 
 /* X30 is marked as caller-saved which is in line with regular function call
@@ -509,7 +509,7 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
     1, 1, 1, 1,   1, 1, 1, 1,	/* P0 - P7 */		\
     1, 1, 1, 1,   1, 1, 1, 1,	/* P8 - P15 */		\
     1, 1,			/* FFR and FFRT */	\
-    0, 0, 0, 0, 0, 0, 0		/* Fake registers */	\
+    0, 0, 0, 0, 0, 0, 0, 0	/* Fake registers */	\
   }
 
 #define REGISTER_NAMES						\
@@ -527,7 +527,7 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
     "p8",  "p9",  "p10", "p11", "p12", "p13", "p14", "p15",	\
     "ffr", "ffrt",						\
     "lowering", "tpidr2_block", "sme_state", "tpidr2_setup",	\
-    "za_free", "za_saved", "za"					\
+    "za_free", "za_saved", "za", "zt0"				\
   }
 
 /* Generate the register aliases for core register N */
@@ -770,7 +770,7 @@  enum reg_class
   { 0x00000000, 0x00000000, 0x000ffff0 },	/* PR_REGS */		\
   { 0x00000000, 0x00000000, 0x00300000 },	/* FFR_REGS */		\
   { 0x00000000, 0x00000000, 0x003ffff0 },	/* PR_AND_FFR_REGS */	\
-  { 0x00000000, 0x00000000, 0x1fc00000 },	/* FAKE_REGS */		\
+  { 0x00000000, 0x00000000, 0x3fc00000 },	/* FAKE_REGS */		\
   { 0xffffffff, 0xffffffff, 0x000fffff }	/* ALL_REGS */		\
 }
 
@@ -980,6 +980,9 @@  typedef struct GTY (()) machine_function
      or null if none.  */
   rtx za_save_buffer;
 
+  /* A stack slot that stores the contents of the function's ZT0 state.  */
+  rtx zt0_save_buffer;
+
   bool label_is_assembled;
 
   /* True if we've expanded at least one call to a function that changes
@@ -1061,8 +1064,9 @@  typedef struct
 				   raise an error for invalid calls.  */
 
   /* AARCH64_STATE_* flags that describe whether the function shares ZA
-     with its callers.  */
+     and ZT0 with its callers.  */
   unsigned int shared_za_flags;
+  unsigned int shared_zt0_flags;
 
   /* A list of registers that need to be saved and restored around a
      change to PSTATE.SM.  An auto_vec would be more convenient, but those
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 79d4614924d..a50c3ea50c9 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -156,9 +156,12 @@  (define_constants
     ;; The contents persist even when the architected ZA is off.  Private-ZA
     ;; functions have no effect on its contents.
     (ZA_REGNUM 92)
-    ;; ----------------------------------------------------------------
+
+    ;; Similarly represents the contents of the current function's ZT0 state.
+    (ZT0_REGNUM 93)
+
     (FIRST_FAKE_REGNUM	LOWERING_REGNUM)
-    (LAST_FAKE_REGNUM	ZA_REGNUM)
+    (LAST_FAKE_REGNUM	ZT0_REGNUM)
     ;; ----------------------------------------------------------------
 
     ;; The pair of scratch registers used for stack probing with -fstack-check.
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c
new file mode 100644
index 00000000000..05da587d4b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_1.c
@@ -0,0 +1,65 @@ 
+// { dg-options "" }
+
+#pragma GCC target "+sme2"
+
+void share_za_zt0_a() __arm_inout("za", "zt0");
+void share_za_zt0_b() __arm_inout("za", "zt0");
+
+void share_za_preserve_zt0() __arm_inout("za") __arm_preserves("zt0");
+void share_zt0_preserve_za() __arm_inout("zt0") __arm_preserves("za");
+
+__arm_new("za", "zt0") void new_za_zt0_a() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0", "za") void new_za_zt0_b() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0") void new_za_zt0_c();
+__arm_new("za") void new_za_zt0_c() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("za") void new_za_zt0_d();
+__arm_new("zt0") void new_za_zt0_d() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0", "za") void new_za_zt0_e();
+void new_za_zt0_e() {
+  share_za_zt0_a();
+  share_za_zt0_b();
+}
+
+__arm_new("zt0") void new_zt0_a() {
+  share_za_zt0_a(); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
+}
+
+__arm_new("zt0") void new_zt0_b();
+void new_zt0_b() {
+  share_za_preserve_zt0(); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
+}
+
+__arm_new("zt0") void new_zt0_c();
+void new_zt0_c() {
+  share_zt0_preserve_za();
+}
+
+__arm_new("za") void new_za_a() {
+  share_za_zt0_a(); // { dg-error {call to a function that shares 'zt0' state from a function that has no 'zt0' state} }
+}
+
+__arm_new("za") void new_za_b();
+void new_za_b() {
+  share_za_preserve_zt0();
+}
+
+__arm_new("za") void new_za_c();
+void new_za_c() {
+  share_zt0_preserve_za(); // { dg-error {call to a function that shares 'zt0' state from a function that has no 'zt0' state} }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c
new file mode 100644
index 00000000000..17cd84437d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_2.c
@@ -0,0 +1,31 @@ 
+// { dg-options "" }
+
+void invalid_a() __arm_inout("za");
+void invalid_a() __arm_inout("za", "zt0"); // { dg-error {conflicting types} }
+
+void invalid_b() __arm_inout("za", "zt0");
+void invalid_b() __arm_inout("zt0"); // { dg-error {conflicting types} }
+
+void invalid_c() __arm_in("zt0") __arm_inout("za");
+void invalid_c() __arm_inout("zt0", "za"); // { dg-error {conflicting types} }
+
+void invalid_d() __arm_inout("zt0");
+void invalid_d() __arm_out("zt0"); // { dg-error {conflicting types} }
+
+void invalid_e() __arm_in("zt0");
+void invalid_e() __arm_out("zt0"); // { dg-error {conflicting types} }
+
+void invalid_f() __arm_in("zt0");
+void invalid_f() __arm_preserves("zt0"); // { dg-error {conflicting types} }
+
+void valid_a() __arm_inout("zt0") __arm_inout("za");
+void valid_a() __arm_inout("zt0", "za");
+
+void valid_b() __arm_inout("za") __arm_inout("zt0");
+void valid_b() __arm_inout("zt0") __arm_inout("za");
+
+void valid_c() __arm_inout("za", "zt0");
+void valid_c() __arm_inout("zt0", "za");
+
+void valid_d() __arm_inout("zt0", "za");
+void valid_d() __arm_inout("za", "zt0");
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c
new file mode 100644
index 00000000000..2489ea21de9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_3.c
@@ -0,0 +1,6 @@ 
+// { dg-options "" }
+
+#pragma GCC target "+sme2"
+
+void foo() __arm_inout("zt0");
+void bar() __arm_inout("za", "zt0") { foo(); } // { dg-message {call to a function that shares state other than 'za' from a function that has 'za' state} }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c
new file mode 100644
index 00000000000..29999003d8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_4.c
@@ -0,0 +1,53 @@ 
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+sme2"
+
+void inout_za() __arm_inout("za");
+void inout_za_zt0() __arm_inout("za", "zt0");
+
+void inout_za_out_zt0() __arm_inout("za") __arm_out("zt0");
+void inout_za_in_zt0() __arm_inout("za") __arm_in("zt0");
+
+/*
+** test1:
+**	str	x30, \[sp, #?-16\]!
+**	bl	inout_za_zt0
+**	ldr	x30, \[sp\], #?16
+**	ret
+*/
+void test1() __arm_inout("za", "zt0")
+{
+  inout_za_zt0();
+}
+
+/*
+** test2:
+**	...
+**	str	zt0, \[(?:x[0-9]+|sp)\]
+**	...
+**	bl	inout_za
+**	...
+**	ldr	zt0, \[(?:x[0-9]+|sp)\]
+**	...
+**	ret
+*/
+void test2() __arm_inout("za", "zt0")
+{
+  inout_za();
+}
+
+/*
+** test3:
+**	...
+**	bl	inout_za
+**	bl	inout_za_out_zt0
+**	[^\n]+
+**	ret
+*/
+void test3() __arm_inout("za", "zt0")
+{
+  inout_za_in_zt0();
+  inout_za();
+  inout_za_out_zt0();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
new file mode 100644
index 00000000000..e18b395476c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
@@ -0,0 +1,260 @@ 
+// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+sme2"
+
+void private_zt0();
+void out_zt0() __arm_out("zt0");
+void in_zt0() __arm_in("zt0");
+void inout_zt0() __arm_inout("zt0");
+void preserves_zt0() __arm_preserves("zt0");
+
+/*
+** test1:
+**	ret
+*/
+__arm_new("zt0") void test1()
+{
+}
+
+/*
+** test2:
+**	ldr	w0, \[x0\]
+**	ret
+*/
+__arm_new("zt0") int test2(int *ptr)
+{
+  return *ptr;
+}
+
+/*
+** test3:
+**	stp	[^\n]+
+**	mov	x29, sp
+**	bl	private_zt0
+** (
+**	mov	w0, 0
+**	ldp	[^\n]+
+** |
+**	ldp	[^\n]+
+**	mov	w0, 0
+** )
+**	ret
+*/
+__arm_new("zt0") int test3()
+{
+  private_zt0();
+  return 0;
+}
+
+/*
+** test4:
+**	...
+**	mrs	x0, tpidr2_el0
+**	cbz	x0, [^\n]+
+**	bl	__arm_tpidr2_save
+**	msr	tpidr2_el0, xzr
+**	smstart	za
+**	bl	in_zt0
+**	smstop	za
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test4()
+{
+  in_zt0(); // Uses zeroed contents.
+}
+
+/*
+** test5:
+**	...
+**	mrs	x0, tpidr2_el0
+**	cbz	x0, [^\n]+
+**	bl	__arm_tpidr2_save
+**	msr	tpidr2_el0, xzr
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test5()
+{
+  private_zt0();
+  out_zt0();
+  in_zt0();
+  private_zt0();
+}
+
+// Despite the long test, there shouldn't be too much scope for variation
+// here.  The point is both to test correctness and code quality.
+/*
+** test6:
+**	stp	[^\n]+
+**	mov	x29, sp
+**	mrs	x0, tpidr2_el0
+**	cbz	x0, [^\n]+
+**	bl	__arm_tpidr2_save
+**	msr	tpidr2_el0, xzr
+**	smstart	za
+**	bl	out_zt0
+**	...
+**	str	zt0, [^\n]+
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	...
+**	ldr	zt0, [^\n]+
+**	bl	in_zt0
+**	smstop	za
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test6()
+{
+  out_zt0();
+  private_zt0();
+  in_zt0();
+}
+
+// Rely on previous tests for the part leading up to the smstart.
+/*
+** test7:
+**	...
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	smstop	za
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test7()
+{
+  out_zt0();
+  in_zt0();
+  private_zt0();
+  out_zt0();
+  in_zt0();
+}
+
+/*
+** test8:
+**	...
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	smstart	za
+**	bl	out_zt0
+**	bl	in_zt0
+**	...
+**	smstop	za
+**	bl	private_zt0
+**	ldp	[^\n]+
+**	ret
+*/
+__arm_new("zt0") void test8()
+{
+  out_zt0();
+  in_zt0();
+  private_zt0();
+  out_zt0();
+  in_zt0();
+  private_zt0();
+}
+
+/*
+** test9:
+**	...
+**	str	zt0, [^\n]+
+**	smstop	za
+**	bl	private_zt0
+**	bl	private_zt0
+**	bl	private_zt0
+**	bl	private_zt0
+**	smstart	za
+**	...
+**	ldr	zt0, [^\n]+
+**	bl	in_zt0
+**	smstop	za
+**	...
+*/
+__arm_new("zt0") void test9()
+{
+  out_zt0();
+  private_zt0();
+  private_zt0();
+  private_zt0();
+  private_zt0();
+  in_zt0();
+}
+
+/*
+** test10:
+**	ldr	(w[0-9]+), \[x0\]
+**	cbz	\1, [^\n]+
+**	ldr	[^\n]+
+**	add	[^\n]+
+**	str	[^\n]+
+**	ret
+**	...
+*/
+__arm_new("zt0") void test10(volatile int *ptr)
+{
+  if (__builtin_expect (*ptr != 0, 1))
+    *ptr = *ptr + 1;
+  else
+    inout_zt0();
+}
+
+/*
+** test11:
+**	...
+**	ldr	w[0-9]+, [^\n]+
+**	add	(w[0-9]+), [^\n]+
+**	str	\1, [^\n]+
+**	...
+**	ret
+**	mrs	x[0-9]+, tpidr2_el0
+**	...
+**	smstart	za
+**	bl	inout_zt0
+**	ldr	(w[0-9]+), [^\n]+
+**	cbnz	\2, [^\n]+
+**	smstop	za
+**	...
+*/
+__arm_new("zt0") void test11(volatile int *ptr)
+{
+  if (__builtin_expect (*ptr == 0, 0))
+    do
+      inout_zt0();
+    while (*ptr);
+  else
+    *ptr += 1;
+}
+
+__arm_new("zt0") void test12(volatile int *ptr)
+{
+  do
+    {
+      inout_zt0();
+      private_zt0();
+    }
+  while (*ptr);
+  out_zt0();
+  in_zt0();
+}