[04/22] aarch64: Add __builtin_aarch64_chkfeat

Message ID 20241023110528.487830-5-yury.khrustalev@arm.com
State New
Headers
Series aarch64: Add support for Guarded Control Stack extension |

Commit Message

Yury Khrustalev Oct. 23, 2024, 11:05 a.m. UTC
  From: Szabolcs Nagy <szabolcs.nagy@arm.com>

Builtin for chkfeat: the input argument is used to initialize x16 then
execute chkfeat and return the updated x16.

Note: ACLE __chkfeat(x) plans to flip the bits to be more intuitive
(xor the input to output), but for the builtin that seems unnecessary
complication.

gcc/ChangeLog:

	* config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
	Define AARCH64_BUILTIN_CHKFEAT.
	(aarch64_general_init_builtins): Handle chkfeat.
	(aarch64_general_expand_builtin): Handle chkfeat.
---
 gcc/config/aarch64/aarch64-builtins.cc | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
  

Comments

Richard Sandiford Oct. 24, 2024, 11:10 a.m. UTC | #1
Yury Khrustalev <yury.khrustalev@arm.com> writes:
> From: Szabolcs Nagy <szabolcs.nagy@arm.com>
>
> Builtin for chkfeat: the input argument is used to initialize x16 then
> execute chkfeat and return the updated x16.
>
> Note: ACLE __chkfeat(x) plans to flip the bits to be more intuitive
> (xor the input to output), but for the builtin that seems unnecessary
> complication.

Sounds good.

> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
> 	Define AARCH64_BUILTIN_CHKFEAT.
> 	(aarch64_general_init_builtins): Handle chkfeat.
> 	(aarch64_general_expand_builtin): Handle chkfeat.
> ---
>  gcc/config/aarch64/aarch64-builtins.cc | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> index 7d737877e0b..765f2091504 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -875,6 +875,8 @@ enum aarch64_builtins
>    AARCH64_PLDX,
>    AARCH64_PLI,
>    AARCH64_PLIX,
> +  /* Armv8.9-A / Armv9.4-A builtins.  */
> +  AARCH64_BUILTIN_CHKFEAT,
>    AARCH64_BUILTIN_MAX
>  };
>  
> @@ -2280,6 +2282,12 @@ aarch64_general_init_builtins (void)
>    if (!TARGET_ILP32)
>      aarch64_init_pauth_hint_builtins ();
>  
> +  tree ftype_chkfeat
> +    = build_function_type_list (uint64_type_node, uint64_type_node, NULL);
> +  aarch64_builtin_decls[AARCH64_BUILTIN_CHKFEAT]
> +    = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
> +				   AARCH64_BUILTIN_CHKFEAT);
> +
>    if (in_lto_p)
>      handle_arm_acle_h ();
>  }
> @@ -3484,6 +3492,16 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
>      case AARCH64_PLIX:
>        aarch64_expand_prefetch_builtin (exp, fcode);
>        return target;
> +
> +    case AARCH64_BUILTIN_CHKFEAT:
> +      {
> +	rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
> +	op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +	emit_move_insn (x16_reg, op0);
> +	expand_insn (CODE_FOR_aarch64_chkfeat, 0, 0);
> +	emit_move_insn (target, x16_reg);
> +	return target;

target isn't reuired to be nonnull, so this would be safer as:

  return copy_to_reg (x16_reg);

(I don't think it's worth complicating things by trying to reuse target,
since this code isn't going to be performance/memory critical.)

Looks good otherwise.

Thanks,
Richard

> +      }
>      }
>  
>    if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
  

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 7d737877e0b..765f2091504 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -875,6 +875,8 @@  enum aarch64_builtins
   AARCH64_PLDX,
   AARCH64_PLI,
   AARCH64_PLIX,
+  /* Armv8.9-A / Armv9.4-A builtins.  */
+  AARCH64_BUILTIN_CHKFEAT,
   AARCH64_BUILTIN_MAX
 };
 
@@ -2280,6 +2282,12 @@  aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
     aarch64_init_pauth_hint_builtins ();
 
+  tree ftype_chkfeat
+    = build_function_type_list (uint64_type_node, uint64_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_CHKFEAT]
+    = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
+				   AARCH64_BUILTIN_CHKFEAT);
+
   if (in_lto_p)
     handle_arm_acle_h ();
 }
@@ -3484,6 +3492,16 @@  aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
     case AARCH64_PLIX:
       aarch64_expand_prefetch_builtin (exp, fcode);
       return target;
+
+    case AARCH64_BUILTIN_CHKFEAT:
+      {
+	rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
+	op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+	emit_move_insn (x16_reg, op0);
+	expand_insn (CODE_FOR_aarch64_chkfeat, 0, 0);
+	emit_move_insn (target, x16_reg);
+	return target;
+      }
     }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)