aarch64: modify Ampere CPU tunings on reassociation/FMA

Message ID SN6PR01MB4240A5E3CCDD06E80624CE9BE882A@SN6PR01MB4240.prod.exchangelabs.com
State Committed
Commit 6563d6767ed0f702458f8975cd5d73676c4604cc
Headers
Series aarch64: modify Ampere CPU tunings on reassociation/FMA |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Testing passed

Commit Message

Di Zhao OS Nov. 30, 2023, 8:27 a.m. UTC
  This patch modifies tunings for ampere1/ampere1a/ampere1b, to:

1. Allow reassociation on FP additions.
2. Avoid generating loop-dependant FMA chains. Added a tuning
option for this.

Bootstrapped and tested. Is this ok for trunk?

Thanks,
Di Zhao

gcc/ChangeLog:

        * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION):
        New tuing option to avoid cross-loop FMA.
        * config/aarch64/aarch64.cc (aarch64_override_options_internal): Set
        param_avoid_fma_max_bits according to tuning option.
        * config/aarch64/tuning_models/ampere1.h: Modify tunings related with
        FMA.
        * config/aarch64/tuning_models/ampere1a.h: Modify tunings related with
        FMA.
        * config/aarch64/tuning_models/ampere1b.h: Modify tunings related with
        FMA.

---
 gcc/config/aarch64/aarch64-tuning-flags.def | 2 ++
 gcc/config/aarch64/aarch64.cc               | 6 ++++++
 gcc/config/aarch64/tuning_models/ampere1.h  | 2 +-
 gcc/config/aarch64/tuning_models/ampere1a.h | 4 ++--
 gcc/config/aarch64/tuning_models/ampere1b.h | 5 +++--
 5 files changed, 14 insertions(+), 5 deletions(-)
  

Comments

Richard Earnshaw Nov. 30, 2023, 12:20 p.m. UTC | #1
On 30/11/2023 08:27, Di Zhao OS wrote:
> This patch modifies tunings for ampere1/ampere1a/ampere1b, to:
> 
> 1. Allow reassociation on FP additions.
> 2. Avoid generating loop-dependant FMA chains. Added a tuning
> option for this.
> 
> Bootstrapped and tested. Is this ok for trunk?
> 
> Thanks,
> Di Zhao
> 
> gcc/ChangeLog:
> 
>          * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION):
>          New tuing option to avoid cross-loop FMA.

typo: tuning

>          * config/aarch64/aarch64.cc (aarch64_override_options_internal): Set
>          param_avoid_fma_max_bits according to tuning option.
>          * config/aarch64/tuning_models/ampere1.h: Modify tunings related with
>          FMA.
>          * config/aarch64/tuning_models/ampere1a.h: Modify tunings related with
>          FMA.
>          * config/aarch64/tuning_models/ampere1b.h: Modify tunings related with
>          FMA.
> 

You need to mention the name of the structure you're modifying.  Also we 
usually just write 'Likewise.' if the change is identical in effect to 
the change immediately above, so

	* config/aarch64/tuning_models/ampere1.h (ampere1_tunings):
	Modify tunings related with FMA.
	* config/aarch64/tuning_models/ampere1a.h (ampere1a_tunings):
	Likewise.
	* config/aarch64/tuning_models/ampere1b.h (ampere1b_tunings):
	Likewise.

Finally, watch your line length.  The total length of the line should 
not go beyond column 72 in commit log entries, unless that involves 
breaking a single word on a line.

Otherwise, this is OK.

R.

> ---
>   gcc/config/aarch64/aarch64-tuning-flags.def | 2 ++
>   gcc/config/aarch64/aarch64.cc               | 6 ++++++
>   gcc/config/aarch64/tuning_models/ampere1.h  | 2 +-
>   gcc/config/aarch64/tuning_models/ampere1a.h | 4 ++--
>   gcc/config/aarch64/tuning_models/ampere1b.h | 5 +++--
>   5 files changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
> index 774568e9106..f28a73839a6 100644
> --- a/gcc/config/aarch64/aarch64-tuning-flags.def
> +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
> @@ -47,4 +47,6 @@ AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS)
>   
>   AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGHPUT)
>   
> +AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
> +
>   #undef AARCH64_EXTRA_TUNING_OPTION
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 64684258b7b..28bc70a787f 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -16083,6 +16083,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
>         && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
>       opts->x_flag_prefetch_loop_arrays = 1;
>   
> +  /* Avoid loop-dependant FMA chains.  */
> +  if (aarch64_tune_params.extra_tuning_flags
> +      & AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA)
> +    SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
> +			 512);
> +
>     aarch64_override_options_after_change_1 (opts);
>   }
>   
> diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h
> index 8d2a1c69610..a144e8f94b3 100644
> --- a/gcc/config/aarch64/tuning_models/ampere1.h
> +++ b/gcc/config/aarch64/tuning_models/ampere1.h
> @@ -104,7 +104,7 @@ static const struct tune_params ampere1_tunings =
>     2,	/* min_div_recip_mul_df.  */
>     0,	/* max_case_values.  */
>     tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
> -  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
> +  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
>     &ampere1_prefetch_tune,
>     AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
>     AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h b/gcc/config/aarch64/tuning_models/ampere1a.h
> index c419ffb3c1a..f688ed08a79 100644
> --- a/gcc/config/aarch64/tuning_models/ampere1a.h
> +++ b/gcc/config/aarch64/tuning_models/ampere1a.h
> @@ -50,13 +50,13 @@ static const struct tune_params ampere1a_tunings =
>     "32:16",	/* loop_align.  */
>     2,	/* int_reassoc_width.  */
>     4,	/* fp_reassoc_width.  */
> -  1,	/* fma_reassoc_width.  */
> +  4,	/* fma_reassoc_width.  */
>     2,	/* vec_reassoc_width.  */
>     2,	/* min_div_recip_mul_sf.  */
>     2,	/* min_div_recip_mul_df.  */
>     0,	/* max_case_values.  */
>     tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
> -  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
> +  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
>     &ampere1_prefetch_tune,
>     AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
>     AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h
> index c4928f50d29..a98b6a980f7 100644
> --- a/gcc/config/aarch64/tuning_models/ampere1b.h
> +++ b/gcc/config/aarch64/tuning_models/ampere1b.h
> @@ -99,13 +99,14 @@ static const struct tune_params ampere1b_tunings =
>     "32:16",	/* loop_align.  */
>     2,	/* int_reassoc_width.  */
>     4,	/* fp_reassoc_width.  */
> -  1,	/* fma_reassoc_width.  */
> +  4,	/* fma_reassoc_width.  */
>     2,	/* vec_reassoc_width.  */
>     2,	/* min_div_recip_mul_sf.  */
>     2,	/* min_div_recip_mul_df.  */
>     0,	/* max_case_values.  */
>     tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
> -  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),	/* tune_flags.  */
> +  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
> +   AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
>     &ampere1b_prefetch_tune,
>     AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
>     AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
  
Di Zhao OS Dec. 1, 2023, 9:07 a.m. UTC | #2
Hello Richard,

Thank you for the review. Fixed the problems and committed to master.

Thanks,
Di

> -----Original Message-----
> From: Richard Earnshaw <Richard.Earnshaw@foss.arm.com>
> Sent: Thursday, November 30, 2023 8:21 PM
> To: Di Zhao OS <dizhao@os.amperecomputing.com>; gcc-patches@gcc.gnu.org
> Cc: Philipp Tomsich <philipp.tomsich@vrull.eu>
> Subject: Re: [PATCH] aarch64: modify Ampere CPU tunings on reassociation/FMA
> 
> 
> 
> On 30/11/2023 08:27, Di Zhao OS wrote:
> > This patch modifies tunings for ampere1/ampere1a/ampere1b, to:
> >
> > 1. Allow reassociation on FP additions.
> > 2. Avoid generating loop-dependant FMA chains. Added a tuning
> > option for this.
> >
> > Bootstrapped and tested. Is this ok for trunk?
> >
> > Thanks,
> > Di Zhao
> >
> > gcc/ChangeLog:
> >
> >          * config/aarch64/aarch64-tuning-flags.def
> (AARCH64_EXTRA_TUNING_OPTION):
> >          New tuing option to avoid cross-loop FMA.
> 
> typo: tuning
> 
> >          * config/aarch64/aarch64.cc (aarch64_override_options_internal):
> Set
> >          param_avoid_fma_max_bits according to tuning option.
> >          * config/aarch64/tuning_models/ampere1.h: Modify tunings related
> with
> >          FMA.
> >          * config/aarch64/tuning_models/ampere1a.h: Modify tunings related
> with
> >          FMA.
> >          * config/aarch64/tuning_models/ampere1b.h: Modify tunings related
> with
> >          FMA.
> >
> 
> You need to mention the name of the structure you're modifying.  Also we
> usually just write 'Likewise.' if the change is identical in effect to
> the change immediately above, so
> 
> 	* config/aarch64/tuning_models/ampere1.h (ampere1_tunings):
> 	Modify tunings related with FMA.
> 	* config/aarch64/tuning_models/ampere1a.h (ampere1a_tunings):
> 	Likewise.
> 	* config/aarch64/tuning_models/ampere1b.h (ampere1b_tunings):
> 	Likewise.
> 
> Finally, watch your line length.  The total length of the line should
> not go beyond column 72 in commit log entries, unless that involves
> breaking a single word on a line.
> 
> Otherwise, this is OK.
> 
> R.
> 
> > ---
> >   gcc/config/aarch64/aarch64-tuning-flags.def | 2 ++
> >   gcc/config/aarch64/aarch64.cc               | 6 ++++++
> >   gcc/config/aarch64/tuning_models/ampere1.h  | 2 +-
> >   gcc/config/aarch64/tuning_models/ampere1a.h | 4 ++--
> >   gcc/config/aarch64/tuning_models/ampere1b.h | 5 +++--
> >   5 files changed, 14 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def
> b/gcc/config/aarch64/aarch64-tuning-flags.def
> > index 774568e9106..f28a73839a6 100644
> > --- a/gcc/config/aarch64/aarch64-tuning-flags.def
> > +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
> > @@ -47,4 +47,6 @@ AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs",
> USE_NEW_VECTOR_COSTS)
> >
> >   AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput",
> MATCHED_VECTOR_THROUGHPUT)
> >
> > +AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
> > +
> >   #undef AARCH64_EXTRA_TUNING_OPTION
> > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> > index 64684258b7b..28bc70a787f 100644
> > --- a/gcc/config/aarch64/aarch64.cc
> > +++ b/gcc/config/aarch64/aarch64.cc
> > @@ -16083,6 +16083,12 @@ aarch64_override_options_internal (struct
> gcc_options *opts)
> >         && opts->x_optimize >= aarch64_tune_params.prefetch-
> >default_opt_level)
> >       opts->x_flag_prefetch_loop_arrays = 1;
> >
> > +  /* Avoid loop-dependant FMA chains.  */
> > +  if (aarch64_tune_params.extra_tuning_flags
> > +      & AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA)
> > +    SET_OPTION_IF_UNSET (opts, &global_options_set,
> param_avoid_fma_max_bits,
> > +			 512);
> > +
> >     aarch64_override_options_after_change_1 (opts);
> >   }
> >
> > diff --git a/gcc/config/aarch64/tuning_models/ampere1.h
> b/gcc/config/aarch64/tuning_models/ampere1.h
> > index 8d2a1c69610..a144e8f94b3 100644
> > --- a/gcc/config/aarch64/tuning_models/ampere1.h
> > +++ b/gcc/config/aarch64/tuning_models/ampere1.h
> > @@ -104,7 +104,7 @@ static const struct tune_params ampere1_tunings =
> >     2,	/* min_div_recip_mul_df.  */
> >     0,	/* max_case_values.  */
> >     tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
> > -  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
> > +  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
> >     &ampere1_prefetch_tune,
> >     AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
> >     AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
> > diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h
> b/gcc/config/aarch64/tuning_models/ampere1a.h
> > index c419ffb3c1a..f688ed08a79 100644
> > --- a/gcc/config/aarch64/tuning_models/ampere1a.h
> > +++ b/gcc/config/aarch64/tuning_models/ampere1a.h
> > @@ -50,13 +50,13 @@ static const struct tune_params ampere1a_tunings =
> >     "32:16",	/* loop_align.  */
> >     2,	/* int_reassoc_width.  */
> >     4,	/* fp_reassoc_width.  */
> > -  1,	/* fma_reassoc_width.  */
> > +  4,	/* fma_reassoc_width.  */
> >     2,	/* vec_reassoc_width.  */
> >     2,	/* min_div_recip_mul_sf.  */
> >     2,	/* min_div_recip_mul_df.  */
> >     0,	/* max_case_values.  */
> >     tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
> > -  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
> > +  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
> >     &ampere1_prefetch_tune,
> >     AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
> >     AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
> > diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h
> b/gcc/config/aarch64/tuning_models/ampere1b.h
> > index c4928f50d29..a98b6a980f7 100644
> > --- a/gcc/config/aarch64/tuning_models/ampere1b.h
> > +++ b/gcc/config/aarch64/tuning_models/ampere1b.h
> > @@ -99,13 +99,14 @@ static const struct tune_params ampere1b_tunings =
> >     "32:16",	/* loop_align.  */
> >     2,	/* int_reassoc_width.  */
> >     4,	/* fp_reassoc_width.  */
> > -  1,	/* fma_reassoc_width.  */
> > +  4,	/* fma_reassoc_width.  */
> >     2,	/* vec_reassoc_width.  */
> >     2,	/* min_div_recip_mul_sf.  */
> >     2,	/* min_div_recip_mul_df.  */
> >     0,	/* max_case_values.  */
> >     tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
> > -  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),	/* tune_flags.  */
> > +  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
> > +   AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
> >     &ampere1b_prefetch_tune,
> >     AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
> >     AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
  

Patch

diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 774568e9106..f28a73839a6 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -47,4 +47,6 @@  AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS)
 
 AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGHPUT)
 
+AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 64684258b7b..28bc70a787f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16083,6 +16083,12 @@  aarch64_override_options_internal (struct gcc_options *opts)
       && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
     opts->x_flag_prefetch_loop_arrays = 1;
 
+  /* Avoid loop-dependant FMA chains.  */
+  if (aarch64_tune_params.extra_tuning_flags
+      & AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA)
+    SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
+			 512);
+
   aarch64_override_options_after_change_1 (opts);
 }
 
diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h
index 8d2a1c69610..a144e8f94b3 100644
--- a/gcc/config/aarch64/tuning_models/ampere1.h
+++ b/gcc/config/aarch64/tuning_models/ampere1.h
@@ -104,7 +104,7 @@  static const struct tune_params ampere1_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
   &ampere1_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h b/gcc/config/aarch64/tuning_models/ampere1a.h
index c419ffb3c1a..f688ed08a79 100644
--- a/gcc/config/aarch64/tuning_models/ampere1a.h
+++ b/gcc/config/aarch64/tuning_models/ampere1a.h
@@ -50,13 +50,13 @@  static const struct tune_params ampere1a_tunings =
   "32:16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
-  1,	/* fma_reassoc_width.  */
+  4,	/* fma_reassoc_width.  */
   2,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
   &ampere1_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h
index c4928f50d29..a98b6a980f7 100644
--- a/gcc/config/aarch64/tuning_models/ampere1b.h
+++ b/gcc/config/aarch64/tuning_models/ampere1b.h
@@ -99,13 +99,14 @@  static const struct tune_params ampere1b_tunings =
   "32:16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
-  1,	/* fma_reassoc_width.  */
+  4,	/* fma_reassoc_width.  */
   2,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
+   AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
   &ampere1b_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */