[aarch64] update reg-costs to include predicate move costs

Message ID 1f472c95-12bc-bf0d-fa83-385405cc195a@arm.com
State New
Headers
Series [aarch64] update reg-costs to include predicate move costs |

Commit Message

Andre Vieira (lists) March 8, 2022, 3:16 p.m. UTC
  Hi,

This patch adds predicate move costs to several SVE enabled cores.


2022-02-25  Tamar Christina  <tamar.christina@arm.com>
                        Andre Vieira <andre.simoesdiasvieira@arm.com>

gcc/ChangeLog:

         * config/aarch64/aarch64-protos.h (struct cpu_regmove_cost): 
Add PR2PR member.
         * config/aarch64/aarch64.cc (aarch64_register_move_cost): Use 
PR2PR costs when moving a predicate.
         (generic_regmove_cost, cortexa57_regmove_cost, 
exynosm1_regmove_cost thunderx_regmove_cost, xgene1_regmove_cost, 
qdf24xx_regmove_cost, thunderx2t99_regmove_cost, 
thunderx3t110_regmove_cost, tsv110_regmove_cost, a64fx_regmove_cost): 
Add PR2PR entry.
         (cortexa76_regmove_cost): New.
         (neoversen1_tunings): Use cortexa76_regmove_cost.
  

Comments

Kyrylo Tkachov March 9, 2022, 10:29 a.m. UTC | #1
> -----Original Message-----
> From: Andre Vieira (lists) <andre.simoesdiasvieira@arm.com>
> Sent: Tuesday, March 8, 2022 3:16 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Subject: [aarch64] update reg-costs to include predicate move costs
> 
> Hi,
> 
> This patch adds predicate move costs to several SVE enabled cores.
> 

Ok. It's a bit odd to have them in non-SVE tuning structs too, but we do reuse these structs across different cores, so it makes sense to have it.

Thanks,
Kyrill

> 
> 2022-02-25  Tamar Christina  <tamar.christina@arm.com>
>                         Andre Vieira <andre.simoesdiasvieira@arm.com>
> 
> gcc/ChangeLog:
> 
>          * config/aarch64/aarch64-protos.h (struct cpu_regmove_cost):
> Add PR2PR member.
>          * config/aarch64/aarch64.cc (aarch64_register_move_cost): Use
> PR2PR costs when moving a predicate.
>          (generic_regmove_cost, cortexa57_regmove_cost,
> exynosm1_regmove_cost thunderx_regmove_cost, xgene1_regmove_cost,
> qdf24xx_regmove_cost, thunderx2t99_regmove_cost,
> thunderx3t110_regmove_cost, tsv110_regmove_cost, a64fx_regmove_cost):
> Add PR2PR entry.
>          (cortexa76_regmove_cost): New.
>          (neoversen1_tunings): Use cortexa76_regmove_cost.
  

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index d0e78d6a559a7c310b7f8c7877081a0e2baf6a05..f2fde35c6eb4989af8736db8fad004171c160282 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -192,6 +192,7 @@  struct cpu_regmove_cost
   const int GP2FP;
   const int FP2GP;
   const int FP2FP;
+  const int PR2PR;
 };
 
 struct simd_vec_cost
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index dbeaaf484dbc070ae3fcc08530ec9bd20b8ab651..9a94f3a30b0f1acc3c9b8a0e3d703e60780d0cbc 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -526,7 +526,8 @@  static const struct cpu_regmove_cost generic_regmove_cost =
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   5, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost cortexa57_regmove_cost =
@@ -536,7 +537,8 @@  static const struct cpu_regmove_cost cortexa57_regmove_cost =
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   5, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost cortexa53_regmove_cost =
@@ -546,7 +548,8 @@  static const struct cpu_regmove_cost cortexa53_regmove_cost =
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   5, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost exynosm1_regmove_cost =
@@ -556,7 +559,8 @@  static const struct cpu_regmove_cost exynosm1_regmove_cost =
      their cost higher than memmov_cost (actual, 4 and 9).  */
   9, /* GP2FP  */
   9, /* FP2GP  */
-  1 /* FP2FP  */
+  1, /* FP2FP  */
+  1 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost thunderx_regmove_cost =
@@ -564,7 +568,8 @@  static const struct cpu_regmove_cost thunderx_regmove_cost =
   2, /* GP2GP  */
   2, /* GP2FP  */
   6, /* FP2GP  */
-  4 /* FP2FP  */
+  4, /* FP2FP  */
+  4 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost xgene1_regmove_cost =
@@ -574,7 +579,8 @@  static const struct cpu_regmove_cost xgene1_regmove_cost =
      their cost higher than memmov_cost.  */
   8, /* GP2FP  */
   8, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost qdf24xx_regmove_cost =
@@ -583,7 +589,8 @@  static const struct cpu_regmove_cost qdf24xx_regmove_cost =
   /* Avoid the use of int<->fp moves for spilling.  */
   6, /* GP2FP  */
   6, /* FP2GP  */
-  4 /* FP2FP  */
+  4, /* FP2FP  */
+  4 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
@@ -593,6 +600,7 @@  static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
   5, /* GP2FP  */
   6, /* FP2GP  */
   3, /* FP2FP  */
+  3 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost thunderx3t110_regmove_cost =
@@ -601,7 +609,8 @@  static const struct cpu_regmove_cost thunderx3t110_regmove_cost =
   /* Avoid the use of int<->fp moves for spilling.  */
   4, /* GP2FP  */
   5, /* FP2GP  */
-  4  /* FP2FP  */
+  4,  /* FP2FP  */
+  4 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost tsv110_regmove_cost =
@@ -611,7 +620,8 @@  static const struct cpu_regmove_cost tsv110_regmove_cost =
      their cost higher than memmov_cost.  */
   2, /* GP2FP  */
   3, /* FP2GP  */
-  2  /* FP2FP  */
+  2,  /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost a64fx_regmove_cost =
@@ -621,7 +631,19 @@  static const struct cpu_regmove_cost a64fx_regmove_cost =
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   7, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
+};
+
+static const struct cpu_regmove_cost neoversen1_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Spilling to int<->fp instead of memory is recommended so set
+     realistic costs compared to memmv_cost.  */
+  3, /* GP2FP  */
+  2, /* FP2GP  */
+  2, /* FP2FP  */
+  1 /* PR2PR.  */
 };
 
 /* Generic costs for Advanced SIMD vector operations.   */
@@ -1698,7 +1720,7 @@  static const struct tune_params neoversen1_tunings =
 {
   &cortexa76_extra_costs,
   &generic_addrcost_table,
-  &generic_regmove_cost,
+  &neoversen1_regmove_cost,
   &cortexa57_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
@@ -14438,6 +14460,11 @@  aarch64_register_move_cost (machine_mode mode,
       || (to == GENERAL_REGS && from == STACK_REG))
     return regmove_cost->GP2GP;
 
+  /* Predicate to predicate moves are usually very cheap so cost them
+     separately.  */
+  if (from == PR_REGS && to == PR_REGS)
+    return regmove_cost->PR2PR;
+
   /* To/From the stack register, we move via the gprs.  */
   if (to == STACK_REG || from == STACK_REG)
     return aarch64_register_move_cost (mode, from, GENERAL_REGS)