Hi,
This patch adds predicate move costs to several SVE enabled cores.
2022-02-25 Tamar Christina <tamar.christina@arm.com>
Andre Vieira <andre.simoesdiasvieira@arm.com>
gcc/ChangeLog:
* config/aarch64/aarch64-protos.h (struct cpu_regmove_cost):
Add PR2PR member.
* config/aarch64/aarch64.cc (aarch64_register_move_cost): Use
PR2PR costs when moving a predicate.
(generic_regmove_cost, cortexa57_regmove_cost,
exynosm1_regmove_cost thunderx_regmove_cost, xgene1_regmove_cost,
qdf24xx_regmove_cost, thunderx2t99_regmove_cost,
thunderx3t110_regmove_cost, tsv110_regmove_cost, a64fx_regmove_cost):
Add PR2PR entry.
(cortexa76_regmove_cost): New.
(neoversen1_tunings): Use cortexa76_regmove_cost.
> -----Original Message-----
> From: Andre Vieira (lists) <andre.simoesdiasvieira@arm.com>
> Sent: Tuesday, March 8, 2022 3:16 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Subject: [aarch64] update reg-costs to include predicate move costs
>
> Hi,
>
> This patch adds predicate move costs to several SVE enabled cores.
>
Ok. It's a bit odd to have them in non-SVE tuning structs too, but we do reuse these structs across different cores, so it makes sense to have it.
Thanks,
Kyrill
>
> 2022-02-25 Tamar Christina <tamar.christina@arm.com>
> Andre Vieira <andre.simoesdiasvieira@arm.com>
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-protos.h (struct cpu_regmove_cost):
> Add PR2PR member.
> * config/aarch64/aarch64.cc (aarch64_register_move_cost): Use
> PR2PR costs when moving a predicate.
> (generic_regmove_cost, cortexa57_regmove_cost,
> exynosm1_regmove_cost thunderx_regmove_cost, xgene1_regmove_cost,
> qdf24xx_regmove_cost, thunderx2t99_regmove_cost,
> thunderx3t110_regmove_cost, tsv110_regmove_cost, a64fx_regmove_cost):
> Add PR2PR entry.
> (cortexa76_regmove_cost): New.
> (neoversen1_tunings): Use cortexa76_regmove_cost.
@@ -192,6 +192,7 @@ struct cpu_regmove_cost
const int GP2FP;
const int FP2GP;
const int FP2FP;
+ const int PR2PR;
};
struct simd_vec_cost
@@ -526,7 +526,8 @@ static const struct cpu_regmove_cost generic_regmove_cost =
their cost higher than memmov_cost. */
5, /* GP2FP */
5, /* FP2GP */
- 2 /* FP2FP */
+ 2, /* FP2FP */
+ 2 /* PR2PR. */
};
static const struct cpu_regmove_cost cortexa57_regmove_cost =
@@ -536,7 +537,8 @@ static const struct cpu_regmove_cost cortexa57_regmove_cost =
their cost higher than memmov_cost. */
5, /* GP2FP */
5, /* FP2GP */
- 2 /* FP2FP */
+ 2, /* FP2FP */
+ 2 /* PR2PR. */
};
static const struct cpu_regmove_cost cortexa53_regmove_cost =
@@ -546,7 +548,8 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost =
their cost higher than memmov_cost. */
5, /* GP2FP */
5, /* FP2GP */
- 2 /* FP2FP */
+ 2, /* FP2FP */
+ 2 /* PR2PR. */
};
static const struct cpu_regmove_cost exynosm1_regmove_cost =
@@ -556,7 +559,8 @@ static const struct cpu_regmove_cost exynosm1_regmove_cost =
their cost higher than memmov_cost (actual, 4 and 9). */
9, /* GP2FP */
9, /* FP2GP */
- 1 /* FP2FP */
+ 1, /* FP2FP */
+ 1 /* PR2PR. */
};
static const struct cpu_regmove_cost thunderx_regmove_cost =
@@ -564,7 +568,8 @@ static const struct cpu_regmove_cost thunderx_regmove_cost =
2, /* GP2GP */
2, /* GP2FP */
6, /* FP2GP */
- 4 /* FP2FP */
+ 4, /* FP2FP */
+ 4 /* PR2PR. */
};
static const struct cpu_regmove_cost xgene1_regmove_cost =
@@ -574,7 +579,8 @@ static const struct cpu_regmove_cost xgene1_regmove_cost =
their cost higher than memmov_cost. */
8, /* GP2FP */
8, /* FP2GP */
- 2 /* FP2FP */
+ 2, /* FP2FP */
+ 2 /* PR2PR. */
};
static const struct cpu_regmove_cost qdf24xx_regmove_cost =
@@ -583,7 +589,8 @@ static const struct cpu_regmove_cost qdf24xx_regmove_cost =
/* Avoid the use of int<->fp moves for spilling. */
6, /* GP2FP */
6, /* FP2GP */
- 4 /* FP2FP */
+ 4, /* FP2FP */
+ 4 /* PR2PR. */
};
static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
@@ -593,6 +600,7 @@ static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
5, /* GP2FP */
6, /* FP2GP */
3, /* FP2FP */
+ 3 /* PR2PR. */
};
static const struct cpu_regmove_cost thunderx3t110_regmove_cost =
@@ -601,7 +609,8 @@ static const struct cpu_regmove_cost thunderx3t110_regmove_cost =
/* Avoid the use of int<->fp moves for spilling. */
4, /* GP2FP */
5, /* FP2GP */
- 4 /* FP2FP */
+ 4, /* FP2FP */
+ 4 /* PR2PR. */
};
static const struct cpu_regmove_cost tsv110_regmove_cost =
@@ -611,7 +620,8 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
their cost higher than memmov_cost. */
2, /* GP2FP */
3, /* FP2GP */
- 2 /* FP2FP */
+ 2, /* FP2FP */
+ 2 /* PR2PR. */
};
static const struct cpu_regmove_cost a64fx_regmove_cost =
@@ -621,7 +631,19 @@ static const struct cpu_regmove_cost a64fx_regmove_cost =
their cost higher than memmov_cost. */
5, /* GP2FP */
7, /* FP2GP */
- 2 /* FP2FP */
+ 2, /* FP2FP */
+ 2 /* PR2PR. */
+};
+
+static const struct cpu_regmove_cost neoversen1_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Spilling to int<->fp instead of memory is recommended so set
+ realistic costs compared to memmv_cost. */
+ 3, /* GP2FP */
+ 2, /* FP2GP */
+ 2, /* FP2FP */
+ 1 /* PR2PR. */
};
/* Generic costs for Advanced SIMD vector operations. */
@@ -1698,7 +1720,7 @@ static const struct tune_params neoversen1_tunings =
{
&cortexa76_extra_costs,
&generic_addrcost_table,
- &generic_regmove_cost,
+ &neoversen1_regmove_cost,
&cortexa57_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
@@ -14438,6 +14460,11 @@ aarch64_register_move_cost (machine_mode mode,
|| (to == GENERAL_REGS && from == STACK_REG))
return regmove_cost->GP2GP;
+ /* Predicate to predicate moves are usually very cheap so cost them
+ separately. */
+ if (from == PR_REGS && to == PR_REGS)
+ return regmove_cost->PR2PR;
+
/* To/From the stack register, we move via the gprs. */
if (to == STACK_REG || from == STACK_REG)
return aarch64_register_move_cost (mode, from, GENERAL_REGS)