[2/3,ARM] STAR-MC1 CPU Support - arm: Add individual star-mc1 cost tables and cost functions

Message ID b751121d-cdf2-e18d-328c-d73f0db0fbfb@gmail.com
State Dropped
Headers
Series [1/3,ARM] STAR-MC1 CPU Support - arm: Add star-mc1 core |

Commit Message

Chung-Ju Wu May 26, 2022, 7:18 a.m. UTC
  Hi,

Attached is the patch to provide star-mc1 specific cost functions and tables.
Given these individual implementation, developers are able to make
their own adjustment to fine-tune star-mc1 performance without affecting
other cpu configurations.

Bootstrapped and tested on arm-none-eabi.

Is it OK for trunk?

Regards,
jasonwucj
From e9081bb6d7fc1521036dbceec59ba2eae532c04c Mon Sep 17 00:00:00 2001
From: Chung-Ju Wu <jasonwucj@gmail.com>
Date: Thu, 26 May 2022 03:47:23 +0000
Subject: [PATCH 2/3] arm: Add individual star-mc1 cost tables and cost
 functions

Signed-off-by: Chung-Ju Wu <jasonwucj@gmail.com>

gcc/ChangeLog:

	* config/arm/arm-cpus.in (star-mc1): Use star_mc1 costs.
	* config/arm/arm.cc (arm_star_mc1_branch_cost): New function.
	(star_mc1_extra_costs): New struct.
	(arm_star_mc1_tune): New struct.
---
 gcc/config/arm/arm-cpus.in |   2 +-
 gcc/config/arm/arm.cc      | 139 +++++++++++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 1 deletion(-)
  

Comments

Kyrylo Tkachov June 6, 2022, 2:18 p.m. UTC | #1
Hi jasonwucj,

> -----Original Message-----
> From: Gcc-patches <gcc-patches-
> bounces+kyrylo.tkachov=arm.com@gcc.gnu.org> On Behalf Of Chung-Ju Wu
> via Gcc-patches
> Sent: Thursday, May 26, 2022 8:18 AM
> To: Richard Earnshaw <Richard.Earnshaw@arm.com>; gcc-patches <gcc-
> patches@gcc.gnu.org>
> Cc: Jason.Wu@anshingtek.com.tw
> Subject: [PATCH 2/3][ARM] STAR-MC1 CPU Support - arm: Add individual
> star-mc1 cost tables and cost functions
> 
> Hi,
> 
> Attached is the patch to provide star-mc1 specific cost functions and tables.
> Given these individual implementation, developers are able to make
> their own adjustment to fine-tune star-mc1 performance without affecting
> other cpu configurations.
> 
> Bootstrapped and tested on arm-none-eabi.
> 
> Is it OK for trunk?

It looks like arm_star_mc1_tune, star_mc1_extra_costs and arm_star_mc1_branch_cost are identical to arm_v7m_tune, v7m_extra_costs and arm_cortex_m_branch_cost.
I'd rather not duplicate those structures and functions in the master branch, as they provide a maintenance burden to the community.
If some tuning parameters need to be modified in the future for better performance we can create star-mc1-specific structures on demand then.
Thus, I think we don't want this patch.
Thanks,
Kyrill
  
Chung-Ju Wu June 8, 2022, 8:32 a.m. UTC | #2
Hi Kyrylo,

On 2022/06/06 22:18 UTC+8, Kyrylo Tkachov wrote:
> I'd rather not duplicate those structures and functions in the master branch, as they provide a maintenance burden to the community.
> If some tuning parameters need to be modified in the future for better performance we can create star-mc1-specific structures on demand then.
> Thus, I think we don't want this patch.
> Thanks,
> Kyrill

Thanks for the comment.

Indeed, considering the maintenance burden to community, having those duplicate
structures in the master branch is not a good idea.

I am planning to contribute the star-mc1 pipeline machine description in the future.
Maybe it would be better to propose new star-mc1 specific structure along with pipeline
implementation then.

Thanks for the review.  I won't apply this 2/3 patch.

Regards,
jasonwucj
  

Patch

diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 5a63bc548e5..6a346e4a93d 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -1645,7 +1645,7 @@  begin cpu star-mc1
  option nofp remove ALL_FP
  option nodsp remove armv7em
  isa quirk_no_asmcpu quirk_vlldm
- costs v7m
+ costs star_mc1
 end cpu star-mc1
 
 # V8 R-profile implementations.
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 70c2d50f0cc..c8f96f92a59 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -295,6 +295,7 @@  static int arm_default_branch_cost (bool, bool);
 static int arm_cortex_a5_branch_cost (bool, bool);
 static int arm_cortex_m_branch_cost (bool, bool);
 static int arm_cortex_m7_branch_cost (bool, bool);
+static int arm_star_mc1_branch_cost (bool, bool);
 
 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
 					  const vec_perm_indices &);
@@ -1847,6 +1848,113 @@  const struct cpu_cost_table v7m_extra_costs =
   }
 };
 
+const struct cpu_cost_table star_mc1_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    0,			/* shift.  */
+    0,			/* shift_reg.  */
+    0,			/* arith_shift.  */
+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+    0,			/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    0,			/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    0,			/* bfi.  */
+    0,			/* bfx.  */
+    0,			/* clz.  */
+    0,			/* rev.  */
+    COSTS_N_INSNS (1),	/* non_exec.  */
+    false		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (1),	/* simple.  */
+      COSTS_N_INSNS (1),	/* flag_setting.  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      COSTS_N_INSNS (1),	/* add.  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      COSTS_N_INSNS (8)		/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      0,			/* add (N/A).  */
+      COSTS_N_INSNS (3),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (2),	/* load.  */
+    0,			/* load_sign_extend.  */
+    COSTS_N_INSNS (3),	/* ldrd.  */
+    COSTS_N_INSNS (2),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    1,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* loadf.  */
+    COSTS_N_INSNS (3),	/* loadd.  */
+    COSTS_N_INSNS (1),  /* load_unaligned.  */
+    COSTS_N_INSNS (2),	/* store.  */
+    COSTS_N_INSNS (3),	/* strd.  */
+    COSTS_N_INSNS (2),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    1,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* storef.  */
+    COSTS_N_INSNS (3),	/* stored.  */
+    COSTS_N_INSNS (1),	/* store_unaligned.  */
+    COSTS_N_INSNS (1),	/* loadv.  */
+    COSTS_N_INSNS (1)	/* storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (7),	/* div.  */
+      COSTS_N_INSNS (2),	/* mult.  */
+      COSTS_N_INSNS (5),	/* mult_addsub.  */
+      COSTS_N_INSNS (3),	/* fma.  */
+      COSTS_N_INSNS (1),	/* addsub.  */
+      0,			/* fpconst.  */
+      0,			/* neg.  */
+      0,			/* compare.  */
+      0,			/* widen.  */
+      0,			/* narrow.  */
+      0,			/* toint.  */
+      0,			/* fromint.  */
+      0				/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (15),	/* div.  */
+      COSTS_N_INSNS (5),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub.  */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      0,			/* fpconst.  */
+      0,			/* neg.  */
+      0,			/* compare.  */
+      0,			/* widen.  */
+      0,			/* narrow.  */
+      0,			/* toint.  */
+      0,			/* fromint.  */
+      0				/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1),	/* alu.  */
+    COSTS_N_INSNS (4),	/* mult.  */
+    COSTS_N_INSNS (1),	/* movi.  */
+    COSTS_N_INSNS (2),	/* dup.  */
+    COSTS_N_INSNS (2)	/* extract.  */
+  }
+};
+
 const struct addr_mode_cost_table generic_addr_mode_costs =
 {
   /* int.  */
@@ -2370,6 +2478,30 @@  const struct tune_params arm_cortex_m7_tune =
   tune_params::SCHED_AUTOPREF_OFF
 };
 
+/* star-mc1 tuning.  */
+
+const struct tune_params arm_star_mc1_tune =
+{
+  &star_mc1_extra_costs,
+  &generic_addr_mode_costs,		/* Addressing mode costs.  */
+  NULL,					/* Sched adj cost.  */
+  arm_star_mc1_branch_cost,
+  &arm_default_vec_cost,
+  1,						/* Constant limit.  */
+  2,						/* Max cond insns.  */
+  8,						/* Memset max inline.  */
+  1,						/* Issue rate.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  tune_params::PREF_CONST_POOL_TRUE,
+  tune_params::PREF_LDRD_FALSE,
+  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
+  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
+  tune_params::DISPARAGE_FLAGS_NEITHER,
+  tune_params::PREF_NEON_STRINGOPS_FALSE,
+  tune_params::FUSE_NOTHING,
+  tune_params::SCHED_AUTOPREF_OFF
+};
+
 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
    cortex-m23.  */
@@ -12622,6 +12754,13 @@  arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
 }
 
+static int
+arm_star_mc1_branch_cost (bool speed_p, bool predictable_p)
+{
+  return (TARGET_32BIT && speed_p) ? 1
+	 : arm_default_branch_cost (speed_p, predictable_p);
+}
+
 static bool fp_consts_inited = false;
 
 static REAL_VALUE_TYPE value_fp0;