[58/61] Add EHB after last load if branch within 16 inst.

Message ID 20250131171232.1018281-60-aleksandar.rakic@htecgroup.com
State New
Headers
Series Improve Mips target |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Aleksandar Rakic Jan. 31, 2025, 5:13 p.m. UTC
  From: "dragan.mladjenovic" <dragan.mladjenovic@rt-rk.com>

This workaround adds -mfix-i6400 and -mfix-i6500. If any of those two
options are active, it will add an EHB after the last load instruction
in sequence if there is a branch within 16 instructions following it.

Options have no effect on pre-R6 or compressed ISA targets.

Inline assembly is treated as safe. It is up to user to insert required
EHB instruction after the loads and before the branches/jumps.

gcc/

    * config/mips/mips.cc (MIPS_JR): Define as JR.HB for
    TARGET_FIX_I6500.
    (mips_idiv_insns): Account for extra ehb instruction for
    TARGET_FIX_I6500.
    (mips_adjust_insn_length): Likewise for long pic jumps.
    (mips_output_conditional_branch): Output ehb in long pic jumps
    for TARGET_FIX_I6500.
    (mips_process_sync_loop): Output ehb before the first branch in
    sequence for TARGET_FIX_I6500.
    (mips_output_division): Likewise for -mdivide-breaks.
    (mips_msa_output_division): Likewise.
    (mips_avoid_hazard): Add new state to track loads and handle
    ehb insertion.
    (mips_reorg_process_insns): Setup new state for calling
    mips_avoid_hazard.
    (mips_set_compression_mode): Disable the TARGET_FIX_I6500 for
    compressed ISA.
    (mips_option_override): Allow TARGET_FIX_i6400 and TARGET_FIX_I6500
    only for R6 ISA.
    (mips_trampoline_init): Do not use compact branches with
    TARGET_FIX_I6500.
    * config/mips/mips.md (can_delay): Set to "no" for load instruction
    when TARGET_FIX_I6500 is enabled.
    (jump_pic) : Output ehb for TARGET_FIX_I6500.
    * config/mips/mips.opt (-mfix-i6400): New option.
    (-mfix-i6500): Likewise.
    * doc/invoke.texi (-mfix-i6400): Document.
    (-mfix-i6500): Likewise.

gcc/testsuite/

    * gcc.target/mips/fix-i6500.c: New file.

Cherry-picked 784408360ef462711181e5cb59f1b0ff575f92ca
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic <dragan.mladjenovic@rt-rk.com>
Signed-off-by: Faraz Shahbazker <fshahbazker@wavecomp.com>
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
---
 gcc/config/mips/mips.cc                   | 81 ++++++++++++++++++++---
 gcc/config/mips/mips.md                   |  8 ++-
 gcc/config/mips/mips.opt                  |  8 +++
 gcc/doc/invoke.texi                       |  8 +++
 gcc/testsuite/gcc.target/mips/fix-i6500.c | 18 +++++
 5 files changed, 112 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/fix-i6500.c
  

Patch

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 63b7bdd255c..b09794eab15 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -179,10 +179,13 @@  static int *consumer_luid = NULL;
   ((0xf << 26) | ((DEST) << 16) | (VALUE))
 
 /* Return the opcode to jump to register DEST.  When the JR opcode is not
-   available use JALR $0, DEST.  */
+   available use JALR $0, DEST.
+   Use hazard barrier for TARGET_FIX_I6500.  */
 #define MIPS_JR(DEST) \
-  (TARGET_CB_ALWAYS ? ((0x1b << 27) | ((DEST) << 16)) \
-		    : (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9)))
+  (TARGET_CB_ALWAYS && !TARGET_FIX_I6500 \
+  ? ((0x1b << 27) | ((DEST) << 16)) \
+  : (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9) \
+      | (TARGET_FIX_I6500 ? (0x1 << 10) : 0x0)))
 
 /* Return the opcode for:
 
@@ -3993,7 +3996,7 @@  mips_idiv_insns (machine_mode mode)
       if (GENERATE_DIVIDE_TRAPS && !MSA_SUPPORTED_MODE_P (mode))
         count++;
       else
-        count += 2;
+	count += !TARGET_FIX_I6500 ? 2 : 3;
     }
 
   if (TARGET_FIX_R4000 || TARGET_FIX_R4400)
@@ -15601,6 +15604,9 @@  mips_adjust_insn_length (rtx_insn *insn, int length)
 
       /* Add the length of an indirect jump, ignoring the delay slot.  */
       length += TARGET_COMPRESSION ? 2 : 4;
+
+      if (TARGET_FIX_I6500 && !TARGET_ABSOLUTE_JUMPS)
+	length += 4;
     }
 
   /* A unconditional jump has an unfilled delay slot if it is not part
@@ -15769,6 +15775,10 @@  mips_output_conditional_branch (rtx_insn *insn, rtx *operands,
   else
     {
       mips_output_load_label (taken);
+
+      if (TARGET_FIX_I6500)
+	output_asm_insn ("ehb", 0);
+
       if (TARGET_CB_MAYBE)
 	output_asm_insn ("jrc\t%@%]", 0);
       else
@@ -16149,6 +16159,10 @@  mips_process_sync_loop (rtx_insn *insn, rtx *operands)
 			       at, oldval, inclusive_mask, NULL);
 	  tmp1 = at;
 	}
+
+      if (TARGET_FIX_I6500)
+	mips_multi_add_insn ("ehb", NULL);
+
       if (TARGET_CB_NEVER)
 	mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
 
@@ -16413,6 +16427,9 @@  mips_output_division (const char *division, rtx *operands)
 	}
       else
 	{
+	  if (TARGET_FIX_I6500)
+	    output_asm_insn ("ehb", NULL);
+
 	  if (flag_delayed_branch)
 	    {
 	      output_asm_insn ("%(bne\t%2,%.,1f", operands);
@@ -16441,6 +16458,9 @@  mips_msa_output_division (const char *division, rtx *operands)
   s = division;
   if (TARGET_CHECK_ZERO_DIV)
     {
+      if (TARGET_FIX_I6500)
+	output_asm_insn ("ehb", NULL);
+
       output_asm_insn ("%(bnz.%v0\t%w2,1f", operands);
       output_asm_insn (s, operands);
       s = "break\t7%)\n1:";
@@ -21095,7 +21115,8 @@  mips_classify_branch_p6600 (rtx_insn *insn)
 
 static void
 mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
-		   rtx *delayed_reg, rtx lo_reg, bool *fs_delay)
+		   rtx *delayed_reg, rtx lo_reg, bool *fs_delay,
+		   rtx_insn **last_load, int *load_delay)
 {
   rtx pattern, set;
   int nops, ninsns;
@@ -21113,6 +21134,15 @@  mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
   if (get_attr_length (insn) == 0)
     return;
 
+  if (TARGET_FIX_I6500
+      && (CALL_P (insn) || JUMP_P (insn))
+      && (*last_load != 0 && *load_delay > 0))
+   {
+     emit_insn_after (gen_mips_ehb (), *last_load);
+     *last_load = 0;
+     *load_delay = 0;
+   }
+
   /* Work out how many nops are needed.  Note that we only care about
      registers that are explicitly mentioned in the instruction's pattern.
      It doesn't matter that calls use the argument registers or that they
@@ -21170,6 +21200,10 @@  mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
   *hilo_delay += ninsns;
   *delayed_reg = 0;
   *fs_delay = false;
+
+  if (*last_load && *load_delay > 0)
+    *load_delay -= ninsns;
+
   if (INSN_CODE (insn) >= 0)
     switch (get_attr_hazard (insn))
       {
@@ -21200,6 +21234,21 @@  mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
 	*delayed_reg = SET_DEST (set);
 	break;
       }
+
+   if (TARGET_FIX_I6500 && INSN_CODE (insn) >= 0)
+     switch (get_attr_type (insn))
+      {
+      case TYPE_LOAD:
+      case TYPE_FPLOAD:
+      case TYPE_FPIDXLOAD:
+      case TYPE_SIMD_LOAD:
+	gcc_assert (!insn->deleted ());
+	*last_load = insn;
+	*load_delay = 16;
+	break;
+      default:
+	break;
+      }
 }
 
 /* Emit a speculation barrier.
@@ -21246,9 +21295,9 @@  mips_break_sequence (rtx_insn *insn)
 static void
 mips_reorg_process_insns (void)
 {
-  rtx_insn *insn, *last_insn, *subinsn, *next_insn;
+  rtx_insn *insn, *last_insn, *subinsn, *next_insn, *last_load;
   rtx lo_reg, delayed_reg;
-  int hilo_delay;
+  int hilo_delay, load_delay;
   bool fs_delay;
 
   /* Force all instructions to be split into their final form.  */
@@ -21315,7 +21364,9 @@  mips_reorg_process_insns (void)
 	}
 
   last_insn = 0;
+  last_load = 0;
   hilo_delay = 2;
+  load_delay = 0;
   delayed_reg = 0;
   lo_reg = gen_rtx_REG (SImode, LO_REGNUM);
   fs_delay = false;
@@ -21404,7 +21455,8 @@  mips_reorg_process_insns (void)
 			INSN_CODE (subinsn) = CODE_FOR_nop;
 		      }
 		    mips_avoid_hazard (last_insn, subinsn, &hilo_delay,
-				       &delayed_reg, lo_reg, &fs_delay);
+				       &delayed_reg, lo_reg, &fs_delay,
+				       &last_load, &load_delay);
 		  }
 	      last_insn = insn;
 	    }
@@ -21425,7 +21477,8 @@  mips_reorg_process_insns (void)
 	      else
 		{
 		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
-				     &delayed_reg, lo_reg, &fs_delay);
+				     &delayed_reg, lo_reg, &fs_delay,
+				     &last_load, &load_delay);
 		  /* When a compact branch introduces a forbidden slot hazard
 		     and the next useful instruction is a SEQUENCE of a jump
 		     and a non-nop instruction in the delay slot, remove the
@@ -21919,6 +21972,10 @@  mips_set_compression_mode (unsigned int compression_mode)
   target_flags &= ~(MASK_MIPS16 | MASK_MICROMIPS);
   target_flags |= compression_mode;
 
+  if (compression_mode && (TARGET_FIX_I6500 || TARGET_FIX_I6400))
+    error ("-mfix-i6500 (-mfix-i6400) not compatible with "
+	   "-mmips16 or -mmicromips");
+
   if (compression_mode & MASK_MIPS16)
     {
       /* Switch to MIPS16 mode.  */
@@ -22526,6 +22583,10 @@  mips_option_override (void)
   SUBTARGET_OVERRIDE_OPTIONS;
 #endif
 
+  if (mips_isa_rev < 6 && (TARGET_FIX_I6500 || TARGET_FIX_I6400))
+    error ("-mfix-i6500 (-mfix-i6400) not compatible with "
+	   "pre-R6 target: %qs", mips_arch_info->name);
+
   /* MIPS16 and microMIPS cannot coexist.  */
   if (TARGET_MICROMIPS && TARGET_MIPS16)
     error ("unsupported combination: %s", "-mips16 -mmicromips");
@@ -24180,7 +24241,7 @@  mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
      place the instruction that was in the delay slot before the JRC
      instruction.  */
 
-  if (TARGET_CB_ALWAYS)
+  if (TARGET_CB_ALWAYS && !TARGET_FIX_I6500)
     {
       rtx temp;
       temp = trampoline[i-2];
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 0c93ce17ae4..52abb9c1119 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -784,7 +784,9 @@ 
 (define_attr "can_delay" "no,yes"
   (if_then_else (and (eq_attr "type" "!branch,call,jump,simd_branch")
 		     (eq_attr "hazard" "none")
-		     (match_test "get_attr_insn_count (insn) == 1"))
+		     (match_test "get_attr_insn_count (insn) == 1")
+	 (ior (match_test "!TARGET_FIX_I6500")
+	     (eq_attr "type" "!load,fpload,fpidxload,simd_load")))
 		(const_string "yes")
 		(const_string "no")))
 
@@ -6716,6 +6718,10 @@ 
   else
     {
       mips_output_load_label (operands[0]);
+
+      if (TARGET_FIX_I6500)
+	output_asm_insn ("ehb", 0);
+
       if (TARGET_CB_MAYBE)
 	return "%*jr%:\t%@%]";
       else
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 804f4fecbc9..36c9d567a24 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -218,6 +218,14 @@  mfix4300
 Target Var(TARGET_4300_MUL_FIX)
 Work around an early 4300 hardware bug.
 
+mfix-i6400
+Target Var(TARGET_FIX_I6400) Init(0)
+Work around certain I6400 errata.
+
+mfix-i6500
+Target Var(TARGET_FIX_I6500) Init(0)
+Work around certain I6500 errata.
+
 mfp-exceptions
 Target Var(TARGET_FP_EXCEPTIONS) Init(1)
 FP exceptions are enabled.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b2e11a7fd0d..63d97c73efb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -29177,6 +29177,14 @@  Work around certain SB-1 CPU core errata.
 (This flag currently works around the SB-1 revision 2
 ``F1'' and ``F2'' floating-point errata.)
 
+@opindex mfix-i6500
+@item -mfix-i6500
+@itemx -mno-fix-i6500
+@itemx -mfix-i6400
+@itemx -mno-fix-i6400
+Work around certain I6500/I6400 core errata.
+(These flags currently work around the ``E75'' errata for I6500.)
+
 @opindex mr10k-cache-barrier
 @item -mr10k-cache-barrier=@var{setting}
 Specify whether GCC should insert cache barriers to avoid the
diff --git a/gcc/testsuite/gcc.target/mips/fix-i6500.c b/gcc/testsuite/gcc.target/mips/fix-i6500.c
new file mode 100644
index 00000000000..07488a6a83e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fix-i6500.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mno-micromips -mno-mips16 (HAS_LSA)" } */
+/* { dg-additional-options "-mfix-i6500" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+
+int
+foo (int ***p)
+{
+  return ***p;
+}
+
+float
+bar (float *p)
+{
+  return *p;
+}
+
+/* { dg-final { scan-assembler-times "ehb" 2 } } */