[26/61] Load/store bonding improvements

Message ID 20250131171232.1018281-28-aleksandar.rakic@htecgroup.com
State New
Headers
Series Improve Mips target |

Commit Message

Aleksandar Rakic Jan. 31, 2025, 5:13 p.m. UTC
  From: Robert Suchanek <robert.suchanek@imgtec.com>

gcc/ChangeLog:

	* config/mips/mips-protos.h (mips_load_store_bonding_p): New
	prototype.
	* config/mips/mips.cc (mips_load_store_bond_insns): New static
	function.
	(mips_block_move_straight): Bond insns where possible.
	(mips_for_each_saved_gpr_and_fpr): Save/restore registers with
	increasing offsets if load store pairs optimisation is enabled.
	(mips_expand_prologue): Bond insns in the prologue.
	(mips_expand_epilogue): Bond insns in the epilogue.
	(mips_multipass_dfa_lookahead): Fix sched_fusion with compiler
	checking enabled.
	(mips_sched_fusion_priority): New static function.
	(mips_avoid_hazard): Check if instruction is not in forbidden
	slot.
	(mips_reorg_process_insns): Likewise.
	(mips_option_override): Disable schedule_fusion for MSA.
	(mips_load_store_p): New function.
	(mips_load_store_insn_p): Likewise.
	(mips_load_store_bond_insns_in_range): Likewise.
	(mips_load_store_bonding_p): Remove load_p argument.
	(mips_load_store_bonding_insn_p): Add more rules for bonding.
	(TARGET_SCHED_FUSION_PRIORITY): Define macro.
	* config/mips/mips.md (can_forbidden): New attribute.
	(JOIN_MODE): Add DI mode to the mode iterator.
	(join2_load_store<JOIN_MODE:mode>): Change this to named
	pattern.  Add 0 operand to constraints.  Add `can_forbidden'
	attribute.
	(*join2_loadhi): Add `can_forbidden' attribute.
	* config/mips/predicates.md (nonimmediate_or_0_operand): New
	predicate.

Cherry-picked 65c0efe581901a706fbe2d4a9d96337090ac220a
and 4a2432906766a48b7f3f9aaad8a1358604ce2f88
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek <robert.suchanek@mips.com>
Signed-off-by: Faraz Shahbazker <fshahbazker@wavecomp.com>
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
---
 gcc/config/mips/mips-protos.h |   2 +-
 gcc/config/mips/mips.cc       | 399 ++++++++++++++++++++++++++++++++--
 gcc/config/mips/mips.md       |  28 ++-
 gcc/config/mips/predicates.md |   5 +
 4 files changed, 404 insertions(+), 30 deletions(-)
  

Patch

diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 6b8f2370752..1ec6f386f5f 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -379,7 +379,7 @@  extern bool mips_epilogue_uses (unsigned int);
 extern void mips_final_prescan_insn (rtx_insn *, rtx *, int);
 extern int mips_trampoline_code_size (void);
 extern void mips_function_profiler (FILE *);
-extern bool mips_load_store_bonding_p (rtx *, machine_mode, bool);
+extern bool mips_load_store_bonding_p (rtx *, machine_mode);
 
 typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx);
 #ifdef RTX_CODE
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 508435cc9eb..36ce297085b 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -1790,6 +1790,9 @@  static int mips_register_move_cost (machine_mode, reg_class_t,
 				    reg_class_t);
 static unsigned int mips_function_arg_boundary (machine_mode, const_tree);
 static rtx mips_gen_const_int_vector_shuffle (machine_mode, int);
+static bool mips_load_store_insn_p (rtx_insn *, rtx *,
+				    HOST_WIDE_INT *, bool *);
+static void mips_load_store_bond_insns ();
 
 /* This hash table keeps track of implicit "mips16" and "nomips16" attributes
    for -mflip_mips16.  It maps decl names onto a boolean mode setting.  */
@@ -9398,6 +9401,9 @@  mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
       move_by_pieces (dest, src, length - offset,
 		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
     }
+
+  if (ENABLE_LD_ST_PAIRS)
+    mips_load_store_bond_insns ();
 }
 
 /* Helper function for doing a loop-based block operation on memory
@@ -13279,8 +13285,9 @@  mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset,
   machine_mode fpr_mode;
   int regno;
   const struct mips_frame_info *frame = &cfun->machine->frame;
-  HOST_WIDE_INT offset;
+  HOST_WIDE_INT offset, offset_dec;
   unsigned int mask;
+  bool increasing_order_p = false;
 
   /* Save registers starting from high to low.  The debuggers prefer at least
      the return register be stored at func+4, and also it allows us not to
@@ -13292,20 +13299,53 @@  mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset,
   if (TARGET_MICROMIPS)
     umips_build_save_restore (fn, &mask, &offset);
 
-  for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+  if (ENABLE_LD_ST_PAIRS)
+    increasing_order_p = true;
+
+  if (BITSET_P (mask, (regno = GP_REG_LAST - GP_REG_FIRST)))
+    {
+      /* Record the ra offset for use by mips_function_profiler.  */
+      if (regno == RETURN_ADDR_REGNUM)
+	cfun->machine->frame.ra_fp_offset = offset + sp_offset;
+      mips_save_restore_reg (word_mode, regno, offset, fn);
+      offset -= UNITS_PER_WORD;
+    }
+
+  if (increasing_order_p)
+    {
+      offset_dec = 0;
+      for (regno = GP_REG_LAST - 1; regno >= GP_REG_FIRST; regno--)
+	if (BITSET_P (mask, regno - GP_REG_FIRST))
+	  offset_dec += UNITS_PER_WORD;
+      offset -= (offset_dec - UNITS_PER_WORD);
+    }
+
+  for (regno = GP_REG_LAST - 1; regno >= GP_REG_FIRST; regno--)
     if (BITSET_P (mask, regno - GP_REG_FIRST))
       {
-	/* Record the ra offset for use by mips_function_profiler.  */
-	if (regno == RETURN_ADDR_REGNUM)
-	  cfun->machine->frame.ra_fp_offset = offset + sp_offset;
 	mips_save_restore_reg (word_mode, regno, offset, fn);
-	offset -= UNITS_PER_WORD;
+	if (increasing_order_p)
+	  offset += UNITS_PER_WORD;
+	else
+	  offset -= UNITS_PER_WORD;
       }
 
   /* This loop must iterate over the same space as its companion in
      mips_compute_frame_info.  */
   offset = cfun->machine->frame.fp_sp_offset - sp_offset;
   fpr_mode = (TARGET_SINGLE_FLOAT ? SFmode : DFmode);
+
+  if (increasing_order_p)
+    {
+      offset_dec = 0;
+      for (regno = FP_REG_LAST - MAX_FPRS_PER_FMT + 1;
+	   regno >= FP_REG_FIRST;
+	   regno -= MAX_FPRS_PER_FMT)
+	if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+	  offset_dec += GET_MODE_SIZE (fpr_mode);
+      offset -= (offset_dec - GET_MODE_SIZE (fpr_mode));
+    }
+
   for (regno = FP_REG_LAST - MAX_FPRS_PER_FMT + 1;
        regno >= FP_REG_FIRST;
        regno -= MAX_FPRS_PER_FMT)
@@ -13321,7 +13361,10 @@  mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset,
 	  }
 	else
 	  mips_save_restore_reg (fpr_mode, regno, offset, fn);
-	offset -= GET_MODE_SIZE (fpr_mode);
+	if (increasing_order_p)
+	  offset += GET_MODE_SIZE (fpr_mode);
+	else
+	  offset -= GET_MODE_SIZE (fpr_mode);
       }
 }
 
@@ -14078,6 +14121,9 @@  mips_expand_prologue (void)
      the call to mcount.  */
   if (crtl->profile)
     emit_insn (gen_blockage ());
+
+  if (ENABLE_LD_ST_PAIRS)
+    mips_load_store_bond_insns ();
 }
 
 /* Attach all pending register saves to the previous instruction.
@@ -14430,6 +14476,9 @@  mips_expand_epilogue (bool sibcall_p)
       emit_insn_before (gen_mips_di (), insn);
       emit_insn_before (gen_mips_ehb (), insn);
     }
+
+  if (ENABLE_LD_ST_PAIRS)
+    mips_load_store_bond_insns ();
 }
 
 /* Return nonzero if this function is known to have a null epilogue.
@@ -16486,6 +16535,9 @@  mips_dfa_post_advance_cycle (void)
 static int
 mips_multipass_dfa_lookahead (void)
 {
+  if (sched_fusion)
+    return 0;
+
   /* Can schedule up to 4 of the 6 function units in any one cycle.  */
   if (TUNE_SB1)
     return 4;
@@ -16876,6 +16928,48 @@  mips_set_sched_flags (spec_info_t spec_info ATTRIBUTE_UNUSED)
   *flags |= DONT_BREAK_DEPENDENCIES;
 }
 
+/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.  */
+
+static void
+mips_sched_fusion_priority (rtx_insn *insn, int max_pri,
+			   int *fusion_pri, int *pri)
+{
+  int tmp;
+  bool is_load;
+  rtx base;
+  HOST_WIDE_INT offset;
+
+  gcc_assert (INSN_P (insn));
+
+  tmp = max_pri - 1;
+  if (!mips_load_store_insn_p (insn, &base, &offset, &is_load))
+    {
+      *pri = tmp;
+      *fusion_pri = tmp;
+      return;
+    }
+
+  /* Load goes first.  */
+  if (is_load)
+    *fusion_pri = tmp - 1;
+  else
+    *fusion_pri = tmp - 2;
+
+  tmp /= 2;
+
+  /* INSN with smaller base register goes first.  */
+  tmp -= ((REGNO (base) & 0xff) << 20);
+
+  /* INSN with smaller offset goes first.  */
+  if (offset >= 0)
+    tmp -= (offset & 0xfffff);
+  else
+    tmp += ((-offset) & 0xfffff);
+
+  *pri = tmp;
+  return;
+}
+
 static void
 mips_weight_finish_global ()
 {
@@ -20791,7 +20885,7 @@  mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
      imply it is not actually a compact branch anyway) and the current
      insn is not an inline asm, and can't go in a delay slot.  */
   else if (TARGET_FORBIDDEN_SLOTS && *fs_delay
-	   && get_attr_can_delay (insn) == CAN_DELAY_NO
+	   && get_attr_can_forbidden (insn) == CAN_FORBIDDEN_NO
 	   && GET_CODE (PATTERN (after)) != SEQUENCE
 	   && GET_CODE (pattern) != ASM_INPUT
 	   && asm_noperands (pattern) < 0)
@@ -21007,7 +21101,8 @@  mips_reorg_process_insns (void)
 		  && ((next_active
 		       && INSN_P (next_active)
 		       && GET_CODE (PATTERN (next_active)) != SEQUENCE
-		       && get_attr_can_delay (next_active) == CAN_DELAY_YES)
+		       && (get_attr_can_forbidden (next_active)
+			   == CAN_FORBIDDEN_YES))
 		      || !optimize_size))
 		{
 		  /* To hide a potential pipeline bubble, if we scan backwards
@@ -22615,6 +22710,11 @@  mips_option_override (void)
   if (ISA_HAS_MSA && !(TARGET_FLOAT64 && TARGET_HARD_FLOAT_ABI))
     error ("%<-mmsa%> must be used with %<-mfp64%> and %<-mhard-float%>");
 
+  /* Disable fusion for MSA as it can significantly interfere and schedule
+     loads too close to their use.  */
+  if (ISA_HAS_MSA)
+    flag_schedule_fusion = 0;
+
   /* Make sure that -mpaired-single is only used on ISAs that support it.
      We must disable it otherwise since it relies on other ISA properties
      like ISA_HAS_8CC having their normal values.  */
@@ -23258,12 +23358,96 @@  umips_load_store_pair_p_1 (bool load_p, bool swap_p,
   return true;
 }
 
+/* Return TRUE if OPERANDS represents a load or store of address in
+   the form of [BASE+OFFSET] that can be later bonded.  LOAD_P is set to TRUE
+   if it's a load.  Return FALSE otherwise.  */
+
+static bool
+mips_load_store_p (rtx *operands, rtx *base, HOST_WIDE_INT *offset,
+		   bool *load_p)
+{
+  rtx mem;
+  rtx dest = operands[0];
+  rtx src = operands[1];
+
+  if ((GET_CODE (src) == REG || src == const0_rtx)
+      && GET_CODE ((mem = dest)) == MEM)
+    *load_p = false;
+  else if (GET_CODE ((mem = src)) == MEM && GET_CODE (dest) == REG)
+    *load_p = true;
+  else
+    return false;
+
+  mips_split_plus (XEXP (mem, 0), base, offset);
+
+  if (GET_CODE (*base) != REG)
+    return false;
+
+  if (*load_p && MEM_VOLATILE_P (mem))
+    return false;
+
+  return true;
+}
+
+/* Return TRUE if INSN represents a load or store of address in the form of
+   [BASE+OFFSET] that can be later bonded.  LOAD_P is set to TRUE
+   if it's a load.  Return FALSE otherwise.  */
+
+static bool
+mips_load_store_insn_p (rtx_insn *insn, rtx *base, HOST_WIDE_INT *offset,
+			bool *load_p)
+{
+  rtx op[2], x;
+
+  gcc_assert (INSN_P (insn));
+
+  x = PATTERN (insn);
+  if (GET_CODE (x) != SET)
+    return false;
+
+  op[0] = SET_DEST (x);
+  op[1] = SET_SRC (x);
+  return mips_load_store_p (op, base, offset, load_p);
+}
+
+/* Return TRUE if operands OPERANDS represent two consecutive instructions
+   than can be bonded as load-load/store-store pair in mode MODE.
+   Return FALSE otherwise.  */
+
 bool
-mips_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
+mips_load_store_bonding_p (rtx *operands, machine_mode mode)
 {
   rtx reg1, reg2, mem1, mem2, base1, base2;
   enum reg_class rc1, rc2;
   HOST_WIDE_INT offset1, offset2;
+  bool load_p, load_p2;
+
+  /* Check the supported modes.  */
+  switch (mode)
+    {
+    case E_HImode:
+    case E_SImode:
+      break;
+    case E_DImode:
+      if (!TARGET_64BIT)
+	return false;
+      break;
+    case E_SFmode:
+      if (!TARGET_HARD_FLOAT)
+	return false;
+      break;
+    case E_DFmode:
+      if (!(TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT))
+	return false;
+      break;
+    default:
+      return false;
+    }
+
+  if (!mips_load_store_p (&operands[0], &base1, &offset1, &load_p)
+      || !mips_load_store_p (&operands[2], &base2, &offset2, &load_p2)
+      || load_p != load_p2)
+    return false;
 
   if (load_p)
     {
@@ -23280,12 +23464,17 @@  mips_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
       mem2 = operands[2];
     }
 
+  if (MEM_ALIGN (mem1) < GET_MODE_BITSIZE (GET_MODE (mem1))
+      || MEM_ALIGN (mem2) < GET_MODE_BITSIZE (GET_MODE (mem2)))
+    return false;
+
   if (mips_address_insns (XEXP (mem1, 0), mode, false) == 0
       || mips_address_insns (XEXP (mem2, 0), mode, false) == 0)
     return false;
 
-  mips_split_plus (XEXP (mem1, 0), &base1, &offset1);
-  mips_split_plus (XEXP (mem2, 0), &base2, &offset2);
+  if ((!REG_P (reg1) && reg1 != const0_rtx)
+      || (!REG_P (reg2) && reg2 != const0_rtx))
+    return false;
 
   /* Base regs do not match.  */
   if (!REG_P (base1) || !rtx_equal_p (base1, base2))
@@ -23295,29 +23484,196 @@  mips_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
      loads if second load clobbers base register.  However, hardware does not
      support such bonding.  */
   if (load_p
-      && (REGNO (reg1) == REGNO (base1)
-	  || (REGNO (reg2) == REGNO (base1))))
+      && ((REG_P (reg1) && REGNO (reg1) == REGNO (base1))
+	  || (REG_P (reg2) && REGNO (reg2) == REGNO (base1))))
     return false;
 
   /* Loading in same registers.  */
   if (load_p
+      && REG_P (reg1) && REG_P (reg2)
       && REGNO (reg1) == REGNO (reg2))
     return false;
 
-  /* The loads/stores are not of same type.  */
-  rc1 = REGNO_REG_CLASS (REGNO (reg1));
-  rc2 = REGNO_REG_CLASS (REGNO (reg2));
-  if (rc1 != rc2
-      && !reg_class_subset_p (rc1, rc2)
-      && !reg_class_subset_p (rc2, rc1))
+  /* Check if the loads/stores are of the same mode.  */
+  if (GET_MODE (mem1) != GET_MODE (mem2))
     return false;
 
+  /* The loads/stores are not of same type.  */
+  if (reload_completed
+      && reg1 != const0_rtx
+      && reg2 != const0_rtx)
+    {
+      rc1 = REGNO_REG_CLASS (REGNO (reg1));
+      rc2 = REGNO_REG_CLASS (REGNO (reg2));
+      if (rc1 != rc2
+	  && !reg_class_subset_p (rc1, rc2)
+	  && !reg_class_subset_p (rc2, rc1))
+	return false;
+    }
+
   if (abs (offset1 - offset2) != GET_MODE_SIZE (mode))
     return false;
 
   return true;
 }
 
+/* Return TRUE if INSN1 and INSN2 can be bonded, FALSE otherwise.  */
+
+bool
+mips_load_store_bonding_insn_p (rtx insn1, rtx insn2)
+{
+  rtx operands[4];
+  rtx pat1, pat2;
+
+  gcc_assert (INSN_P (insn1) && INSN_P (insn2));
+
+  pat1 = PATTERN (insn1);
+  pat2 = PATTERN (insn2);
+
+  if (GET_CODE (pat1) == SET && GET_CODE (pat2) == SET)
+    {
+      machine_mode mode;
+
+      operands[0] = SET_DEST (pat1);
+      operands[1] = SET_SRC (pat1);
+      operands[2] = SET_DEST (pat2);
+      operands[3] = SET_SRC (pat2);
+
+      /* We take the mode from either SET_DESTs and the remaining operands
+	 and modes will be checked later.  */
+      mode = GET_MODE (operands[0]);
+
+      return mips_load_store_bonding_p (operands, mode);
+    }
+
+  return false;
+}
+
+/* Find and bond load/store pairs in range FROM to TO.  */
+
+static void
+mips_load_store_bond_insns_in_range (rtx_insn *from, rtx_insn *to)
+{
+  rtx_insn *cur, *next;
+
+  if (!ENABLE_LD_ST_PAIRS)
+    return;
+
+  if (from == NULL || to == NULL || from == to)
+    return;
+
+  for (cur = from, next = NEXT_INSN (from);
+       next;
+       cur = next, next = NEXT_INSN (next))
+    {
+      if (INSN_P (cur) && INSN_P (next)
+	  && mips_load_store_bonding_insn_p (cur, next))
+	{
+	  rtx_insn *bonded;
+	  int code;
+	  rtx base1, base2;
+	  HOST_WIDE_INT offset1, offset2;
+	  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+	  XVECEXP (par, 0, 0) = PATTERN (cur);
+	  XVECEXP (par, 0, 1) = PATTERN (next);
+
+	  bonded = emit_insn_before (par, cur);
+	  code = recog_memoized (bonded);
+
+	  if (code < 0)
+	    {
+	      delete_insn (bonded);
+	      continue;
+	    }
+
+	  base1 = base2 = NULL_RTX;
+
+	  if (GET_CODE (SET_SRC (single_set (cur))) == REG
+	      && GET_CODE (SET_DEST (single_set (cur))) == MEM)
+	    {
+	      mips_split_plus (XEXP (SET_DEST (single_set (cur)), 0),
+				     &base1, &offset1);
+	      mips_split_plus (XEXP (SET_DEST (single_set (next)), 0),
+				     &base2, &offset2);
+	    }
+
+	  if (base1 != NULL_RTX
+	      && GET_CODE (base1) == REG
+	      && REGNO (base1) == STACK_POINTER_REGNUM)
+	    {
+	      rtx dwarf, dwarf1 = NULL_RTX, dwarf2 = NULL_RTX;
+	      rtx note1, note2;
+	      int len = 0;
+	      int dwarf_index = 0;
+
+	      gcc_assert (base2 != NULL_RTX && GET_CODE (base2) == REG
+			  && REGNO (base2) == STACK_POINTER_REGNUM);
+
+	      if ((note1 = find_reg_note (cur, REG_FRAME_RELATED_EXPR, 0)))
+		{
+		  dwarf1 = XEXP (note1, 0);
+		  if (GET_CODE (dwarf1) == PARALLEL)
+		    len += XVECLEN (dwarf1, 0);
+		  else
+		    len += 1;
+		}
+
+	      if ((note2 = find_reg_note (next, REG_FRAME_RELATED_EXPR, 0)))
+		{
+		  dwarf2 = XEXP (note2, 0);
+		  if (GET_CODE (dwarf2) == PARALLEL)
+		    len += XVECLEN (dwarf2, 0);
+		  else
+		    len += 1;
+		}
+
+	      dwarf = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (len));
+
+	      if (dwarf1 && GET_CODE (dwarf1) == PARALLEL)
+		{
+		  int i;
+		  for (i = 0 ; i < XVECLEN (dwarf1, 0) ; i++)
+		    {
+		      XVECEXP (dwarf, 0, dwarf_index++) = XVECEXP (dwarf1,
+								   0, i);
+		    }
+		}
+	      else if (dwarf1)
+		XVECEXP (dwarf, 0, dwarf_index++) = dwarf1;
+
+	      if (dwarf2 && GET_CODE (dwarf2) == PARALLEL)
+		{
+		  int i;
+		  for (i = 0 ; i < XVECLEN (dwarf2, 0) ; i++)
+		    {
+		      XVECEXP (dwarf, 0, dwarf_index++) = XVECEXP (dwarf2,
+								   0, i);
+		    }
+		}
+	      else if (dwarf2)
+		XVECEXP (dwarf, 0, dwarf_index++) = dwarf2;
+
+	      RTX_FRAME_RELATED_P (bonded) = 1;
+	      add_reg_note (bonded, REG_FRAME_RELATED_EXPR, dwarf);
+	    }
+
+	  remove_insn (cur);
+	  remove_insn (next);
+	  cur = bonded;
+	  next = bonded;
+	}
+    }
+}
+
+/* Find and bond load/store pairs for the entire sequence.  */
+
+static void
+mips_load_store_bond_insns ()
+{
+  mips_load_store_bond_insns_in_range (get_insns (), get_last_insn ());
+}
+
 /* OPERANDS describes the operands to a pair of SETs, in the order
    dest1, src1, dest2, src2.  Return true if the operands can be used
    in an LWP or SWP instruction; LOAD_P says which.  */
@@ -25491,6 +25847,9 @@  mips_bit_clear_p (enum machine_mode mode, unsigned HOST_WIDE_INT m)
 #undef TARGET_SCHED_SET_SCHED_FLAGS
 #define TARGET_SCHED_SET_SCHED_FLAGS mips_set_sched_flags
 
+#undef TARGET_SCHED_FUSION_PRIORITY
+#define TARGET_SCHED_FUSION_PRIORITY mips_sched_fusion_priority
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-mips.h"
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 21f31a5595a..814692aecf1 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -786,6 +786,12 @@ 
 		(const_string "yes")
 		(const_string "no")))
 
+;; Can the instruction be put into a forbidden slot?
+(define_attr "can_forbidden" "no,yes"
+  (if_then_else (eq_attr "can_delay" "yes")
+		(const_string "yes")
+		(const_string "no")))
+
 ;; Attribute defining whether or not we can use the branch-likely
 ;; instructions.
 (define_attr "branch_likely" "no,yes"
@@ -817,6 +823,7 @@ 
 (define_mode_iterator MOVEP2 [SI SF])
 (define_mode_iterator JOIN_MODE [HI
 				 SI
+				 (DI "TARGET_64BIT")
 				 (SF "TARGET_HARD_FLOAT")
 				 (DF "TARGET_HARD_FLOAT
 				      && TARGET_DOUBLE_FLOAT")])
@@ -8015,12 +8022,13 @@ 
    (set_attr "insn_count" "3")])
 
 ;; Match paired HI/SI/SF/DFmode load/stores.
-(define_insn "*join2_load_store<JOIN_MODE:mode>"
+(define_insn "join2_load_store<JOIN_MODE:mode>"
   [(set (match_operand:JOIN_MODE 0 "nonimmediate_operand" "=d,f,m,m")
-	(match_operand:JOIN_MODE 1 "nonimmediate_operand" "m,m,d,f"))
+	(match_operand:JOIN_MODE 1 "nonimmediate_or_0_operand" "m,m,dJ,f"))
    (set (match_operand:JOIN_MODE 2 "nonimmediate_operand" "=d,f,m,m")
-	(match_operand:JOIN_MODE 3 "nonimmediate_operand" "m,m,d,f"))]
-  "ENABLE_LD_ST_PAIRS && reload_completed"
+	(match_operand:JOIN_MODE 3 "nonimmediate_or_0_operand" "m,m,dJ,f"))]
+  "ENABLE_LD_ST_PAIRS
+   && mips_load_store_bonding_p (operands, <JOIN_MODE:MODE>mode)"
   {
     bool load_p = (which_alternative == 0 || which_alternative == 1);
     /* Reg-renaming pass reuses base register if it is dead after bonded loads.
@@ -8043,6 +8051,7 @@ 
     return "";
   }
   [(set_attr "move_type" "load,fpload,store,fpstore")
+   (set_attr "can_forbidden" "yes")
    (set_attr "insn_count" "2,2,2,2")])
 
 ;; 2 HI/SI/SF/DF loads are joined.
@@ -8055,7 +8064,7 @@ 
    (set (match_operand:JOIN_MODE 2 "register_operand")
 	(match_operand:JOIN_MODE 3 "non_volatile_mem_operand"))]
   "ENABLE_LD_ST_PAIRS
-   && mips_load_store_bonding_p (operands, <JOIN_MODE:MODE>mode, true)"
+   && mips_load_store_bonding_p (operands, <JOIN_MODE:MODE>mode)"
   [(parallel [(set (match_dup 0)
 		   (match_dup 1))
 	      (set (match_dup 2)
@@ -8066,11 +8075,11 @@ 
 ;; P5600 does not support bonding of two SBs, hence QI mode is not included.
 (define_peephole2
   [(set (match_operand:JOIN_MODE 0 "memory_operand")
-	(match_operand:JOIN_MODE 1 "register_operand"))
+	(match_operand:JOIN_MODE 1 "reg_or_0_operand"))
    (set (match_operand:JOIN_MODE 2 "memory_operand")
-	(match_operand:JOIN_MODE 3 "register_operand"))]
+	(match_operand:JOIN_MODE 3 "reg_or_0_operand"))]
   "ENABLE_LD_ST_PAIRS
-   && mips_load_store_bonding_p (operands, <JOIN_MODE:MODE>mode, false)"
+   && mips_load_store_bonding_p (operands, <JOIN_MODE:MODE>mode)"
   [(parallel [(set (match_dup 0)
 		   (match_dup 1))
 	      (set (match_dup 2)
@@ -8102,6 +8111,7 @@ 
     return "";
   }
   [(set_attr "move_type" "load")
+   (set_attr "can_forbidden" "yes")
    (set_attr "insn_count" "2")])
 
 ;;
@@ -8153,7 +8163,7 @@ 
    (set (match_operand:SI 2 "register_operand")
 	(any_extend:SI (match_operand:HI 3 "non_volatile_mem_operand")))]
   "ENABLE_LD_ST_PAIRS
-   && mips_load_store_bonding_p (operands, HImode, true)"
+   && mips_load_store_bonding_p (operands, HImode)"
   [(parallel [(set (match_dup 0)
 		   (any_extend:SI (match_dup 1)))
 	      (set (match_dup 2)
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
index 31cc57af435..604b1676f2b 100644
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -120,6 +120,11 @@ 
 				  (match_test "ISA_HAS_MIPS16E2")))
        (match_operand 0 "register_operand")))
 
+(define_predicate "nonimmediate_or_0_operand"
+  (ior (and (match_operand 0 "const_0_operand")
+	    (not (match_test "TARGET_MIPS16")))
+       (match_operand 0 "nonimmediate_operand")))
+
 (define_predicate "const_1_operand"
   (and (match_code "const_int,const_double,const_vector")
        (match_test "op == CONST1_RTX (GET_MODE (op))")))