[20/61] Add -march=interaptiv-mr2 with MIPS16E2

Message ID 20250131171232.1018281-22-aleksandar.rakic@htecgroup.com
State New
Headers
Series Improve Mips target |

Commit Message

Aleksandar Rakic Jan. 31, 2025, 5:13 p.m. UTC
  From: Robert Suchanek <robert.suchanek@imgtec.com>

- Bugfix [MIPS16E2]: split of moves of negative constants should exclude
zero const.

- Add support for every style of ZEB/ZEH support that has been tried:

An earlier attempt to improve generation of ZEB/ZEH led to a chaotic
effect of sometimes generating the instructions and sometimes retaining
the ANDI 0xffff. Also occasional generation of LHU/LBU appeared where
the original value was not already in memory.

Performance results are showing wild and unexpected variation which
appears to correlate with the way in which ZEH/ZEB handling is or is
not implemented. Support all forms tried so far with a hidden option
defaulting to the preferred method.

- Check to see if it is safe to use the SAVE/RESTORE instruction in a
function.

- Add interaptiv-mr2 architecture with COPYW/UCOPYW.

- Add -muse-copyw-ucopyw option (hidden from help).

- Disable tests at -O0 due to introducing a frame:

SAVE/RESTORE end up introducing a frame owing to saving more data
than strictly necessary.

gcc/
        * config/mips/mips.cc (mips_option_override): Set default for
        TARGET_USE_COPYW_UCOPYW.
        * config/mips/mips.h (ISA_HAS_COPY): Update to reference
        TARGET_USE_COPYW_UCOPYW.
        * config/mips/mips.opt (-muse-copyw-ucopyw): New hidden option.
        * config/mips/mips-cpus.def: Set PTF_AVOID_BRANCHLIKELY_ALWAYS
        flag for interAptiv-mr2 CPU.

gcc/testsuite/
        * gcc.target/mips/iamr2.c: New test.
        * gcc.target/mips/memcpy-3.c: New test.
        * gcc.target/mips/memcpy-4.c: Likewise.
        * gcc.target/mips/mips.exp: Accept -muse-copyw-ucopyw and
        isa=interaptiv-mr2.
        (mips-dg-init): Add memcpy option.
        * gcc.target/mips/r10k-cache-barrier-9.c: Skip test for -O0.
        * gcc.target/mips/stack-1.c: Likewise.

Cherry-picked 01dbcc401881f2e4ed063fe43406f8670e4e0cac,
34e4b01b6e6afea14f51c093520c58e7eb3ddb66,
3475f16f5ce9d1247758f5d3a858af5163116d71 and
aecf341540d1462145eaf47e3cfa7e7780ee7adc
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek <robert.suchanek@imgtec.com>
Signed-off-by: Matthew Fortune <matthew.fortune@imgtec.com>
Signed-off-by: Mihailo Stojanovic <mistojanovic@wavecomp.com>
Signed-off-by: Faraz Shahbazker <fshahbazker@wavecomp.com>
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
---
 gcc/config/mips/24k.md                        |  68 ++--
 gcc/config/mips/mips-cpus.def                 |   2 +
 gcc/config/mips/mips-protos.h                 |   3 +-
 gcc/config/mips/mips-tables.opt               |  57 ++--
 gcc/config/mips/mips.cc                       | 300 ++++++++++++++++--
 gcc/config/mips/mips.h                        |  35 +-
 gcc/config/mips/mips.md                       |  94 +++++-
 gcc/config/mips/mips.opt                      |   6 +
 gcc/config/mips/predicates.md                 |   2 +-
 gcc/doc/invoke.texi                           |   2 +-
 gcc/testsuite/gcc.target/mips/iamr2.c         |  51 +++
 gcc/testsuite/gcc.target/mips/memcpy-3.c      |  14 +
 gcc/testsuite/gcc.target/mips/memcpy-4.c      |  14 +
 gcc/testsuite/gcc.target/mips/mips.exp        |   6 +
 .../gcc.target/mips/r10k-cache-barrier-9.c    |   1 +
 gcc/testsuite/gcc.target/mips/stack-1.c       |   1 +
 16 files changed, 557 insertions(+), 99 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/iamr2.c
 create mode 100644 gcc/testsuite/gcc.target/mips/memcpy-3.c
 create mode 100644 gcc/testsuite/gcc.target/mips/memcpy-4.c
  

Patch

diff --git a/gcc/config/mips/24k.md b/gcc/config/mips/24k.md
index 1d09c929ab4..8e49456eac0 100644
--- a/gcc/config/mips/24k.md
+++ b/gcc/config/mips/24k.md
@@ -41,7 +41,7 @@ 
 
 ;; 1. Loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs
 (define_insn_reservation "r24k_int_load" 2
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "load"))
   "r24k_iss+r24k_ixu_arith")
 
@@ -53,7 +53,7 @@ 
 ;; (movn/movz is not matched, we'll need to split condmov to
 ;;  differentiate between integer/float moves)
 (define_insn_reservation "r24k_int_arith" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt"))
   "r24k_iss+r24k_ixu_arith")
 
@@ -61,13 +61,13 @@ 
 ;; 3. Links: bgezal, bgezall, bltzal, bltzall, jal, jalr, jalx
 ;; 3a. jr/jalr consumer
 (define_insn_reservation "r24k_int_jump" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "call,jump"))
   "r24k_iss+r24k_ixu_arith")
 
 ;; 3b. branch consumer
 (define_insn_reservation "r24k_int_branch" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "branch"))
   "r24k_iss+r24k_ixu_arith")
 
@@ -75,38 +75,38 @@ 
 ;; 4. MDU: fully pipelined multiplier
 ;; mult - delivers result to hi/lo in 1 cycle (pipelined)
 (define_insn_reservation "r24k_int_mult" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "imul"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
 ;; madd, msub - delivers result to hi/lo in 1 cycle (pipelined)
 (define_insn_reservation "r24k_int_madd" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "imadd"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
 ;; mul - delivers result to gpr in 5 cycles
 (define_insn_reservation "r24k_int_mul3" 5
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "imul3"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)*5")
 
 ;; mfhi, mflo, mflhxu - deliver result to gpr in 5 cycles
 (define_insn_reservation "r24k_int_mfhilo" 5
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "mfhi,mflo"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
 ;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass
 (define_insn_reservation "r24k_int_mthilo" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "mthi,mtlo"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
 ;; div - default to 36 cycles for 32bit operands.  Faster for 24bit, 16bit and
 ;; 8bit, but is tricky to identify.
 (define_insn_reservation "r24k_int_div" 36
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "idiv"))
   "r24k_iss+(r24k_mul3a+r24k_mul3b+r24k_mul3c)*36")
 
@@ -114,21 +114,21 @@ 
 ;; 5. Cop: cfc1, di, ei, mfc0, mtc0
 ;; (Disabled until we add proper cop0 support)
 ;;(define_insn_reservation "r24k_int_cop" 3
-;;  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+;;  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
 ;;       (eq_attr "type" "cop0"))
 ;;  "r24k_iss+r24k_ixu_arith")
 
 
 ;; 6. Store
 (define_insn_reservation "r24k_int_store" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "store"))
   "r24k_iss+r24k_ixu_arith")
 
 
 ;; 7. Multiple instructions
 (define_insn_reservation "r24k_int_multi" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "multi"))
   "r24k_iss+r24k_ixu_arith+r24k_fpu_arith+(r24k_mul3a+r24k_mul3b+r24k_mul3c)")
 
@@ -137,14 +137,14 @@ 
 ;;    rtls. They do not really affect scheduling latency, (blockage affects
 ;;    scheduling via log links, but not used here).
 (define_insn_reservation "r24k_int_unknown" 0
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "unknown,atomic,syncloop"))
   "r24k_iss")
 
 
 ;; 9. Prefetch
 (define_insn_reservation "r24k_int_prefetch" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "prefetch,prefetchx"))
   "r24k_iss+r24k_ixu_arith")
 
@@ -211,31 +211,31 @@ 
 ;; packrl, pick, preceq, preceu, precequ, precrq, precrqu, raddu, rddsp, repl,
 ;; replv, shll, shllv, shra, shrav, shrl, shrlv, subq, subu, wrdsp
 (define_insn_reservation "r24k_dsp_alu" 2
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "dspalu,dspalusat"))
   "r24k_iss+r24k_ixu_arith")
 
 ;; dpaq_s, dpau, dpsq_s, dpsu, maq_s, mulsaq
 (define_insn_reservation "r24k_dsp_mac" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "dspmac"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
 ;; dpaq_sa, dpsq_sa, maq_sa
 (define_insn_reservation "r24k_dsp_mac_sat" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "dspmacsat"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
 ;; extp, extpdp, extpdpv, extpv, extr, extrv
 (define_insn_reservation "r24k_dsp_acc_ext" 5
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "accext"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
 ;; mthlip, shilo, shilov
 (define_insn_reservation "r24k_dsp_acc_mod" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
        (eq_attr "type" "accmod"))
   "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
 
@@ -435,37 +435,37 @@ 
 
 ;; fadd, fabs, fneg
 (define_insn_reservation "r24kf1_1_fadd" 4
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (eq_attr "type" "fadd,fabs,fneg"))
   "r24kf1_1_fpu_iss")
 
 ;; fmove, fcmove
 (define_insn_reservation "r24kf1_1_fmove" 4
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (eq_attr "type" "fmove,condmove"))
   "r24kf1_1_fpu_iss")
 
 ;; fload
 (define_insn_reservation "r24kf1_1_fload" 3
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (eq_attr "type" "fpload,fpidxload"))
   "r24kf1_1_fpu_iss")
 
 ;; fstore
 (define_insn_reservation "r24kf1_1_fstore" 1
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (eq_attr "type" "fpstore"))
   "r24kf1_1_fpu_iss")
 
 ;; fmul, fmadd
 (define_insn_reservation "r24kf1_1_fmul_sf" 4
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "fmul,fmadd")
 	    (eq_attr "mode" "SF")))
   "r24kf1_1_fpu_iss")
 
 (define_insn_reservation "r24kf1_1_fmul_df" 5
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "fmul,fmadd")
 	    (eq_attr "mode" "DF")))
   "r24kf1_1_fpu_iss,r24k_fpu_arith")
@@ -473,27 +473,27 @@ 
 
 ;; fdiv, fsqrt, frsqrt
 (define_insn_reservation "r24kf1_1_fdiv_sf" 17
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "fdiv,fsqrt,frsqrt")
 	    (eq_attr "mode" "SF")))
   "r24kf1_1_fpu_iss,(r24k_fpu_arith*13)")
 
 (define_insn_reservation "r24kf1_1_fdiv_df" 32
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "fdiv,fsqrt")
 	    (eq_attr "mode" "DF")))
   "r24kf1_1_fpu_iss,(r24k_fpu_arith*28)")
 
 ;; frsqrt
 (define_insn_reservation "r24kf1_1_frsqrt_df" 35
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "frsqrt")
 	    (eq_attr "mode" "DF")))
   "r24kf1_1_fpu_iss,(r24k_fpu_arith*30)")
 
 ;; fcmp
 (define_insn_reservation "r24kf1_1_fcmp" 2
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (eq_attr "type" "fcmp"))
   "r24kf1_1_fpu_iss")
 
@@ -502,28 +502,28 @@ 
 
 ;; fcvt (cvt.d.s, cvt.[sd].[wl])
 (define_insn_reservation "r24kf1_1_fcvt_i2f_s2d" 4
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "fcvt")
 	    (eq_attr "cnv_mode" "I2S,I2D,S2D")))
   "r24kf1_1_fpu_iss")
 
 ;; fcvt (cvt.s.d)
 (define_insn_reservation "r24kf1_1_fcvt_s2d" 6
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "fcvt")
 	    (eq_attr "cnv_mode" "D2S")))
   "r24kf1_1_fpu_iss")
 
 ;; fcvt (cvt.[wl].[sd], etc)
 (define_insn_reservation "r24kf1_1_fcvt_f2i" 5
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (and (eq_attr "type" "fcvt")
 	    (eq_attr "cnv_mode" "S2I,D2I")))
   "r24kf1_1_fpu_iss")
 
 ;; fxfer (mfc1, mfhc1, mtc1, mthc1)
 (define_insn_reservation "r24kf1_1_fxfer" 2
-  (and (eq_attr "cpu" "24kf1_1")
+  (and (eq_attr "cpu" "24kf1_1,interaptiv_mr2")
        (eq_attr "type" "mfc,mtc"))
   "r24kf1_1_fpu_iss")
 
diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def
index 50843480b03..b52a609e12a 100644
--- a/gcc/config/mips/mips-cpus.def
+++ b/gcc/config/mips/mips-cpus.def
@@ -145,6 +145,8 @@  MIPS_CPU ("1004kf", PROCESSOR_24KF2_1, MIPS_ISA_MIPS32R2, 0)
 MIPS_CPU ("1004kf1_1", PROCESSOR_24KF1_1, MIPS_ISA_MIPS32R2, 0)
 
 MIPS_CPU ("interaptiv", PROCESSOR_24KF2_1, MIPS_ISA_MIPS32R2, 0)
+MIPS_CPU ("interaptiv-mr2", PROCESSOR_INTERAPTIV_MR2, MIPS_ISA_MIPS32R3,
+		      PTF_AVOID_BRANCHLIKELY_ALWAYS)
 
 /* MIPS32 Release 5 processors.  */
 MIPS_CPU ("p5600", PROCESSOR_P5600, MIPS_ISA_MIPS32R5, (PTF_AVOID_BRANCHLIKELY_SPEED
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index c514c0711de..6b8f2370752 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -242,7 +242,8 @@  extern bool mips_get_pic_call_symbol (rtx *, int);
 extern void mips_set_return_address (rtx, rtx);
 extern bool mips_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
 extern bool mips_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
-extern bool mips_expand_block_move (rtx, rtx, rtx);
+extern bool mips_expand_block_move (rtx, rtx, rtx, rtx);
+extern bool mips16_expand_copy (rtx, rtx, rtx, rtx);
 extern void mips_expand_synci_loop (rtx, rtx);
 
 extern void mips_init_cumulative_args (CUMULATIVE_ARGS *, tree);
diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt
index c26009cfb5c..9f5da087bbd 100644
--- a/gcc/config/mips/mips-tables.opt
+++ b/gcc/config/mips/mips-tables.opt
@@ -634,83 +634,86 @@  EnumValue
 Enum(mips_arch_opt_value) String(interaptiv) Value(85) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(p5600) Value(86) Canonical
+Enum(mips_arch_opt_value) String(interaptiv-mr2) Value(86) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(m5100) Value(87) Canonical
+Enum(mips_arch_opt_value) String(p5600) Value(87) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(m5101) Value(88) Canonical
+Enum(mips_arch_opt_value) String(m5100) Value(88) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(m6201) Value(89) Canonical
+Enum(mips_arch_opt_value) String(m5101) Value(89) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(5kc) Value(90) Canonical
+Enum(mips_arch_opt_value) String(m6201) Value(90) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5kc) Value(90)
+Enum(mips_arch_opt_value) String(5kc) Value(91) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(5kf) Value(91) Canonical
+Enum(mips_arch_opt_value) String(r5kc) Value(91)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5kf) Value(91)
+Enum(mips_arch_opt_value) String(5kf) Value(92) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(20kc) Value(92) Canonical
+Enum(mips_arch_opt_value) String(r5kf) Value(92)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r20kc) Value(92)
+Enum(mips_arch_opt_value) String(20kc) Value(93) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(sb1) Value(93) Canonical
+Enum(mips_arch_opt_value) String(r20kc) Value(93)
 
 EnumValue
-Enum(mips_arch_opt_value) String(sb1a) Value(94) Canonical
+Enum(mips_arch_opt_value) String(sb1) Value(94) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(sr71000) Value(95) Canonical
+Enum(mips_arch_opt_value) String(sb1a) Value(95) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(sr71k) Value(95)
+Enum(mips_arch_opt_value) String(sr71000) Value(96) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(xlr) Value(96) Canonical
+Enum(mips_arch_opt_value) String(sr71k) Value(96)
 
 EnumValue
-Enum(mips_arch_opt_value) String(loongson3a) Value(97) Canonical
+Enum(mips_arch_opt_value) String(xlr) Value(97) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(gs464) Value(98) Canonical
+Enum(mips_arch_opt_value) String(loongson3a) Value(98) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(gs464e) Value(99) Canonical
+Enum(mips_arch_opt_value) String(gs464) Value(99) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(gs264e) Value(100) Canonical
+Enum(mips_arch_opt_value) String(gs464e) Value(100) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon) Value(101) Canonical
+Enum(mips_arch_opt_value) String(gs264e) Value(101) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon+) Value(102) Canonical
+Enum(mips_arch_opt_value) String(octeon) Value(102) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon2) Value(103) Canonical
+Enum(mips_arch_opt_value) String(octeon+) Value(103) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon3) Value(104) Canonical
+Enum(mips_arch_opt_value) String(octeon2) Value(104) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(xlp) Value(105) Canonical
+Enum(mips_arch_opt_value) String(octeon3) Value(105) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(i6400) Value(106) Canonical
+Enum(mips_arch_opt_value) String(xlp) Value(106) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(i6500) Value(107) Canonical
+Enum(mips_arch_opt_value) String(i6400) Value(107) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(p6600) Value(108) Canonical
+Enum(mips_arch_opt_value) String(i6500) Value(108) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(p6600) Value(109) Canonical
 
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index d9c913f2e23..9808fda286c 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -106,7 +106,7 @@  along with GCC; see the file COPYING3.  If not see
    to save and restore registers, and to allocate and deallocate the top
    part of the frame.  */
 #define MIPS_MAX_FIRST_STACK_STEP					\
-  (!TARGET_COMPRESSION ? 0x7ff0						\
+  (!TARGET_COMPRESSION && !TARGET_USE_SAVE_RESTORE ? 0x7ff0	\
    : TARGET_MICROMIPS || GENERATE_MIPS16E_SAVE_RESTORE ? 0x7f8		\
    : TARGET_64BIT ? 0x100 : 0x400)
 
@@ -1413,6 +1413,19 @@  static const struct mips_rtx_cost_data
 		     1,           /* branch_cost */
 		     4            /* memory_latency */
   },
+  { /* INTERAPTIV_MR2 (identical to 24KF1_1) */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
   { /* Loongson-2E */
     DEFAULT_COSTS
   },
@@ -1752,7 +1765,7 @@  static const struct mips_rtx_cost_data
     COSTS_N_INSNS (68),           /* int_div_di */
 		     1,           /* branch_cost */
 		     4            /* memory_latency */
-   }
+  }
 };
 
 static rtx mips_find_pic_call_symbol (rtx_insn *, rtx, bool);
@@ -2454,7 +2467,10 @@  mips_build_lower (struct mips_integer_op *codes, unsigned HOST_WIDE_INT value)
       /* Either this is a simple LUI/ORI pair, or clearing the lowest 16
 	 bits gives a value with at least 17 trailing zeros.  */
       i = mips_build_integer (codes, high);
-      codes[i].code = IOR;
+      if (ISA_HAS_MIPS16E2 && (value & 0x8000) == 0)
+	codes[i].code = PLUS;
+      else
+	codes[i].code = IOR;
       codes[i].value = value & 0xffff;
     }
   return i + 1;
@@ -4660,7 +4676,7 @@  mips_rewrite_small_data_p (rtx x, enum mips_symbol_context context)
 /* Return true if OP refers to small data symbols directly, not through
    a LO_SUM.  CONTEXT is the context in which X appears.  */
 
-static int
+static bool
 mips_small_data_pattern_1 (rtx x, enum mips_symbol_context context)
 {
   subrtx_var_iterator::array_type array;
@@ -4760,6 +4776,11 @@  mips16_constant_cost (int code, HOST_WIDE_INT x)
 	return COSTS_N_INSNS (1);
       return -1;
 
+    case IOR:
+      if (ISA_HAS_MIPS16E2 && SMALL_OPERAND_UNSIGNED (x))
+	return COSTS_N_INSNS (1);
+      return -1;
+
     case LEU:
       /* Like LE, but reject the always-true case.  */
       if (x == -1)
@@ -6060,6 +6081,15 @@  mips_split_move_insn (rtx dest, rtx src, rtx insn)
 /* Return the appropriate instructions to move SRC into DEST.  Assume
    that SRC is operand 1 and DEST is operand 0.  */
 
+bool
+mips_constant_pool_symbol_in_sdata (rtx x, enum mips_symbol_context context)
+{
+  enum mips_symbol_type symbol_type;
+  return (mips_symbolic_constant_p (x, context, &symbol_type)
+	  && symbol_type == SYMBOL_GP_RELATIVE
+	  && CONSTANT_POOL_ADDRESS_P (x));
+}
+
 const char *
 mips_output_move (rtx insn, rtx dest, rtx src)
 {
@@ -6234,7 +6264,13 @@  mips_output_move (rtx insn, rtx dest, rtx src)
 	}
 
       if (src_code == HIGH)
-	return (TARGET_MIPS16 && !ISA_HAS_MIPS16E2) ? "#" : "lui\t%0,%h1";
+	{
+	  if (mips_constant_pool_symbol_in_sdata (XEXP (src, 0),
+	      SYMBOL_CONTEXT_MEM))
+	    return "move\t%0,$28";
+
+	  return (TARGET_MIPS16 && !ISA_HAS_MIPS16E2) ? "#" : "lui\t%0,%h1";
+	}
 
       if (CONST_GP_P (src))
 	return "move\t%0,%1";
@@ -9133,6 +9169,10 @@  mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
 	return false;
       if (align < BITS_PER_WORD)
 	return size < UNITS_PER_WORD;
+      /* It is more profitable to use COPYW for at least 2 words.  */
+      if (ISA_HAS_COPY
+	  && align >= BITS_PER_WORD && size >= 2 * UNITS_PER_WORD)
+	return false;
       return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
     }
 
@@ -9202,7 +9242,8 @@  mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
    Assume that the areas do not overlap.  */
 
 static void
-mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
+			  HOST_WIDE_INT alignment ATTRIBUTE_UNUSED)
 {
   HOST_WIDE_INT offset, delta;
   unsigned HOST_WIDE_INT bits;
@@ -9302,6 +9343,7 @@  mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
 
 static void
 mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+		      HOST_WIDE_INT alignment,
 		      HOST_WIDE_INT bytes_per_iter)
 {
   rtx_code_label *label;
@@ -9325,7 +9367,7 @@  mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
   emit_label (label);
 
   /* Emit the loop body.  */
-  mips_block_move_straight (dest, src, bytes_per_iter);
+  mips_block_move_straight (dest, src, bytes_per_iter, alignment);
 
   /* Move on to the next block.  */
   mips_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
@@ -9340,36 +9382,176 @@  mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
 
   /* Mop up any left-over bytes.  */
   if (leftover)
-    mips_block_move_straight (dest, src, leftover);
+    mips_block_move_straight (dest, src, leftover, alignment);
   else
     /* Temporary fix for PR79150.  */
     emit_insn (gen_nop ());
 }
 
+/* Expand a cpymemsi instruction using the mips16 copy instruction.  */
+
+bool
+mips16_expand_copy (rtx dest, rtx src, rtx length, rtx alignment)
+{
+  rtx base_dest, base_src;
+  rtx temp;
+  HOST_WIDE_INT offset_dest, offset_src;
+  int word_count, byte_count, offset = 0;
+  rtx first_dest = dest, first_src = src;
+  rtx xdest = XEXP (dest, 0);
+  rtx xsrc = XEXP (src, 0);
+  int align = INTVAL (alignment);
+  bool word_by_pieces_p = false;
+
+  if (!ISA_HAS_COPY)
+    return false;
+
+  gcc_assert (!TARGET_64BIT);
+  gcc_assert (MEM_P (src) && MEM_P (dest));
+
+  if (!CONST_INT_P (length))
+    return false;
+
+  byte_count = INTVAL (length);
+
+  if (byte_count > (mips_movmem_limit == -1
+		    ? MIPS_MAX_MOVE_BYTES_STRAIGHT
+		    : mips_movmem_limit))
+    return false;
+
+  if (byte_count >= MIPS_MAX_MOVE_BYTES_STRAIGHT
+      && align < 4)
+    return false;
+
+  word_count = byte_count / UNITS_PER_WORD;
+  byte_count = byte_count % UNITS_PER_WORD;
+
+  mips_split_plus (xdest, &base_dest, &offset_dest);
+  mips_split_plus (xsrc, &base_src, &offset_src);
+
+  /* In some cases, it's better to move by pieces rather than generating
+     COPYW/UCOPYW:
+     1. Copying 4 bytes when both dest and src are aligned but base+offset is
+	likely to be squashed.
+     2. Copying 4 bytes when the lowest alignment is 2-bytes iff the offsets
+	are not the same or multiples of 16 bytes.  */
+
+  /* Case (1).  */
+  if (word_count == 1
+      && MEM_ALIGN (dest) >= 4 * BITS_PER_UNIT
+      && MEM_ALIGN (src) >= 4 * BITS_PER_UNIT
+      && (offset_dest >= 0 || offset_src >= 0))
+    word_by_pieces_p = true;
+
+  /* Case (2).  */
+  if (word_count == 1 && align >= 2
+      && !(offset_src == offset_dest && offset_src % 16 != 0))
+    word_by_pieces_p = true;
+
+  if (word_by_pieces_p)
+    {
+      rtx src2 = adjust_address (src, BLKmode, offset);
+      rtx dest2 = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest2, src2, 4, INTVAL (alignment), RETURN_BEGIN);
+      offset += 4;
+      word_count = 0;
+    }
+
+  if (word_count > 0 && !REG_P (XEXP (dest, 0)))
+    {
+      rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
+      first_dest = replace_equiv_address (first_dest, dest_reg);
+    }
+
+  if (word_count > 0 && !REG_P (XEXP (src, 0)))
+    {
+      rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
+      first_src = replace_equiv_address (first_src, src_reg);
+    }
+
+  while (word_count > 0)
+    {
+      int new_word_count, new_offset;
+      rtx adj_src, adj_dest;
+
+      new_offset = offset;
+      new_word_count = word_count >= 4 ? 4 : word_count;
+
+      /* Using a COPYW dst,src,*,1 instruction causes the core to stall
+	 so we generate a lw/sw sequence to get around this core bug.  */
+      if (new_word_count == 1 && align >= 4)
+	{
+	  temp = gen_reg_rtx (SImode);
+	  adj_src = adjust_address (first_src, Pmode, new_offset);
+	  adj_dest = adjust_address (first_dest, Pmode, new_offset);
+	  mips_emit_move (temp, adj_src);
+	  mips_emit_move (adj_dest, temp);
+	}
+      else
+	{
+	  adj_src = adjust_address (first_src, BLKmode, new_offset);
+	  adj_dest = adjust_address (first_dest, BLKmode, new_offset);
+	  set_mem_size (adj_src, new_word_count * 4);
+	  set_mem_size (adj_dest, new_word_count * 4);
+	  emit_insn (gen_mips16_copy (adj_dest, adj_src, GEN_INT (new_offset),
+				      GEN_INT (new_word_count), alignment));
+	}
+
+      offset += new_word_count * 4;
+      word_count = word_count >= 4 ? word_count - 4 : 0;
+
+      if (offset > 496)
+	{
+	  rtx dest_reg = copy_addr_to_reg (XEXP (adj_dest, 0));
+	  rtx src_reg = copy_addr_to_reg (XEXP (adj_src, 0));
+	  first_dest = replace_equiv_address (first_dest, dest_reg);
+	  first_src = replace_equiv_address (first_src, src_reg);
+	  offset = 0;
+	}
+    }
+
+  if (byte_count > 0)
+    {
+      rtx src2 = adjust_address (src, BLKmode, offset);
+      rtx dest2 = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest2, src2, byte_count, align, RETURN_BEGIN);
+    }
+
+  return true;
+}
+
 /* Expand a cpymemsi instruction, which copies LENGTH bytes from
-   memory reference SRC to memory reference DEST.  */
+   memory reference SRC to memory reference DEST.  The lowest alignment
+   of SRC and DEST is specified by ALIGNMENT.  */
 
 bool
-mips_expand_block_move (rtx dest, rtx src, rtx length)
+mips_expand_block_move (rtx dest, rtx src, rtx length, rtx alignment)
 {
   if (!CONST_INT_P (length))
     return false;
 
+  if (TARGET_MIPS16 && !ISA_HAS_COPY)
+    return false;
+
   if (mips_isa_rev >= 6 && !ISA_HAS_UNALIGNED_ACCESS
-      && (MEM_ALIGN (src) < MIPS_MIN_MOVE_MEM_ALIGN
-	  || MEM_ALIGN (dest) < MIPS_MIN_MOVE_MEM_ALIGN))
+      && !(INTVAL (alignment) * BITS_PER_UNIT >= MIPS_MIN_MOVE_MEM_ALIGN
+	  || ISA_HAS_COPY))
     return false;
 
   if (mips_movmem_limit == -1 || INTVAL (length) < mips_movmem_limit)
     {
-      if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER)
+      if (ISA_HAS_COPY)
+	  return mips16_expand_copy (dest, src, length, alignment);
+      else if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER)
   {
-    mips_block_move_straight (dest, src, INTVAL (length));
+    mips_block_move_straight (dest, src, INTVAL (length),
+				INTVAL (alignment));
     return true;
   }
       else if (optimize)
   {
     mips_block_move_loop (dest, src, INTVAL (length),
+			  INTVAL (alignment),
 			  MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER);
     return true;
   }
@@ -12287,6 +12469,8 @@  mips_compute_frame_info (void)
   struct mips_frame_info *frame;
   HOST_WIDE_INT offset, size;
   unsigned int regno, i;
+  int global_reg_used;
+  int local_reg_used;
 
   /* Skip re-computing the frame info after reload completed.  */
   if (reload_completed)
@@ -12401,10 +12585,61 @@  mips_compute_frame_info (void)
 	frame->mask |= 1 << (EH_RETURN_DATA_REGNO (i) - GP_REG_FIRST);
       }
 
+  /* The SAVE and RESTORE instructions have two ranges of registers:
+     $a3-$a0 and $s2-$s8.  If we save one register in the range, we must
+     save all later registers too.  This can cause problems if the user has
+     placed a global value into a register that falls into one of these
+     ranges and the function uses a callee saved register that also in the
+     same range.  In this case the global value could be accidently saved
+     and restored on function entry and exit which means any changes made to
+     its value in the function will be lost.
+
+     The code below checks for this case, and if it is found it turns off
+     the use of the SAVE/RESTORE instruction in this function.
+
+     This approach is not optimal because it should really just check that
+     the number of the register used for the global value occurs before
+     one of the callee saved registers.  However as the use of forcing global
+     values into a register is small it is fine to use the unoptimal version
+     of the code for the moment.  */
+  cfun->machine->safe_to_use_save_restore = true;
+
+  global_reg_used = 0;
+  local_reg_used = 0;
+
+  for (i = 0 ; i < ARRAY_SIZE (mips16e_s2_s8_regs) ; i++)
+     {
+       regno = mips16e_s2_s8_regs[i];
+       if (global_regs[regno])
+	 global_reg_used = 1;
+
+       if (BITSET_P (frame->mask, regno))
+	 local_reg_used = 1;
+     }
+
+  if (global_reg_used && local_reg_used)
+    cfun->machine->safe_to_use_save_restore = false;
+
+  global_reg_used = 0;
+  local_reg_used = 0;
+
+  for (i = 0 ; i < ARRAY_SIZE (mips16e_a0_a3_regs) ; i++)
+     {
+       regno = mips16e_a0_a3_regs[i];
+       if (global_regs[regno])
+	 global_reg_used = 1;
+
+       if (BITSET_P (frame->mask, regno))
+	 local_reg_used = 1;
+     }
+
+  if (global_reg_used && local_reg_used)
+    cfun->machine->safe_to_use_save_restore = false;
+
   /* The MIPS16e SAVE and RESTORE instructions have two ranges of registers:
      $a3-$a0 and $s2-$s8.  If we save one register in the range, we must
      save all later registers too.  */
-  if (GENERATE_MIPS16E_SAVE_RESTORE)
+  if (GENERATE_MIPS16E_SAVE_RESTORE && cfun->machine->safe_to_use_save_restore)
     {
       mips16e_mask_registers (&frame->mask, mips16e_s2_s8_regs,
  			      ARRAY_SIZE (mips16e_s2_s8_regs), &frame->num_gp);
@@ -13495,7 +13730,9 @@  mips_expand_prologue (void)
       HOST_WIDE_INT step1;
 
       step1 = MIN (size, MIPS_MAX_FIRST_STACK_STEP);
-      if (GENERATE_MIPS16E_SAVE_RESTORE)
+      if (GENERATE_MIPS16E_SAVE_RESTORE
+	  && !cfun->machine->interrupt_handler_p
+	  && cfun->machine->safe_to_use_save_restore)
  	{
  	  HOST_WIDE_INT offset;
  	  unsigned int mask, regno;
@@ -13945,7 +14182,9 @@  mips_expand_epilogue (bool sibcall_p)
     emit_insn (gen_blockage ());
 
   mips_epilogue.cfa_restore_sp_offset = step2;
-  if (GENERATE_MIPS16E_SAVE_RESTORE && frame->mask != 0)
+  if (GENERATE_MIPS16E_SAVE_RESTORE && frame->mask != 0
+      && !cfun->machine->interrupt_handler_p
+      && cfun->machine->safe_to_use_save_restore)
     {
       unsigned int regno, mask;
       HOST_WIDE_INT offset;
@@ -21555,6 +21794,32 @@  mips_option_override (void)
 	      "-mcompact-branches=never");
     }
 
+  /* Enable the use of interAptiv MIPS32 SAVE/RESTORE instructions.  */
+  if (TARGET_USE_SAVE_RESTORE == -1)
+    {
+      if (TARGET_INTERAPTIV_MR2)
+	TARGET_USE_SAVE_RESTORE = 1;
+      else
+	TARGET_USE_SAVE_RESTORE = 0;
+    }
+  else if (TARGET_USE_SAVE_RESTORE
+	   && !TARGET_INTERAPTIV_MR2)
+    error ("unsupported combination: %qs %s",
+	   mips_arch_info->name, "-muse-save-restore");
+
+  /* Enable the use of interAptiv MIPS16 COPYW/UCOPYW instructions.  */
+  if (TARGET_USE_COPYW_UCOPYW == -1)
+    {
+      if (TARGET_INTERAPTIV_MR2)
+	TARGET_USE_COPYW_UCOPYW = 1;
+      else
+	TARGET_USE_COPYW_UCOPYW = 0;
+    }
+  else if (TARGET_USE_COPYW_UCOPYW
+	   && !TARGET_INTERAPTIV_MR2)
+    error ("unsupported combination: %qs %s",
+	   mips_arch_info->name, "-muse-copyw_ucopyw");
+
   /* Require explicit relocs for MIPS R6 onwards.  This enables simplification
      of the compact branch and jump support through the backend.  */
   if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6)
@@ -24221,7 +24486,6 @@  mips_bit_clear_p (enum machine_mode mode, unsigned HOST_WIDE_INT m)
 
   return false;
 }
-
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 70a7b2032dc..b727074bf53 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -265,7 +265,11 @@  struct mips_cpu_info {
 /* Generate mips16e code. Default 16bit ASE for mips32* and mips64* */
 #define GENERATE_MIPS16E	(TARGET_MIPS16 && mips_isa >= MIPS_ISA_MIPS32)
 /* Generate mips16e register save/restore sequences.  */
-#define GENERATE_MIPS16E_SAVE_RESTORE (GENERATE_MIPS16E && mips_abi == ABI_32)
+#define GENERATE_MIPS16E_SAVE_RESTORE ((GENERATE_MIPS16E \
+					|| (TARGET_USE_SAVE_RESTORE \
+					    && !TARGET_MICROMIPS \
+					    && TARGET_SOFT_FLOAT)) \
+				       && mips_abi == ABI_32)
 
 /* True if we're generating a form of MIPS16 code in which general
    text loads are allowed.  */
@@ -319,6 +323,7 @@  struct mips_cpu_info {
 				     || mips_arch == PROCESSOR_SB1A)
 #define TARGET_SR71K                (mips_arch == PROCESSOR_SR71000)
 #define TARGET_XLP                  (mips_arch == PROCESSOR_XLP)
+#define TARGET_INTERAPTIV_MR2	    (mips_arch == PROCESSOR_INTERAPTIV_MR2)
 
 /* Scheduling target defines.  */
 #define TUNE_20KC		    (mips_tune == PROCESSOR_20KC)
@@ -431,6 +436,8 @@  struct mips_cpu_info {
       for (p = macro; *p != 0; p++)				\
         if (*p == '+')                                          \
           *p = 'P';                                             \
+	else if (*p == '-')					\
+	  *p = '_';						\
         else                                                    \
           *p = TOUPPER (*p);                                    \
 								\
@@ -844,7 +851,7 @@  struct mips_cpu_info {
      %{march=mips32r2|march=m4k|march=4ke*|march=4ksd|march=24k* \
        |march=34k*|march=74k*|march=m14k*|march=1004k* \
        |march=interaptiv: -mips32r2} \
-     %{march=mips32r3: -mips32r3} \
+     %{march=mips32r3|march=interaptiv-mr2: -mips32r3} \
      %{march=mips32r5|march=p5600|march=m5100|march=m5101: -mips32r5} \
      %{march=mips32r6|march=m6201: -mips32r6} \
      %{march=mips64|march=5k*|march=20k*|march=sb1*|march=sr71000 \
@@ -963,10 +970,12 @@  struct mips_cpu_info {
 #define MIPS_ASE_DSP_SPEC \
   "%{!mno-dsp: \
      %{march=24ke*|march=34kc*|march=34kf*|march=34kx*|march=1004k* \
-       |march=interaptiv: -mdsp} \
+       |march=interaptiv*: -mdsp} \
      %{march=74k*|march=m14ke*: %{!mno-dspr2: -mdspr2 -mdsp}}}" \
   "%{!mforbidden-slots: \
-     %{mips32r6|mips64r6:%{mmicromips:-mno-forbidden-slots}}}"
+     %{mips32r6|mips64r6:%{mmicromips:-mno-forbidden-slots}}}" \
+  "%{!mno-mips16e2: \
+     %{march=interaptiv-mr2: -mmips16e2}}"
 
 #define MIPS_ASE_LOONGSON_MMI_SPEC						\
   "%{!mno-loongson-mmi:								\
@@ -1334,6 +1343,10 @@  struct mips_cpu_info {
 #define ISA_HAS_MIPS16E2       (TARGET_MIPS16 && TARGET_MIPS16E2 \
 				&& !TARGET_64BIT)
 
+/* The interAptiv MR2 COPYW/UCOPYW instructions are available.  */
+#define ISA_HAS_COPY		(TARGET_MIPS16 && TARGET_INTERAPTIV_MR2 \
+				 && TARGET_USE_COPYW_UCOPYW)
+
 /* True if the result of a load is not available to the next instruction.
    A nop will then be needed between instructions like "lw $4,..."
    and "addiu $4,$4,1".  */
@@ -1501,6 +1514,7 @@  struct mips_cpu_info {
 %{mtune=*}" \
 FP_ASM_SPEC "\
 %{mmips16e2} \
+%{mmips16-copy:-mmips16cp} \
 %(subtarget_asm_spec)"
 
 /* Extra switches sometimes passed to the linker.  */
@@ -2733,7 +2747,6 @@  typedef struct mips_args {
    do not truncate the shift amount at all.  */
 #define SHIFT_COUNT_TRUNCATED (!TARGET_LOONGSON_MMI)
 
-
 /* Specify the machine mode that pointers have.
    After generation of rtl, the compiler makes no further distinction
    between pointers and any other objects of this machine mode.  */
@@ -3143,7 +3156,9 @@  while (0)
 /* The maximum number of bytes that can be copied by one iteration of
    a cpymemsi loop; see mips_block_move_loop.  */
 #define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \
-  (UNITS_PER_WORD * 4)
+  (ISA_HAS_COPY	  			  \
+  ? UNITS_PER_WORD * 4 * 4		  \
+  : UNITS_PER_WORD * 4)
 
 /* The maximum number of bytes that can be copied by a straight-line
    implementation of cpymemsi; see mips_block_move_straight.  We want
@@ -3174,7 +3189,9 @@  while (0)
 
 #define MOVE_RATIO(speed)				\
   (HAVE_cpymemsi					\
-   ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX		\
+   ? (ISA_HAS_COPY					\
+      ? MIPS_MAX_MOVE_BYTES_STRAIGHT / 4 / MOVE_MAX	\
+      : MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX)	\
    : MIPS_CALL_RATIO / 2)
 
 /* For CLEAR_RATIO, when optimizing for size, give a better estimate
@@ -3435,6 +3452,10 @@  struct GTY(())  machine_function {
 
   /* True if the function should generate hazard barrier return.  */
   bool use_hazard_barrier_return_p;
+
+  /* True if we are safe to use SAVE/RESTORE instruction in the
+     prologue/epilogue.  */
+  bool safe_to_use_save_restore;
 };
 #endif
 
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index b1e55428682..21f31a5595a 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -35,6 +35,7 @@ 
   74kf2_1
   74kf1_1
   74kf3_2
+  interaptiv_mr2
   loongson_2e
   loongson_2f
   gs464
@@ -4332,7 +4333,7 @@ 
 	(sign_extract:GPR (match_operand:BLK 1 "memory_operand")
 			  (match_operand 2 "const_int_operand")
 			  (match_operand 3 "const_int_operand")))]
-  "ISA_HAS_LWL_LWR"
+  "ISA_HAS_LWL_LWR || ISA_HAS_MIPS16E2"
 {
   if (mips_expand_ext_as_unaligned_load (operands[0], operands[1],
 					 INTVAL (operands[2]),
@@ -4369,7 +4370,7 @@ 
 	(zero_extract:GPR (match_operand:BLK 1 "memory_operand")
 			  (match_operand 2 "const_int_operand")
 			  (match_operand 3 "const_int_operand")))]
-  "ISA_HAS_LWL_LWR"
+  "ISA_HAS_LWL_LWR || ISA_HAS_MIPS16E2"
 {
   if (mips_expand_ext_as_unaligned_load (operands[0], operands[1],
 					 INTVAL (operands[2]),
@@ -4445,7 +4446,7 @@ 
 			  (match_operand 1 "const_int_operand")
 			  (match_operand 2 "const_int_operand"))
 	(match_operand:GPR 3 "reg_or_0_operand"))]
-  "ISA_HAS_LWL_LWR"
+  "ISA_HAS_LWL_LWR || ISA_HAS_MIPS16E2"
 {
   if (mips_expand_ins_as_unaligned_store (operands[0], operands[3],
 					  INTVAL (operands[1]),
@@ -4891,7 +4892,7 @@ 
 
 (define_insn "*movdi_32bit_mips16"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
-	(match_operand:DI 1 "move_operand" "d,d,y,K,N,m,d,*x"))]
+	(match_operand:DI 1 "move_operand" "d,d,y,i,N,m,d,*x"))]
   "!TARGET_64BIT && TARGET_MIPS16
    && (register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode))"
@@ -4919,6 +4920,74 @@ 
   [(set_attr "move_type" "move,move,move,const,constN,const,loadpool,load,store,mflo")
    (set_attr "mode" "DI")])
 
+;; Operand 0 is the register containing the destination address
+;; Operand 1 is the register containing the source address
+;; Operand 2 is a byte offset to use for both the source and dest addresses
+;; Operand 3 is the number of words to copy (1,2,3, or 4)
+;; Operand 4 is a constant integer value for the known alignment.
+
+(define_expand "mips16_copy"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (match_operand 1 "" ""))
+     (use (match_operand 2 "" ""))
+     (use (match_operand 3 "" ""))
+     (use (match_operand 4 "" ""))
+     (clobber (reg:SI 12))
+     (clobber (reg:SI 13))
+     (clobber (reg:SI 14))
+     (clobber (reg:SI 15))])]
+  "ISA_HAS_COPY"
+  {
+    /* Using a COPYW dst,src,*,1 instruction causes the core to stall
+       so we can not use mips16_copy in this case.  */
+    gcc_assert (!(INTVAL (operands[3]) == 1 && INTVAL (operands[4]) >= 4));
+  })
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "d"))
+	(mem:BLK (match_operand:SI 1 "register_operand" "d")))
+   (use (match_operand:SI 2 "const_int_operand"))
+   (use (match_operand:SI 3 "const_int_operand"))
+   (use (match_operand:SI 4 "const_int_operand"))
+   (clobber (reg:SI 12))
+   (clobber (reg:SI 13))
+   (clobber (reg:SI 14))
+   (clobber (reg:SI 15))]
+  "ISA_HAS_COPY"
+  {
+    if (INTVAL (operands[4]) < 4)
+      return "ucopyw\t%0,%1,%2,%3";
+    else
+      return "copyw\t%0,%1,%2,%3";
+  }
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "SI")
+   (set_attr "extended_mips16" "yes")])
+
+(define_insn "mips16_copy_ofs"
+  [(set (mem:BLK (plus:SI (match_operand:SI 0 "register_operand" "d")
+			  (match_operand:SI 2 "const_int_operand")))
+	(mem:BLK (plus:SI (match_operand:SI 1 "register_operand" "d")
+			  (match_dup 2))))
+   (use (match_dup 2))
+   (use (match_operand:SI 3 "const_int_operand"))
+   (use (match_operand:SI 4 "const_int_operand"))
+   (clobber (reg:SI 12))
+   (clobber (reg:SI 13))
+   (clobber (reg:SI 14))
+   (clobber (reg:SI 15))]
+  "ISA_HAS_COPY"
+  {
+    if (INTVAL (operands[4]) < 4)
+      return "ucopyw\t%0,%1,%2,%3";
+    else
+      return "copyw\t%0,%1,%2,%3";
+  }
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "SI")
+   (set_attr "extended_mips16" "yes")])
+
 ;; On the mips16, we can split ld $r,N($r) into an add and a load,
 ;; when the original load is a 4 byte instruction but the add and the
 ;; load are 2 2 byte instructions.
@@ -5426,7 +5495,11 @@ 
 (define_split
   [(set (match_operand 0 "d_operand")
 	(match_operand 1 "const_int_operand"))]
-  "TARGET_MIPS16 && reload_completed && INTVAL (operands[1]) < 0"
+  "TARGET_MIPS16 && reload_completed
+   && (ISA_HAS_MIPS16E2
+       ? SMALL_OPERAND_UNSIGNED (-INTVAL (operands[1]))
+	 && INTVAL (operands[1]) != 0
+       : INTVAL (operands[1]) < 0)"
   [(set (match_dup 2)
 	(match_dup 3))
    (set (match_dup 2)
@@ -5842,12 +5915,12 @@ 
 		   (match_operand:BLK 1 "general_operand"))
 	      (use (match_operand:SI 2 ""))
 	      (use (match_operand:SI 3 "const_int_operand"))])]
-  "!TARGET_MIPS16 && !TARGET_MEMCPY"
+  "(!TARGET_MIPS16 || ISA_HAS_COPY) && !TARGET_MEMCPY"
 {
-  if (mips_expand_block_move (operands[0], operands[1], operands[2]))
+  if (mips_expand_block_move (operands[0], operands[1],
+			      operands[2], operands[3]))
     DONE;
-  else
-    FAIL;
+  FAIL;
 })
 
 ;;
@@ -7779,7 +7852,8 @@ 
    && mips16e_save_restore_pattern_p (operands[0], INTVAL (operands[2]), NULL)"
   { return mips16e_output_save_restore (operands[0], INTVAL (operands[2])); }
   [(set_attr "type" "arith")
-   (set_attr "extended_mips16" "yes")])
+   (set_attr "extended_mips16" "yes")
+   (set_attr "can_delay" "no")])
 
 ;; Thread-Local Storage
 
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index a4b93de924d..c5a3addbc55 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -559,3 +559,9 @@  mfunc-opt-list=FILE	Use to specify per function optimizations.
 
 mblockmov-limit=
 Target RejectNegative Undocumented Joined UInteger Var(mips_movmem_limit) Init(-1)
+
+muse-save-restore
+Target Undocumented Var(TARGET_USE_SAVE_RESTORE) Init(-1)
+
+muse-copyw-ucopyw
+Target Undocumented Var(TARGET_USE_COPYW_UCOPYW) Init(-1)
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
index a64900d25ef..31cc57af435 100644
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -578,7 +578,7 @@ 
        (and (match_code "ne") (not (match_test "TARGET_MIPS16")))))
 
 (define_predicate "small_data_pattern"
-  (and (match_code "set,parallel,unspec,unspec_volatile,prefetch")
+  (and (match_code "set,parallel,prefetch")
        (match_test "mips_small_data_pattern_p (op)")))
 
 (define_predicate "mem_noofs_operand"
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index cd84cafafd5..d607f8e430c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -28195,7 +28195,7 @@  The processor names are:
 @samp{74kc}, @samp{74kf2_1}, @samp{74kf1_1}, @samp{74kf3_2},
 @samp{1004kc}, @samp{1004kf2_1}, @samp{1004kf1_1},
 @samp{i6400}, @samp{i6500},
-@samp{interaptiv},
+@samp{interaptiv}, @samp{interaptiv-mr2},
 @samp{loongson2e}, @samp{loongson2f}, @samp{loongson3a}, @samp{gs464},
 @samp{gs464e}, @samp{gs264e},
 @samp{m4k},
diff --git a/gcc/testsuite/gcc.target/mips/iamr2.c b/gcc/testsuite/gcc.target/mips/iamr2.c
new file mode 100644
index 00000000000..40e425ddcd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/iamr2.c
@@ -0,0 +1,51 @@ 
+/* { dg-options "-march=interaptiv-mr2 -mno-abicalls -mgpopt -G8 -mabi=32 -mips16 -mmips16e2 -mno-memcpy" } */
+
+/* Test UCOPYW.  */
+
+/* { dg-final { scan-assembler "test01:.*\tucopyw\t.*\tucopyw\t.*test01\n" } } */
+/* { dg-final { scan-assembler-times "\tucopyw\t" 2 } } */
+struct node01
+{
+  int i;
+  int j;
+  int k;
+  int l;
+  int a;
+  int b;
+  int c;
+  int d;
+} __attribute__ ((packed));
+
+struct node01 dst01;
+struct node01 src01;
+
+void
+test01 (void)
+{
+  dst01 = src01;
+}
+
+/* Test COPYW.  */
+
+/* { dg-final { scan-assembler "test02:.*\tcopyw\t.*\tcopyw\t.*test02\n" } } */
+/* { dg-final { scan-assembler-times "\tcopyw\t" 2 } } */
+struct node02
+{
+  int i;
+  int j;
+  int k;
+  int l;
+  int a;
+  int b;
+  int c;
+  int d;
+};
+
+struct node02 dst02;
+struct node02 src02;
+
+void
+test02 (void)
+{
+  dst02 = src02;
+}
diff --git a/gcc/testsuite/gcc.target/mips/memcpy-3.c b/gcc/testsuite/gcc.target/mips/memcpy-3.c
new file mode 100644
index 00000000000..cf1b073c9ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/memcpy-3.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "isa=interaptiv-mr2 -mno-memcpy (-mips16)" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "" } { "" } } */
+
+char * ref = "123456789012";
+
+__attribute__((mips16))
+void
+f1 (int *p)
+{
+  __builtin_memcpy (p, ref, 12);
+}
+
+/* { dg-final { scan-assembler "\tucopyw\t" } } */
diff --git a/gcc/testsuite/gcc.target/mips/memcpy-4.c b/gcc/testsuite/gcc.target/mips/memcpy-4.c
new file mode 100644
index 00000000000..fc4f3761c9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/memcpy-4.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "isa=interaptiv-mr2 -mno-memcpy -mno-use-copyw-ucopyw (-mips16)" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "" } { "" } } */
+
+char * ref = "123456789012";
+
+__attribute__((mips16))
+void
+f1 (int *p)
+{
+  __builtin_memcpy (p, ref, 12);
+}
+
+/* { dg-final { scan-assembler-not "\tucopyw\t" } } */
diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp
index b0825ca4339..3907fe2a778 100644
--- a/gcc/testsuite/gcc.target/mips/mips.exp
+++ b/gcc/testsuite/gcc.target/mips/mips.exp
@@ -289,6 +289,7 @@  foreach option {
     long-calls
     lxc1-sxc1
     madd4
+    memcpy
     paired-single
     plt
     shared
@@ -303,6 +304,7 @@  foreach option {
     loongson-ext
     loongson-ext2
     mips16e2
+    use-copyw-ucopyw
 } {
     lappend mips_option_groups $option "-m(no-|)$option"
 }
@@ -1135,6 +1137,10 @@  proc mips-dg-options { args } {
 	    if { ![regexp {^-march=p5600} $arch] } {
 		set arch "-march=p5600"
 	    }
+	} elseif { [string equal $spec "isa=interaptiv-mr2"] } {
+	    if { ![regexp {^-march=interaptiv-mr2} $arch] } {
+		set arch "-march=interaptiv-mr2"
+	    }
 	} else {
 	    if { ![regexp {^(isa(?:|_rev))(=|<=|>=)([0-9]*)$} \
 		       $spec dummy prop relation value nocpus] } {
diff --git a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c
index 2516b663ca1..103dd82caa1 100644
--- a/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c
+++ b/gcc/testsuite/gcc.target/mips/r10k-cache-barrier-9.c
@@ -1,4 +1,5 @@ 
 /* { dg-options "-mr10k-cache-barrier=store -G8 -w" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
 
 /* Test that out-of-range stores to components of static objects
    are protected by a cache barrier.  */
diff --git a/gcc/testsuite/gcc.target/mips/stack-1.c b/gcc/testsuite/gcc.target/mips/stack-1.c
index 5f25c21a0a9..227e6c9201b 100644
--- a/gcc/testsuite/gcc.target/mips/stack-1.c
+++ b/gcc/testsuite/gcc.target/mips/stack-1.c
@@ -1,3 +1,4 @@ 
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
 /* { dg-final { scan-assembler "\td?addiu\t(\\\$sp,)?\\\$sp,\[1-9\]" } } */
 /* { dg-final { scan-assembler "\tlw\t" } } */
 /* { dg-final { scan-assembler-not "\td?addiu\t(\\\$sp,)?\\\$sp,\[1-9\].*\tlw\t" } } */