@@ -531,6 +531,7 @@ riscv*)
cpu_type=riscv
extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o"
extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
+ extra_objs="${extra_objs} thead.o"
d_target_objs="riscv-d.o"
extra_headers="riscv_vector.h"
target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc"
@@ -38,3 +38,59 @@ (define_peephole2
{
operands[5] = GEN_INT (INTVAL (operands[2]) - INTVAL (operands[5]));
})
+
+;; XTheadMemPair: merge two SI or DI loads
+(define_peephole2
+ [(set (match_operand:GPR 0 "register_operand" "")
+ (match_operand:GPR 1 "memory_operand" ""))
+ (set (match_operand:GPR 2 "register_operand" "")
+ (match_operand:GPR 3 "memory_operand" ""))]
+ "TARGET_XTHEADMEMPAIR
+ && th_mempair_operands_p (operands, true, <GPR:MODE>mode)"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))])]
+{
+ th_mempair_order_operands (operands, true, <GPR:MODE>mode);
+})
+
+;; XTheadMemPair: merge two SI or DI stores
+(define_peephole2
+ [(set (match_operand:GPR 0 "memory_operand" "")
+ (match_operand:GPR 1 "register_operand" ""))
+ (set (match_operand:GPR 2 "memory_operand" "")
+ (match_operand:GPR 3 "register_operand" ""))]
+ "TARGET_XTHEADMEMPAIR
+ && th_mempair_operands_p (operands, false, <GPR:MODE>mode)"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))])]
+{
+ th_mempair_order_operands (operands, false, <GPR:MODE>mode);
+})
+
+;; XTheadMemPair: merge two SI loads with sign-extension
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "memory_operand" "")))
+ (set (match_operand:DI 2 "register_operand" "")
+ (sign_extend:DI (match_operand:SI 3 "memory_operand" "")))]
+ "TARGET_XTHEADMEMPAIR && TARGET_64BIT
+ && th_mempair_operands_p (operands, true, SImode)"
+ [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1)))
+ (set (match_dup 2) (sign_extend:DI (match_dup 3)))])]
+{
+ th_mempair_order_operands (operands, true, SImode);
+})
+
+;; XTheadMemPair: merge two SI loads with zero-extension
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "memory_operand" "")))
+ (set (match_operand:DI 2 "register_operand" "")
+ (zero_extend:DI (match_operand:SI 3 "memory_operand" "")))]
+ "TARGET_XTHEADMEMPAIR && TARGET_64BIT
+ && th_mempair_operands_p (operands, true, SImode)"
+ [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1)))
+ (set (match_dup 2) (zero_extend:DI (match_dup 3)))])]
+{
+ th_mempair_order_operands (operands, true, SImode);
+})
@@ -54,6 +54,7 @@ extern bool riscv_split_64bit_move_p (rtx, rtx);
extern void riscv_split_doubleword_move (rtx, rtx);
extern const char *riscv_output_move (rtx, rtx);
extern const char *riscv_output_return ();
+
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx);
extern void riscv_expand_float_scc (rtx, enum rtx_code, rtx, rtx);
@@ -200,4 +201,17 @@ const unsigned int RISCV_BUILTIN_SHIFT = 1;
/* Mask that selects the riscv_builtin_class part of a function code. */
const unsigned int RISCV_BUILTIN_CLASS = (1 << RISCV_BUILTIN_SHIFT) - 1;
+/* Routines implemented in thead.cc. */
+extern bool th_mempair_operands_p (rtx[4], bool, machine_mode);
+extern void th_mempair_order_operands (rtx[4], bool, machine_mode);
+extern void th_mempair_prepare_save_restore_operands (rtx[4], bool,
+ machine_mode,
+ int, HOST_WIDE_INT,
+ int, HOST_WIDE_INT);
+extern void th_mempair_save_restore_regs (rtx[4], bool, machine_mode);
+#ifdef RTX_CODE
+extern const char*
+th_mempair_output_move (rtx[4], bool, machine_mode, RTX_CODE);
+#endif
+
#endif /* ! GCC_RISCV_PROTOS_H */
@@ -4963,6 +4963,35 @@ riscv_set_return_address (rtx address, rtx scratch)
riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
}
+/* Save register REG to MEM. Make the instruction frame-related. */
+
+static void
+riscv_save_reg (rtx reg, rtx mem)
+{
+ riscv_emit_move (mem, reg);
+ riscv_set_frame_expr (riscv_frame_set (mem, reg));
+}
+
+/* Restore register REG from MEM. */
+
+static void
+riscv_restore_reg (rtx reg, rtx mem)
+{
+ rtx insn = riscv_emit_move (reg, mem);
+ rtx dwarf = NULL_RTX;
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+ if (epilogue_cfa_sp_offset && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
+ {
+ rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (epilogue_cfa_sp_offset));
+ dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
+ }
+
+ REG_NOTES (insn) = dwarf;
+ RTX_FRAME_RELATED_P (insn) = 1;
+}
+
/* A function to save or store a register. The first argument is the
register and the second is the stack slot. */
typedef void (*riscv_save_restore_fn) (rtx, rtx);
@@ -5057,6 +5086,36 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
&& riscv_is_eh_return_data_register (regno))
continue;
+ if (TARGET_XTHEADMEMPAIR)
+ {
+ /* Get the next reg/offset pair. */
+ HOST_WIDE_INT offset2 = offset;
+ unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
+
+ /* Validate everything before emitting a mempair instruction. */
+ if (regno2 != INVALID_REGNUM
+ && !cfun->machine->reg_is_wrapped_separately[regno2]
+ && !(epilogue && !maybe_eh_return
+ && riscv_is_eh_return_data_register (regno2)))
+ {
+ bool load_p = (fn == riscv_restore_reg);
+ rtx operands[4];
+ th_mempair_prepare_save_restore_operands (operands,
+ load_p, word_mode,
+ regno, offset,
+ regno2, offset2);
+
+ /* If the operands fit into a mempair insn, then emit one. */
+ if (th_mempair_operands_p (operands, load_p, word_mode))
+ {
+ th_mempair_save_restore_regs (operands, load_p, word_mode);
+ offset = offset2;
+ regno = regno2;
+ continue;
+ }
+ }
+ }
+
riscv_save_restore_reg (word_mode, regno, offset, fn);
}
@@ -5075,35 +5134,6 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
}
}
-/* Save register REG to MEM. Make the instruction frame-related. */
-
-static void
-riscv_save_reg (rtx reg, rtx mem)
-{
- riscv_emit_move (mem, reg);
- riscv_set_frame_expr (riscv_frame_set (mem, reg));
-}
-
-/* Restore register REG from MEM. */
-
-static void
-riscv_restore_reg (rtx reg, rtx mem)
-{
- rtx insn = riscv_emit_move (reg, mem);
- rtx dwarf = NULL_RTX;
- dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
-
- if (epilogue_cfa_sp_offset && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
- {
- rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (epilogue_cfa_sp_offset));
- dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
- }
-
- REG_NOTES (insn) = dwarf;
- RTX_FRAME_RELATED_P (insn) = 1;
-}
-
/* For stack frames that can't be allocated with a single ADDI instruction,
compute the best value to initially allocate. It must at a minimum
allocate enough space to spill the callee-saved registers. If TARGET_RVC,
@@ -75,6 +75,10 @@ riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc
$(COMPILE) $<
$(POSTCOMPILE)
+thead.o: $(srcdir)/config/riscv/thead.cc
+ $(COMPILE) $<
+ $(POSTCOMPILE)
+
PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def
$(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \
new file mode 100644
@@ -0,0 +1,427 @@
+/* Subroutines used for code generation for RISC-V.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ Contributed by Christoph Müllner (christoph.muellner@vrull.eu).
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "target.h"
+#include "backend.h"
+#include "rtl.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "poly-int.h"
+#include "output.h"
+
+/* If MEM is in the form of "base+offset", extract the two parts
+ of address and set to BASE and OFFSET, otherwise return false
+ after clearing BASE and OFFSET. */
+
+static bool
+extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
+{
+ rtx addr;
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ if (REG_P (addr))
+ {
+ *base = addr;
+ *offset = const0_rtx;
+ return true;
+ }
+
+ if (GET_CODE (addr) == PLUS
+ && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return true;
+ }
+
+ *base = NULL_RTX;
+ *offset = NULL_RTX;
+
+ return false;
+}
+
+/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
+ and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */
+
+static void
+split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
+{
+ if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+ {
+ *base_ptr = XEXP (x, 0);
+ *offset_ptr = INTVAL (XEXP (x, 1));
+ }
+ else
+ {
+ *base_ptr = x;
+ *offset_ptr = 0;
+ }
+}
+
+/* Output a mempair instruction with the provided OPERANDS.
+ LOAD_P is true if a we have a pair of loads (stores otherwise).
+ MODE is the access mode (DI or SI).
+ CODE is the extension code (UNKNOWN, SIGN_EXTEND or ZERO_EXTEND).
+ This instruction does not handle invalid inputs gracefully,
+ but is full of assertions to ensure that only valid instructions
+ are emitted. */
+
+const char *
+th_mempair_output_move (rtx operands[4], bool load_p,
+ machine_mode mode, RTX_CODE code)
+{
+ rtx reg1, reg2, mem1, mem2, base1, base2;
+ HOST_WIDE_INT offset1, offset2;
+ rtx output_operands[5];
+ const char* format;
+
+ gcc_assert (mode == SImode || mode == DImode);
+
+ /* Paired 64-bit access instructions have a fixed shift amount of 4.
+ Paired 32-bit access instructions have a fixed shift amount of 3. */
+ unsigned shamt = (mode == DImode) ? 4 : 3;
+
+ if (load_p)
+ {
+ reg1 = copy_rtx (operands[0]);
+ reg2 = copy_rtx (operands[2]);
+ mem1 = copy_rtx (operands[1]);
+ mem2 = copy_rtx (operands[3]);
+
+ if (mode == SImode)
+ if (code == ZERO_EXTEND)
+ format = "th.lwud\t%0, %1, (%2), %3, %4";
+ else //SIGN_EXTEND or UNKNOWN
+ format = "th.lwd\t%0, %1, (%2), %3, %4";
+ else
+ format = "th.ldd\t%0, %1, (%2), %3, %4";
+ }
+ else
+ {
+ reg1 = copy_rtx (operands[1]);
+ reg2 = copy_rtx (operands[3]);
+ mem1 = copy_rtx (operands[0]);
+ mem2 = copy_rtx (operands[2]);
+
+ if (mode == SImode)
+ format = "th.swd\t%z0, %z1, (%2), %3, %4";
+ else
+ format = "th.sdd\t%z0, %z1, (%2), %3, %4";
+ }
+
+ split_plus (XEXP (mem1, 0), &base1, &offset1);
+ split_plus (XEXP (mem2, 0), &base2, &offset2);
+ gcc_assert (rtx_equal_p (base1, base2));
+ auto size1 = MEM_SIZE (mem1);
+ auto size2 = MEM_SIZE (mem2);
+ gcc_assert (known_eq (size1, size2));
+ gcc_assert (known_eq (offset1 + size1, offset2));
+
+ HOST_WIDE_INT imm2 = offset1 >> shamt;
+
+ /* Make sure all mempair instruction constraints are met. */
+ gcc_assert (imm2 >= 0 && imm2 < 4);
+ gcc_assert ((imm2 << shamt) == offset1);
+ gcc_assert (REG_P (reg1));
+ gcc_assert (REG_P (reg2));
+ gcc_assert (REG_P (base1));
+ if (load_p)
+ {
+ gcc_assert (REGNO (reg1) != REGNO (reg2));
+ gcc_assert (REGNO (reg1) != REGNO (base1));
+ gcc_assert (REGNO (reg2) != REGNO (base1));
+ }
+
+ /* Output the mempair instruction. */
+ output_operands[0] = copy_rtx (reg1);
+ output_operands[1] = copy_rtx (reg2);
+ output_operands[2] = copy_rtx (base1);
+ output_operands[3] = gen_rtx_CONST_INT (mode, imm2);
+ output_operands[4] = gen_rtx_CONST_INT (mode, shamt);
+ output_asm_insn (format, output_operands);
+
+ return "";
+}
+
+/* Analyse if a pair of loads/stores MEM1 and MEM2 with given MODE
+ are consecutive so they can be merged into a mempair instruction.
+ RESERVED will be set to true, if a reversal of the accesses is
+ required (false otherwise). Returns true if the accesses can be
+ merged (even if reversing is necessary) and false if not. */
+
+static bool
+th_mempair_check_consecutive_mems (machine_mode mode, rtx *mem1, rtx *mem2,
+ bool *reversed)
+{
+ rtx base1, base2, offset1, offset2;
+ extract_base_offset_in_addr (*mem1, &base1, &offset1);
+ extract_base_offset_in_addr (*mem2, &base2, &offset2);
+
+ /* Make sure both mems are in base+offset form. */
+ if (!base1 || !base2)
+ return false;
+
+ /* If both mems use the same base register, just check the offsets. */
+ if (rtx_equal_p (base1, base2))
+ {
+ auto size = GET_MODE_SIZE (mode);
+
+ if (known_eq (UINTVAL (offset1) + size, UINTVAL (offset2)))
+ {
+ *reversed = false;
+ return true;
+ }
+
+ if (known_eq (UINTVAL (offset2) + size, UINTVAL (offset1)))
+ {
+ *reversed = true;
+ return true;
+ }
+
+ return false;
+ }
+
+ return false;
+}
+
+/* Check if the given MEM can be used to define the address of a mempair
+ instruction. */
+
+static bool
+th_mempair_operand_p (rtx mem, machine_mode mode)
+{
+ if (!MEM_SIZE_KNOWN_P (mem))
+ return false;
+
+ /* Only DI or SI mempair instructions exist. */
+ gcc_assert (mode == SImode || mode == DImode);
+ auto mem_sz = MEM_SIZE (mem);
+ auto mode_sz = GET_MODE_SIZE (mode);
+ if (!known_eq (mem_sz, mode_sz))
+ return false;
+
+ /* Paired 64-bit access instructions have a fixed shift amount of 4.
+ Paired 32-bit access instructions have a fixed shift amount of 3. */
+ machine_mode mem_mode = GET_MODE (mem);
+ unsigned shamt = (mem_mode == DImode) ? 4 : 3;
+
+ rtx base;
+ HOST_WIDE_INT offset;
+ split_plus (XEXP (mem, 0), &base, &offset);
+ HOST_WIDE_INT imm2 = offset >> shamt;
+
+ if (imm2 < 0 || imm2 >= 4)
+ return false;
+
+ if ((imm2 << shamt) != offset)
+ return false;
+
+ return true;
+}
+
+static bool
+th_mempair_load_overlap_p (rtx reg1, rtx reg2, rtx mem)
+{
+ if (REGNO (reg1) == REGNO (reg2))
+ return true;
+
+ if (reg_overlap_mentioned_p (reg1, mem))
+ return true;
+
+ rtx base;
+ HOST_WIDE_INT offset;
+ split_plus (XEXP (mem, 0), &base, &offset);
+
+ if (!REG_P (base))
+ return true;
+
+ if (REG_P (base))
+ {
+ if (REGNO (base) == REGNO (reg1)
+ || REGNO (base) == REGNO (reg2))
+ return true;
+ }
+
+ return false;
+}
+
+/* Given OPERANDS of consecutive load/store, check if we can merge
+ them into load-pair or store-pair instructions.
+ LOAD is true if they are load instructions.
+ MODE is the mode of memory operation. */
+
+bool
+th_mempair_operands_p (rtx operands[4], bool load_p,
+ machine_mode mode)
+{
+ rtx mem_1, mem_2, reg_1, reg_2;
+
+ if (load_p)
+ {
+ reg_1 = operands[0];
+ mem_1 = operands[1];
+ reg_2 = operands[2];
+ mem_2 = operands[3];
+ if (!REG_P (reg_1) || !REG_P (reg_2))
+ return false;
+ if (th_mempair_load_overlap_p (reg_1, reg_2, mem_1))
+ return false;
+ if (th_mempair_load_overlap_p (reg_1, reg_2, mem_2))
+ return false;
+ }
+ else
+ {
+ mem_1 = operands[0];
+ reg_1 = operands[1];
+ mem_2 = operands[2];
+ reg_2 = operands[3];
+ }
+
+ /* Check if the registers are GP registers. */
+ if (!REG_P (reg_1) || !GP_REG_P (REGNO (reg_1))
+ || !REG_P (reg_2) || !GP_REG_P (REGNO (reg_2)))
+ return false;
+
+ /* The mems cannot be volatile. */
+ if (!MEM_P (mem_1) || !MEM_P (mem_2))
+ return false;
+ if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
+ return false;
+
+ /* If we have slow unaligned access, we only accept aligned memory. */
+ if (riscv_slow_unaligned_access_p
+ && known_lt (MEM_ALIGN (mem_1), GET_MODE_SIZE (mode) * BITS_PER_UNIT))
+ return false;
+
+ /* Check if the addresses are in the form of [base+offset]. */
+ bool reversed = false;
+ if (!th_mempair_check_consecutive_mems (mode, &mem_1, &mem_2, &reversed))
+ return false;
+
+ /* The first memory accesses must be a mempair operand. */
+ if ((!reversed && !th_mempair_operand_p (mem_1, mode))
+ || (reversed && !th_mempair_operand_p (mem_2, mode)))
+ return false;
+
+ /* The operands must be of the same size. */
+ gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
+ GET_MODE_SIZE (GET_MODE (mem_2))));
+
+ return true;
+}
+
+/* Given OPERANDS of consecutive load/store that can be merged,
+ swap them if they are not in ascending order.
+ Return true if swap was performed. */
+void
+th_mempair_order_operands (rtx operands[4], bool load_p, machine_mode mode)
+{
+ int mem_op = load_p ? 1 : 0;
+ bool reversed = false;
+ if (!th_mempair_check_consecutive_mems (mode,
+ operands + mem_op,
+ operands + mem_op + 2,
+ &reversed))
+ gcc_unreachable ();
+
+ if (reversed)
+ {
+ /* Irrespective of whether this is a load or a store,
+ we do the same swap. */
+ std::swap (operands[0], operands[2]);
+ std::swap (operands[1], operands[3]);
+ }
+}
+
+/* Similar like riscv_save_reg, but saves two registers to memory
+ and marks the resulting instruction as frame-related. */
+
+static void
+th_mempair_save_regs (rtx operands[4])
+{
+ rtx set1 = gen_rtx_SET (operands[0], operands[1]);
+ rtx set2 = gen_rtx_SET (operands[2], operands[3]);
+ rtx insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set1, set2)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set1));
+ add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set2));
+}
+
+/* Similar like riscv_restore_reg, but restores two registers from memory
+ and marks the instruction frame-related. */
+
+static void
+th_mempair_restore_regs (rtx operands[4])
+{
+ rtx set1 = gen_rtx_SET (operands[0], operands[1]);
+ rtx set2 = gen_rtx_SET (operands[2], operands[3]);
+ rtx insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set1, set2)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, operands[0]);
+ add_reg_note (insn, REG_CFA_RESTORE, operands[2]);
+}
+
+/* Prepare the OPERANDS array to emit a mempair instruction using the
+ provided information. No checks are performed, the resulting array
+ should be validated using th_mempair_operands_p(). */
+
+void
+th_mempair_prepare_save_restore_operands (rtx operands[4],
+ bool load_p, machine_mode mode,
+ int regno, HOST_WIDE_INT offset,
+ int regno2, HOST_WIDE_INT offset2)
+{
+ int reg_op = load_p ? 0 : 1;
+ int mem_op = load_p ? 1 : 0;
+
+ rtx mem1 = plus_constant (mode, stack_pointer_rtx, offset);
+ mem1 = gen_frame_mem (mode, mem1);
+ rtx mem2 = plus_constant (mode, stack_pointer_rtx, offset2);
+ mem2 = gen_frame_mem (mode, mem2);
+
+ operands[reg_op] = gen_rtx_REG (mode, regno);
+ operands[mem_op] = mem1;
+ operands[2 + reg_op] = gen_rtx_REG (mode, regno2);
+ operands[2 + mem_op] = mem2;
+}
+
+/* Emit a mempair instruction to save/restore two registers to/from stack. */
+
+void
+th_mempair_save_restore_regs (rtx operands[4], bool load_p,
+ machine_mode mode)
+{
+ gcc_assert (th_mempair_operands_p (operands, load_p, mode));
+
+ th_mempair_order_operands (operands, load_p, mode);
+
+ if (load_p)
+ th_mempair_restore_regs (operands);
+ else
+ th_mempair_save_regs (operands);
+}
@@ -292,3 +292,55 @@ (define_insn "*th_sextw_msubhisi4"
[(set_attr "type" "imul")
(set_attr "mode" "SI")]
)
+
+;; XTheadMemPair
+
+;; MEMPAIR load 64/32 bit
+(define_insn "*th_mempair_load_<GPR:mode>2"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (match_operand:GPR 1 "memory_operand" "m"))
+ (set (match_operand:GPR 2 "register_operand" "=r")
+ (match_operand:GPR 3 "memory_operand" "m"))]
+ "TARGET_XTHEADMEMPAIR && reload_completed
+ && th_mempair_operands_p (operands, true, <GPR:MODE>mode)"
+ { return th_mempair_output_move (operands, true, <GPR:MODE>mode, UNKNOWN); }
+ [(set_attr "move_type" "load")
+ (set_attr "mode" "<GPR:MODE>")])
+
+;; MEMPAIR store 64/32 bit
+(define_insn "*th_mempair_store_<GPR:mode>2"
+ [(set (match_operand:GPR 0 "memory_operand" "=m")
+ (match_operand:GPR 1 "register_operand" "r"))
+ (set (match_operand:GPR 2 "memory_operand" "=m")
+ (match_operand:GPR 3 "register_operand" "r"))]
+ "TARGET_XTHEADMEMPAIR && reload_completed
+ && th_mempair_operands_p (operands, false, <GPR:MODE>mode)"
+ { return th_mempair_output_move (operands, false, <GPR:MODE>mode, UNKNOWN); }
+ [(set_attr "move_type" "store")
+ (set_attr "mode" "<GPR:MODE>")])
+
+;; MEMPAIR load DI extended signed SI
+(define_insn "*th_mempair_load_extendsidi2"
+ [(set (match_operand 0 "register_operand" "=r")
+ (sign_extend:DI (match_operand 1 "memory_operand" "m")))
+ (set (match_operand 2 "register_operand" "=r")
+ (sign_extend:DI (match_operand 3 "memory_operand" "m")))]
+ "TARGET_XTHEADMEMPAIR && TARGET_64BIT && reload_completed
+ && th_mempair_operands_p (operands, true, SImode)"
+ { return th_mempair_output_move (operands, true, SImode, SIGN_EXTEND); }
+ [(set_attr "move_type" "load")
+ (set_attr "mode" "DI")
+ (set_attr "length" "8")])
+
+;; MEMPAIR load DI extended unsigned SI
+(define_insn "*th_mempair_load_zero_extendsidi2"
+ [(set (match_operand 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand 1 "memory_operand" "m")))
+ (set (match_operand 2 "register_operand" "=r")
+ (zero_extend:DI (match_operand 3 "memory_operand" "m")))]
+ "TARGET_XTHEADMEMPAIR && TARGET_64BIT && reload_completed
+ && th_mempair_operands_p (operands, true, SImode)"
+ { return th_mempair_output_move (operands, true, SImode, ZERO_EXTEND); }
+ [(set_attr "move_type" "load")
+ (set_attr "mode" "DI")
+ (set_attr "length" "8")])
new file mode 100644
@@ -0,0 +1,98 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-g" "-Oz" "-Os" "-flto" } } */
+/* { dg-options "-march=rv64gc_xtheadmempair -mtune=thead-c906" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_xtheadmempair -mtune=thead-c906" { target { rv32 } } } */
+
+#include <inttypes.h>
+
+#if __riscv_xlen == 32
+typedef uint32_t xlen_t;
+#else
+typedef uint64_t xlen_t;
+#endif
+
+void foof (xlen_t*, xlen_t, xlen_t);
+void foor (xlen_t*, xlen_t, xlen_t);
+void foowu (uint32_t*, uint64_t, uint64_t);
+void foows (int32_t*, int64_t, int64_t);
+
+#define LxD_TEST(f, T, i1, i2) \
+void \
+f ## i1 ## i2(T *arr) \
+{ \
+ foo ## f(arr, arr[i1], arr[i2]); \
+}
+
+// works
+LxD_TEST(f, xlen_t, 0, 1)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(f, xlen_t, 1, 2)
+// works
+LxD_TEST(f, xlen_t, 2, 3)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(f, xlen_t, 3, 4)
+// works
+LxD_TEST(f, xlen_t, 4, 5)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(f, xlen_t, 5, 6)
+// works
+LxD_TEST(f, xlen_t, 6, 7)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(f, xlen_t, 7, 8)
+// does not work (out of range)
+LxD_TEST(f, xlen_t, 8, 9)
+
+// works with reordering
+LxD_TEST(r, xlen_t, 1, 0)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(r, xlen_t, 2, 1)
+// works with reordering
+LxD_TEST(r, xlen_t, 3, 2)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(r, xlen_t, 4, 3)
+// works with reordering
+LxD_TEST(r, xlen_t, 5, 4)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(r, xlen_t, 6, 5)
+// works with reordering
+LxD_TEST(r, xlen_t, 7, 6)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(r, xlen_t, 8, 7)
+// does not work (out of range)
+LxD_TEST(r, xlen_t, 9, 8)
+
+#if __riscv_xlen != 32
+// works
+LxD_TEST(wu, uint32_t, 0, 1)
+LxD_TEST(ws, int32_t, 0, 1)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(wu, uint32_t, 1, 2)
+LxD_TEST(ws, int32_t, 1, 2)
+// works
+LxD_TEST(wu, uint32_t, 2, 3)
+LxD_TEST(ws, int32_t, 2, 3)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(wu, uint32_t, 3, 4)
+LxD_TEST(ws, int32_t, 3, 4)
+// works
+LxD_TEST(wu, uint32_t, 4, 5)
+LxD_TEST(ws, int32_t, 4, 5)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(wu, uint32_t, 5, 6)
+LxD_TEST(ws, int32_t, 5, 6)
+// works
+LxD_TEST(wu, uint32_t, 6, 7)
+LxD_TEST(ws, int32_t, 6, 7)
+// does not work (can't merge with unaligned offset)
+LxD_TEST(wu, uint32_t, 7, 8)
+LxD_TEST(ws, int32_t, 7, 8)
+// does not work (out of range)
+LxD_TEST(wu, uint32_t, 8, 9)
+LxD_TEST(ws, int32_t, 8, 9)
+#endif
+
+/* { dg-final { scan-assembler-times "th.ldd\t" 8 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "th.lwud\t" 4 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "th.lwd\t" 4 { target { rv64 } } } } */
+
+/* { dg-final { scan-assembler-times "th.lwd\t" 8 { target { rv32 } } } } */
new file mode 100644
@@ -0,0 +1,84 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-g" "-Oz" "-Os" "-flto" } } */
+/* { dg-options "-march=rv64gc_xtheadmempair -mtune=thead-c906" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_xtheadmempair -mtune=thead-c906" { target { rv32 } } } */
+
+#include <inttypes.h>
+
+#if __riscv_xlen == 32
+typedef uint32_t xlen_t;
+#else
+typedef uint64_t xlen_t;
+#endif
+
+#define SxD_TEST(f, T, i1, i2) \
+void \
+f ## i1 ## i2(T *arr, T x, T y) \
+{ \
+ arr[i1] = x; \
+ arr[i2] = y; \
+}
+
+// works
+SxD_TEST(f, xlen_t, 0, 1)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(f, xlen_t, 1, 2)
+// works
+SxD_TEST(f, xlen_t, 2, 3)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(f, xlen_t, 3, 4)
+// works
+SxD_TEST(f, xlen_t, 4, 5)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(f, xlen_t, 5, 6)
+// works
+SxD_TEST(f, xlen_t, 6, 7)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(f, xlen_t, 7, 8)
+// does not work (out of range)
+SxD_TEST(f, xlen_t, 8, 9)
+
+// works with reordering
+SxD_TEST(r, xlen_t, 1, 0)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(r, xlen_t, 2, 1)
+// works with reordering
+SxD_TEST(r, xlen_t, 3, 2)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(r, xlen_t, 4, 3)
+// works with reordering
+SxD_TEST(r, xlen_t, 5, 4)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(r, xlen_t, 6, 5)
+// works with reordering
+SxD_TEST(r, xlen_t, 7, 6)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(r, xlen_t, 8, 7)
+// does not work (out of range)
+SxD_TEST(r, xlen_t, 9, 8)
+
+#if __riscv_xlen != 32
+// works
+SxD_TEST(w, uint32_t, 0, 1)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(w, uint32_t, 1, 2)
+// works
+SxD_TEST(w, uint32_t, 2, 3)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(w, uint32_t, 3, 4)
+// works
+SxD_TEST(w, uint32_t, 4, 5)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(w, uint32_t, 5, 6)
+// works
+SxD_TEST(w, uint32_t, 6, 7)
+// does not work (can't merge with unaligned offset)
+SxD_TEST(w, uint32_t, 7, 8)
+// does not work (out of range)
+SxD_TEST(w, uint32_t, 8, 9)
+#endif
+
+/* { dg-final { scan-assembler-times "th.sdd\t" 8 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "th.swd\t" 4 { target { rv64 } } } } */
+
+/* { dg-final { scan-assembler-times "th.swd\t" 8 { target { rv32 } } } } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-g" "-Oz" "-Os" "-flto" } } */
+/* { dg-options "-march=rv64gc_xtheadmempair -mtune=thead-c906" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_xtheadmempair -mtune=thead-c906" { target { rv32 } } } */
+
+#include <inttypes.h>
+
+#if __riscv_xlen == 32
+typedef uint32_t xlen_t;
+#else
+typedef uint64_t xlen_t;
+#endif
+
+void foo (xlen_t, xlen_t, xlen_t, xlen_t, xlen_t, xlen_t, xlen_t, xlen_t);
+void bar (xlen_t, xlen_t, xlen_t, xlen_t, xlen_t, xlen_t, xlen_t, xlen_t);
+
+void baz (xlen_t a, xlen_t b, xlen_t c, xlen_t d, xlen_t e, xlen_t f, xlen_t g, xlen_t h)
+{
+ foo (a, b, c, d, e, f, g, h);
+ /* RV64: We don't use 0(sp), therefore we can only get 3 mempairs. */
+ /* RV32: We don't use 0(sp)-8(sp), therefore we can only get 2 mempairs. */
+ bar (a, b, c, d, e, f, g, h);
+}
+
+/* { dg-final { scan-assembler-times "th.ldd\t" 3 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "th.sdd\t" 3 { target { rv64 } } } } */
+
+/* { dg-final { scan-assembler-times "th.lwd\t" 2 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "th.swd\t" 2 { target { rv32 } } } } */