@@ -401,4 +401,6 @@ extern void mips_split_msa_subreg_move (rtx, rtx);
extern const char *mips_output_compare (const char *fpcmp, const char *fcond,
const char *fmt, const char *fpcc_mode, bool swap);
+extern bool mips_expand_mod_pow2 (rtx, rtx, rtx);
+
#endif /* ! GCC_MIPS_PROTOS_H */
@@ -5292,6 +5292,19 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
return true;
}
+ /* Don't force the constant into register during modulo by power of two.
+ This is needed so that the MIPS-specific modulo pattern will be
+ selected during the expand phase. */
+ if (!TARGET_64BIT
+ && !TARGET_MIPS16
+ && outer_code == MOD
+ && mode == DImode
+ && (exact_log2 (INTVAL (x)) > 0))
+ {
+ *total = 0;
+ return true;
+ }
+
if (TARGET_MIPS16)
{
cost = mips16_constant_cost (outer_code, INTVAL (x));
@@ -5615,8 +5628,20 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
}
/* Fall through. */
- case SQRT:
case MOD:
+ /* Modulo by power of two produces (at most) nine instructions. */
+ if (CONST_INT_P (XEXP (x, 1))
+ && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+ && !TARGET_64BIT
+ && !TARGET_MIPS16
+ && mode == DImode)
+ {
+ *total = COSTS_N_INSNS (9);
+ return true;
+ }
+ /* Fall through. */
+
+ case SQRT:
if (float_mode_p)
{
*total = mips_fp_div_cost (mode);
@@ -26107,6 +26132,123 @@ mips_prune_insertions_deletions (struct edge_list* edge_list,
sbitmap_free (ifcv_blocks);
sbitmap_free (insertions);
}
+
+/* Expand modulo by power of two of DImode values on 32-bit targets. */
+
+bool
+mips_expand_mod_pow2 (rtx target, rtx op1, rtx op2)
+{
+ HOST_WIDE_INT val, reg_width;
+ rtx out_low, out_high;
+ rtx in_low, in_high;
+ rtx at, temp;
+ rtx comp, cond_operands[4];
+
+ gcc_assert (GET_CODE (op2) == CONST_INT);
+
+ val = INTVAL (op2);
+
+ int logd = exact_log2 (val);
+
+ if (logd <= 0)
+ return false;
+
+ /* Extract lower and upper words of DImode source and destination. */
+ out_low = mips_subword (target, 0);
+ out_high = mips_subword (target, 1);
+
+ in_low = mips_subword (op1, 0);
+ in_high = mips_subword (op1, 1);
+
+ at = gen_reg_rtx (SImode);
+ temp = gen_reg_rtx (SImode);
+
+ reg_width = GET_MODE_BITSIZE (SImode);
+
+ /* Divisor equals 2. */
+ if (logd == 1)
+ {
+ mips_emit_binary (AND, at, in_low, const1_rtx);
+ mips_emit_binary (ASHIFT, temp, in_low,
+ gen_int_mode (reg_width - 1, SImode));
+ mips_emit_binary (AND, temp, in_high, temp);
+ mips_emit_binary (ASHIFTRT, out_high, temp,
+ gen_int_mode (reg_width - 1, SImode));
+ mips_emit_binary (IOR, out_low, out_high, at);
+
+ return true;
+ }
+ /* Divisor fits into 32 bits. */
+ else if (logd <= reg_width)
+ {
+ mips_emit_binary (ASHIFTRT, at, in_high,
+ gen_int_mode (reg_width - 1, SImode));
+
+ if (logd == reg_width)
+ mips_emit_move (out_low, in_low);
+ else if (ISA_HAS_EXT_INS || logd <= 16)
+ mips_emit_binary (AND, out_low, in_low,
+ gen_int_mode ((1 << logd) - 1, SImode));
+ else
+ {
+ mips_emit_binary (ASHIFT, out_low, in_low,
+ gen_int_mode (reg_width - logd, SImode));
+ mips_emit_binary (LSHIFTRT, out_low, out_low,
+ gen_int_mode (reg_width - logd, SImode));
+ }
+
+ comp = gen_rtx_EQ (VOIDmode, out_low, const0_rtx);
+ cond_operands[0] = at;
+ cond_operands[1] = comp;
+ cond_operands[2] = const0_rtx;
+ cond_operands[3] = at;
+
+ mips_expand_conditional_move (cond_operands);
+ mips_emit_move (out_high, at);
+ if (logd < reg_width)
+ {
+ mips_emit_binary (ASHIFT, at, at, gen_int_mode (logd, SImode));
+ mips_emit_binary (IOR, out_low, out_low, at);
+ }
+
+ return true;
+ }
+ /* Divisor is wider than 32 bits. */
+ else if (logd <= 2 * reg_width - 1)
+ {
+ mips_emit_binary (ASHIFTRT, at, in_high,
+ gen_int_mode (reg_width - 1, SImode));
+ mips_emit_move (temp, in_low);
+
+ if (ISA_HAS_EXT_INS || logd <= 16)
+ mips_emit_binary (AND, out_high, in_high,
+ gen_int_mode ((1 << (logd - reg_width)) - 1, SImode));
+ else
+ {
+ mips_emit_binary (ASHIFT, out_high, in_high,
+ gen_int_mode (2 * reg_width - logd, SImode));
+ mips_emit_binary (LSHIFTRT, out_high, out_high,
+ gen_int_mode (2 * reg_width - logd, SImode));
+ }
+ mips_emit_move (out_low, temp);
+ mips_emit_binary (IOR, temp, temp, out_high);
+
+ comp = gen_rtx_EQ (VOIDmode, temp, const0_rtx);
+ cond_operands[0] = at;
+ cond_operands[1] = comp;
+ cond_operands[2] = const0_rtx;
+ cond_operands[3] = at;
+
+ mips_expand_conditional_move (cond_operands);
+ mips_emit_binary (ASHIFT, at, at,
+ gen_int_mode (logd - reg_width, SImode));
+ mips_emit_binary (IOR, out_high, out_high, at);
+
+ return true;
+ }
+
+ return false;
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -821,6 +821,10 @@
;; modes.
(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")])
+;; This mode iterator is used in the same way as GPR, except that it allows
+;; 64-bit patterns on 32-bit targets.
+(define_mode_iterator GPR3 [SI DI])
+
(define_mode_iterator MOVEP1 [SI SF])
(define_mode_iterator MOVEP2 [SI SF])
(define_mode_iterator JOIN_MODE [HI
@@ -3098,7 +3102,31 @@
[(set_attr "type" "idiv3")
(set_attr "mode" "<MODE>")])
-(define_insn "<u>mod<mode>3"
+(define_expand "<u>mod<mode>3"
+ [(set (match_operand:GPR3 0 "register_operand")
+ (any_mod:GPR3 (match_operand:GPR3 1 "register_operand")
+ (match_operand:GPR3 2 "nonmemory_operand")))]
+ "(TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || ISA_HAS_R6<D>DIV)
+ || (!TARGET_64BIT && !TARGET_MIPS16 && <MODE>mode == DImode)"
+{
+ /* In case of DImode signed modulo with power of 2 on 32-bit targets, call
+ the custom expander. */
+ if (<CODE> == MOD
+ && !TARGET_64BIT
+ && !TARGET_MIPS16
+ && GET_CODE (operands[2]) == CONST_INT)
+ {
+ if (mips_expand_mod_pow2 (operands[0], operands[1], operands[2]))
+ DONE;
+ FAIL;
+ }
+ /* Else, forbid the expand of DImode modulo on 32-bit targets. */
+ else if ((!TARGET_64BIT && <MODE>mode == DImode)
+ || GET_CODE (operands[2]) == CONST_INT)
+ FAIL;
+})
+
+(define_insn "*<u>mod<mode>3"
[(set (match_operand:GPR 0 "register_operand" "=&d")
(any_mod:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d")))]
@@ -3113,7 +3141,6 @@
}
[(set_attr "type" "idiv3")
(set_attr "mode" "<MODE>")])
-
;;
;; ....................
;;
new file mode 100644
@@ -0,0 +1,176 @@
+/* { dg-do run } */
+/* { dg-options "-mabi=32 (REQUIRES_STDLIB)" } */
+
+/* Test modulo by power of two expansion. */
+#include <stdlib.h>
+
+#define ARGPASTE(X, Y) X ## Y
+#define ARGPASTE2(X) ARGPASTE (mod_, X)
+
+/* Create 64 distinct functions which calculate the modulo by power of two */
+#define MOD(LOG) \
+ __attribute__((noinline)) long long ARGPASTE2 (__COUNTER__) (long long x) \
+ { return x % (1LL << (LOG)); }
+#define MOD_1(x) \
+ MOD (x) \
+ MOD (x + 1)
+#define MOD_2(x) \
+ MOD_1 (x) \
+ MOD_1 (x + 2)
+#define MOD_4(x) \
+ MOD_2 (x) \
+ MOD_2 (x + 4)
+#define MOD_8(x) \
+ MOD_4 (x) \
+ MOD_4 (x + 8)
+#define MOD_16(x) \
+ MOD_8 (x) \
+ MOD_8 (x + 16)
+#define MOD_32(x) \
+ MOD_16 (x) \
+ MOD_16 (x + 32)
+
+MOD_32 (0)
+
+#define TEST_NUM_POS 0x7fffffffffffffff
+#define TEST_NUM_NEG 0xffffffffffffffff
+
+/* Test the modulo by power of two with a positive number. */
+#define TEST_MOD_POS(LOG) \
+ ARGPASTE2 (LOG) (TEST_NUM_POS) == (TEST_NUM_POS & ((1LL << (LOG)) - 1)) \
+ ? (void) 0 : abort ()
+
+/* Test the modulo by power of two with a negative number. */
+#define TEST_MOD_NEG(LOG) \
+ ARGPASTE2 (LOG) (TEST_NUM_NEG) == -1LL ? (void) 0 : abort()
+
+int main() {
+ TEST_MOD_POS(1);
+ TEST_MOD_POS(2);
+ TEST_MOD_POS(3);
+ TEST_MOD_POS(4);
+ TEST_MOD_POS(5);
+ TEST_MOD_POS(6);
+ TEST_MOD_POS(7);
+ TEST_MOD_POS(8);
+ TEST_MOD_POS(9);
+ TEST_MOD_POS(10);
+ TEST_MOD_POS(11);
+ TEST_MOD_POS(12);
+ TEST_MOD_POS(13);
+ TEST_MOD_POS(14);
+ TEST_MOD_POS(15);
+ TEST_MOD_POS(16);
+ TEST_MOD_POS(17);
+ TEST_MOD_POS(18);
+ TEST_MOD_POS(19);
+ TEST_MOD_POS(20);
+ TEST_MOD_POS(21);
+ TEST_MOD_POS(22);
+ TEST_MOD_POS(23);
+ TEST_MOD_POS(24);
+ TEST_MOD_POS(25);
+ TEST_MOD_POS(26);
+ TEST_MOD_POS(27);
+ TEST_MOD_POS(28);
+ TEST_MOD_POS(29);
+ TEST_MOD_POS(30);
+ TEST_MOD_POS(31);
+ TEST_MOD_POS(32);
+ TEST_MOD_POS(33);
+ TEST_MOD_POS(34);
+ TEST_MOD_POS(35);
+ TEST_MOD_POS(36);
+ TEST_MOD_POS(37);
+ TEST_MOD_POS(38);
+ TEST_MOD_POS(39);
+ TEST_MOD_POS(40);
+ TEST_MOD_POS(41);
+ TEST_MOD_POS(42);
+ TEST_MOD_POS(43);
+ TEST_MOD_POS(44);
+ TEST_MOD_POS(45);
+ TEST_MOD_POS(46);
+ TEST_MOD_POS(47);
+ TEST_MOD_POS(48);
+ TEST_MOD_POS(49);
+ TEST_MOD_POS(50);
+ TEST_MOD_POS(51);
+ TEST_MOD_POS(52);
+ TEST_MOD_POS(53);
+ TEST_MOD_POS(54);
+ TEST_MOD_POS(55);
+ TEST_MOD_POS(56);
+ TEST_MOD_POS(57);
+ TEST_MOD_POS(58);
+ TEST_MOD_POS(59);
+ TEST_MOD_POS(60);
+ TEST_MOD_POS(61);
+ TEST_MOD_POS(62);
+
+ TEST_MOD_NEG(1);
+ TEST_MOD_NEG(2);
+ TEST_MOD_NEG(3);
+ TEST_MOD_NEG(4);
+ TEST_MOD_NEG(5);
+ TEST_MOD_NEG(6);
+ TEST_MOD_NEG(7);
+ TEST_MOD_NEG(8);
+ TEST_MOD_NEG(9);
+ TEST_MOD_NEG(10);
+ TEST_MOD_NEG(11);
+ TEST_MOD_NEG(12);
+ TEST_MOD_NEG(13);
+ TEST_MOD_NEG(14);
+ TEST_MOD_NEG(15);
+ TEST_MOD_NEG(16);
+ TEST_MOD_NEG(17);
+ TEST_MOD_NEG(18);
+ TEST_MOD_NEG(19);
+ TEST_MOD_NEG(20);
+ TEST_MOD_NEG(21);
+ TEST_MOD_NEG(22);
+ TEST_MOD_NEG(23);
+ TEST_MOD_NEG(24);
+ TEST_MOD_NEG(25);
+ TEST_MOD_NEG(26);
+ TEST_MOD_NEG(27);
+ TEST_MOD_NEG(28);
+ TEST_MOD_NEG(29);
+ TEST_MOD_NEG(30);
+ TEST_MOD_NEG(31);
+ TEST_MOD_NEG(32);
+ TEST_MOD_NEG(33);
+ TEST_MOD_NEG(34);
+ TEST_MOD_NEG(35);
+ TEST_MOD_NEG(36);
+ TEST_MOD_NEG(37);
+ TEST_MOD_NEG(38);
+ TEST_MOD_NEG(39);
+ TEST_MOD_NEG(40);
+ TEST_MOD_NEG(41);
+ TEST_MOD_NEG(42);
+ TEST_MOD_NEG(43);
+ TEST_MOD_NEG(44);
+ TEST_MOD_NEG(45);
+ TEST_MOD_NEG(46);
+ TEST_MOD_NEG(47);
+ TEST_MOD_NEG(48);
+ TEST_MOD_NEG(49);
+ TEST_MOD_NEG(50);
+ TEST_MOD_NEG(51);
+ TEST_MOD_NEG(52);
+ TEST_MOD_NEG(53);
+ TEST_MOD_NEG(54);
+ TEST_MOD_NEG(55);
+ TEST_MOD_NEG(56);
+ TEST_MOD_NEG(57);
+ TEST_MOD_NEG(58);
+ TEST_MOD_NEG(59);
+ TEST_MOD_NEG(60);
+ TEST_MOD_NEG(61);
+ TEST_MOD_NEG(62);
+ TEST_MOD_NEG(63);
+
+ exit (0);
+}