[56/61] Inefficient 64-bit signed modulo by powers of two

Message ID 20250131171232.1018281-58-aleksandar.rakic@htecgroup.com
State New
Headers
Series Improve Mips target |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Aleksandar Rakic Jan. 31, 2025, 5:13 p.m. UTC
  From: Mihailo Stojanovic <mihailo.stojanovic@rt-rk.com>

This adds the custom MIPS-specific modulo by power of two expander,
which uses a modified algorithm, tailored to MIPS instruction set.

gcc/

    * config/mips/mips-protos.h (mips_expand_mod_pow2): New prototype.
    * config/mips/mips.cc (mips_rtx_costs): Don't force power of two
    constants into registers during modulo operations. Modify the cost
    modulo by power of two.
    (mips_expand_mod_pow2): New expander for modulo by power of two of
    64-bit values on 32-bit targets.
    * config/mips/mips.md (define_expand "<u>mod<mode>3"): Separate
    define_expand for "<u>mod<mode>3" from the define_insn and call the
    new expander for 64-bit values on 32-bit targets.
    (define_insn "*<u>mod<mode>3"): Add * to the pattern name to avoid
    clash with the define_expand pattern.

gcc/testsuite/

    * gcc.target/mips/mod-pow2.c: New test.

Cherry-picked e683ed1717b3f689c959c738a764174fdcdc7998
from https://github.com/MIPS/gcc

Signed-off-by: Mihailo Stojanovic <mistojanovic@wavecomp.com>
Signed-off-by: Faraz Shahbazker <fshahbazker@wavecomp.com>
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
---
 gcc/config/mips/mips-protos.h            |   2 +
 gcc/config/mips/mips.cc                  | 144 ++++++++++++++++++-
 gcc/config/mips/mips.md                  |  31 +++-
 gcc/testsuite/gcc.target/mips/mod-pow2.c | 176 +++++++++++++++++++++++
 4 files changed, 350 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/mod-pow2.c
  

Patch

diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 435b2e7e179..5782cd5d1b7 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -401,4 +401,6 @@  extern void mips_split_msa_subreg_move (rtx, rtx);
 
 extern const char *mips_output_compare (const char *fpcmp, const char *fcond,
 			const char *fmt, const char *fpcc_mode, bool swap);
+extern bool mips_expand_mod_pow2 (rtx, rtx, rtx);
+
 #endif /* ! GCC_MIPS_PROTOS_H */
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index d23c30a43be..19d428e6ed6 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -5292,6 +5292,19 @@  mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	  return true;
 	}
 
+      /* Don't force the constant into register during modulo by power of two.
+	 This is needed so that the MIPS-specific modulo pattern will be
+	 selected during the expand phase.  */
+      if (!TARGET_64BIT
+	  && !TARGET_MIPS16
+	  && outer_code == MOD
+	  && mode == DImode
+	  && (exact_log2 (INTVAL (x)) > 0))
+	{
+	  *total = 0;
+	  return true;
+	}
+
       if (TARGET_MIPS16)
 	{
 	  cost = mips16_constant_cost (outer_code, INTVAL (x));
@@ -5615,8 +5628,20 @@  mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	}
       /* Fall through.  */
 
-    case SQRT:
     case MOD:
+      /* Modulo by power of two produces (at most) nine instructions.  */
+      if (CONST_INT_P (XEXP (x, 1))
+	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+	  && !TARGET_64BIT
+	  && !TARGET_MIPS16
+	  && mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (9);
+	  return true;
+	}
+      /* Fall through.  */
+
+    case SQRT:
       if (float_mode_p)
 	{
 	  *total = mips_fp_div_cost (mode);
@@ -26107,6 +26132,123 @@  mips_prune_insertions_deletions (struct edge_list* edge_list,
   sbitmap_free (ifcv_blocks);
   sbitmap_free (insertions);
 }
+
+/* Expand modulo by power of two of DImode values on 32-bit targets.  */
+
+bool
+mips_expand_mod_pow2 (rtx target, rtx op1, rtx op2)
+{
+  HOST_WIDE_INT val, reg_width;
+  rtx out_low, out_high;
+  rtx in_low, in_high;
+  rtx at, temp;
+  rtx comp, cond_operands[4];
+
+  gcc_assert (GET_CODE (op2) == CONST_INT);
+
+  val = INTVAL (op2);
+
+  int logd = exact_log2 (val);
+
+  if (logd <= 0)
+    return false;
+
+  /* Extract lower and upper words of DImode source and destination.  */
+  out_low = mips_subword (target, 0);
+  out_high = mips_subword (target, 1);
+
+  in_low = mips_subword (op1, 0);
+  in_high = mips_subword (op1, 1);
+
+  at = gen_reg_rtx (SImode);
+  temp = gen_reg_rtx (SImode);
+
+  reg_width = GET_MODE_BITSIZE (SImode);
+
+  /* Divisor equals 2.  */
+  if (logd == 1)
+    {
+      mips_emit_binary (AND, at, in_low, const1_rtx);
+      mips_emit_binary (ASHIFT, temp, in_low,
+			gen_int_mode (reg_width - 1, SImode));
+      mips_emit_binary (AND, temp, in_high, temp);
+      mips_emit_binary (ASHIFTRT, out_high, temp,
+			gen_int_mode (reg_width - 1, SImode));
+      mips_emit_binary (IOR, out_low, out_high, at);
+
+      return true;
+    }
+  /* Divisor fits into 32 bits.  */
+  else if (logd <= reg_width)
+    {
+      mips_emit_binary (ASHIFTRT, at, in_high,
+			gen_int_mode (reg_width - 1, SImode));
+
+      if (logd == reg_width)
+	mips_emit_move (out_low, in_low);
+      else if (ISA_HAS_EXT_INS || logd <= 16)
+	mips_emit_binary (AND, out_low, in_low,
+			  gen_int_mode ((1 << logd) - 1, SImode));
+      else
+	{
+	  mips_emit_binary (ASHIFT, out_low, in_low,
+			    gen_int_mode (reg_width - logd, SImode));
+	  mips_emit_binary (LSHIFTRT, out_low, out_low,
+			    gen_int_mode (reg_width - logd, SImode));
+	}
+
+      comp = gen_rtx_EQ (VOIDmode, out_low, const0_rtx);
+      cond_operands[0] = at;
+      cond_operands[1] = comp;
+      cond_operands[2] = const0_rtx;
+      cond_operands[3] = at;
+
+      mips_expand_conditional_move (cond_operands);
+      mips_emit_move (out_high, at);
+      if (logd < reg_width)
+	{
+	  mips_emit_binary (ASHIFT, at, at, gen_int_mode (logd, SImode));
+	  mips_emit_binary (IOR, out_low, out_low, at);
+	}
+
+      return true;
+    }
+  /* Divisor is wider than 32 bits.  */
+  else if (logd <= 2 * reg_width - 1)
+    {
+      mips_emit_binary (ASHIFTRT, at, in_high,
+			gen_int_mode (reg_width - 1, SImode));
+      mips_emit_move (temp, in_low);
+
+      if (ISA_HAS_EXT_INS || logd <= 16)
+	mips_emit_binary (AND, out_high, in_high,
+			  gen_int_mode ((1 << (logd - reg_width)) - 1, SImode));
+      else
+	{
+	  mips_emit_binary (ASHIFT, out_high, in_high,
+			    gen_int_mode (2 * reg_width - logd, SImode));
+	  mips_emit_binary (LSHIFTRT, out_high, out_high,
+			    gen_int_mode (2 * reg_width - logd, SImode));
+	}
+      mips_emit_move (out_low, temp);
+      mips_emit_binary (IOR, temp, temp, out_high);
+
+      comp = gen_rtx_EQ (VOIDmode, temp, const0_rtx);
+      cond_operands[0] = at;
+      cond_operands[1] = comp;
+      cond_operands[2] = const0_rtx;
+      cond_operands[3] = at;
+
+      mips_expand_conditional_move (cond_operands);
+      mips_emit_binary (ASHIFT, at, at,
+			gen_int_mode (logd - reg_width, SImode));
+      mips_emit_binary (IOR, out_high, out_high, at);
+
+      return true;
+    }
+
+  return false;
+}
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 1243f20f344..0c93ce17ae4 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -821,6 +821,10 @@ 
 ;; modes.
 (define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")])
 
+;; This mode iterator is used in the same way as GPR, except that it allows
+;; 64-bit patterns on 32-bit targets.
+(define_mode_iterator GPR3 [SI DI])
+
 (define_mode_iterator MOVEP1 [SI SF])
 (define_mode_iterator MOVEP2 [SI SF])
 (define_mode_iterator JOIN_MODE [HI
@@ -3098,7 +3102,31 @@ 
   [(set_attr "type" "idiv3")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<u>mod<mode>3"
+(define_expand "<u>mod<mode>3"
+  [(set (match_operand:GPR3 0 "register_operand")
+    (any_mod:GPR3 (match_operand:GPR3 1 "register_operand")
+	     (match_operand:GPR3 2 "nonmemory_operand")))]
+  "(TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || ISA_HAS_R6<D>DIV)
+   || (!TARGET_64BIT && !TARGET_MIPS16 && <MODE>mode == DImode)"
+{
+  /* In case of DImode signed modulo with power of 2 on 32-bit targets, call
+     the custom expander.  */
+  if (<CODE> == MOD
+      && !TARGET_64BIT
+      && !TARGET_MIPS16
+      && GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (mips_expand_mod_pow2 (operands[0], operands[1], operands[2]))
+	DONE;
+      FAIL;
+    }
+  /* Else, forbid the expand of DImode modulo on 32-bit targets.  */
+  else if ((!TARGET_64BIT && <MODE>mode == DImode)
+	   || GET_CODE (operands[2]) == CONST_INT)
+    FAIL;
+})
+
+(define_insn "*<u>mod<mode>3"
   [(set (match_operand:GPR 0 "register_operand" "=&d")
 	(any_mod:GPR (match_operand:GPR 1 "register_operand" "d")
 		     (match_operand:GPR 2 "register_operand" "d")))]
@@ -3113,7 +3141,6 @@ 
   }
   [(set_attr "type" "idiv3")
    (set_attr "mode" "<MODE>")])
-
 ;;
 ;;  ....................
 ;;
diff --git a/gcc/testsuite/gcc.target/mips/mod-pow2.c b/gcc/testsuite/gcc.target/mips/mod-pow2.c
new file mode 100644
index 00000000000..4fee299a777
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/mod-pow2.c
@@ -0,0 +1,176 @@ 
+/* { dg-do run } */
+/* { dg-options "-mabi=32 (REQUIRES_STDLIB)" } */
+
+/* Test modulo by power of two expansion.  */
+#include <stdlib.h>
+
+#define ARGPASTE(X, Y) X ## Y
+#define ARGPASTE2(X) ARGPASTE (mod_, X)
+
+/* Create 64 distinct functions which calculate the modulo by power of two  */
+#define MOD(LOG) \
+  __attribute__((noinline)) long long ARGPASTE2 (__COUNTER__) (long long x) \
+  { return x % (1LL << (LOG)); }
+#define MOD_1(x) \
+  MOD (x) \
+  MOD (x + 1)
+#define MOD_2(x) \
+  MOD_1 (x) \
+  MOD_1 (x + 2)
+#define MOD_4(x) \
+  MOD_2 (x) \
+  MOD_2 (x + 4)
+#define MOD_8(x) \
+  MOD_4 (x) \
+  MOD_4 (x + 8)
+#define MOD_16(x) \
+  MOD_8 (x) \
+  MOD_8 (x + 16)
+#define MOD_32(x) \
+  MOD_16 (x) \
+  MOD_16 (x + 32)
+
+MOD_32 (0)
+
+#define TEST_NUM_POS 0x7fffffffffffffff
+#define TEST_NUM_NEG 0xffffffffffffffff
+
+/* Test the modulo by power of two with a positive number.  */
+#define TEST_MOD_POS(LOG) \
+  ARGPASTE2 (LOG) (TEST_NUM_POS) == (TEST_NUM_POS & ((1LL << (LOG)) - 1)) \
+  ? (void) 0 : abort ()
+
+/* Test the modulo by power of two with a negative number.  */
+#define TEST_MOD_NEG(LOG) \
+  ARGPASTE2 (LOG) (TEST_NUM_NEG) == -1LL ? (void) 0 : abort()
+
+int main() {
+    TEST_MOD_POS(1);
+    TEST_MOD_POS(2);
+    TEST_MOD_POS(3);
+    TEST_MOD_POS(4);
+    TEST_MOD_POS(5);
+    TEST_MOD_POS(6);
+    TEST_MOD_POS(7);
+    TEST_MOD_POS(8);
+    TEST_MOD_POS(9);
+    TEST_MOD_POS(10);
+    TEST_MOD_POS(11);
+    TEST_MOD_POS(12);
+    TEST_MOD_POS(13);
+    TEST_MOD_POS(14);
+    TEST_MOD_POS(15);
+    TEST_MOD_POS(16);
+    TEST_MOD_POS(17);
+    TEST_MOD_POS(18);
+    TEST_MOD_POS(19);
+    TEST_MOD_POS(20);
+    TEST_MOD_POS(21);
+    TEST_MOD_POS(22);
+    TEST_MOD_POS(23);
+    TEST_MOD_POS(24);
+    TEST_MOD_POS(25);
+    TEST_MOD_POS(26);
+    TEST_MOD_POS(27);
+    TEST_MOD_POS(28);
+    TEST_MOD_POS(29);
+    TEST_MOD_POS(30);
+    TEST_MOD_POS(31);
+    TEST_MOD_POS(32);
+    TEST_MOD_POS(33);
+    TEST_MOD_POS(34);
+    TEST_MOD_POS(35);
+    TEST_MOD_POS(36);
+    TEST_MOD_POS(37);
+    TEST_MOD_POS(38);
+    TEST_MOD_POS(39);
+    TEST_MOD_POS(40);
+    TEST_MOD_POS(41);
+    TEST_MOD_POS(42);
+    TEST_MOD_POS(43);
+    TEST_MOD_POS(44);
+    TEST_MOD_POS(45);
+    TEST_MOD_POS(46);
+    TEST_MOD_POS(47);
+    TEST_MOD_POS(48);
+    TEST_MOD_POS(49);
+    TEST_MOD_POS(50);
+    TEST_MOD_POS(51);
+    TEST_MOD_POS(52);
+    TEST_MOD_POS(53);
+    TEST_MOD_POS(54);
+    TEST_MOD_POS(55);
+    TEST_MOD_POS(56);
+    TEST_MOD_POS(57);
+    TEST_MOD_POS(58);
+    TEST_MOD_POS(59);
+    TEST_MOD_POS(60);
+    TEST_MOD_POS(61);
+    TEST_MOD_POS(62);
+
+    TEST_MOD_NEG(1);
+    TEST_MOD_NEG(2);
+    TEST_MOD_NEG(3);
+    TEST_MOD_NEG(4);
+    TEST_MOD_NEG(5);
+    TEST_MOD_NEG(6);
+    TEST_MOD_NEG(7);
+    TEST_MOD_NEG(8);
+    TEST_MOD_NEG(9);
+    TEST_MOD_NEG(10);
+    TEST_MOD_NEG(11);
+    TEST_MOD_NEG(12);
+    TEST_MOD_NEG(13);
+    TEST_MOD_NEG(14);
+    TEST_MOD_NEG(15);
+    TEST_MOD_NEG(16);
+    TEST_MOD_NEG(17);
+    TEST_MOD_NEG(18);
+    TEST_MOD_NEG(19);
+    TEST_MOD_NEG(20);
+    TEST_MOD_NEG(21);
+    TEST_MOD_NEG(22);
+    TEST_MOD_NEG(23);
+    TEST_MOD_NEG(24);
+    TEST_MOD_NEG(25);
+    TEST_MOD_NEG(26);
+    TEST_MOD_NEG(27);
+    TEST_MOD_NEG(28);
+    TEST_MOD_NEG(29);
+    TEST_MOD_NEG(30);
+    TEST_MOD_NEG(31);
+    TEST_MOD_NEG(32);
+    TEST_MOD_NEG(33);
+    TEST_MOD_NEG(34);
+    TEST_MOD_NEG(35);
+    TEST_MOD_NEG(36);
+    TEST_MOD_NEG(37);
+    TEST_MOD_NEG(38);
+    TEST_MOD_NEG(39);
+    TEST_MOD_NEG(40);
+    TEST_MOD_NEG(41);
+    TEST_MOD_NEG(42);
+    TEST_MOD_NEG(43);
+    TEST_MOD_NEG(44);
+    TEST_MOD_NEG(45);
+    TEST_MOD_NEG(46);
+    TEST_MOD_NEG(47);
+    TEST_MOD_NEG(48);
+    TEST_MOD_NEG(49);
+    TEST_MOD_NEG(50);
+    TEST_MOD_NEG(51);
+    TEST_MOD_NEG(52);
+    TEST_MOD_NEG(53);
+    TEST_MOD_NEG(54);
+    TEST_MOD_NEG(55);
+    TEST_MOD_NEG(56);
+    TEST_MOD_NEG(57);
+    TEST_MOD_NEG(58);
+    TEST_MOD_NEG(59);
+    TEST_MOD_NEG(60);
+    TEST_MOD_NEG(61);
+    TEST_MOD_NEG(62);
+    TEST_MOD_NEG(63);
+
+    exit (0);
+}