[middle-end] Only call targetm.truly_noop_truncation for truncations.

Message ID 009701da39a3$7b406080$71c12180$@nextmovesoftware.com
State New
Headers
Series [middle-end] Only call targetm.truly_noop_truncation for truncations. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm fail Testing failed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 fail Testing failed

Commit Message

Roger Sayle Dec. 28, 2023, 3:35 p.m. UTC
  The truly_noop_truncation target hook is documented, in target.def, as
"true if it is safe to convert a value of inprec bits to one of outprec
bits (where outprec is smaller than inprec) by merely operating on it
as if it had only outprec bits", i.e. the middle-end can use a SUBREG
instead of a TRUNCATE.

What's perhaps potentially a little ambiguous in the above description is
whether it is the caller or the callee that's responsible for ensuring or
checking whether "outprec < inprec".  The name TRULY_NOOP_TRUNCATION_P,
like SUBREG_PROMOTED_P, may be prone to being understood as a predicate
that confirms that something is a no-op truncation or a promoted subreg,
when in fact the caller must first confirm this is a truncation/subreg and
only then call the "classification" macro.

Alas making the following minor tweak (for testing) to the i386 backend:

static bool
ix86_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
{
  gcc_assert (outprec < inprec);
  return true;
}

#undef TARGET_TRULY_NOOP_TRUNCATION
#define TARGET_TRULY_NOOP_TRUNCATION ix86_truly_noop_truncation

reveals that there are numerous callers in middle-end that rely on the
default behaviour of silently returning true for any (invalid) input.
These are fixed below.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2023-12-28  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * combine.cc (make_extraction): Confirm that OUTPREC is less than
        INPREC before calling TRULY_NOOP_TRUNCATION_MODES_P.
        * expmed.cc (store_bit_field_using_insv): Likewise.
        (extract_bit_field_using_extv): Likewise.
        (extract_bit_field_as_subreg): Likewise.
        * optabs-query.cc (get_best_extraction_insn): Likewise.
        * optabs.cc (expand_parity): Likewise.
        * rtlhooks.cc (gen_lowpart_general): Likewise.
        * simplify-rtx.cc (simplify_truncation): Disallow truncations
        to the same precision.
        (simplify_unary_operation_1) <case TRUNCATE>: Move optimization
        of truncations to the same mode earlier.


Thanks in advance,
Roger
--
  

Patch

diff --git a/gcc/combine.cc b/gcc/combine.cc
index f2c64a9..5aa2f57 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -7613,7 +7613,8 @@  make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
 	   && (pos == 0 || REG_P (inner))
 	   && (inner_mode == tmode
 	       || !REG_P (inner)
-	       || TRULY_NOOP_TRUNCATION_MODES_P (tmode, inner_mode)
+	       || (known_lt (GET_MODE_SIZE (tmode), GET_MODE_SIZE (inner_mode))
+		   && TRULY_NOOP_TRUNCATION_MODES_P (tmode, inner_mode))
 	       || reg_truncated_to_mode (tmode, inner))
 	   && (! in_dest
 	       || (REG_P (inner)
@@ -7856,6 +7857,8 @@  make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
       /* On the LHS, don't create paradoxical subregs implicitely truncating
 	 the register unless TARGET_TRULY_NOOP_TRUNCATION.  */
       if (in_dest
+	  && known_lt (GET_MODE_SIZE (GET_MODE (inner)),
+		       GET_MODE_SIZE (wanted_inner_mode))
 	  && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (inner),
 					     wanted_inner_mode))
 	return NULL_RTX;
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 0bba93f..8940d47 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26707,6 +26707,16 @@  ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
 #endif /* #if CHECKING_P */
 
+static bool
+ix86_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
+{
+  gcc_assert (outprec < inprec);
+  return true;
+}
+
+#undef TARGET_TRULY_NOOP_TRUNCATION
+#define TARGET_TRULY_NOOP_TRUNCATION ix86_truly_noop_truncation
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 05331dd..6398bf9 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -651,6 +651,7 @@  store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
      X) 0)) is (reg:N X).  */
   if (GET_CODE (xop0) == SUBREG
       && REG_P (SUBREG_REG (xop0))
+      && paradoxical_subreg_p (xop0)
       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 					 op_mode))
     {
@@ -1585,7 +1586,11 @@  extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
 	 mode.  Instead, create a temporary and use convert_move to set
 	 the target.  */
       if (REG_P (target)
-	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
+	  && (known_lt (GET_MODE_SIZE (GET_MODE (target)),
+			GET_MODE_SIZE (ext_mode))
+	      ? TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
+	      : known_eq (GET_MODE_SIZE (GET_MODE (target)),
+			  GET_MODE_SIZE (ext_mode)))
 	  && (temp = gen_lowpart_if_possible (ext_mode, target)))
 	{
 	  target = temp;
@@ -1626,7 +1631,9 @@  extract_bit_field_as_subreg (machine_mode mode, rtx op0,
   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
-      && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
+      && (known_lt (GET_MODE_SIZE (mode), GET_MODE_SIZE (op0_mode))
+	  ? TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode)
+	  : known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (op0_mode))))
     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
   return NULL_RTX;
 }
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 947ccef..f33253f 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -213,7 +213,7 @@  get_best_extraction_insn (extraction_insn *insn,
 	  FOR_EACH_MODE_FROM (mode_iter, mode)
 	    {
 	      mode = mode_iter.require ();
-	      if (maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (field_mode))
+	      if (maybe_ge (GET_MODE_SIZE (mode), GET_MODE_SIZE (field_mode))
 		  || TRULY_NOOP_TRUNCATION_MODES_P (insn->field_mode,
 						    field_mode))
 		break;
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 6a34276..fad0d59 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -2954,7 +2954,11 @@  expand_parity (scalar_int_mode mode, rtx op0, rtx target)
 	  if (temp)
 	    {
 	      if (mclass != MODE_INT
-		  || !TRULY_NOOP_TRUNCATION_MODES_P (mode, wider_mode))
+		  || (known_lt (GET_MODE_SIZE (mode),
+				GET_MODE_SIZE (wider_mode))
+		      ? !TRULY_NOOP_TRUNCATION_MODES_P (mode, wider_mode)
+		      : maybe_ne (GET_MODE_SIZE (mode),
+				  GET_MODE_SIZE (wider_mode))))
 		return convert_to_mode (mode, temp, 0);
 	      else
 		return gen_lowpart (mode, temp);
diff --git a/gcc/rtlhooks.cc b/gcc/rtlhooks.cc
index 989d3c9..9a53ab0 100644
--- a/gcc/rtlhooks.cc
+++ b/gcc/rtlhooks.cc
@@ -66,7 +66,9 @@  gen_lowpart_general (machine_mode mode, rtx x)
       scalar_int_mode xmode;
       if (is_a <scalar_int_mode> (GET_MODE (x), &xmode)
 	  && GET_MODE_SIZE (xmode) <= UNITS_PER_WORD
-	  && TRULY_NOOP_TRUNCATION_MODES_P (mode, xmode)
+	  && (known_lt (GET_MODE_SIZE (mode), GET_MODE_SIZE (xmode))
+	      ? TRULY_NOOP_TRUNCATION_MODES_P (mode, xmode)
+	      : known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (xmode)))
 	  && !reload_completed)
 	return gen_lowpart_general (mode, force_reg (xmode, x));
 
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index f3745d8..27518f5 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -617,7 +617,7 @@  simplify_context::simplify_truncation (machine_mode mode, rtx op,
   unsigned int op_precision = GET_MODE_UNIT_PRECISION (op_mode);
   scalar_int_mode int_mode, int_op_mode, subreg_mode;
 
-  gcc_assert (precision <= op_precision);
+  gcc_assert (precision < op_precision);
 
   /* Optimize truncations of zero and sign extended values.  */
   if (GET_CODE (op) == ZERO_EXTEND
@@ -1207,6 +1207,10 @@  simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
       break;
 
     case TRUNCATE:
+      /* Check for useless truncation.  */
+      if (GET_MODE (op) == mode)
+	return op;
+
       /* Don't optimize (lshiftrt (mult ...)) as it would interfere
 	 with the umulXi3_highpart patterns.  */
       if (GET_CODE (op) == LSHIFTRT
@@ -1271,9 +1275,6 @@  simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
 	    return temp;
 	}
 
-      /* Check for useless truncation.  */
-      if (GET_MODE (op) == mode)
-	return op;
       break;
 
     case FLOAT_TRUNCATE: