x86: Track converted/skipped registers in STV

Message ID 20221031210618.695953-1-hjl.tools@gmail.com
State New
Headers
Series x86: Track converted/skipped registers in STV |

Commit Message

H.J. Lu Oct. 31, 2022, 9:06 p.m. UTC
  When converting integer computations into vector ones, we build a chain
from an integer definition instruction together with all dependent use
instructions.  The integer computations on the chain are converted to
vector ones if the total vector costs are lower than the integer ones.
Since the same register may appear in multiple chains, if it has been
converted or skipped in one chain, its instances in the other chains
must also be converted or skipped, regardless if the total vector costs
are lower than integer ones.  Otherwise, we will get the unexpected
vector mode in integer instruction patterns.

To track skipped registers, we add a bitmap, skipped_regs, when converting
integer computations into vector ones.  When computing gain for vector
computations, we convert or skip a chain if any register on the chain has
been converted or skipped already.

Note: If 2 integer registers on a chain, one has been converted and the
other has been skipped already, it will lead to a compiler error since
we can't undo the conversion.

gcc/

	PR target/106933
	PR target/106959
	* config/i386/i386-features.cc (scalar_chain::skipped_regs): New.
	(scalar_chain::update_skipped_regs): Likewise.
	(scalar_chain::check_convert_gain): Likewise.
	(general_scalar_chain::compute_convert_gain ): Return gain if
	check_convert_gain returns non-zero.
	(general_scalar_chain::compute_convert_gain): Call
	update_skipped_regs if a chain won't be converted.
	(timode_scalar_chain::compute_convert_gain): Likewise.
	(convert_scalars_to_vector): Initialize and release
	scalar_chain::skipped_regs before and after its use.
	* config/i386/i386-features.h (scalar_chain): Add
	skipped_regs, check_convert_gain and update_skipped_regs.

gcc/testsuite/

	* gcc.target/i386/pr106933.c: New test.
	* gcc.target/i386/pr106959.c: Likewise.
---
 gcc/config/i386/i386-features.cc         | 104 ++++++++++++++++++++++-
 gcc/config/i386/i386-features.h          |   5 ++
 gcc/testsuite/gcc.target/i386/pr106933.c |  17 ++++
 gcc/testsuite/gcc.target/i386/pr106959.c |  13 +++
 4 files changed, 137 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106933.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106959.c
  

Patch

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index fd212262f50..d9d63cf8d22 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -273,6 +273,8 @@  xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
 
 unsigned scalar_chain::max_id = 0;
 
+bitmap_head scalar_chain::skipped_regs;
+
 namespace {
 
 /* Initialize new chain.  */
@@ -477,6 +479,72 @@  scalar_chain::build (bitmap candidates, unsigned insn_uid)
   BITMAP_FREE (queue);
 }
 
+/* Add all scalar mode registers, which are set by INSN and not used in
+   both vector and scalar modes, to skipped register map. */
+
+void
+scalar_chain::update_skipped_regs (rtx_insn *insn)
+{
+  for (df_ref def = DF_INSN_DEFS (insn);
+       def;
+       def = DF_REF_NEXT_LOC (def))
+    {
+      rtx reg = DF_REF_REG (def);
+      if (GET_MODE (reg) == smode
+	  && !bitmap_bit_p (defs_conv, REGNO (reg)))
+	bitmap_set_bit (&skipped_regs, REGNO (reg));
+    }
+}
+
+/* Check convert gain for INSN.  Return 1 if any registers, which are
+   set or used by INSN, have been converted to vector mode.  Return -1
+   if any registers set by INSN are skipped in other chains.  Return 0
+   otherwise.  */
+
+int
+scalar_chain::check_convert_gain (rtx_insn *insn)
+{
+  for (df_ref def = DF_INSN_DEFS (insn);
+       def;
+       def = DF_REF_NEXT_LOC (def))
+    {
+      rtx reg = DF_REF_REG (def);
+      if (GET_MODE (reg) == vmode)
+	{
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "  Gain 1 for converted register r%d\n",
+		     REGNO (reg));
+	  return 1;
+	}
+      else if (bitmap_bit_p (&skipped_regs, REGNO (reg)))
+	{
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "  Gain -1 for skipped register r%d\n",
+		     REGNO (reg));
+	  return -1;
+	}
+    }
+
+  for (df_ref ref = DF_INSN_USES (insn);
+       ref;
+       ref = DF_REF_NEXT_LOC (ref))
+    {
+      rtx reg = DF_REF_REG (ref);
+      if (GET_MODE (reg) == vmode)
+	{
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "  Gain 1 for converted register r%d\n",
+		     REGNO (reg));
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
 /* Return a cost of building a vector costant
    instead of using a scalar one.  */
 
@@ -515,10 +583,15 @@  general_scalar_chain::compute_convert_gain ()
   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     {
       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
+      /* If check_convert_gain returns non-zero on any INSN, the chain
+	 must be converted or can't be converted since some registers
+	 have been converted or skipped in other chains.  */
+      int igain = check_convert_gain (insn);
+      if (igain)
+	return igain;
       rtx def_set = single_set (insn);
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
-      int igain = 0;
 
       if (REG_P (src) && REG_P (dst))
 	igain += 2 * m - ix86_cost->xmm_move;
@@ -656,6 +729,15 @@  general_scalar_chain::compute_convert_gain ()
 
   gain -= cost;
 
+  /* If this chain won't be converted, mark all scalar mode registers
+     in the chain as skipped.  */
+  if (gain < 0)
+    EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
+      {
+	rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
+	update_skipped_regs (insn);
+      }
+
   if (dump_file)
     fprintf (dump_file, "  Total gain: %d\n", gain);
 
@@ -1206,12 +1288,17 @@  timode_scalar_chain::compute_convert_gain ()
   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     {
       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
+      /* If check_convert_gain returns non-zero on any INSN, the chain
+	 must be converted or can't be converted since some registers
+	 have been converted or skipped in other chains.  */
+      int igain = check_convert_gain (insn);
+      if (igain)
+	return igain;
       rtx def_set = single_set (insn);
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
       HOST_WIDE_INT op1val;
       int scost, vcost;
-      int igain = 0;
 
       switch (GET_CODE (src))
 	{
@@ -1412,6 +1499,15 @@  timode_scalar_chain::compute_convert_gain ()
       gain += igain;
     }
 
+  /* If this chain won't be converted, mark all scalar mode registers
+     in the chain as skipped.  */
+  if (gain < 0)
+    EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
+      {
+	rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
+	update_skipped_regs (insn);
+      }
+
   if (dump_file)
     fprintf (dump_file, "  Total gain: %d\n", gain);
 
@@ -2159,6 +2255,9 @@  convert_scalars_to_vector (bool timode_p)
   for (unsigned i = 0; i < 3; ++i)
     bitmap_initialize (&candidates[i], &bitmap_default_obstack);
 
+  bitmap_initialize (&scalar_chain::skipped_regs,
+		     &bitmap_default_obstack);
+
   calculate_dominance_info (CDI_DOMINATORS);
   df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
@@ -2235,6 +2334,7 @@  convert_scalars_to_vector (bool timode_p)
   if (dump_file)
     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
 
+  bitmap_release (&scalar_chain::skipped_regs);
   for (unsigned i = 0; i <= 2; ++i)
     bitmap_release (&candidates[i]);
   bitmap_obstack_release (NULL);
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index f898e67a108..4c8b00a7e83 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -132,6 +132,9 @@  class scalar_chain
 
   static unsigned max_id;
 
+  /* All registers which are skipped in any chains.  */
+  static bitmap_head skipped_regs;
+
   /* Scalar mode.  */
   enum machine_mode smode;
   /* Vector mode.  */
@@ -154,6 +157,8 @@  class scalar_chain
   unsigned n_integer_to_sse;
 
   void build (bitmap candidates, unsigned insn_uid);
+  int check_convert_gain (rtx_insn *insn);
+  void update_skipped_regs (rtx_insn *insn);
   virtual int compute_convert_gain () = 0;
   int convert ();
 
diff --git a/gcc/testsuite/gcc.target/i386/pr106933.c b/gcc/testsuite/gcc.target/i386/pr106933.c
new file mode 100644
index 00000000000..a70eca1f0e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106933.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-msse4 -Os -w" } */
+
+__int128 n;
+
+__int128
+empty (void)
+{
+}
+
+int
+foo (void)
+{
+  n = empty ();
+
+  return n == 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr106959.c b/gcc/testsuite/gcc.target/i386/pr106959.c
new file mode 100644
index 00000000000..9994e040c5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106959.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -fpeel-loops" } */
+
+unsigned __int128 m;
+int n;
+
+__attribute__ ((simd)) void
+foo (int x)
+{
+  x = n ? n : (short int) x;
+  if (x)
+    m /= 2;
+}