[RFC/RFA,v2,11/12] Replace the original CRC loops with a faster CRC calculation.

Message ID CAE65F3Obo47HPHf53m3o36ATuTqHDsjEA9bFhoYOVNQnqPtVoA@mail.gmail.com
State New
Headers
Series None |

Commit Message

Mariam Arutunian Aug. 2, 2024, 4:14 p.m. UTC
  After the loop exit an internal function call (CRC, CRC_REV) is added,
and its result is assigned to the output CRC variable (the variable where
the calculated CRC is stored after the loop execution).
The removal of the loop is left to CFG cleanup and DCE.

  gcc/

    * gimple-crc-optimization.cc (get_data): New function.
    (optimize_crc_loop): Likewise.
    (build_polynomial_without_1): Likewise.
    (execute): Add optimize_crc_loop function call.

Signed-off-by: Mariam Arutunian <mariamarutunian@gmail.com>
  

Comments

Richard Biener Aug. 22, 2024, 9:18 a.m. UTC | #1
On Fri, Aug 2, 2024 at 6:15 PM Mariam Arutunian
<mariamarutunian@gmail.com> wrote:
>
> After the loop exit an internal function call (CRC, CRC_REV) is added,
> and its result is assigned to the output CRC variable (the variable where the calculated CRC is stored after the loop execution).
> The removal of the loop is left to CFG cleanup and DCE.
>
>   gcc/
>
>     * gimple-crc-optimization.cc (get_data): New function.
>     (optimize_crc_loop): Likewise.
>     (build_polynomial_without_1): Likewise.
>     (execute): Add optimize_crc_loop function call.

+  /* If we have the data, use it.  */
+  if (m_phi_for_data)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file,
+                "Data and CRC are xor-ed in the for loop.  Initializing data "
+                "with its value.\n");
+      tree data_arg = PHI_ARG_DEF (m_phi_for_data, 1);

how do you know statically that it's the second PHI arg def?  That
seems fragile.
I suppose you are looking for a latch definition?

+      if (TYPE_PRECISION (TREE_TYPE (data_arg)) == data_size)
+       return data_arg;
+      else
+       {
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           fprintf (dump_file,
+                    "Loop iteration number and data's size differ.\n");
+         return nullptr;

that seems to be oddly placed in the transform rather than the analysis phase?

+  gcc_assert (m_phi_for_crc);
+
+  tree crc_arg = PHI_ARG_DEF (m_phi_for_crc, 1);

same issue here - I think you want to use PHI_ARG_DEF_FROM_EDGE?

+  /* We don't support the case where data is larger than the CRC.  */
+  if (TYPE_PRECISION (TREE_TYPE (crc_arg))
+      < TYPE_PRECISION (TREE_TYPE (data_arg)))
+    return false;

you can check that before building any stuff?

+  gimple_stmt_iterator si = gsi_start_bb (output_crc->bb);
+  gsi_insert_before (&si, call, GSI_SAME_STMT);

you want gsi_after_labels (gimple_bb (output_crc))

+  /* Remove phi statement, which was holding CRC result.  */
+  gimple_stmt_iterator tmp_gsi = gsi_for_stmt (output_crc);
+  gsi_remove (&tmp_gsi, true);

Please use remove_phi_node (&tmp_gsi, false).

otherwise looks fine.

Richard.

> Signed-off-by: Mariam Arutunian <mariamarutunian@gmail.com>
  

Patch

diff --git a/gcc/gimple-crc-optimization.cc b/gcc/gimple-crc-optimization.cc
index bd84d553a60..4de383419a0 100644
--- a/gcc/gimple-crc-optimization.cc
+++ b/gcc/gimple-crc-optimization.cc
@@ -216,6 +216,24 @@  class crc_optimization {
   /* Returns phi statement which may hold the calculated CRC.  */
   gphi *get_output_phi ();
 
+  /* Returns data argument to pass to the CRC IFN.
+     If there is data from the code - use it (this is the case,
+     when data isn't xor-ed with CRC before the loop).
+     Otherwise, generate a new variable for the data with 0 value
+     (the case, when data is xor-ed with CRC before the loop).
+     For the CRC calculation, it doesn't matter CRC is calculated for the
+     (CRC^data, 0) or (CRC, data).  */
+  tree get_data ();
+
+  /* Attempts to optimize a CRC calculation loop by replacing it with a call to
+     an internal function (IFN_CRC or IFN_CRC_REV).
+     Returns true if replacement is succeeded, otherwise false.  */
+  bool optimize_crc_loop (value *polynomial, gphi *output_crc);
+
+  /* Build tree for the POLYNOMIAL (from its binary representation)
+     without the leading 1.  */
+  tree build_polynomial_without_1 (tree crc_arg, value *polynomial);
+
  public:
   unsigned int execute (function *fun);
 };
@@ -1094,6 +1112,142 @@  crc_optimization::get_output_phi ()
   return nullptr;
 }
 
+/* Build tree for the POLYNOMIAL (from its binary representation)
+   without the leading 1.  */
+
+tree
+crc_optimization::build_polynomial_without_1 (tree crc_arg, value *polynomial)
+{
+  unsigned HOST_WIDE_INT cst_polynomial = 0;
+  for (unsigned i = 0; i < (*polynomial).length (); i++)
+    {
+      value_bit *const_bit;
+      if (m_is_bit_forward)
+	const_bit = (*polynomial)[(*polynomial).length () - 1 - i];
+      else
+	const_bit = (*polynomial)[i];
+      cst_polynomial <<= 1;
+      cst_polynomial ^= (as_a<bit *> (const_bit))->get_val () ? 1 : 0;
+    }
+  return build_int_cstu (TREE_TYPE (crc_arg), cst_polynomial);
+}
+
+/* Returns data argument to pass to the CRC IFN.
+   If there is data from the code - use it (this is the case,
+   when data isn't xor-ed with CRC before the loop).
+   Otherwise, generate a new variable for the data with 0 value
+   (the case, when data is xor-ed with CRC before the loop).
+   For the CRC calculation, it doesn't matter CRC is calculated for the
+   (CRC^data, 0) or (CRC, data).  */
+
+tree
+crc_optimization::get_data ()
+{
+  unsigned HOST_WIDE_INT
+    data_size = tree_to_uhwi (m_crc_loop->nb_iterations) + 1;
+
+  /* If we have the data, use it.  */
+  if (m_phi_for_data)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file,
+		 "Data and CRC are xor-ed in the for loop.  Initializing data "
+		 "with its value.\n");
+      tree data_arg = PHI_ARG_DEF (m_phi_for_data, 1);
+      if (TYPE_PRECISION (TREE_TYPE (data_arg)) == data_size)
+	return data_arg;
+      else
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file,
+		     "Loop iteration number and data's size differ.\n");
+	  return nullptr;
+	}
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file,
+	     "Data and CRC are xor-ed before for loop.  Initializing data "
+	     "with 0.\n");
+  /* Create a new variable for the data.
+     Determine the data's size with the loop iteration count.  */
+  tree type = build_nonstandard_integer_type (data_size, 1);
+  return build_int_cstu (type, 0);
+}
+
+/* Attempts to optimize a CRC calculation loop by replacing it with a call to
+   an internal function (IFN_CRC or IFN_CRC_REV).
+   Returns true if replacement is succeeded, otherwise false.  */
+
+bool
+crc_optimization::optimize_crc_loop (value *polynomial, gphi *output_crc)
+{
+  if (!output_crc)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Couldn't determine output CRC.\n");
+      return false;
+    }
+
+  gcc_assert (m_phi_for_crc);
+
+  tree crc_arg = PHI_ARG_DEF (m_phi_for_crc, 1);
+  if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (crc_arg))).to_constant ()
+      > GET_MODE_SIZE (word_mode))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "word_mode is less than CRC mode.\n");
+      return false;
+    }
+
+  tree data_arg = get_data ();
+  tree polynomial_arg = build_polynomial_without_1 (crc_arg, polynomial);
+
+  if (!crc_arg || !data_arg || !polynomial_arg)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "crc_arg, data_arg or polynomial_arg is null.\n");
+      return false;
+    }
+
+  /* We don't support the case where data is larger than the CRC.  */
+  if (TYPE_PRECISION (TREE_TYPE (crc_arg))
+      < TYPE_PRECISION (TREE_TYPE (data_arg)))
+    return false;
+
+  internal_fn ifn;
+  if (m_is_bit_forward)
+    ifn = IFN_CRC;
+  else
+    ifn = IFN_CRC_REV;
+
+  tree phi_result = gimple_phi_result (output_crc);
+  location_t loc;
+  loc = EXPR_LOCATION (phi_result);
+
+  /* Add IFN call and write the return value in the phi_result.  */
+  gcall *call
+      = gimple_build_call_internal (ifn, 3,
+				    crc_arg,
+				    data_arg,
+				    polynomial_arg);
+
+  gimple_call_set_lhs (call, phi_result);
+  gimple_set_location (call, loc);
+  gimple_stmt_iterator si = gsi_start_bb (output_crc->bb);
+  gsi_insert_before (&si, call, GSI_SAME_STMT);
+
+  /* Remove phi statement, which was holding CRC result.  */
+  gimple_stmt_iterator tmp_gsi = gsi_for_stmt (output_crc);
+  gsi_remove (&tmp_gsi, true);
+
+  /* Alter the exit condition of the loop to always exit.  */
+  gcond* loop_exit_cond = get_loop_exit_condition (m_crc_loop);
+  gimple_cond_make_false (loop_exit_cond);
+  update_stmt (loop_exit_cond);
+  return true;
+}
+
 unsigned int
 crc_optimization::execute (function *fun)
 {
@@ -1139,6 +1293,12 @@  crc_optimization::execute (function *fun)
 	  if (dump_file)
 	    fprintf (dump_file, "The loop with %d header BB index "
 				"calculates CRC!\n", m_crc_loop->header->index);
+
+	  if (!optimize_crc_loop (polynom_value, output_crc))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "Couldn't generate faster CRC code.\n");
+	    }
 	}
     }
   return 0;
-- 
2.25.1