[2/8,RFC] Add FP types for atomic builtin overload resolution

Message ID 20240919131204.3865854-3-mmalcomson@nvidia.com
State New
Headers
Series Introduce floating point fetch_add builtins |

Commit Message

Matthew Malcomson Sept. 19, 2024, 1:11 p.m. UTC
  From: Matthew Malcomson <mmalcomson@nvidia.com>

Have a bit of an ugly mapping from floating point type to the builtin
using that type.  Would like to find some code-sharing between this, the
function (in a later patch in this series) that finds the relevant mode
from a given builtin, and the general sync-builtins.def file.
As yet don't have a nice way to do that, but haven't looked that hard.

Other than that, seems we can cleanly emit the functions that we need.

N.b. we match which function to use based on the MODE of the type for
two reasons:
1) Can't match directly on type as otherwise `typedef float x` would
   mean that `x` could no longer be used with that intrinsic.
2) MODE (i.e. the types ABI) is the thing that we need to distinguish
   between when deciding which fundamental operation needs to be
   applied.

Signed-off-by: Matthew Malcomson <mmalcomson@nvidia.com>
---
 gcc/c-family/c-common.cc | 88 ++++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 18 deletions(-)
  

Patch

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index e7e371fd26f..c0a2b136d67 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -7360,13 +7360,15 @@  speculation_safe_value_resolve_return (tree first_param, tree result)
 
 static int
 sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch,
-		   bool orig_format)
+		   bool orig_format,
+                   int *fp_specialisation_offset)
 {
   /* Type of the argument.  */
   tree argtype;
   /* Type the argument points to.  */
   tree type;
   int size;
+  bool valid_float = false;
 
   if (vec_safe_is_empty (params))
     {
@@ -7385,7 +7387,8 @@  sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch,
     goto incompatible;
 
   type = TREE_TYPE (type);
-  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
+  valid_float = fp_specialisation_offset && fetch && SCALAR_FLOAT_TYPE_P (type);
+  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type) && !valid_float)
     goto incompatible;
 
   if (!COMPLETE_TYPE_P (type))
@@ -7402,6 +7405,40 @@  sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch,
       && !targetm.scalar_mode_supported_p (TImode))
     return -1;
 
+  if (valid_float)
+    {
+      tree fp_type = type;
+      /* TODO Want a better reverse-mapping between an argument type and
+         the builtin enum.  */
+      struct type_to_offset { tree type; size_t offset; };
+      static const struct type_to_offset fp_type_mappings[] = {
+        { float_type_node, 6 },
+        { double_type_node, 7 },
+        { long_double_type_node, 8 },
+        { bfloat16_type_node ? bfloat16_type_node : error_mark_node, 9 },
+        { float16_type_node ? float16_type_node : error_mark_node, 10 },
+        { float32_type_node ? float32_type_node : error_mark_node, 11 },
+        { float64_type_node ? float64_type_node : error_mark_node, 12 },
+        { float128_type_node ? float128_type_node : error_mark_node, 13 },
+        { float32x_type_node ? float32x_type_node : error_mark_node, 14 },
+        { float64x_type_node ? float64x_type_node : error_mark_node, 15 }
+      };
+      size_t offset = 0;
+      for (size_t i = 0;
+           i < sizeof(fp_type_mappings)/sizeof(fp_type_mappings[0]);
+           ++i) {
+        if (TYPE_MODE (fp_type) == TYPE_MODE (fp_type_mappings[i].type))
+          {
+            offset = fp_type_mappings[i].offset;
+            break;
+          }
+      }
+      if (offset == 0)
+        goto incompatible;
+      *fp_specialisation_offset = offset;
+      return -1;
+    }
+
   if (size == 1 || size == 2 || size == 4 || size == 8 || size == 16)
     return size;
 
@@ -7462,9 +7499,10 @@  sync_resolve_params (location_t loc, tree orig_function, tree function,
 	 arguments (e.g. EXPECTED argument of __atomic_compare_exchange_n),
 	 bool arguments (e.g. WEAK argument) or signed int arguments (memmodel
 	 kinds).  */
-      if (TREE_CODE (arg_type) == INTEGER_TYPE && TYPE_UNSIGNED (arg_type))
+      if ((TREE_CODE (arg_type) == INTEGER_TYPE && TYPE_UNSIGNED (arg_type))
+          || SCALAR_FLOAT_TYPE_P (arg_type))
 	{
-	  /* Ideally for the first conversion we'd use convert_for_assignment
+	  /* Ideally) for the first conversion we'd use convert_for_assignment
 	     so that we get warnings for anything that doesn't match the pointer
 	     type.  This isn't portable across the C and C++ front ends atm.  */
 	  val = (*params)[parmnum];
@@ -8256,7 +8294,6 @@  atomic_bitint_fetch_using_cas_loop (location_t loc,
 		 NULL_TREE);
 }
 
-
 /* Some builtin functions are placeholders for other expressions.  This
    function should be called immediately after parsing the call expression
    before surrounding code has committed to the type of the expression.
@@ -8277,6 +8314,9 @@  resolve_overloaded_builtin (location_t loc, tree function,
      and so must be rejected.  */
   bool fetch_op = true;
   bool orig_format = true;
+  /* Is this function one of the builtins that has floating point
+     specializations.  */
+  bool fetch_maybe_float = false;
   tree new_return = NULL_TREE;
 
   switch (DECL_BUILT_IN_CLASS (function))
@@ -8406,12 +8446,14 @@  resolve_overloaded_builtin (location_t loc, tree function,
       /* FALLTHRU */
     case BUILT_IN_ATOMIC_ADD_FETCH_N:
     case BUILT_IN_ATOMIC_SUB_FETCH_N:
+    case BUILT_IN_ATOMIC_FETCH_SUB_N:
+    case BUILT_IN_ATOMIC_FETCH_ADD_N:
+      fetch_maybe_float = true;
+      /* FALLTHRU */
     case BUILT_IN_ATOMIC_AND_FETCH_N:
     case BUILT_IN_ATOMIC_NAND_FETCH_N:
     case BUILT_IN_ATOMIC_XOR_FETCH_N:
     case BUILT_IN_ATOMIC_OR_FETCH_N:
-    case BUILT_IN_ATOMIC_FETCH_ADD_N:
-    case BUILT_IN_ATOMIC_FETCH_SUB_N:
     case BUILT_IN_ATOMIC_FETCH_AND_N:
     case BUILT_IN_ATOMIC_FETCH_NAND_N:
     case BUILT_IN_ATOMIC_FETCH_XOR_N:
@@ -8443,23 +8485,33 @@  resolve_overloaded_builtin (location_t loc, tree function,
 		      && orig_code != BUILT_IN_SYNC_LOCK_TEST_AND_SET_N
 		      && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N);
 
-	int n = sync_resolve_size (function, params, fetch_op, orig_format);
-	tree new_function, first_param, result;
+        int fp_specialisation_offset = 0;
+        int n = sync_resolve_size(function, params, fetch_op, orig_format,
+                                  fetch_maybe_float
+                                  ? &fp_specialisation_offset
+                                  : NULL);
+        tree new_function, first_param, result;
 	enum built_in_function fncode;
 
-	if (n == 0)
-	  return error_mark_node;
+        if (n == 0)
+          return error_mark_node;
 
-	if (n == -1)
+        /* If this is a floating point atomic operation,
+           The operation does not have a backend implementation,
+           Or we are asking for things to not be inlined,
+           Then inline it as a CAS loop.  */
+        if (fp_specialisation_offset != 0)
+          fncode = (enum built_in_function)((int)orig_code + fp_specialisation_offset);
+        else if (n == -1)
 	  return atomic_bitint_fetch_using_cas_loop (loc, orig_code,
 						     function, params);
+        else
+          fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
 
-	fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
-	new_function = builtin_decl_explicit (fncode);
-	if (!sync_resolve_params (loc, function, new_function, params,
-				  orig_format))
-	  return error_mark_node;
-
+        new_function = builtin_decl_explicit (fncode);
+        if (!sync_resolve_params (loc, function, new_function, params,
+                                  orig_format))
+          return error_mark_node;
 	first_param = (*params)[0];
 	result = build_function_call_vec (loc, vNULL, new_function, params,
 					  NULL);