@@ -15097,6 +15097,180 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt)
{
return new pass_diagnose_omp_blocks (ctxt);
}
+
+/* Provide transformation required for using unified shared memory
+ by replacing calls to standard memory allocation functions with
+ function provided by the libgomp. */
+
+static tree
+usm_transform (gimple_stmt_iterator *gsi_p, bool *,
+ struct walk_stmt_info *wi)
+{
+ gimple *stmt = gsi_stmt (*gsi_p);
+ /* ompx_unified_shared_mem_alloc is 10. */
+ const unsigned int unified_shared_mem_alloc = 10;
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_CALL:
+ {
+ gcall *gs = as_a <gcall *> (stmt);
+ tree fndecl = gimple_call_fndecl (gs);
+ if (fndecl)
+ {
+ tree allocator = build_int_cst (pointer_sized_int_node,
+ unified_shared_mem_alloc);
+ const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+ if ((strcmp (name, "malloc") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MALLOC)
+ || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl)
+ || strcmp (name, "omp_target_alloc") == 0)
+ {
+ tree omp_alloc_type
+ = build_function_type_list (ptr_type_node, size_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_alloc", omp_alloc_type);
+ tree size = gimple_call_arg (gs, 0);
+ gimple *g = gimple_build_call (repl, 2, size, allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if (strcmp (name, "aligned_alloc") == 0)
+ {
+ /* May be we can also use this for new operator with
+ std::align_val_t parameter. */
+ tree omp_alloc_type
+ = build_function_type_list (ptr_type_node, size_type_node,
+ size_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_aligned_alloc",
+ omp_alloc_type);
+ tree align = gimple_call_arg (gs, 0);
+ tree size = gimple_call_arg (gs, 1);
+ gimple *g = gimple_build_call (repl, 3, align, size,
+ allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if ((strcmp (name, "calloc") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CALLOC))
+ {
+ tree omp_calloc_type
+ = build_function_type_list (ptr_type_node, size_type_node,
+ size_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_calloc", omp_calloc_type);
+ tree num = gimple_call_arg (gs, 0);
+ tree size = gimple_call_arg (gs, 1);
+ gimple *g = gimple_build_call (repl, 3, num, size, allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if ((strcmp (name, "realloc") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_REALLOC))
+ {
+ tree omp_realloc_type
+ = build_function_type_list (ptr_type_node, ptr_type_node,
+ size_type_node,
+ pointer_sized_int_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_realloc", omp_realloc_type);
+ tree ptr = gimple_call_arg (gs, 0);
+ tree size = gimple_call_arg (gs, 1);
+ gimple *g = gimple_build_call (repl, 4, ptr, size, allocator,
+ allocator);
+ gimple_call_set_lhs (g, gimple_call_lhs (gs));
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ else if ((strcmp (name, "free") == 0)
+ || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+ && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_FREE)
+ || (DECL_IS_OPERATOR_DELETE_P (fndecl)
+ && DECL_IS_REPLACEABLE_OPERATOR (fndecl))
+ || strcmp (name, "omp_target_free") == 0)
+ {
+ tree omp_free_type
+ = build_function_type_list (void_type_node, ptr_type_node,
+ pointer_sized_int_node,
+ NULL_TREE);
+ tree repl = build_fn_decl ("omp_free", omp_free_type);
+ tree ptr = gimple_call_arg (gs, 0);
+ gimple *g = gimple_build_call (repl, 2, ptr, allocator);
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi_p, g, true);
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL_TREE;
+}
+
+namespace {
+
+const pass_data pass_data_usm_transform =
+{
+ GIMPLE_PASS, /* type */
+ "usm_transform", /* name */
+ OPTGROUP_OMP, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_gimple_any, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_usm_transform : public gimple_opt_pass
+{
+public:
+ pass_usm_transform (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_usm_transform, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return (flag_openmp || flag_openmp_simd)
+ && (flag_offload_memory == OFFLOAD_MEMORY_UNIFIED
+ || omp_requires_mask & OMP_REQUIRES_UNIFIED_SHARED_MEMORY
+ || omp_requires_mask & OMP_REQUIRES_UNIFIED_ADDRESS);
+ }
+ virtual unsigned int execute (function *)
+ {
+ struct walk_stmt_info wi;
+ gimple_seq body = gimple_body (current_function_decl);
+
+ memset (&wi, 0, sizeof (wi));
+ walk_gimple_seq (body, usm_transform, NULL, &wi);
+
+ return 0;
+ }
+
+}; // class pass_usm_transform
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_usm_transform (gcc::context *ctxt)
+{
+ return new pass_usm_transform (ctxt);
+}
#include "gt-omp-low.h"
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_diagnose_tm_blocks);
NEXT_PASS (pass_omp_oacc_kernels_decompose);
NEXT_PASS (pass_lower_omp);
+ NEXT_PASS (pass_usm_transform);
NEXT_PASS (pass_lower_cf);
NEXT_PASS (pass_lower_tm);
NEXT_PASS (pass_refactor_eh);
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-usm_transform" } */
+
+#pragma omp requires unified_shared_memory
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *malloc (__SIZE_TYPE__);
+void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__);
+void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
+void *realloc(void *, __SIZE_TYPE__);
+void free (void *);
+void *omp_target_alloc (__SIZE_TYPE__, int);
+void omp_target_free (void *, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+void
+foo ()
+{
+ void *p1 = malloc(20);
+ void *p2 = realloc(p1, 30);
+ void *p3 = calloc(4, 15);
+ void *p4 = aligned_alloc(16, 40);
+ void *p5 = omp_target_alloc(50, 1);
+ free (p2);
+ free (p3);
+ free (p4);
+ omp_target_free (p5, 1);
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " free" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " aligned_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " malloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_free" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" } */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *malloc (__SIZE_TYPE__);
+void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__);
+void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
+void *realloc(void *, __SIZE_TYPE__);
+void free (void *);
+void *omp_target_alloc (__SIZE_TYPE__, int);
+void omp_target_free (void *, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+void
+foo ()
+{
+ void *p1 = malloc(20);
+ void *p2 = realloc(p1, 30);
+ void *p3 = calloc(4, 15);
+ void *p4 = aligned_alloc(16, 40);
+ void *p5 = omp_target_alloc(50, 1);
+ free (p2);
+ free (p3);
+ free (p4);
+ omp_target_free (p5, 1);
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " free" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " aligned_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " malloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not " omp_target_free" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+// { dg-do compile }
+// { dg-options "-fopenmp -fdump-tree-usm_transform" }
+
+#pragma omp requires unified_shared_memory
+
+struct t1
+{
+ int a;
+ int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+ uint8_t *p1 = new uint8_t;
+ uint8_t *p2 = new uint8_t[20];
+ t1 *p3 = new t1;
+ t1 *p4 = new t1[y];
+ delete p1;
+ delete p3;
+ delete [] p2;
+ delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,30 @@
+// { dg-do compile }
+// { dg-options "-fopenmp -foffload-memory=unified -fdump-tree-usm_transform" }
+
+struct t1
+{
+ int a;
+ int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+ uint8_t *p1 = new uint8_t;
+ uint8_t *p2 = new uint8_t[20];
+ t1 *p3 = new t1;
+ t1 *p4 = new t1[y];
+ delete p1;
+ delete p3;
+ delete [] p2;
+ delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,38 @@
+// { dg-do compile }
+// { dg-options "-fopenmp -fdump-tree-usm_transform" }
+
+#pragma omp requires unified_shared_memory
+
+#include <new>
+
+
+struct X {
+ static void* operator new(std::size_t count)
+ {
+ static char buf[10];
+ return &buf[0];
+ }
+ static void* operator new[](std::size_t count)
+ {
+ static char buf[10];
+ return &buf[0];
+ }
+ static void operator delete(void*)
+ {
+ }
+ static void operator delete[](void*)
+ {
+ }
+};
+void foo() {
+ X* p1 = new X;
+ delete p1;
+ X* p2 = new X[10];
+ delete[] p2;
+ unsigned char buf[24] ;
+ int *p3 = new (buf) int(3);
+ p3[0] = 1;
+}
+
+/* { dg-final { scan-tree-dump-not "omp_alloc" "usm_transform" } } */
+/* { dg-final { scan-tree-dump-not "omp_free" "usm_transform" } } */
new file mode 100644
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-usm_transform" }
+
+!$omp requires unified_shared_memory
+end
+
+subroutine foo()
+ implicit none
+ integer, allocatable :: var1
+
+ allocate(var1)
+
+end subroutine
+
+! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform" } }
+! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform" } }
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" }
+
+subroutine foo()
+ implicit none
+ integer, allocatable :: var1
+
+ allocate(var1)
+
+end subroutine
+
+! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform" } }
+! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform" } }
\ No newline at end of file
@@ -424,6 +424,7 @@ extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_usm_transform (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt);
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+#include <stdint.h>
+
+#pragma omp requires unified_shared_memory
+
+int g1 = 0;
+
+struct s1
+{
+ s1() { a = g1++;}
+ ~s1() { g1--;}
+ int a;
+};
+
+int
+main ()
+{
+ s1 *p1 = new s1;
+ s1 *p2 = new s1[10];
+
+ if (!p1 || !p2 || p1->a != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i < 10; i++)
+ if (p2[i].a != i+1)
+ __builtin_abort ();
+
+ uintptr_t pp1 = (uintptr_t)p1;
+ uintptr_t pp2 = (uintptr_t)p2;
+
+#pragma omp target firstprivate(pp1, pp2)
+ {
+ s1 *t1 = (s1*)pp1;
+ s1 *t2 = (s1*)pp2;
+ if (t1->a != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i < 10; i++)
+ if (t2[i].a != i+1)
+ __builtin_abort ();
+
+ t1->a = 42;
+ }
+
+ if (p1->a != 42)
+ __builtin_abort ();
+
+ delete [] p2;
+ delete p1;
+ if (g1 != 0)
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,92 @@
+/* { dg-do run } */
+/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <omp.h>
+
+/* On old systems, the declaraition may not be present in stdlib.h which
+ will generate a warning. This function is going to be replaced with
+ omp_aligned_alloc so the purpose of this declaration is to avoid that
+ warning. */
+void *aligned_alloc(size_t alignment, size_t size);
+
+#pragma omp requires unified_shared_memory
+
+int
+main ()
+{
+ int *a = (int *) malloc(sizeof(int)*2);
+ int *b = (int *) calloc(sizeof(int), 3);
+ int *c = (int *) realloc(NULL, sizeof(int) * 4);
+ int *d = (int *) aligned_alloc(32, sizeof(int));
+ int *e = (int *) omp_target_alloc(sizeof(int), 1);
+ if (!a || !b || !c || !d || !e)
+ __builtin_abort ();
+
+ a[0] = 42;
+ a[1] = 43;
+ b[0] = 52;
+ b[1] = 53;
+ b[2] = 54;
+ c[0] = 62;
+ c[1] = 63;
+ c[2] = 64;
+ c[3] = 65;
+
+ uintptr_t a_p = (uintptr_t)a;
+ uintptr_t b_p = (uintptr_t)b;
+ uintptr_t c_p = (uintptr_t)c;
+ uintptr_t d_p = (uintptr_t)d;
+ uintptr_t e_p = (uintptr_t)e;
+
+ if (d_p & 31 != 0)
+ __builtin_abort ();
+
+#pragma omp target enter data map(to:a[0:2])
+
+#pragma omp target is_device_ptr(c)
+ {
+ if (a[0] != 42 || a_p != (uintptr_t)a)
+ __builtin_abort ();
+ if (b[0] != 52 || b[2] != 54 || b_p != (uintptr_t)b)
+ __builtin_abort ();
+ if (c[0] != 62 || c[3] != 65 || c_p != (uintptr_t)c)
+ __builtin_abort ();
+ if (d_p != (uintptr_t)d)
+ __builtin_abort ();
+ if (e_p != (uintptr_t)e)
+ __builtin_abort ();
+ a[0] = 72;
+ b[0] = 82;
+ c[0] = 92;
+ e[0] = 102;
+ }
+
+#pragma omp target
+ {
+ if (a[1] != 43 || a_p != (uintptr_t)a)
+ __builtin_abort ();
+ if (b[1] != 53 || b_p != (uintptr_t)b)
+ __builtin_abort ();
+ if (c[1] != 63 || c[2] != 64 || c_p != (uintptr_t)c)
+ __builtin_abort ();
+ a[1] = 73;
+ b[1] = 83;
+ c[1] = 93;
+ }
+
+#pragma omp target exit data map(delete:a[0:2])
+
+ if (a[0] != 72 || a[1] != 73
+ || b[0] != 82 || b[1] != 83
+ || c[0] != 92 || c[1] != 93
+ || e[0] != 102)
+ __builtin_abort ();
+ free(a);
+ free(b);
+ free(c);
+ omp_target_free(e, 1);
+ return 0;
+}