[4/5] openmp: Use libgomp memory allocation functions with unified shared memory.

Message ID 20220308113059.688551-5-abidh@codesourcery.com
State New
Headers
Series openmp: Handle pinned and unified shared memory. |

Commit Message

Abid Qadeer March 8, 2022, 11:30 a.m. UTC
  This patches changes calls to malloc/free/calloc/realloc and operator new to
memory allocation functions in libgomp with
allocator=ompx_unified_shared_mem_alloc.  This helps existing code to benefit
from the unified shared memory.  The libgomp does the correct thing with all
the mapping constructs and there is no memory copies if the pointer is pointing
to unified shared memory.

We only replace replacable new operator and not the class member or placement new.

gcc/ChangeLog:

	* omp-low.cc (usm_transform): New function.
	(make_pass_usm_transform): Likewise.
	(class pass_usm_transform): New.
	* passes.def: Add pass_usm_transform.
	* tree-pass.h (make_pass_usm_transform): New declaration.

gcc/testsuite/ChangeLog:

	* c-c++-common/gomp/usm-2.c: New test.
	* c-c++-common/gomp/usm-3.c: New test.
	* g++.dg/gomp/usm-1.C: New test.
	* g++.dg/gomp/usm-2.C: New test.
	* g++.dg/gomp/usm-3.C: New test.
	* gfortran.dg/gomp/usm-2.f90: New test.
	* gfortran.dg/gomp/usm-3.f90: New test.

libgomp/ChangeLog:

	* testsuite/libgomp.c/usm-6.c: New test.
	* testsuite/libgomp.c++/usm-1.C: Likewise.
---
 gcc/omp-low.cc                           | 152 +++++++++++++++++++++++
 gcc/passes.def                           |   1 +
 gcc/testsuite/c-c++-common/gomp/usm-2.c  |  34 +++++
 gcc/testsuite/c-c++-common/gomp/usm-3.c  |  32 +++++
 gcc/testsuite/g++.dg/gomp/usm-1.C        |  32 +++++
 gcc/testsuite/g++.dg/gomp/usm-2.C        |  30 +++++
 gcc/testsuite/g++.dg/gomp/usm-3.C        |  38 ++++++
 gcc/testsuite/gfortran.dg/gomp/usm-2.f90 |  16 +++
 gcc/testsuite/gfortran.dg/gomp/usm-3.f90 |  13 ++
 gcc/tree-pass.h                          |   1 +
 libgomp/testsuite/libgomp.c++/usm-1.C    |  54 ++++++++
 libgomp/testsuite/libgomp.c/usm-6.c      |  70 +++++++++++
 12 files changed, 473 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/usm-3.c
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-1.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-2.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/usm-3.C
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/usm-3.f90
 create mode 100644 libgomp/testsuite/libgomp.c++/usm-1.C
 create mode 100644 libgomp/testsuite/libgomp.c/usm-6.c
  

Comments

Andrew Stubbs April 2, 2022, 12:04 p.m. UTC | #1
On 08/03/2022 11:30, Hafiz Abid Qadeer wrote:
> This patches changes calls to malloc/free/calloc/realloc and operator new to
> memory allocation functions in libgomp with
> allocator=ompx_unified_shared_mem_alloc.

This additional patch adds transformation for omp_target_alloc. The 
OpenMP 5.0 document says that addresses allocated this way needs to work 
without is_device_ptr. The easiest way to make that work is to make them 
USM addresses.

I will commit this to OG11 shortly.

Andrew
openmp: Do USM transform for omp_target_alloc

OpenMP 5.0 says that omp_target_alloc should return USM addresses.

gcc/ChangeLog:

	* omp-low.c (usm_transform): Transform omp_target_alloc and
	omp_target_free.

libgomp/ChangeLog:

	* testsuite/libgomp.c/usm-6.c: Add omp_target_alloc.

gcc/testsuite/ChangeLog:

	* c-c++-common/gomp/usm-2.c: Add omp_target_alloc.
	* c-c++-common/gomp/usm-3.c: Add omp_target_alloc.

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 4e8ab9e4ca0..9235eafd1d7 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -15880,7 +15880,8 @@ usm_transform (gimple_stmt_iterator *gsi_p, bool *,
 	    if ((strcmp (name, "malloc") == 0)
 		 || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
 		     && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MALLOC)
-		 || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl))
+		 || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl)
+		 || strcmp (name, "omp_target_alloc") == 0)
 	      {
 		  tree omp_alloc_type
 		    = build_function_type_list (ptr_type_node, size_type_node,
@@ -15952,7 +15953,8 @@ usm_transform (gimple_stmt_iterator *gsi_p, bool *,
 		       || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
 			   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_FREE)
 		       || (DECL_IS_OPERATOR_DELETE_P (fndecl)
-			   && DECL_IS_REPLACEABLE_OPERATOR (fndecl)))
+			   && DECL_IS_REPLACEABLE_OPERATOR (fndecl))
+		       || strcmp (name, "omp_target_free") == 0)
 	      {
 		tree omp_free_type
 		  = build_function_type_list (void_type_node, ptr_type_node,
diff --git a/gcc/testsuite/c-c++-common/gomp/usm-2.c b/gcc/testsuite/c-c++-common/gomp/usm-2.c
index 64dbb6be131..8c20ef94e69 100644
--- a/gcc/testsuite/c-c++-common/gomp/usm-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/usm-2.c
@@ -12,6 +12,8 @@ void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__);
 void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
 void *realloc(void *, __SIZE_TYPE__);
 void free (void *);
+void *omp_target_alloc (__SIZE_TYPE__, int);
+void omp_target_free (void *, int);
 
 #ifdef __cplusplus
 }
@@ -24,16 +26,21 @@ foo ()
   void *p2 = realloc(p1, 30);
   void *p3 = calloc(4, 15);
   void *p4 = aligned_alloc(16, 40);
+  void *p5 = omp_target_alloc(50, 1);
   free (p2);
   free (p3);
   free (p4);
+  omp_target_free (p5, 1);
 }
 
 /* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 10\\)" 1 "usm_transform"  } } */
-/* { dg-final { scan-tree-dump-times "omp_free" 3 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-not " free"  "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-not " aligned_alloc"  "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-not " malloc"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " omp_target_alloc"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " omp_target_free"  "usm_transform"  } } */
diff --git a/gcc/testsuite/c-c++-common/gomp/usm-3.c b/gcc/testsuite/c-c++-common/gomp/usm-3.c
index 934582ea5fd..2b0cbb45e27 100644
--- a/gcc/testsuite/c-c++-common/gomp/usm-3.c
+++ b/gcc/testsuite/c-c++-common/gomp/usm-3.c
@@ -10,6 +10,8 @@ void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__);
 void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
 void *realloc(void *, __SIZE_TYPE__);
 void free (void *);
+void *omp_target_alloc (__SIZE_TYPE__, int);
+void omp_target_free (void *, int);
 
 #ifdef __cplusplus
 }
@@ -22,16 +24,21 @@ foo ()
   void *p2 = realloc(p1, 30);
   void *p3 = calloc(4, 15);
   void *p4 = aligned_alloc(16, 40);
+  void *p5 = omp_target_alloc(50, 1);
   free (p2);
   free (p3);
   free (p4);
+  omp_target_free (p5, 1);
 }
 
 /* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 10\\)" 1 "usm_transform"  } } */
-/* { dg-final { scan-tree-dump-times "omp_free" 3 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-not " free"  "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-not " aligned_alloc"  "usm_transform"  } } */
 /* { dg-final { scan-tree-dump-not " malloc"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " omp_target_alloc"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " omp_target_free"  "usm_transform"  } } */
diff --git a/libgomp/testsuite/libgomp.c/usm-6.c b/libgomp/testsuite/libgomp.c/usm-6.c
index d2c828fdc9d..c207140092a 100644
--- a/libgomp/testsuite/libgomp.c/usm-6.c
+++ b/libgomp/testsuite/libgomp.c/usm-6.c
@@ -4,6 +4,8 @@
 #include <stdint.h>
 #include <stdlib.h>
 
+#include <omp.h>
+
 /* On old systems, the declaraition may not be present in stdlib.h which
    will generate a warning.  This function is going to be replaced with
    omp_aligned_alloc so the purpose of this declaration is to avoid that
@@ -19,7 +21,8 @@ main ()
   int *b = (int *) calloc(sizeof(int), 3);
   int *c = (int *) realloc(NULL, sizeof(int) * 4);
   int *d = (int *) aligned_alloc(32, sizeof(int));
-  if (!a || !b || !c || !d)
+  int *e = (int *) omp_target_alloc(sizeof(int), 1);
+  if (!a || !b || !c || !d || !e)
     __builtin_abort ();
 
   a[0] = 42;
@@ -36,6 +39,7 @@ main ()
   uintptr_t b_p = (uintptr_t)b;
   uintptr_t c_p = (uintptr_t)c;
   uintptr_t d_p = (uintptr_t)d;
+  uintptr_t e_p = (uintptr_t)e;
 
   if (d_p & 31 != 0)
     __builtin_abort ();
@@ -52,9 +56,12 @@ main ()
 	__builtin_abort ();
       if (d_p != (uintptr_t)d)
 	__builtin_abort ();
+      if (e_p != (uintptr_t)e)
+	__builtin_abort ();
       a[0] = 72;
       b[0] = 82;
       c[0] = 92;
+      e[0] = 102;
     }
 
 #pragma omp target
@@ -74,10 +81,12 @@ main ()
 
   if (a[0] != 72 || a[1] != 73
       || b[0] != 82 || b[1] != 83
-      || c[0] != 92 || c[1] != 93)
+      || c[0] != 92 || c[1] != 93
+      || e[0] != 102)
 	__builtin_abort ();
   free(a);
   free(b);
   free(c);
+  omp_target_free(e, 1);
   return 0;
 }
  
Andrew Stubbs April 2, 2022, 12:42 p.m. UTC | #2
On 02/04/2022 13:04, Andrew Stubbs wrote:
> This additional patch adds transformation for omp_target_alloc. The 
> OpenMP 5.0 document says that addresses allocated this way needs to work 
> without is_device_ptr. The easiest way to make that work is to make them 
> USM addresses.

Actually, reading on, it says "Every device address allocated through 
OpenMP device memory routines is a valid host pointer", so USM is the 
correct answer.

Andrew
  

Patch

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 5ce3a50709a..ec08d59f676 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -14849,6 +14849,158 @@  make_pass_diagnose_omp_blocks (gcc::context *ctxt)
 {
   return new pass_diagnose_omp_blocks (ctxt);
 }
+
+/* Provide transformation required for using unified shared memory
+   by replacing calls to standard memory allocation functions with
+   function provided by the libgomp.  */
+
+static tree
+usm_transform (gimple_stmt_iterator *gsi_p, bool *,
+	       struct walk_stmt_info *wi)
+{
+  gimple *stmt = gsi_stmt (*gsi_p);
+  /* ompx_unified_shared_mem_alloc is 10.  */
+  const unsigned int unified_shared_mem_alloc = 10;
+
+  switch (gimple_code (stmt))
+    {
+    case GIMPLE_CALL:
+      {
+	gcall *gs = as_a <gcall *> (stmt);
+	tree fndecl = gimple_call_fndecl (gs);
+	if (fndecl)
+	  {
+	    tree allocator = build_int_cst (pointer_sized_int_node,
+					    unified_shared_mem_alloc);
+	    const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+	    if ((strcmp (name, "malloc") == 0)
+		 || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+		     && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MALLOC)
+		 || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl))
+	      {
+		  tree omp_alloc_type
+		    = build_function_type_list (ptr_type_node, size_type_node,
+						pointer_sized_int_node,
+						NULL_TREE);
+		tree repl = build_fn_decl ("omp_alloc", omp_alloc_type);
+		tree size = gimple_call_arg (gs, 0);
+		gimple *g = gimple_build_call (repl, 2, size, allocator);
+		gimple_call_set_lhs (g, gimple_call_lhs (gs));
+		gimple_set_location (g, gimple_location (stmt));
+		gsi_replace (gsi_p, g, true);
+	      }
+	    else if ((strcmp (name, "calloc") == 0)
+		      || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+			  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CALLOC))
+	      {
+		tree omp_calloc_type
+		  = build_function_type_list (ptr_type_node, size_type_node,
+					      size_type_node,
+					      pointer_sized_int_node,
+					      NULL_TREE);
+		tree repl = build_fn_decl ("omp_calloc", omp_calloc_type);
+		tree num = gimple_call_arg (gs, 0);
+		tree size = gimple_call_arg (gs, 1);
+		gimple *g = gimple_build_call (repl, 3, num, size, allocator);
+		gimple_call_set_lhs (g, gimple_call_lhs (gs));
+		gimple_set_location (g, gimple_location (stmt));
+		gsi_replace (gsi_p, g, true);
+	      }
+	    else if ((strcmp (name, "realloc") == 0)
+		      || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+			  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_REALLOC))
+	      {
+		tree omp_realloc_type
+		  = build_function_type_list (ptr_type_node, ptr_type_node,
+					      size_type_node,
+					      pointer_sized_int_node,
+					      pointer_sized_int_node,
+					      NULL_TREE);
+		tree repl = build_fn_decl ("omp_realloc", omp_realloc_type);
+		tree ptr = gimple_call_arg (gs, 0);
+		tree size = gimple_call_arg (gs, 1);
+		gimple *g = gimple_build_call (repl, 4, ptr, size, allocator,
+					       allocator);
+		gimple_call_set_lhs (g, gimple_call_lhs (gs));
+		gimple_set_location (g, gimple_location (stmt));
+		gsi_replace (gsi_p, g, true);
+	      }
+	    else  if ((strcmp (name, "free") == 0)
+		       || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)
+			   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_FREE)
+		       || (DECL_IS_OPERATOR_DELETE_P (fndecl)
+			   && DECL_IS_REPLACEABLE_OPERATOR (fndecl)))
+	      {
+		tree omp_free_type
+		  = build_function_type_list (void_type_node, ptr_type_node,
+					      pointer_sized_int_node,
+					      NULL_TREE);
+		tree repl = build_fn_decl ("omp_free", omp_free_type);
+		tree ptr = gimple_call_arg (gs, 0);
+		gimple *g = gimple_build_call (repl, 2, ptr, allocator);
+		gimple_set_location (g, gimple_location (stmt));
+		gsi_replace (gsi_p, g, true);
+	      }
+	  }
+      }
+      break;
+
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+namespace {
+
+const pass_data pass_data_usm_transform =
+{
+  GIMPLE_PASS, /* type */
+  "usm_transform", /* name */
+  OPTGROUP_OMP, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_gimple_any, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_usm_transform : public gimple_opt_pass
+{
+public:
+  pass_usm_transform (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_usm_transform, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+  {
+    return (flag_openmp || flag_openmp_simd)
+	    && (flag_offload_memory == OFFLOAD_MEMORY_UNIFIED
+		|| omp_requires_mask & OMP_REQUIRES_UNIFIED_SHARED_MEMORY);
+  }
+  virtual unsigned int execute (function *)
+  {
+    struct walk_stmt_info wi;
+    gimple_seq body = gimple_body (current_function_decl);
+
+    memset (&wi, 0, sizeof (wi));
+    walk_gimple_seq (body, usm_transform, NULL, &wi);
+
+    return 0;
+  }
+
+}; // class pass_usm_transform
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_usm_transform (gcc::context *ctxt)
+{
+  return new pass_usm_transform (ctxt);
+}
 
 
 #include "gt-omp-low.h"
diff --git a/gcc/passes.def b/gcc/passes.def
index f7718181038..98c7736bb8b 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -36,6 +36,7 @@  along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_diagnose_tm_blocks);
   NEXT_PASS (pass_omp_oacc_kernels_decompose);
   NEXT_PASS (pass_lower_omp);
+  NEXT_PASS (pass_usm_transform);
   NEXT_PASS (pass_lower_cf);
   NEXT_PASS (pass_lower_tm);
   NEXT_PASS (pass_refactor_eh);
diff --git a/gcc/testsuite/c-c++-common/gomp/usm-2.c b/gcc/testsuite/c-c++-common/gomp/usm-2.c
new file mode 100644
index 00000000000..2f3f986012c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/usm-2.c
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-usm_transform" } */
+
+#pragma omp requires unified_shared_memory
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *malloc (__SIZE_TYPE__);
+void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
+void *realloc(void *, __SIZE_TYPE__);
+void free (void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+void
+foo ()
+{
+  void *p1 = malloc(20);
+  void *p2 = realloc(p1, 30);
+  void *p3 = calloc(4, 15);
+  free (p2);
+  free (p3);
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 2 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " free"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " malloc"  "usm_transform"  } } */
diff --git a/gcc/testsuite/c-c++-common/gomp/usm-3.c b/gcc/testsuite/c-c++-common/gomp/usm-3.c
new file mode 100644
index 00000000000..c8230e7ff7c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/usm-3.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" } */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *malloc (__SIZE_TYPE__);
+void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
+void *realloc(void *, __SIZE_TYPE__);
+void free (void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+void
+foo ()
+{
+  void *p1 = malloc(20);
+  void *p2 = realloc(p1, 30);
+  void *p3 = calloc(4, 15);
+  free (p2);
+  free (p3);
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 2 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " free"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not " malloc"  "usm_transform"  } } */
diff --git a/gcc/testsuite/g++.dg/gomp/usm-1.C b/gcc/testsuite/g++.dg/gomp/usm-1.C
new file mode 100644
index 00000000000..bd70a81b5bb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/usm-1.C
@@ -0,0 +1,32 @@ 
+// { dg-do compile }
+// { dg-options "-fopenmp -fdump-tree-usm_transform" }
+
+#pragma omp requires unified_shared_memory
+
+struct t1
+{
+  int a;
+  int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+  uint8_t *p1 = new uint8_t;
+  uint8_t *p2 = new uint8_t[20];
+  t1 *p3 = new t1;
+  t1 *p4 = new t1[y];
+  delete p1;
+  delete p3;
+  delete [] p2;
+  delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not "operator new"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not "operator delete"  "usm_transform"  } } */
diff --git a/gcc/testsuite/g++.dg/gomp/usm-2.C b/gcc/testsuite/g++.dg/gomp/usm-2.C
new file mode 100644
index 00000000000..f6ab155c6de
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/usm-2.C
@@ -0,0 +1,30 @@ 
+// { dg-do compile }
+// { dg-options "-fopenmp -foffload-memory=unified -fdump-tree-usm_transform" }
+
+struct t1
+{
+  int a;
+  int b;
+};
+
+typedef unsigned char uint8_t;
+
+void
+foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y)
+{
+  uint8_t *p1 = new uint8_t;
+  uint8_t *p2 = new uint8_t[20];
+  t1 *p3 = new t1;
+  t1 *p4 = new t1[y];
+  delete p1;
+  delete p3;
+  delete [] p2;
+  delete [] p4;
+}
+
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not "operator new"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not "operator delete"  "usm_transform"  } } */
diff --git a/gcc/testsuite/g++.dg/gomp/usm-3.C b/gcc/testsuite/g++.dg/gomp/usm-3.C
new file mode 100644
index 00000000000..50ac9302c8b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/usm-3.C
@@ -0,0 +1,38 @@ 
+// { dg-do compile }
+// { dg-options "-fopenmp -fdump-tree-usm_transform" }
+
+#pragma omp requires unified_shared_memory
+
+#include <new>
+
+
+struct X {
+    static void* operator new(std::size_t count)
+    {
+      static char buf[10];
+      return &buf[0];
+    }
+    static void* operator new[](std::size_t count)
+    {
+      static char buf[10];
+      return &buf[0];
+    }
+    static void operator delete(void*)
+    {
+    }
+    static void operator delete[](void*)
+    {
+    }
+};
+void foo() {
+  X* p1 = new X;
+  delete p1;
+  X* p2 = new X[10];
+  delete[] p2;
+  unsigned char buf[24] ;
+  int *p3 = new (buf) int(3);
+  p3[0] = 1;
+}
+
+/* { dg-final { scan-tree-dump-not "omp_alloc"  "usm_transform"  } } */
+/* { dg-final { scan-tree-dump-not "omp_free"  "usm_transform"  } } */
diff --git a/gcc/testsuite/gfortran.dg/gomp/usm-2.f90 b/gcc/testsuite/gfortran.dg/gomp/usm-2.f90
new file mode 100644
index 00000000000..dc775260cb7
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/usm-2.f90
@@ -0,0 +1,16 @@ 
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-usm_transform" }
+
+!$omp requires unified_shared_memory
+end
+
+subroutine foo()
+  implicit none
+  integer, allocatable :: var1
+
+  allocate(var1)
+
+end subroutine
+
+! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform"  } } 
+! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform"  } } 
\ No newline at end of file
diff --git a/gcc/testsuite/gfortran.dg/gomp/usm-3.f90 b/gcc/testsuite/gfortran.dg/gomp/usm-3.f90
new file mode 100644
index 00000000000..7983444ebff
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/usm-3.f90
@@ -0,0 +1,13 @@ 
+! { dg-do compile }
+! { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" }
+
+subroutine foo()
+  implicit none
+  integer, allocatable :: var1
+
+  allocate(var1)
+
+end subroutine
+
+! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform"  } } 
+! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform"  } } 
\ No newline at end of file
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 606d1d60b85..494a9662afa 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -424,6 +424,7 @@  extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_usm_transform (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt);
diff --git a/libgomp/testsuite/libgomp.c++/usm-1.C b/libgomp/testsuite/libgomp.c++/usm-1.C
new file mode 100644
index 00000000000..fea25e5f10b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/usm-1.C
@@ -0,0 +1,54 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+#include <stdint.h>
+
+#pragma omp requires unified_shared_memory
+
+int g1 = 0;
+
+struct s1
+{
+  s1() { a = g1++;}
+  ~s1() { g1--;}
+  int a;
+};
+
+int
+main ()
+{
+  s1 *p1 = new s1;
+  s1 *p2 = new s1[10];
+
+  if (!p1 || !p2 || p1->a != 0)
+    __builtin_abort ();
+
+  for (int i = 0; i < 10; i++)
+    if (p2[i].a != i+1)
+      __builtin_abort ();
+
+  uintptr_t pp1 = (uintptr_t)p1;
+  uintptr_t pp2 = (uintptr_t)p2;
+
+#pragma omp target firstprivate(pp1, pp2)
+    {
+      s1 *t1 = (s1*)pp1;
+      s1 *t2 = (s1*)pp2;
+      if (t1->a != 0)
+	__builtin_abort ();
+
+      for (int i = 0; i < 10; i++)
+	if (t2[i].a != i+1)
+	  __builtin_abort ();
+
+      t1->a = 42;
+    }
+
+  if (p1->a != 42)
+    __builtin_abort ();
+
+  delete [] p2;
+  delete p1;
+  if (g1 != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/usm-6.c b/libgomp/testsuite/libgomp.c/usm-6.c
new file mode 100644
index 00000000000..d98da68a1ed
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/usm-6.c
@@ -0,0 +1,70 @@ 
+/* { dg-do run } */
+/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#pragma omp requires unified_shared_memory
+
+int
+main ()
+{
+  int *a = (int *) malloc(sizeof(int)*2);
+  int *b = (int *) calloc(sizeof(int), 3);
+  int *c = (int *) realloc(NULL, sizeof(int) * 4);
+  if (!a || !b || !c)
+    __builtin_abort ();
+
+  a[0] = 42;
+  a[1] = 43;
+  b[0] = 52;
+  b[1] = 53;
+  b[2] = 54;
+  c[0] = 62;
+  c[1] = 63;
+  c[2] = 64;
+  c[3] = 65;
+
+  uintptr_t a_p = (uintptr_t)a;
+  uintptr_t b_p = (uintptr_t)b;
+  uintptr_t c_p = (uintptr_t)c;
+
+#pragma omp target enter data map(to:a[0:2])
+
+#pragma omp target is_device_ptr(c)
+    {
+      if (a[0] != 42 || a_p != (uintptr_t)a)
+	__builtin_abort ();
+      if (b[0] != 52 || b[2] != 54 || b_p != (uintptr_t)b)
+	__builtin_abort ();
+      if (c[0] != 62 || c[3] != 65 || c_p != (uintptr_t)c)
+	__builtin_abort ();
+      a[0] = 72;
+      b[0] = 82;
+      c[0] = 92;
+    }
+
+#pragma omp target
+    {
+      if (a[1] != 43 || a_p != (uintptr_t)a)
+	__builtin_abort ();
+      if (b[1] != 53 || b_p != (uintptr_t)b)
+	__builtin_abort ();
+      if (c[1] != 63 || c[2] != 64 || c_p != (uintptr_t)c)
+	__builtin_abort ();
+      a[1] = 73;
+      b[1] = 83;
+      c[1] = 93;
+    }
+
+#pragma omp target exit data map(delete:a[0:2])
+
+  if (a[0] != 72 || a[1] != 73
+      || b[0] != 82 || b[1] != 83
+      || c[0] != 92 || c[1] != 93)
+	__builtin_abort ();
+  free(a);
+  free(b);
+  free(c);
+  return 0;
+}