From patchwork Fri Dec 16 11:45:59 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 61999 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id BB086385136B for ; Fri, 16 Dec 2022 11:46:25 +0000 (GMT) X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from esa1.mentor.iphmx.com (esa1.mentor.iphmx.com [68.232.129.153]) by sourceware.org (Postfix) with ESMTPS id 67193385136B for ; Fri, 16 Dec 2022 11:46:06 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 67193385136B Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=mentor.com X-IronPort-AV: E=Sophos;i="5.96,249,1665475200"; d="scan'208";a="93174357" Received: from orw-gwy-01-in.mentorg.com ([192.94.38.165]) by esa1.mentor.iphmx.com with ESMTP; 16 Dec 2022 03:46:04 -0800 IronPort-SDR: YJQSM8H1hLwsVg49dfqfy+apuXpCMTCIk4hheFToKdKjDVIoeXsQ8jqEPfySwCPVYzkfXES69J dfT3iPEVyRLJfAvJHpmligmh1ruiR6VA/bRXd4qVlsyEd+Ro9EIxPFk2iDSzJ+LLOfjv1c3q7p WqqsWEs1c4RrH8HwWqExoS2nRlMDzd2QJnZvBVf4o2qDBmPbbKR4fAn4CWC5BhZMBKV8wW3sxY zTYUHNaUVSO5r3O8qpiMY6qn4PeZ9fj8Q9YcLeBPb/L6KbrtqscL9x9csVJvBcT+zwfodknW5m o9U= Message-ID: Date: Fri, 16 Dec 2022 11:45:59 +0000 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Thunderbird/102.5.1 Content-Language: en-GB From: Andrew Stubbs Subject: [OG12][committed] libgomp: Fix USM bugs To: "gcc-patches@gcc.gnu.org" X-Originating-IP: [137.202.0.90] X-ClientProxiedBy: svr-ies-mbx-12.mgc.mentorg.com (139.181.222.12) To svr-ies-mbx-11.mgc.mentorg.com (139.181.222.11) X-Spam-Status: No, score=-12.5 required=5.0 tests=BAYES_00, GIT_PATCH_0, HEADER_FROM_DIFFERENT_DOMAINS, KAM_DMARC_STATUS, SPF_HELO_PASS, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" I've committed this patch to the devel/omp/gcc-12 branch. It fixes some missed cases in the Unified Shared Memory implementation that were especially noticeable in Fortran because the size of arrays are known. This patch will have to be folded into the mainline USM patches that were submitted for review. I hope to get to that sometime in the new year. Andrew libgomp: Fix USM bugs Fix up some USM corner cases. libgomp/ChangeLog: * libgomp.h (OFFSET_USM): New macro. * target.c (gomp_map_pointer): Handle USM mappings. (gomp_map_val): Handle OFFSET_USM. (gomp_map_vars_internal): Move USM check earlier, and use OFFSET_USM. Add OFFSET_USM check to the second mapping pass. * testsuite/libgomp.fortran/usm-1.f90: New test. * testsuite/libgomp.fortran/usm-2.f90: New test. diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index e5345ebee53..7d55f3cf825 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1196,6 +1196,7 @@ struct target_mem_desc; #define OFFSET_INLINED (~(uintptr_t) 0) #define OFFSET_POINTER (~(uintptr_t) 1) #define OFFSET_STRUCT (~(uintptr_t) 2) +#define OFFSET_USM (~(uintptr_t) 3) /* Auxiliary structure for infrequently-used or API-specific data. */ diff --git a/libgomp/target.c b/libgomp/target.c index 50709f0677d..19db5a56f4a 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -692,6 +692,9 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, { if (allow_zero_length_array_sections) cur_node.tgt_offset = 0; + else if (devicep->is_usm_ptr_func + && devicep->is_usm_ptr_func ((void*)cur_node.host_start)) + cur_node.tgt_offset = cur_node.host_start; else { gomp_mutex_unlock (&devicep->lock); @@ -928,6 +931,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) switch (tgt->list[i].offset) { case OFFSET_INLINED: + case OFFSET_USM: return (uintptr_t) hostaddrs[i]; case OFFSET_POINTER: @@ -1013,6 +1017,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, { int kind = get_kind (short_mapkind, kinds, i); bool implicit = get_implicit (short_mapkind, kinds, i); + tgt->list[i].offset = 0; if (hostaddrs[i] == NULL || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) { @@ -1020,6 +1025,15 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].offset = OFFSET_INLINED; continue; } + else if (devicep->is_usm_ptr_func + && devicep->is_usm_ptr_func (hostaddrs[i])) + { + /* The memory is visible from both host and target + so nothing needs to be moved. */ + tgt->list[i].key = NULL; + tgt->list[i].offset = OFFSET_USM; + continue; + } else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) { @@ -1063,15 +1077,6 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].offset = 0; continue; } - else if (devicep->is_usm_ptr_func - && devicep->is_usm_ptr_func (hostaddrs[i])) - { - /* The memory is visible from both host and target - so nothing needs to be moved. */ - tgt->list[i].key = NULL; - tgt->list[i].offset = OFFSET_INLINED; - continue; - } else if ((kind & typemask) == GOMP_MAP_STRUCT) { size_t first = i + 1; @@ -1441,6 +1446,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, bool implicit = get_implicit (short_mapkind, kinds, i); if (hostaddrs[i] == NULL) continue; + if (tgt->list[i].offset == OFFSET_USM) + continue; switch (kind & typemask) { size_t align, len, first, last; diff --git a/libgomp/testsuite/libgomp.fortran/usm-1.f90 b/libgomp/testsuite/libgomp.fortran/usm-1.f90 new file mode 100644 index 00000000000..7147e9925a2 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/usm-1.f90 @@ -0,0 +1,28 @@ +! { dg-do run } +! { dg-require-effective-target omp_usm } + +! Ensure that USM works for implicit mappings. +! This needs to cover both the initial mapping scan and the rescan that +! happens when some of the mappings aren't no-ops (in this cases there are +! some hidden pointers). + +program usm + use iso_fortran_env + use omp_lib + implicit none + + !$omp requires unified_shared_memory + + integer, parameter :: N = 1024 + real(real64), allocatable :: x(:), y(:) + integer :: i + + allocate(x(N), y(N)) + !$omp target teams distribute parallel do simd + do i=1,N + y(i) = x(i) + enddo + + deallocate(x,y) + +end program usm diff --git a/libgomp/testsuite/libgomp.fortran/usm-2.f90 b/libgomp/testsuite/libgomp.fortran/usm-2.f90 new file mode 100644 index 00000000000..94e8284c475 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/usm-2.f90 @@ -0,0 +1,33 @@ +! { dg-do run } +! { dg-require-effective-target omp_usm } + +! Ensure that USM doesn't break the use_device_ptr clause (host pointers and +! target pointers being "unified"). + +program usm + use iso_fortran_env + use omp_lib + implicit none + + !$omp requires unified_shared_memory + + integer, parameter :: N = 1024 + real(real64), allocatable :: x(:), y(:) + integer :: i + + allocate(x(N),y(N)) + + !$omp target data map(x) + ! The "i" variable is not explictly mapped yet, so ensures that both + ! mapping scan passes are tested. + !$omp target data map(i) use_device_ptr(x) + !$omp target teams distribute parallel do simd + do i=1,N + y(i) = x(i) + enddo + !$omp end target data + !$omp end target data + + deallocate(x,y) + +end program usm