@@ -83,6 +83,8 @@ typedef enum {
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
} CUdevice_attribute;
@@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c
+ oacc-target.c target-indirect.c selector.c
include $(top_srcdir)/plugin/Makefrag.am
@@ -219,7 +219,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo target-indirect.lo $(am__objects_1)
+ oacc-target.lo target-indirect.lo selector.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +552,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c $(am__append_3)
+ oacc-target.c target-indirect.c selector.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -777,6 +777,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scope.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/selector.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
new file mode 100644
@@ -0,0 +1,102 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ an AMD GCN GPU. */
+
+#include "libgomp.h"
+#include <string.h>
+
+/* The selectors are passed as strings, but are actually sets of multiple
+ trait property names, separated by '\0' and with an extra '\0' at
+ the end. Match such a string SELECTORS against an array of strings
+ CHOICES, that is terminated by a null pointer.
+ matches. */
+static bool
+gomp_match_selectors (const char *selectors, const char **choices)
+{
+ while (*selectors != '\0')
+ {
+ bool match = false;
+ for (int i = 0; !match && choices[i]; i++)
+ match = !strcmp (selectors, choices[i]);
+ if (!match)
+ return false;
+ selectors += strlen (selectors) + 1;
+ }
+ return true;
+}
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ static const char *kind_choices[] = { "gpu", "nohost", NULL };
+ static const char *arch_choices[] = { "gcn", "amdgcn", NULL };
+ static const char *isa_choices[]
+ = {
+#ifdef __fiji__
+ "fiji", "gfx803",
+#endif
+#ifdef __gfx900__
+ "gfx900",
+#endif
+#ifdef __gfx906__
+ "gfx906",
+#endif
+#ifdef __gfx908__
+ "gfx908",
+#endif
+#ifdef __gfx90a__
+ "gfx90a",
+#endif
+#ifdef __gfx90c__
+ "gfx90c",
+#endif
+#ifdef __gfx1030__
+ "gfx1030",
+#endif
+#ifdef __gfx1036__
+ "gfx1036",
+#endif
+#ifdef __gfx1100__
+ "gfx1100",
+#endif
+#ifdef __gfx1103__
+ "gfx1103",
+#endif
+ NULL };
+
+ if (kind && !gomp_match_selectors (kind, kind_choices))
+ return false;
+
+ if (arch && !gomp_match_selectors (arch, arch_choices))
+ return false;
+
+ if (isa && !gomp_match_selectors (isa, isa_choices))
+ return false;
+
+ return true;
+}
new file mode 100644
@@ -0,0 +1,65 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains a generic implementation of
+ GOMP_evaluate_current_device when run on a Linux host. */
+
+#include <string.h>
+#include "libgomp.h"
+
+/* The selectors are passed as strings, but are actually sets of multiple
+ trait property names, separated by '\0' and with an extra '\0' at
+ the end. Match such a string SELECTORS against an array of strings
+ CHOICES, that is terminated by a null pointer.
+ matches. */
+static bool
+gomp_match_selectors (const char *selectors, const char **choices)
+{
+ while (*selectors != '\0')
+ {
+ bool match = false;
+ for (int i = 0; !match && choices[i]; i++)
+ match = !strcmp (selectors, choices[i]);
+ if (!match)
+ return false;
+ selectors += strlen (selectors) + 1;
+ }
+ return true;
+}
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ static const char *kind_choices[] = { "cpu", "host", NULL };
+
+ if (kind && !gomp_match_selectors (kind, kind_choices))
+ return false;
+
+ if (arch || isa)
+ return false;
+
+ return true;
+}
new file mode 100644
@@ -0,0 +1,406 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ an x86/x64-based Linux host. */
+
+#include <string.h>
+#include "libgomp.h"
+
+/* The selectors are passed as strings, but are actually sets of multiple
+ trait property names, separated by '\0' and with an extra '\0' at
+ the end. Match such a string SELECTORS against an array of strings
+ CHOICES, that is terminated by a null pointer.
+ matches. */
+static bool
+gomp_match_selectors (const char *selectors, const char **choices)
+{
+ while (*selectors != '\0')
+ {
+ bool match = false;
+ for (int i = 0; !match && choices[i]; i++)
+ match = !strcmp (selectors, choices[i]);
+ if (!match)
+ return false;
+ selectors += strlen (selectors) + 1;
+ }
+ return true;
+}
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ static const char *kind_choices[] = { "cpu", "host", NULL };
+
+ static const char *arch_choices[]
+ = { "x86",
+ "ia32",
+#ifdef __x86_64__
+ "x86_64",
+#endif
+#ifdef __ILP32__
+ "x32",
+#endif
+ "i386",
+#ifdef __i486__
+ "i486",
+#endif
+#ifdef __i586__
+ "i586",
+#endif
+#ifdef __i686__
+ "i686",
+#endif
+ NULL };
+
+ static const char *isa_choices[]
+ = {
+#ifdef __WBNOINVD__
+ "wbnoinvd",
+#endif
+#ifdef __AVX512VP2INTERSECT__
+ "avx512vp2intersect",
+#endif
+#ifdef __MMX__
+ "mmx",
+#endif
+#ifdef __3dNOW__
+ "3dnow",
+#endif
+#ifdef __3dNOW_A__
+ "3dnowa",
+#endif
+#ifdef __SSE__
+ "sse",
+#endif
+#ifdef __SSE2__
+ "sse2",
+#endif
+#ifdef __SSE3__
+ "sse3",
+#endif
+#ifdef __SSSE3__
+ "ssse3",
+#endif
+#ifdef __SSE4_1__
+ "sse4.1",
+#endif
+#ifdef __SSE4_2__
+ "sse4",
+ "sse4.2",
+#endif
+#ifdef __AES__
+ "aes",
+#endif
+#ifdef __SHA__
+ "sha",
+#endif
+#ifdef __PCLMUL__
+ "pclmul",
+#endif
+#ifdef __AVX__
+ "avx",
+#endif
+#ifdef __AVX2__
+ "avx2",
+#endif
+#ifdef __AVX512F__
+ "avx512f",
+#endif
+#ifdef __AVX512ER__
+ "avx512er",
+#endif
+#ifdef __AVX512CD__
+ "avx512cd",
+#endif
+#ifdef __AVX512PF__
+ "avx512pf",
+#endif
+#ifdef __AVX512DQ__
+ "avx512dq",
+#endif
+#ifdef __AVX512BW__
+ "avx512bw",
+#endif
+#ifdef __AVX512VL__
+ "avx512vl",
+#endif
+#ifdef __AVX512VBMI__
+ "avx512vbmi",
+#endif
+#ifdef __AVX512IFMA__
+ "avx512ifma",
+#endif
+#ifdef __AVX5124VNNIW__
+ "avx5124vnniw",
+#endif
+#ifdef __AVX512VBMI2__
+ "avx512vbmi2",
+#endif
+#ifdef __AVX512VNNI__
+ "avx512vnni",
+#endif
+#ifdef __PCONFIG__
+ "pconfig",
+#endif
+#ifdef __SGX__
+ "sgx",
+#endif
+#ifdef __AVX5124FMAPS__
+ "avx5124fmaps",
+#endif
+#ifdef __AVX512BITALG__
+ "avx512bitalg",
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+ "avx512vpopcntdq",
+#endif
+#ifdef __FMA__
+ "fma",
+#endif
+#ifdef __RTM__
+ "rtm",
+#endif
+#ifdef __SSE4A__
+ "sse4a",
+#endif
+#ifdef __FMA4__
+ "fma4",
+#endif
+#ifdef __XOP__
+ "xop",
+#endif
+#ifdef __LWP__
+ "lwp",
+#endif
+#ifdef __ABM__
+ "abm",
+#endif
+#ifdef __BMI__
+ "bmi",
+#endif
+#ifdef __BMI2__
+ "bmi2",
+#endif
+#ifdef __LZCNT__
+ "lzcnt",
+#endif
+#ifdef __TBM__
+ "tbm",
+#endif
+#ifdef __CRC32__
+ "crc32",
+#endif
+#ifdef __POPCNT__
+ "popcnt",
+#endif
+#ifdef __FSGSBASE__
+ "fsgsbase",
+#endif
+#ifdef __RDRND__
+ "rdrnd",
+#endif
+#ifdef __F16C__
+ "f16c",
+#endif
+#ifdef __RDSEED__
+ "rdseed",
+#endif
+#ifdef __PRFCHW__
+ "prfchw",
+#endif
+#ifdef __ADX__
+ "adx",
+#endif
+#ifdef __FXSR__
+ "fxsr",
+#endif
+#ifdef __XSAVE__
+ "xsave",
+#endif
+#ifdef __XSAVEOPT__
+ "xsaveopt",
+#endif
+#ifdef __PREFETCHWT1__
+ "prefetchwt1",
+#endif
+#ifdef __CLFLUSHOPT__
+ "clflushopt",
+#endif
+#ifdef __CLZERO__
+ "clzero",
+#endif
+#ifdef __XSAVEC__
+ "xsavec",
+#endif
+#ifdef __XSAVES__
+ "xsaves",
+#endif
+#ifdef __CLWB__
+ "clwb",
+#endif
+#ifdef __MWAITX__
+ "mwaitx",
+#endif
+#ifdef __PKU__
+ "pku",
+#endif
+#ifdef __RDPID__
+ "rdpid",
+#endif
+#ifdef __GFNI__
+ "gfni",
+#endif
+#ifdef __SHSTK__
+ "shstk",
+#endif
+#ifdef __VAES__
+ "vaes",
+#endif
+#ifdef __VPCLMULQDQ__
+ "vpclmulqdq",
+#endif
+#ifdef __MOVDIRI__
+ "movdiri",
+#endif
+#ifdef __MOVDIR64B__
+ "movdir64b",
+#endif
+#ifdef __WAITPKG__
+ "waitpkg",
+#endif
+#ifdef __CLDEMOTE__
+ "cldemote",
+#endif
+#ifdef __SERIALIZE__
+ "serialize",
+#endif
+#ifdef __PTWRITE__
+ "ptwrite",
+#endif
+#ifdef __AVX512BF16__
+ "avx512bf16",
+#endif
+#ifdef __AVX512FP16__
+ "avx512fp16",
+#endif
+#ifdef __ENQCMD__
+ "enqcmd",
+#endif
+#ifdef __TSXLDTRK__
+ "tsxldtrk",
+#endif
+#ifdef __AMX_TILE__
+ "amx-tile",
+#endif
+#ifdef __AMX_INT8__
+ "amx-int8",
+#endif
+#ifdef __AMX_BF16__
+ "amx-bf16",
+#endif
+#ifdef __LAHF_SAHF__
+ "sahf",
+#endif
+#ifdef __MOVBE__
+ "movbe",
+#endif
+#ifdef __UINTR__
+ "uintr",
+#endif
+#ifdef __HRESET__
+ "hreset",
+#endif
+#ifdef __KL__
+ "kl",
+#endif
+#ifdef __WIDEKL__
+ "widekl",
+#endif
+#ifdef __AVXVNNI__
+ "avxvnni",
+#endif
+#ifdef __AVXIFMA_
+ "avxifma",_
+#endif
+#ifdef __AVXVNNIINT8__
+ "avxvnniint8",
+#endif
+#ifdef __AVXNECONVERT__
+ "avxneconvert",
+#endif
+#ifdef __CMPCCXADD__
+ "cmpccxadd",
+#endif
+#ifdef __AMX_FP16__
+ "amx-fp16",
+#endif
+#ifdef __PREFETCHI__
+ "prefetchi",
+#endif
+#ifdef __RAOINT__
+ "raoint",
+#endif
+#ifdef __AMX_COMPLEX__
+ "amx-complex",
+#endif
+#ifdef __AVXVNNIINT16__
+ "amxvnniint16",
+#endif
+#ifdef __SM3__
+ "sm3",
+#endif
+#ifdef __SHA512__
+ "sha512",
+#endif
+#ifdef __SM4__
+ "sm4",
+#endif
+#ifdef __EVEX512__
+ "evex512",
+#endif
+#ifdef __USER_MSR__
+ "usermsr",
+#endif
+#ifdef __AVX10_1_256__
+ "avx10.1-256",
+#endif
+#ifdef __AVX10_1_512__
+ "avx10.1-512",
+#endif
+#ifdef __APX_F__
+ "apxf",
+#endif
+ NULL };
+
+ if (kind && !gomp_match_selectors (kind, kind_choices))
+ return false;
+ if (arch && !gomp_match_selectors (arch, arch_choices))
+ return false;
+ if (isa && !gomp_match_selectors (isa, isa_choices))
+ return false;
+ return true;
+}
new file mode 100644
@@ -0,0 +1,77 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ a Nvidia GPU. */
+
+#include "libgomp.h"
+#include <string.h>
+
+static bool
+gomp_match_selectors (const char *selectors, const char **choices)
+{
+ while (*selectors != '\0')
+ {
+ bool match = false;
+ for (int i = 0; !match && choices[i]; i++)
+ match = !strcmp (selectors, choices[i]);
+ if (!match)
+ return false;
+ selectors += strlen (selectors) + 1;
+ }
+ return true;
+}
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ static const char *kind_choices[] = { "gpu", "nohost", NULL };
+ static const char *arch_choices[] = { "nvptx", NULL };
+ static const char *isa_choices[]
+ = {
+ "sm_30",
+#if __PTX_SM__ >= 350
+ "sm_35",
+#endif
+#if __PTX_SM__ >= 530
+ "sm_53",
+#endif
+#if __PTX_SM__ >= 750
+ "sm_75",
+#endif
+#if __PTX_SM__ >= 800
+ "sm_80",
+#endif
+ NULL };
+
+ if (kind && !gomp_match_selectors (kind, kind_choices))
+ return false;
+ if (arch && !gomp_match_selectors (arch, arch_choices))
+ return false;
+ if (isa && !gomp_match_selectors (isa, isa_choices))
+ return false;
+ return true;
+}
@@ -152,6 +152,8 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *,
extern bool GOMP_OFFLOAD_can_run (void *);
extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
+extern bool GOMP_OFFLOAD_evaluate_device (int, const char *, const char *,
+ const char *);
extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **,
void **, unsigned *, void *);
@@ -1417,6 +1417,7 @@ struct gomp_device_descr
__typeof (GOMP_OFFLOAD_can_run) *can_run_func;
__typeof (GOMP_OFFLOAD_run) *run_func;
__typeof (GOMP_OFFLOAD_async_run) *async_run_func;
+ __typeof (GOMP_OFFLOAD_evaluate_device) *evaluate_device_func;
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;
@@ -428,6 +428,11 @@ GOMP_5.1.2 {
GOMP_target_map_indirect_ptr;
} GOMP_5.1.1;
+GOMP_5.1.3 {
+ global:
+ GOMP_evaluate_target_device;
+} GOMP_5.1.2;
+
OACC_2.0 {
global:
acc_get_num_devices;
@@ -6188,9 +6188,10 @@ smaller number. On non-host devices, the value of the
@c has to be implemented; cf. also PR target/105640.
@c For offload devices, add *additionally* gcc/config/*/t-omp-device.
-For the host compiler, @code{kind} always matches @code{host}; for the
-offloading architectures AMD GCN and Nvidia PTX, @code{kind} always matches
-@code{gpu}. For the x86 family of computers, AMD GCN and Nvidia PTX
+For the host compiler, @code{kind} always matches @code{host} and @code{cpu};
+for the offloading architectures AMD GCN and Nvidia PTX, @code{kind}
+always matches @code{gpu} and @code{nohost}.
+For the x86 family of computers, AMD GCN and Nvidia PTX
the following traits are supported in addition; while OpenMP is supported
on more architectures, GCC currently does not match any @code{arch} or
@code{isa} traits for those.
@@ -6207,6 +6208,17 @@ on more architectures, GCC currently does not match any @code{arch} or
@tab See @code{-march=} in ``Nvidia PTX Options''
@end multitable
+For x86, note that the set of matching @code{arch} and @code{isa}
+selectors is determined by command-line options rather than the actual
+hardware. This is particularly true of dynamic selectors, which match
+the options used to build libgomp rather than the options used to
+build user programs (which may also differ between compilation units).
+
+For the @code{target_device} selector on AMD GCN and Nvidia PTX,
+the actual hardware is checked at run time. On AMD GCN, an exact match
+of the @code{isa} selector is required, while on Nvidia PTX lower-numbered
+revisions also match.
+
@node Memory allocation
@section Memory allocation
@@ -337,6 +337,11 @@ extern void GOMP_single_copy_end (void *);
extern void GOMP_scope_start (uintptr_t *);
+/* selector.c */
+
+extern bool GOMP_evaluate_current_device (const char *, const char *,
+ const char *);
+
/* target.c */
extern void GOMP_target (int, void (*) (void *), const void *,
@@ -359,6 +364,9 @@ extern void GOMP_teams (unsigned int, unsigned int);
extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
extern void *GOMP_target_map_indirect_ptr (void *);
+extern bool GOMP_evaluate_target_device (int, const char *, const char *,
+ const char *);
+
/* teams.c */
extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
@@ -136,6 +136,16 @@ host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
fn (vars);
}
+static bool
+host_evaluate_device (int device_num __attribute__ ((unused)),
+ const char *kind __attribute__ ((unused)),
+ const char *arch __attribute__ ((unused)),
+ const char *isa __attribute__ ((unused)))
+{
+ __builtin_unreachable ();
+ return false;
+}
+
static void
host_openacc_exec (void (*fn) (void *),
size_t mapnum __attribute__ ((unused)),
@@ -285,6 +295,7 @@ static struct gomp_device_descr host_dispatch =
.memcpy2d_func = NULL,
.memcpy3d_func = NULL,
.run_func = host_run,
+ .evaluate_device_func = host_evaluate_device,
.mem_map = { NULL },
.mem_map_rev = { NULL },
@@ -4412,6 +4412,58 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
GOMP_PLUGIN_target_task_completion, async_data);
}
+/* The selectors are passed as strings, but are actually sets of multiple
+ trait property names, separated by '\0' and with an extra '\0' at
+ the end. Match such a string SELECTORS against an array of strings
+ CHOICES, that is terminated by a null pointer.
+ matches. */
+static bool
+gomp_match_selectors (const char *selectors, const char **choices)
+{
+ while (*selectors != '\0')
+ {
+ bool match = false;
+ for (int i = 0; !match && choices[i]; i++)
+ match = !strcmp (selectors, choices[i]);
+ if (!match)
+ return false;
+ selectors += strlen (selectors) + 1;
+ }
+ return true;
+}
+
+/* Here we can only have one possible match and it must be
+ the only selector provided. */
+static bool
+gomp_match_isa (const char *selectors, gcn_isa isa)
+{
+ if (isa_code (selectors) != isa)
+ return false;
+ if (*(selectors + strlen (selectors) + 1) != '\0')
+ return false;
+ return true;
+}
+
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ static const char *kind_choices[] = { "gpu", "nohost", NULL };
+ static const char *arch_choices[] = { "gcn", "amdgcn", NULL };
+ struct agent_info *agent = get_agent_info (device_num);
+
+ if (kind && !gomp_match_selectors (kind, kind_choices))
+ return false;
+
+ if (arch && !gomp_match_selectors (arch, arch_choices))
+ return false;
+
+ if (isa && !gomp_match_isa (isa, agent->device_isa))
+ return false;
+
+ return true;
+}
+
/* }}} */
/* {{{ OpenACC Plugin API */
@@ -319,6 +319,7 @@ struct ptx_device
int max_threads_per_block;
int max_threads_per_multiprocessor;
int default_dims[GOMP_DIM_MAX];
+ int compute_major, compute_minor;
/* Length as used by the CUDA Runtime API ('struct cudaDeviceProp'). */
char name[256];
@@ -551,6 +552,14 @@ nvptx_open_device (int n)
for (int i = 0; i != GOMP_DIM_MAX; i++)
ptx_dev->default_dims[i] = 0;
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
+ ptx_dev->compute_major = pi;
+
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
+ ptx_dev->compute_minor = pi;
+
CUDA_CALL_ERET (NULL, cuDeviceGetName, ptx_dev->name, sizeof ptx_dev->name,
dev);
@@ -2489,3 +2498,76 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
}
/* TODO: Implement GOMP_OFFLOAD_async_run. */
+
+/* The selectors are passed as strings, but are actually sets of multiple
+ trait property names, separated by '\0' and with an extra '\0' at
+ the end. Match such a string SELECTORS against an array of strings
+ CHOICES, that is terminated by a null pointer.
+ matches. */
+static bool
+gomp_match_selectors (const char *selectors, const char **choices)
+{
+ while (*selectors != '\0')
+ {
+ bool match = false;
+ for (int i = 0; !match && choices[i]; i++)
+ match = !strcmp (selectors, choices[i]);
+ if (!match)
+ return false;
+ selectors += strlen (selectors) + 1;
+ }
+ return true;
+}
+
+/* Here we can only have one possible match and it must be
+ the only selector provided. */
+static bool
+gomp_match_selector (const char *selectors, const char *choice)
+{
+ if (!strcmp (selectors, choice))
+ return false;
+ if (*(selectors + strlen (selectors) + 1) != '\0')
+ return false;
+ return true;
+}
+
+#define CHECK_ISA(major, minor) \
+ if (device->compute_major >= major \
+ && device->compute_minor >= minor \
+ && gomp_match_selector (isa, "sm_"#major#minor)) \
+ return true
+
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ static const char *kind_choices[] = { "gpu", "nohost", NULL };
+ static const char *arch_choices[] = { "nvptx", NULL };
+ if (kind && !gomp_match_selectors (kind, kind_choices))
+ return false;
+
+ if (arch && !gomp_match_selectors (arch, arch_choices))
+ return false;
+
+ if (!isa)
+ return true;
+
+ struct ptx_device *device = ptx_devices[device_num];
+
+ CHECK_ISA (3, 0);
+ CHECK_ISA (3, 5);
+ CHECK_ISA (3, 7);
+ CHECK_ISA (5, 0);
+ CHECK_ISA (5, 2);
+ CHECK_ISA (5, 3);
+ CHECK_ISA (6, 0);
+ CHECK_ISA (6, 1);
+ CHECK_ISA (6, 2);
+ CHECK_ISA (7, 0);
+ CHECK_ISA (7, 2);
+ CHECK_ISA (7, 5);
+ CHECK_ISA (8, 0);
+ CHECK_ISA (8, 6);
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,64 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains a placeholder implementation of
+ GOMP_evaluate_current_device. */
+
+#include "libgomp.h"
+
+/* The selectors are passed as strings, but are actually sets of multiple
+ trait property names, separated by '\0' and with an extra '\0' at
+ the end. Match such a string SELECTORS against an array of strings
+ CHOICES, that is terminated by a null pointer.
+ matches. */
+static bool
+gomp_match_selectors (const char *selectors, const char **choices)
+{
+ while (*selectors != '\0')
+ {
+ bool match = false;
+ for (int i = 0; !match && choices[i]; i++)
+ match = !strcmp (selectors, choices[i]);
+ if (!match)
+ return false;
+ selectors += strlen (selectors) + 1;
+ }
+ return true;
+}
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ static const char *kind_choices[] = { "cpu", "host", NULL };
+
+ if (kind && !gomp_match_selectors (kind, kind_choices))
+ return false;
+
+ if (arch || isa)
+ return false;
+
+ return true;
+}
@@ -5109,6 +5109,27 @@ omp_pause_resource_all (omp_pause_resource_t kind)
ialias (omp_pause_resource)
ialias (omp_pause_resource_all)
+bool
+GOMP_evaluate_target_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ bool result = true;
+ if (kind && strcmp (kind, "any") == 0)
+ kind = NULL;
+
+ gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
+ __FUNCTION__, device_num, kind, arch, isa);
+
+ struct gomp_device_descr *devicep = resolve_device (device_num, true);
+ if (devicep == NULL)
+ result = GOMP_evaluate_current_device (kind, arch, isa);
+ else
+ result = devicep->evaluate_device_func (device_num, kind, arch, isa);
+
+ gomp_debug (1, " -> %s\n", result ? "true" : "false");
+ return result;
+}
+
#ifdef PLUGIN_SUPPORT
/* This function tries to load a plugin for DEVICE. Name of plugin is passed
@@ -5161,6 +5182,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM (free);
DLSYM (dev2host);
DLSYM (host2dev);
+ DLSYM (evaluate_device);
DLSYM_OPT (memcpy2d, memcpy2d);
DLSYM_OPT (memcpy3d, memcpy3d);
device->capabilities = device->get_caps_func ();