Hi,
In PR target/104364, two problems were reported:
- in muniform-simt mode, an atom.cas insn is no longer executed in the
"master lane" only.
- in msoft-stack mode, an __atomic_compare_exchange_n on stack memory is
translated assuming it accesses local memory, while that's not the case.
Fix these by:
- ensuring that all insns with atomic attribute are also predicable, such
that the validate_change in nvptx_reorg_uniform_simt will succeed, and
asserting that it does, and
- guarding the local atomics implementation with a new function
nvptx_mem_local_p that correctly handles msoft-stack.
Tested on x86_64 with nvptx accelerator.
Committed to trunk.
Thanks,
- Tom
[nvptx] Fix .local atomic regressions
gcc/ChangeLog:
2022-02-04 Tom de Vries <tdevries@suse.de>
PR target/104364
* config/nvptx/nvptx-protos.h (nvptx_mem_local_p): Declare.
* config/nvptx/nvptx.cc (nvptx_reorg_uniform_simt): Assert that
change is validated.
(nvptx_mem_local_p): New function.
* config/nvptx/nvptx.md: Use nvptx_mem_local_p.
(define_c_enum "unspecv"): Add UNSPECV_CAS_LOCAL.
(define_insn "atomic_compare_and_swap<mode>_1_local"): New
non-atomic, non-predicable define_insn, factored out of ...
(define_insn "atomic_compare_and_swap<mode>_1"): ... here.
Make predicable again.
(define_expand "atomic_compare_and_swap<mode>"): Use
atomic_compare_and_swap<mode>_1_local.
gcc/testsuite/ChangeLog:
2022-02-04 Tom de Vries <tdevries@suse.de>
PR target/104364
* gcc.target/nvptx/softstack-2.c: New test.
* gcc.target/nvptx/uniform-simt-1.c: New test.
---
gcc/config/nvptx/nvptx-protos.h | 1 +
gcc/config/nvptx/nvptx.cc | 25 +++++++++-
gcc/config/nvptx/nvptx.md | 63 +++++++++++++------------
gcc/testsuite/gcc.target/nvptx/softstack-2.c | 11 +++++
gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c | 18 +++++++
5 files changed, 87 insertions(+), 31 deletions(-)
@@ -59,5 +59,6 @@ extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
extern const char *nvptx_output_simt_exit (rtx);
extern const char *nvptx_output_red_partition (rtx, rtx);
extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int);
+extern bool nvptx_mem_local_p (rtx);
#endif
#endif
@@ -3150,7 +3150,8 @@ nvptx_reorg_uniform_simt ()
rtx pred = nvptx_get_unisimt_predicate ();
pred = gen_rtx_NE (BImode, pred, const0_rtx);
pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
- validate_change (insn, &PATTERN (insn), pat, false);
+ bool changed_p = validate_change (insn, &PATTERN (insn), pat, false);
+ gcc_assert (changed_p);
}
}
@@ -6894,6 +6895,28 @@ nvptx_libc_has_function (enum function_class fn_class, tree type)
return default_libc_has_function (fn_class, type);
}
+bool
+nvptx_mem_local_p (rtx mem)
+{
+ gcc_assert (GET_CODE (mem) == MEM);
+
+ struct address_info info;
+ decompose_mem_address (&info, mem);
+
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ if (TARGET_SOFT_STACK)
+ {
+ /* Frame-related doesn't mean local. */
+ }
+ else
+ return true;
+ }
+
+ return false;
+}
+
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE nvptx_option_override
@@ -54,6 +54,7 @@ (define_c_enum "unspec" [
(define_c_enum "unspecv" [
UNSPECV_LOCK
UNSPECV_CAS
+ UNSPECV_CAS_LOCAL
UNSPECV_XCHG
UNSPECV_BARSYNC
UNSPECV_WARPSYNC
@@ -1771,8 +1772,14 @@ (define_expand "atomic_compare_and_swap<mode>"
(match_operand:SI 7 "const_int_operand")] ;; failure model
""
{
- emit_insn (gen_atomic_compare_and_swap<mode>_1
- (operands[1], operands[2], operands[3], operands[4], operands[6]));
+ if (nvptx_mem_local_p (operands[2]))
+ emit_insn (gen_atomic_compare_and_swap<mode>_1_local
+ (operands[1], operands[2], operands[3], operands[4],
+ operands[6]));
+ else
+ emit_insn (gen_atomic_compare_and_swap<mode>_1
+ (operands[1], operands[2], operands[3], operands[4],
+ operands[6]));
rtx cond = gen_reg_rtx (BImode);
emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
@@ -1780,23 +1787,18 @@ (define_expand "atomic_compare_and_swap<mode>"
DONE;
})
-(define_insn "atomic_compare_and_swap<mode>_1"
+(define_insn "atomic_compare_and_swap<mode>_1_local"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(unspec_volatile:SDIM
[(match_operand:SDIM 1 "memory_operand" "+m")
(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
(match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
(match_operand:SI 4 "const_int_operand")]
- UNSPECV_CAS))
+ UNSPECV_CAS_LOCAL))
(set (match_dup 1)
- (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
+ (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))]
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
- {
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1807,13 +1809,26 @@ (define_insn "atomic_compare_and_swap<mode>_1"
output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands);
output_asm_insn ("}", NULL);
return "";
- }
+ }
+ [(set_attr "predicable" "false")])
+
+(define_insn "atomic_compare_and_swap<mode>_1"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (unspec_volatile:SDIM
+ [(match_operand:SDIM 1 "memory_operand" "+m")
+ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
+ (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPECV_CAS))
+ (set (match_dup 1)
+ (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
+ ""
+ {
const char *t
- = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
+ = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
return nvptx_output_atomic_insn (t, operands, 1, 4);
}
- [(set_attr "atomic" "true")
- (set_attr "predicable" "false")])
+ [(set_attr "atomic" "true")])
(define_insn "atomic_exchange<mode>"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
@@ -1825,10 +1840,7 @@ (define_insn "atomic_exchange<mode>"
(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1855,10 +1867,7 @@ (define_insn "atomic_fetch_add<mode>"
(match_dup 1))]
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1888,10 +1897,7 @@ (define_insn "atomic_fetch_addsf"
(match_dup 1))]
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1924,10 +1930,7 @@ (define_insn "atomic_fetch_<logic><mode>"
(match_dup 1))]
"<MODE>mode == SImode || TARGET_SM35"
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands);
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -msoft-stack" } */
+
+int
+f (void)
+{
+ int a = 0;
+ return __sync_lock_test_and_set (&a, 1);
+}
+
+/* { dg-final { scan-assembler-times "atom.exch" 1 } } */
+
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -muniform-simt" } */
+
+enum memmodel
+{
+ MEMMODEL_RELAXED = 0,
+};
+
+int a = 0;
+
+int
+f (void)
+{
+ int expected = 1;
+ return __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED,
+ MEMMODEL_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "@%r\[0-9\]*\tatom.global.cas" 1 } } */