[COMMITTED,BPF] bpf: do not emit BPF non-fetching atomic instructions

Message ID 20240805161033.18249-1-jose.marchesi@oracle.com
State Committed
Commit c26534d21159dd4c5d1472f0050b65e148161691
Headers
Series [COMMITTED,BPF] bpf: do not emit BPF non-fetching atomic instructions |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm warning Patch is already merged
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 warning Patch is already merged

Commit Message

Jose E. Marchesi Aug. 5, 2024, 4:10 p.m. UTC
  When GCC finds a call to one of the __atomic_OP_fetch built-ins in
which the return value is not used it optimizes it into the
corresponding non-fetching atomic operation.  Up to now we had
definitions in gcc/config/bpf/atomic.md to implement both atomic_OP
and atomic_fetch_OP sets of insns:

  atomic_add -> aadd (aka xadd)
  atomic_and -> aand
  atomic_or  -> aor
  atomic_xor -> axor

  atomic_fetch_add -> afadd
  atomic_fetch_and -> afand
  atomic_fetch_or  -> afor
  atomic_fetch_xor -> afxor

This was not correct, because as it happens the non-fetching BPF
atomic instructions imply different memory ordering semantics than the
fetching BPF atomic instructions, and they cannot be used
interchangeably, as it would be expected.

This patch modifies config/bpf/atomic.md in order to not define the
atomic_{add,and,or,xor} insns.  This makes GCC to implement them in
terms of the corresponding fetching operations; this is less
efficient, but correct.  It also updates the expected results in the
corresponding tests, which are also updated to cover cases where the
value resulting from the __atomic_fetch_* operations is actually used.

Tested in bpf-unknown-none target in x86_64-linux-gnu host.

gcc/ChangeLog

	* config/bpf/atomic.md ("atomic_add<AMO:mode>"): Remove insn.
	("atomic_and<AMO:mode>"): Likewise
	("atomic_or<AMO:mode>"): Likewise.
	("atomic_xor<AMO:mode>"): Likewise.

gcc/testsuite/ChangeLog

	* gcc.target/bpf/atomic-op-1.c (test_used_atomic_add): New
	function.
	(test_used_atomic_sub): Likewise.
	(test_used_atomic_and): Likewise.
	(test_used_atomic_nand): Likewise.
	(test_used_atomic_or): Likewise.
	(test_used_atomic_xor): Likewise.
	* gcc.target/bpf/atomic-op-2.c (test_used_atomic_add): Likewise.
	(test_used_atomic_sub): Likewise.
	(test_used_atomic_and): Likewise.
	(test_used_atomic_nand): Likewise.
	(test_used_atomic_or): Likewise.
	(test_used_atomic_xor): Likewise.
	* gcc.target/bpf/sync-fetch-and-add.c: Expected results updated.
---
 gcc/config/bpf/atomic.md                      | 59 +++++--------------
 gcc/testsuite/gcc.target/bpf/atomic-op-1.c    | 53 +++++++++++++++--
 gcc/testsuite/gcc.target/bpf/atomic-op-2.c    | 53 +++++++++++++++--
 .../gcc.target/bpf/sync-fetch-and-add.c       |  4 +-
 4 files changed, 111 insertions(+), 58 deletions(-)
  

Patch

diff --git a/gcc/config/bpf/atomic.md b/gcc/config/bpf/atomic.md
index be4511bb51b..4e94c0352fe 100644
--- a/gcc/config/bpf/atomic.md
+++ b/gcc/config/bpf/atomic.md
@@ -22,50 +22,21 @@  (define_mode_iterator AMO [SI DI])
 
 ;;; Plain atomic modify operations.
 
-;; Non-fetching atomic add predates all other BPF atomic insns.
-;; Use xadd{w,dw} for compatibility with older GAS without support
-;; for v3 atomics.  Newer GAS supports "aadd[32]" in line with the
-;; other atomic operations.
-(define_insn "atomic_add<AMO:mode>"
-  [(set (match_operand:AMO 0 "memory_operand" "+m")
-        (unspec_volatile:AMO
-         [(plus:AMO (match_dup 0)
-                    (match_operand:AMO 1 "register_operand" "r"))
-          (match_operand:SI 2 "const_int_operand")] ;; Memory model.
-         UNSPEC_AADD))]
-  ""
-  "{xadd<mop>\t%0,%1|lock *(<smop> *)%w0 += %w1}"
-  [(set_attr "type" "atomic")])
-
-(define_insn "atomic_and<AMO:mode>"
-  [(set (match_operand:AMO 0 "memory_operand" "+m")
-        (unspec_volatile:AMO
-         [(and:AMO (match_dup 0)
-                   (match_operand:AMO 1 "register_operand" "r"))
-          (match_operand:SI 2 "const_int_operand")] ;; Memory model.
-         UNSPEC_AAND))]
-  "bpf_has_v3_atomics"
-  "{aand<msuffix>\t%0,%1|lock *(<smop> *)%w0 &= %w1}")
-
-(define_insn "atomic_or<AMO:mode>"
-  [(set (match_operand:AMO 0 "memory_operand" "+m")
-        (unspec_volatile:AMO
-         [(ior:AMO (match_dup 0)
-                   (match_operand:AMO 1 "register_operand" "r"))
-          (match_operand:SI 2 "const_int_operand")] ;; Memory model.
-         UNSPEC_AOR))]
-  "bpf_has_v3_atomics"
-  "{aor<msuffix>\t%0,%1|lock *(<smop> *)%w0 %|= %w1}")
-
-(define_insn "atomic_xor<AMO:mode>"
-  [(set (match_operand:AMO 0 "memory_operand" "+m")
-        (unspec_volatile:AMO
-         [(xor:AMO (match_dup 0)
-                   (match_operand:AMO 1 "register_operand" "r"))
-          (match_operand:SI 2 "const_int_operand")] ;; Memory model.
-         UNSPEC_AXOR))]
-  "bpf_has_v3_atomics"
-  "{axor<msuffix>\t%0,%1|lock *(<smop> *)%w0 ^= %w1}")
+;; The BPF instruction set provides non-fetching atomic instructions
+;; that could be used to implement the corresponding named insns:
+;;
+;;  atomic_add -> aadd (aka xadd)
+;;  atomic_and -> aand 
+;;  atomic_or  -> aor
+;;  atomic_xor -> axor
+;;
+;; However, we are not including insns for these here because the
+;; non-fetching BPF atomic instruction imply different memory ordering
+;; semantics than the fetching BPF atomic instruction used to
+;; implement the atomic_fetch_* insns below (afadd, afand, afor,
+;; afxor) and they cannot be used interchangeably, as it is expected
+;; by GCC when it uses a non-fetching variant as an optimization of a
+;; fetching operation where the returned value is not used.
 
 ;;; Feching (read-modify-store) versions of atomic operations.
 
diff --git a/gcc/testsuite/gcc.target/bpf/atomic-op-1.c b/gcc/testsuite/gcc.target/bpf/atomic-op-1.c
index 5c87dcbffe6..b7d3d83fea7 100644
--- a/gcc/testsuite/gcc.target/bpf/atomic-op-1.c
+++ b/gcc/testsuite/gcc.target/bpf/atomic-op-1.c
@@ -1,7 +1,12 @@ 
-/* Test 64-bit non-fetch atomic operations.  */
+/* Test 64-bit fetch and non-fetch atomic operations.  */
 /* { dg-do compile } */
 /* { dg-options "-mv3-atomics -O2 -masm=normal" } */
 
+/* Note that GCC optimizes __atomic_add_fetch calls whose return value is not
+   used into non-fetching operations that, in BPF, generate fetching
+   instructions anyway.  See note in gcc/config/bpf/atomic.md on this
+   regard.  */
+
 long val;
 
 void
@@ -10,40 +15,76 @@  test_atomic_add (long x)
   __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_add (long x)
+{
+  return __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_sub (long x)
 {
   __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_sub (long x)
+{
+  return __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_and (long x)
 {
   __atomic_and_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_and (long x)
+{
+  return __atomic_and_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_nand (long x)
 {
   __atomic_nand_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_nand (long x)
+{
+  return __atomic_nand_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_or (long x)
 {
   __atomic_or_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_or (long x)
+{
+  return __atomic_or_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_xor (long x)
 {
   __atomic_xor_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
-/* sub implemented in terms of add, and we output xadd to support older GAS.  */
-/* { dg-final { scan-assembler-times "xadddw\t" 2 } } */
-/* { dg-final { scan-assembler-times "aand\t" 1 } } */
+long
+test_used_atomic_xor (long x)
+{
+  return __atomic_xor_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
+/* sub implemented in terms of add.  */
+/* { dg-final { scan-assembler-times "afadd\t" 4 } } */
+/* { dg-final { scan-assembler-times "afand\t" 2 } } */
 /* nand must use an exchange loop */
 /* { dg-final { scan-assembler "acmp\t" } } */
-/* { dg-final { scan-assembler-times "aor\t" 1 } } */
-/* { dg-final { scan-assembler-times "axor\t" 1 } } */
+/* { dg-final { scan-assembler-times "afor\t" 2 } } */
+/* { dg-final { scan-assembler-times "afxor\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/bpf/atomic-op-2.c b/gcc/testsuite/gcc.target/bpf/atomic-op-2.c
index 8331d33465f..902142db164 100644
--- a/gcc/testsuite/gcc.target/bpf/atomic-op-2.c
+++ b/gcc/testsuite/gcc.target/bpf/atomic-op-2.c
@@ -1,7 +1,12 @@ 
-/* Test 32-bit non-fetch atomic operations.  */
+/* Test 32-bit fetch and non-fetch atomic operations.  */
 /* { dg-do compile } */
 /* { dg-options "-mv3-atomics -O2 -masm=normal" } */
 
+/* Note that GCC optimizes __atomic_add_fetch calls whose return value is not
+   used into non-fetching operations that, in BPF, generate fetching
+   instructions anyway.  See note in gcc/config/bpf/atomic.md on this
+   regard.  */
+
 int val;
 
 void
@@ -10,40 +15,76 @@  test_atomic_add (int x)
   __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_add (int x)
+{
+  return __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_sub (int x)
 {
   __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_sub (int x)
+{
+  return __atomic_add_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_and (int x)
 {
   __atomic_and_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_and (int x)
+{
+  return __atomic_and_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_nand (int x)
 {
   __atomic_nand_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_nand (int x)
+{
+  return __atomic_nand_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_or (int x)
 {
   __atomic_or_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
+long
+test_used_atomic_or (int x)
+{
+  return __atomic_or_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
 void
 test_atomic_xor (int x)
 {
   __atomic_xor_fetch (&val, x, __ATOMIC_ACQUIRE);
 }
 
-/* sub implemented in terms of add, and we output xadd to support older GAS.  */
-/* { dg-final { scan-assembler-times "xaddw\t" 2 } } */
-/* { dg-final { scan-assembler-times "aand32\t" 1 } } */
+long
+test_used_atomic_xor (int x)
+{
+  return __atomic_xor_fetch (&val, x, __ATOMIC_ACQUIRE);
+}
+
+/* sub implemented in terms of add.  */
+/* { dg-final { scan-assembler-times "afadd32" 4 } } */
+/* { dg-final { scan-assembler-times "afand32\t" 2 } } */
 /* nand must use an exchange loop */
 /* { dg-final { scan-assembler "acmp32\t" } } */
-/* { dg-final { scan-assembler-times "aor32\t" 1 } } */
-/* { dg-final { scan-assembler-times "axor32\t" 1 } } */
+/* { dg-final { scan-assembler-times "afor32\t" 2 } } */
+/* { dg-final { scan-assembler-times "afxor32\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/bpf/sync-fetch-and-add.c b/gcc/testsuite/gcc.target/bpf/sync-fetch-and-add.c
index 6902aabd337..fe4f81a3f06 100644
--- a/gcc/testsuite/gcc.target/bpf/sync-fetch-and-add.c
+++ b/gcc/testsuite/gcc.target/bpf/sync-fetch-and-add.c
@@ -11,5 +11,5 @@  foo ()
   __sync_fetch_and_add((int *)val, (int)delta);
 }
 
-/* { dg-final { scan-assembler "xadddw\t.*" } } */
-/* { dg-final { scan-assembler "xaddw\t.*" } } */
+/* { dg-final { scan-assembler "afadd\t.*" } } */
+/* { dg-final { scan-assembler "afadd32\t.*" } } */