[v5] Improve integer bit test on __atomic_fetch_[or|and]_* returns
Commit Message
Sorry for the slow reply:
Here is update according to comments
1. Define new match function in match.pd.
2. Adjust code for below
>> + gsi_remove (gsip, true);
>> + var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
>
>instead of building a GENERIC NOP you could use the
>
>gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
>
>overload.
>You could use
>
> gimple_seq stmts = NULL;
> mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
> use_bool ?
>build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
>
>> if (throws)
>> {
>> - gsi_insert_on_edge_immediate (e, g);
>
>gsi_insert_seq_on_edge_immediate (e, stmts);
>
>to simplify this. The conversion will be only generated if necessary.
Bootstrapped and regtest on x86-64-pc-linux-gnu{-m32,}
Ok for trunk?
Improve integer bit test on __atomic_fetch_[or|and]_* returns
commit adedd5c173388ae505470df152b9cb3947339566
Author: Jakub Jelinek <jakub@redhat.com>
Date: Tue May 3 13:37:25 2016 +0200
re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
with lock bts/btr/btc by turning
mask_2 = 1 << cnt_1;
_4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
_5 = _4 & mask_2;
into
_4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
_5 = _4;
and
mask_6 = 1 << bit_5(D);
_1 = ~mask_6;
_2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
_3 = _2 & mask_6;
_4 = _3 != 0;
into
mask_6 = 1 << bit_5(D);
_1 = ~mask_6;
_11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
_4 = _11 != 0;
But it failed to optimize many equivalent, but slighly different cases:
1.
_1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
_4 = (_Bool) _1;
2.
_1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
_4 = (_Bool) _1;
3.
_1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
_7 = ~_1;
_5 = (_Bool) _7;
4.
_1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
_7 = ~_1;
_5 = (_Bool) _7;
5.
_1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
_2 = (int) _1;
_7 = ~_2;
_5 = (_Bool) _7;
6.
_1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
_2 = (int) _1;
_7 = ~_2;
_5 = (_Bool) _7;
7.
_1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
_5 = (signed int) _1;
_4 = _5 < 0;
8.
_1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
_5 = (signed int) _1;
_4 = _5 < 0;
9.
_1 = 1 << bit_4(D);
mask_5 = (unsigned int) _1;
_2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
_3 = _2 & mask_5;
10.
mask_7 = 1 << bit_6(D);
_1 = ~mask_7;
_2 = (unsigned int) _1;
_3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
_4 = (int) _3;
_5 = _4 & mask_7;
We make
mask_2 = 1 << cnt_1;
_4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
_5 = _4 & mask_2;
and
mask_6 = 1 << bit_5(D);
_1 = ~mask_6;
_2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
_3 = _2 & mask_6;
_4 = _3 != 0;
the canonical forms for this optimization and transform cases 1-9 to the
equivalent canonical form. For cases 10 and 11, we simply remove the cast
before __atomic_fetch_or_4/__atomic_fetch_and_4 with
_1 = 1 << bit_4(D);
_2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
_3 = _2 & _1;
and
mask_7 = 1 << bit_6(D);
_1 = ~mask_7;
_3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
_6 = _3 & mask_7;
_5 = (int) _6;
2021-11-04 H.J. Lu <hongjiu.lu@intel.com>
Hongtao Liu <hongtao.liu@intel.com>
gcc/
PR middle-end/102566
* match.pd (nop_atomic_bit_test_and_p): New match.
* tree-ssa-ccp.c (convert_atomic_bit_not): New function.
(gimple_nop_atomic_bit_test_and_p): New prototype.
(optimize_atomic_bit_test_and): Transform equivalent, but slighly
different cases to their canonical forms.
gcc/testsuite/
PR middle-end/102566
* g++.target/i386/pr102566-1.C: New test.
* g++.target/i386/pr102566-2.C: Likewise.
* g++.target/i386/pr102566-3.C: Likewise.
* g++.target/i386/pr102566-4.C: Likewise.
* g++.target/i386/pr102566-5a.C: Likewise.
* g++.target/i386/pr102566-5b.C: Likewise.
* g++.target/i386/pr102566-6a.C: Likewise.
* g++.target/i386/pr102566-6b.C: Likewise.
* gcc.target/i386/pr102566-1a.c: Likewise.
* gcc.target/i386/pr102566-1b.c: Likewise.
* gcc.target/i386/pr102566-2.c: Likewise.
* gcc.target/i386/pr102566-3a.c: Likewise.
* gcc.target/i386/pr102566-3b.c: Likewise.
* gcc.target/i386/pr102566-4.c: Likewise.
* gcc.target/i386/pr102566-5.c: Likewise.
* gcc.target/i386/pr102566-6.c: Likewise.
* gcc.target/i386/pr102566-7.c: Likewise.
* gcc.target/i386/pr102566-8a.c: Likewise.
* gcc.target/i386/pr102566-8b.c: Likewise.
* gcc.target/i386/pr102566-9a.c: Likewise.
* gcc.target/i386/pr102566-9b.c: Likewise.
* gcc.target/i386/pr102566-10a.c: Likewise.
* gcc.target/i386/pr102566-10b.c: Likewise.
* gcc.target/i386/pr102566-11.c: Likewise.
* gcc.target/i386/pr102566-12.c: Likewise.
* gcc.target/i386/pr102566-13.c: New test.
* gcc.target/i386/pr102566-14.c: New test.
---
gcc/match.pd | 125 +++++
gcc/testsuite/g++.target/i386/pr102566-1.C | 31 ++
gcc/testsuite/g++.target/i386/pr102566-2.C | 31 ++
gcc/testsuite/g++.target/i386/pr102566-3.C | 31 ++
gcc/testsuite/g++.target/i386/pr102566-4.C | 29 ++
gcc/testsuite/g++.target/i386/pr102566-5a.C | 31 ++
gcc/testsuite/g++.target/i386/pr102566-5b.C | 31 ++
gcc/testsuite/g++.target/i386/pr102566-6a.C | 31 ++
gcc/testsuite/g++.target/i386/pr102566-6b.C | 31 ++
gcc/testsuite/gcc.target/i386/pr102566-10a.c | 15 +
gcc/testsuite/gcc.target/i386/pr102566-10b.c | 15 +
gcc/testsuite/gcc.target/i386/pr102566-11.c | 28 ++
gcc/testsuite/gcc.target/i386/pr102566-12.c | 28 ++
gcc/testsuite/gcc.target/i386/pr102566-13.c | 66 +++
gcc/testsuite/gcc.target/i386/pr102566-14.c | 65 +++
gcc/testsuite/gcc.target/i386/pr102566-1a.c | 188 ++++++++
gcc/testsuite/gcc.target/i386/pr102566-1b.c | 107 +++++
gcc/testsuite/gcc.target/i386/pr102566-2.c | 32 ++
gcc/testsuite/gcc.target/i386/pr102566-3a.c | 15 +
gcc/testsuite/gcc.target/i386/pr102566-3b.c | 15 +
gcc/testsuite/gcc.target/i386/pr102566-4.c | 15 +
gcc/testsuite/gcc.target/i386/pr102566-5.c | 15 +
gcc/testsuite/gcc.target/i386/pr102566-6.c | 32 ++
gcc/testsuite/gcc.target/i386/pr102566-7.c | 30 ++
gcc/testsuite/gcc.target/i386/pr102566-8a.c | 32 ++
gcc/testsuite/gcc.target/i386/pr102566-8b.c | 32 ++
gcc/testsuite/gcc.target/i386/pr102566-9a.c | 32 ++
gcc/testsuite/gcc.target/i386/pr102566-9b.c | 32 ++
gcc/tree-ssa-ccp.c | 452 +++++++++++++++++--
29 files changed, 1575 insertions(+), 42 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
Comments
On Thu, Nov 4, 2021 at 2:28 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Sorry for the slow reply:
Likewise ;)
> Here is update according to comments
> 1. Define new match function in match.pd.
> 2. Adjust code for below
> >> + gsi_remove (gsip, true);
> >> + var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
> >
> >instead of building a GENERIC NOP you could use the
> >
> >gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
> >
> >overload.
> >You could use
> >
> > gimple_seq stmts = NULL;
> > mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> > new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
> > use_bool ?
> >build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
> >
> >> if (throws)
> >> {
> >> - gsi_insert_on_edge_immediate (e, g);
> >
> >gsi_insert_seq_on_edge_immediate (e, stmts);
> >
> >to simplify this. The conversion will be only generated if necessary.
>
> Bootstrapped and regtest on x86-64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> Improve integer bit test on __atomic_fetch_[or|and]_* returns
>
> commit adedd5c173388ae505470df152b9cb3947339566
> Author: Jakub Jelinek <jakub@redhat.com>
> Date: Tue May 3 13:37:25 2016 +0200
>
> re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
>
> optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> with lock bts/btr/btc by turning
>
> mask_2 = 1 << cnt_1;
> _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> _5 = _4 & mask_2;
>
> into
>
> _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
> _5 = _4;
>
> and
>
> mask_6 = 1 << bit_5(D);
> _1 = ~mask_6;
> _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> _3 = _2 & mask_6;
> _4 = _3 != 0;
>
> into
>
> mask_6 = 1 << bit_5(D);
> _1 = ~mask_6;
> _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
> _4 = _11 != 0;
>
> But it failed to optimize many equivalent, but slighly different cases:
>
> 1.
> _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> _4 = (_Bool) _1;
> 2.
> _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> _4 = (_Bool) _1;
> 3.
> _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> _7 = ~_1;
> _5 = (_Bool) _7;
> 4.
> _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> _7 = ~_1;
> _5 = (_Bool) _7;
> 5.
> _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> _2 = (int) _1;
> _7 = ~_2;
> _5 = (_Bool) _7;
> 6.
> _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> _2 = (int) _1;
> _7 = ~_2;
> _5 = (_Bool) _7;
> 7.
> _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> _5 = (signed int) _1;
> _4 = _5 < 0;
> 8.
> _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> _5 = (signed int) _1;
> _4 = _5 < 0;
> 9.
> _1 = 1 << bit_4(D);
> mask_5 = (unsigned int) _1;
> _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> _3 = _2 & mask_5;
> 10.
> mask_7 = 1 << bit_6(D);
> _1 = ~mask_7;
> _2 = (unsigned int) _1;
> _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
> _4 = (int) _3;
> _5 = _4 & mask_7;
>
> We make
>
> mask_2 = 1 << cnt_1;
> _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> _5 = _4 & mask_2;
>
> and
>
> mask_6 = 1 << bit_5(D);
> _1 = ~mask_6;
> _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> _3 = _2 & mask_6;
> _4 = _3 != 0;
>
> the canonical forms for this optimization and transform cases 1-9 to the
> equivalent canonical form. For cases 10 and 11, we simply remove the cast
> before __atomic_fetch_or_4/__atomic_fetch_and_4 with
>
> _1 = 1 << bit_4(D);
> _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
> _3 = _2 & _1;
>
> and
>
> mask_7 = 1 << bit_6(D);
> _1 = ~mask_7;
> _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
> _6 = _3 & mask_7;
> _5 = (int) _6;
>
> 2021-11-04 H.J. Lu <hongjiu.lu@intel.com>
> Hongtao Liu <hongtao.liu@intel.com>
> gcc/
>
> PR middle-end/102566
> * match.pd (nop_atomic_bit_test_and_p): New match.
> * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
> (gimple_nop_atomic_bit_test_and_p): New prototype.
> (optimize_atomic_bit_test_and): Transform equivalent, but slighly
> different cases to their canonical forms.
>
> gcc/testsuite/
>
> PR middle-end/102566
> * g++.target/i386/pr102566-1.C: New test.
> * g++.target/i386/pr102566-2.C: Likewise.
> * g++.target/i386/pr102566-3.C: Likewise.
> * g++.target/i386/pr102566-4.C: Likewise.
> * g++.target/i386/pr102566-5a.C: Likewise.
> * g++.target/i386/pr102566-5b.C: Likewise.
> * g++.target/i386/pr102566-6a.C: Likewise.
> * g++.target/i386/pr102566-6b.C: Likewise.
> * gcc.target/i386/pr102566-1a.c: Likewise.
> * gcc.target/i386/pr102566-1b.c: Likewise.
> * gcc.target/i386/pr102566-2.c: Likewise.
> * gcc.target/i386/pr102566-3a.c: Likewise.
> * gcc.target/i386/pr102566-3b.c: Likewise.
> * gcc.target/i386/pr102566-4.c: Likewise.
> * gcc.target/i386/pr102566-5.c: Likewise.
> * gcc.target/i386/pr102566-6.c: Likewise.
> * gcc.target/i386/pr102566-7.c: Likewise.
> * gcc.target/i386/pr102566-8a.c: Likewise.
> * gcc.target/i386/pr102566-8b.c: Likewise.
> * gcc.target/i386/pr102566-9a.c: Likewise.
> * gcc.target/i386/pr102566-9b.c: Likewise.
> * gcc.target/i386/pr102566-10a.c: Likewise.
> * gcc.target/i386/pr102566-10b.c: Likewise.
> * gcc.target/i386/pr102566-11.c: Likewise.
> * gcc.target/i386/pr102566-12.c: Likewise.
> * gcc.target/i386/pr102566-13.c: New test.
> * gcc.target/i386/pr102566-14.c: New test.
> ---
> gcc/match.pd | 125 +++++
> gcc/testsuite/g++.target/i386/pr102566-1.C | 31 ++
> gcc/testsuite/g++.target/i386/pr102566-2.C | 31 ++
> gcc/testsuite/g++.target/i386/pr102566-3.C | 31 ++
> gcc/testsuite/g++.target/i386/pr102566-4.C | 29 ++
> gcc/testsuite/g++.target/i386/pr102566-5a.C | 31 ++
> gcc/testsuite/g++.target/i386/pr102566-5b.C | 31 ++
> gcc/testsuite/g++.target/i386/pr102566-6a.C | 31 ++
> gcc/testsuite/g++.target/i386/pr102566-6b.C | 31 ++
> gcc/testsuite/gcc.target/i386/pr102566-10a.c | 15 +
> gcc/testsuite/gcc.target/i386/pr102566-10b.c | 15 +
> gcc/testsuite/gcc.target/i386/pr102566-11.c | 28 ++
> gcc/testsuite/gcc.target/i386/pr102566-12.c | 28 ++
> gcc/testsuite/gcc.target/i386/pr102566-13.c | 66 +++
> gcc/testsuite/gcc.target/i386/pr102566-14.c | 65 +++
> gcc/testsuite/gcc.target/i386/pr102566-1a.c | 188 ++++++++
> gcc/testsuite/gcc.target/i386/pr102566-1b.c | 107 +++++
> gcc/testsuite/gcc.target/i386/pr102566-2.c | 32 ++
> gcc/testsuite/gcc.target/i386/pr102566-3a.c | 15 +
> gcc/testsuite/gcc.target/i386/pr102566-3b.c | 15 +
> gcc/testsuite/gcc.target/i386/pr102566-4.c | 15 +
> gcc/testsuite/gcc.target/i386/pr102566-5.c | 15 +
> gcc/testsuite/gcc.target/i386/pr102566-6.c | 32 ++
> gcc/testsuite/gcc.target/i386/pr102566-7.c | 30 ++
> gcc/testsuite/gcc.target/i386/pr102566-8a.c | 32 ++
> gcc/testsuite/gcc.target/i386/pr102566-8b.c | 32 ++
> gcc/testsuite/gcc.target/i386/pr102566-9a.c | 32 ++
> gcc/testsuite/gcc.target/i386/pr102566-9b.c | 32 ++
> gcc/tree-ssa-ccp.c | 452 +++++++++++++++++--
> 29 files changed, 1575 insertions(+), 42 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-13.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-14.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0734c45700c..7888401be02 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -104,6 +104,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (define_operator_list COND_TERNARY
> IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
>
> +/* __atomic_fetch_or_*, __atomic_fetch_xor_*, __atomic_xor_fetch_* */
> +(define_operator_list ATOMIC_FETCH_OR_XOR_N
> + BUILT_IN_ATOMIC_FETCH_OR_1 BUILT_IN_ATOMIC_FETCH_OR_2
> + BUILT_IN_ATOMIC_FETCH_OR_4 BUILT_IN_ATOMIC_FETCH_OR_8
> + BUILT_IN_ATOMIC_FETCH_OR_16
> + BUILT_IN_ATOMIC_FETCH_XOR_1 BUILT_IN_ATOMIC_FETCH_XOR_2
> + BUILT_IN_ATOMIC_FETCH_XOR_4 BUILT_IN_ATOMIC_FETCH_XOR_8
> + BUILT_IN_ATOMIC_FETCH_XOR_16
> + BUILT_IN_ATOMIC_XOR_FETCH_1 BUILT_IN_ATOMIC_XOR_FETCH_2
> + BUILT_IN_ATOMIC_XOR_FETCH_4 BUILT_IN_ATOMIC_XOR_FETCH_8
> + BUILT_IN_ATOMIC_XOR_FETCH_16)
> +/* __sync_fetch_and_or_*, __sync_fetch_and_xor_*, __sync_xor_and_fetch_* */
> +(define_operator_list SYNC_FETCH_OR_XOR_N
> + BUILT_IN_SYNC_FETCH_AND_OR_1 BUILT_IN_SYNC_FETCH_AND_OR_2
> + BUILT_IN_SYNC_FETCH_AND_OR_4 BUILT_IN_SYNC_FETCH_AND_OR_8
> + BUILT_IN_SYNC_FETCH_AND_OR_16
> + BUILT_IN_SYNC_FETCH_AND_XOR_1 BUILT_IN_SYNC_FETCH_AND_XOR_2
> + BUILT_IN_SYNC_FETCH_AND_XOR_4 BUILT_IN_SYNC_FETCH_AND_XOR_8
> + BUILT_IN_SYNC_FETCH_AND_XOR_16
> + BUILT_IN_SYNC_XOR_AND_FETCH_1 BUILT_IN_SYNC_XOR_AND_FETCH_2
> + BUILT_IN_SYNC_XOR_AND_FETCH_4 BUILT_IN_SYNC_XOR_AND_FETCH_8
> + BUILT_IN_SYNC_XOR_AND_FETCH_16)
> +/* __atomic_fetch_and_*. */
> +(define_operator_list ATOMIC_FETCH_AND_N
> + BUILT_IN_ATOMIC_FETCH_AND_1 BUILT_IN_ATOMIC_FETCH_AND_2
> + BUILT_IN_ATOMIC_FETCH_AND_4 BUILT_IN_ATOMIC_FETCH_AND_8
> + BUILT_IN_ATOMIC_FETCH_AND_16)
> +/* __sync_fetch_and_and_*. */
> +(define_operator_list SYNC_FETCH_AND_AND_N
> + BUILT_IN_SYNC_FETCH_AND_AND_1 BUILT_IN_SYNC_FETCH_AND_AND_2
> + BUILT_IN_SYNC_FETCH_AND_AND_4 BUILT_IN_SYNC_FETCH_AND_AND_8
> + BUILT_IN_SYNC_FETCH_AND_AND_16)
> +
> /* With nop_convert? combine convert? and view_convert? in one pattern
> plus conditionalize on tree_nop_conversion_p conversions. */
> (match (nop_convert @0)
> @@ -3931,6 +3964,98 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (vec_cond @0 (op! @3 @1) (op! @3 @2))))
> #endif
>
> +#if GIMPLE
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
> + INTEGER_CST@1)
no need for the :c on the bit_and when the 2nd operand is an
INTEGER_CST (likewise below)
> + (with {
> + int ibit = tree_log2 (@0);
> + int ibit2 = tree_log2 (@1);
> + }
> + (if (single_use (@4)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
I wonder whether we should handle both of these in the caller to make
this a pure IL structure
match? At your preference.
> + && ibit == ibit2
> + && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
> + INTEGER_CST@1)
> + (with {
> + int ibit = tree_log2 (@0);
> + int ibit2 = tree_log2 (@1);
> + }
> + (if (single_use (@3)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> + && ibit == ibit2
> + && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> + (nop_convert?@4
> + (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
> + @1)
> + (if (single_use (@4)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> + && operand_equal_p (@0, @1))))
usually for the equality you'd write
(ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
@0)
thus use @0 in both @0 and @1 places. Does that not work here? (the
nop_atomic_bit_test_and_p
arguments then would be @0 @0). Likewise below.
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> + (nop_convert?@4
> + (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
> + @1)
> + (if (single_use (@4)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
> + && operand_equal_p (@0, @1))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
> + INTEGER_CST@1)
> + (with {
> + tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
> + mask = fold_convert (TREE_TYPE (@4), mask);
it's prefered to use wide_int for this, so
int ibit = wi::exact_log2 (wi::bit_not (wi::to_wide (@0)));
likewise below.
> + int ibit = tree_log2 (mask);
> + int ibit2 = tree_log2 (@1);
> + }
> + (if (single_use (@3)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> + && ibit == ibit2
> + && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c@4
> + (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
> + INTEGER_CST@1)
> + (with {
> + tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
> + mask = fold_convert (TREE_TYPE (@4), mask);
> + int ibit = tree_log2 (mask);
> + int ibit2 = tree_log2 (@1);
> + }
> + (if (single_use (@3)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> + && ibit == ibit2
> + && ibit >= 0))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> + (nop_convert?@3
> + (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
> + @1)
> + (if (single_use (@3)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> + && operand_equal_p (@0, @1))))
> +
> +(match (nop_atomic_bit_test_and_p @0 @1)
> + (bit_and:c
> + (nop_convert?@3
> + (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
> + @1)
> + (if (single_use (@3)
> + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
> + && operand_equal_p (@0, @1))))
> +
> +#endif
> +
> /* (v ? w : 0) ? a : b is just (v & w) ? a : b
> Currently disabled after pass lvec because ARM understands
> VEC_COND_EXPR<v==w,-1,0> but not a plain v==w fed to BIT_IOR_EXPR. */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> new file mode 100644
> index 00000000000..94a66d717cc
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 0)
> + return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 30)
> + return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 31)
> + return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> new file mode 100644
> index 00000000000..4f2aea961c2
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> + return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> + return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> + return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> new file mode 100644
> index 00000000000..e88921dd155
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> + return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> + return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> + return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> new file mode 100644
> index 00000000000..44d1362ac2e
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> @@ -0,0 +1,29 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +tbit0 (std::atomic<int_type> &i)
> +{
> + return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +tbit30 (std::atomic<int_type> &i)
> +{
> + return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +tbit31 (std::atomic<int_type> &i)
> +{
> + return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> new file mode 100644
> index 00000000000..f9595bee2ab
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> + return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> + return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> + return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> new file mode 100644
> index 00000000000..d917b27a918
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> + return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> + return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> + return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> new file mode 100644
> index 00000000000..01d495eda23
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> + return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> + return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> + return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> new file mode 100644
> index 00000000000..adc11fcbf2d
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> + return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> + return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> + return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> new file mode 100644
> index 00000000000..1c1f86a9659
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> + int mask = 1 << bit;
> + return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> new file mode 100644
> index 00000000000..0bf39824ea6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> + long long int mask = 1ll << bit;
> + return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> new file mode 100644
> index 00000000000..2c8f8c4e59a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic int *v)
> +{
> + return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic unsigned int *v, int mask)
> +{
> + return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic unsigned int *v, int mask)
> +{
> + return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> new file mode 100644
> index 00000000000..4603a77612c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic long *v)
> +{
> + return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic long *v, long mask)
> +{
> + return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic long *v, long mask)
> +{
> + return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "btr" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-13.c b/gcc/testsuite/gcc.target/i386/pr102566-13.c
> new file mode 100644
> index 00000000000..2657a2f62ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-13.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define FOO(TYPE,MASK) \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __sync_fetch_and_or (a, mask) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __sync_fetch_and_xor (a, mask) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __sync_xor_and_fetch (a, mask) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1 << MASK; \
> + return __sync_fetch_and_and (a, ~mask) & mask; \
> + } \
> +
> +FOO(short, 0);
> +FOO(short, 7);
> +FOO(short, 15);
> +FOO(int, 0);
> +FOO(int, 15);
> +FOO(int, 31);
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 12 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 24 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 12 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-14.c b/gcc/testsuite/gcc.target/i386/pr102566-14.c
> new file mode 100644
> index 00000000000..24681c1da18
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-14.c
> @@ -0,0 +1,65 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +typedef long long int64;
> +
> +#define FOO(TYPE,MASK) \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __sync_fetch_and_or (a, mask) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __sync_fetch_and_xor (a, mask) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __sync_xor_and_fetch (a, mask) & mask; \
> + } \
> + __attribute__((noinline,noclone)) TYPE \
> + sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a) \
> + { \
> + TYPE mask = 1ll << MASK; \
> + return __sync_fetch_and_and (a, ~mask) & mask; \
> + } \
> +
> +
> +FOO(int64, 0);
> +FOO(int64, 32);
> +FOO(int64, 63);
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 6 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 12 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> new file mode 100644
> index 00000000000..a915de354e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> @@ -0,0 +1,188 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (void);
> +
> +__attribute__((noinline, noclone)) int
> +f1 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f2 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> + int t2 = t1 & mask;
> + return t2 != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f3 (long int *a, int bit)
> +{
> + long int mask = 1l << bit;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f4 (int *a)
> +{
> + int mask = 1 << 7;
> + return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f5 (int *a)
> +{
> + int mask = 1 << 13;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f6 (int *a)
> +{
> + int mask = 1 << 0;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> + bar ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> + bar ();
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f9 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f10 (int *a)
> +{
> + int mask = 1 << 7;
> + return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f11 (int *a)
> +{
> + int mask = 1 << 13;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f12 (int *a)
> +{
> + int mask = 1 << 0;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f13 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f14 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f15 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f16 (int *a)
> +{
> + int mask = 1 << 7;
> + return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f17 (int *a)
> +{
> + int mask = 1 << 13;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f18 (int *a)
> +{
> + int mask = 1 << 0;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f19 (long int *a, int bit)
> +{
> + long int mask = 1l << bit;
> + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f20 (long int *a)
> +{
> + long int mask = 1l << 7;
> + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f21 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__sync_fetch_and_or (a, mask) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f22 (long int *a)
> +{
> + long int mask = 1l << 7;
> + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f23 (long int *a)
> +{
> + long int mask = 1l << 7;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f24 (short int *a)
> +{
> + short int mask = 1 << 7;
> + return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f25 (short int *a)
> +{
> + short int mask = 1 << 7;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> new file mode 100644
> index 00000000000..c4dab8135c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> @@ -0,0 +1,107 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -g" } */
> +
> +int cnt;
> +
> +__attribute__((noinline, noclone)) void
> +bar (void)
> +{
> + cnt++;
> +}
> +
> +#include "pr102566-1a.c"
> +
> +int a;
> +long int b;
> +unsigned long int c;
> +unsigned short int d;
> +
> +int
> +main ()
> +{
> + __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> + if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> + || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> + __builtin_abort ();
> + if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> + || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> + __builtin_abort ();
> + __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> + if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> + || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> + __builtin_abort ();
> + __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> + if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> + || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> + __builtin_abort ();
> + if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> + || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> + __builtin_abort ();
> + if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> + || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (cnt != 0
> + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> + || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> + || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> + || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> + __builtin_abort ();
> + if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> + || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> + || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> + if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> + __builtin_abort ();
> + if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> + || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> + || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> + || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> + || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> + if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> + || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> + __builtin_abort ();
> + __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> + if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> + || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> + || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> + || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> + __builtin_abort ();
> + __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> + if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> + || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> + || cnt != 2)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> new file mode 100644
> index 00000000000..00a7c349f2a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> + return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> + return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> + return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> + int mask = 1 << bit;
> + return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> new file mode 100644
> index 00000000000..d155ed367a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> + long long int mask = 1ll << bit;
> + return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> new file mode 100644
> index 00000000000..2668ccf827c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> + unsigned int mask = 1 << bit;
> + return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> + int mask = 1 << bit;
> + return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> new file mode 100644
> index 00000000000..3dfe55ac683
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> + return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> + return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> + return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> new file mode 100644
> index 00000000000..6bc0ae0f320
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +foo0 (_Atomic int_type *v)
> +{
> + return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +foo1 (_Atomic int_type *v)
> +{
> + return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +foo2 (_Atomic int_type *v)
> +{
> + return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> new file mode 100644
> index 00000000000..168e3db78c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> + return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> + return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> + return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> new file mode 100644
> index 00000000000..392da3098e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> + return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> + return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> + return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> new file mode 100644
> index 00000000000..3fa2a3ef043
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> + return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> + return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> + return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> new file mode 100644
> index 00000000000..38ddbdc630f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> + return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> + return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> + return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> index 70ce6a4d5b8..d14774549b8 100644
> --- a/gcc/tree-ssa-ccp.c
> +++ b/gcc/tree-ssa-ccp.c
> @@ -3243,6 +3243,90 @@ optimize_unreachable (gimple_stmt_iterator i)
> return ret;
> }
>
> +/* Convert
> + _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> + _7 = ~_1;
> + _5 = (_Bool) _7;
> + to
> + _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> + _8 = _1 & 1;
> + _5 = _8 == 0;
> + and convert
> + _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> + _7 = ~_1;
> + _4 = (_Bool) _7;
> + to
> + _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> + _8 = _1 & 1;
> + _4 = (_Bool) _8;
> +
> + USE_STMT is the gimplt statement which uses the return value of
> + __atomic_fetch_or_*. LHS is the return value of __atomic_fetch_or_*.
> + MASK is the mask passed to __atomic_fetch_or_*.
> + */
> +
> +static gimple *
> +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> + tree lhs, tree mask)
> +{
> + tree and_mask;
> + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> + {
> + /* MASK must be ~1. */
> + if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> + ~HOST_WIDE_INT_1), mask, 0))
> + return nullptr;
> + and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> + }
> + else
> + {
> + /* MASK must be 1. */
> + if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> + return nullptr;
> + and_mask = mask;
> + }
> +
> + tree use_lhs = gimple_assign_lhs (use_stmt);
> +
> + use_operand_p use_p;
> + gimple *use_not_stmt;
> +
> + if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> + || !is_gimple_assign (use_not_stmt))
> + return nullptr;
> +
> + if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_not_stmt))
Otherwise looks OK.
Thanks,
Richard.
> + return nullptr;
> +
> + tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> + if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> + return nullptr;
> +
> + gimple_stmt_iterator gsi;
> + gsi = gsi_for_stmt (use_stmt);
> + gsi_remove (&gsi, true);
> + tree var = make_ssa_name (TREE_TYPE (lhs));
> + use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> + gsi = gsi_for_stmt (use_not_stmt);
> + gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> + lhs = gimple_assign_lhs (use_not_stmt);
> + gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> + build_zero_cst (TREE_TYPE (mask)));
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + gsi = gsi_for_stmt (use_not_stmt);
> + gsi_remove (&gsi, true);
> + return use_stmt;
> +}
> +
> +/* match.pd function to match atomic_bit_test_and pattern which
> + has nop_convert:
> + _1 = __atomic_fetch_or_4 (&v, 1, 0);
> + _2 = (int) _1;
> + _5 = _2 & 1;
> + */
> +extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
> + tree (*) (tree));
> +
> /* Optimize
> mask_2 = 1 << cnt_1;
> _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> @@ -3269,7 +3353,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> tree lhs = gimple_call_lhs (call);
> use_operand_p use_p;
> gimple *use_stmt;
> - tree mask, bit;
> + tree mask;
> optab optab;
>
> if (!flag_inline_atomics
> @@ -3279,10 +3363,267 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> || !single_imm_use (lhs, &use_p, &use_stmt)
> || !is_gimple_assign (use_stmt)
> - || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
> || !gimple_vdef (call))
> return;
>
> + tree bit = nullptr;
> +
> + mask = gimple_call_arg (call, 1);
> + tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> + if (rhs_code != BIT_AND_EXPR)
> + {
> + if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> + return;
> +
> + tree use_lhs = gimple_assign_lhs (use_stmt);
> + if (TREE_CODE (use_lhs) == SSA_NAME
> + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> + return;
> +
> + tree use_rhs = gimple_assign_rhs1 (use_stmt);
> + if (lhs != use_rhs)
> + return;
> +
> + gimple *g;
> + gimple_stmt_iterator gsi;
> + tree var;
> + int ibit = -1;
> +
> + if (rhs_code == BIT_NOT_EXPR)
> + {
> + g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> + if (!g)
> + return;
> + use_stmt = g;
> + ibit = 0;
> + }
> + else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> + {
> + tree and_mask;
> + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> + {
> + /* MASK must be ~1. */
> + if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> + ~HOST_WIDE_INT_1),
> + mask, 0))
> + return;
> +
> + /* Convert
> + _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> + _4 = (_Bool) _1;
> + to
> + _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> + _5 = _1 & 1;
> + _4 = (_Bool) _5;
> + */
> + and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> + }
> + else
> + {
> + and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> + if (!operand_equal_p (and_mask, mask, 0))
> + return;
> +
> + /* Convert
> + _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> + _4 = (_Bool) _1;
> + to
> + _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> + _5 = _1 & 1;
> + _4 = (_Bool) _5;
> + */
> + }
> + var = make_ssa_name (TREE_TYPE (use_rhs));
> + replace_uses_by (use_rhs, var);
> + g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> + and_mask);
> + gsi = gsi_for_stmt (use_stmt);
> + gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> + use_stmt = g;
> + ibit = 0;
> + }
> + else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> + == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> + {
> + gimple *use_nop_stmt;
> + if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> + || !is_gimple_assign (use_nop_stmt))
> + return;
> + rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> + if (rhs_code != BIT_AND_EXPR)
> + {
> + tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> + if (TREE_CODE (use_nop_lhs) == SSA_NAME
> + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> + return;
> + if (rhs_code == BIT_NOT_EXPR)
> + {
> + g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> + mask);
> + if (!g)
> + return;
> + /* Convert
> + _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> + _2 = (int) _1;
> + _7 = ~_2;
> + _5 = (_Bool) _7;
> + to
> + _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> + _8 = _1 & 1;
> + _5 = _8 == 0;
> + and convert
> + _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> + _2 = (int) _1;
> + _7 = ~_2;
> + _5 = (_Bool) _7;
> + to
> + _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> + _8 = _1 & 1;
> + _5 = _8 == 0;
> + */
> + gsi = gsi_for_stmt (use_stmt);
> + gsi_remove (&gsi, true);
> + use_stmt = g;
> + ibit = 0;
> + }
> + else
> + {
> + if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> + return;
> + if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> + return;
> + tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> + if (use_lhs != cmp_rhs1)
> + return;
> + tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> + if (!integer_zerop (cmp_rhs2))
> + return;
> +
> + tree and_mask;
> +
> + unsigned HOST_WIDE_INT bytes
> + = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> + ibit = bytes * BITS_PER_UNIT - 1;
> + unsigned HOST_WIDE_INT highest
> + = HOST_WIDE_INT_1U << ibit;
> +
> + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> + {
> + /* Get the signed maximum of the USE_RHS type. */
> + and_mask = build_int_cst (TREE_TYPE (use_rhs),
> + highest - 1);
> + if (!operand_equal_p (and_mask, mask, 0))
> + return;
> +
> + /* Convert
> + _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> + _5 = (signed int) _1;
> + _4 = _5 < 0 or _5 >= 0;
> + to
> + _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> + _6 = _1 & 0x80000000;
> + _4 = _6 != 0 or _6 == 0;
> + */
> + and_mask = build_int_cst (TREE_TYPE (use_rhs),
> + highest);
> + }
> + else
> + {
> + /* Get the signed minimum of the USE_RHS type. */
> + and_mask = build_int_cst (TREE_TYPE (use_rhs),
> + highest);
> + if (!operand_equal_p (and_mask, mask, 0))
> + return;
> +
> + /* Convert
> + _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> + _5 = (signed int) _1;
> + _4 = _5 < 0 or _5 >= 0;
> + to
> + _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> + _6 = _1 & 0x80000000;
> + _4 = _6 != 0 or _6 == 0;
> + */
> + }
> + var = make_ssa_name (TREE_TYPE (use_rhs));
> + gsi = gsi_for_stmt (use_stmt);
> + gsi_remove (&gsi, true);
> + g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> + and_mask);
> + gsi = gsi_for_stmt (use_nop_stmt);
> + gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> + use_stmt = g;
> + g = gimple_build_assign (use_nop_lhs,
> + (rhs_code == GE_EXPR
> + ? EQ_EXPR : NE_EXPR),
> + var,
> + build_zero_cst (TREE_TYPE (use_rhs)));
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + gsi = gsi_for_stmt (use_nop_stmt);
> + gsi_remove (&gsi, true);
> + }
> + }
> + else
> + {
> + tree and_expr = gimple_assign_lhs (use_nop_stmt);
> + tree res_mask[2];
> + if (!gimple_nop_atomic_bit_test_and_p (and_expr,
> + &res_mask[0], NULL))
> + return;
> + mask = res_mask[1];
> + if (TREE_CODE (mask) == INTEGER_CST)
> + {
> + ibit = tree_log2 (mask);
> + gcc_assert (ibit >= 0);
> + }
> + else
> + {
> + g = SSA_NAME_DEF_STMT (mask);
> + gcc_assert (is_gimple_assign (g));
> + bit = gimple_assign_rhs2 (g);
> + }
> + /* Convert
> + _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> + _2 = (int) _1;
> + _5 = _2 & mask;
> + to
> + _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> + _6 = _1 & mask;
> + _5 = (int) _6;
> + and convert
> + _1 = ~mask_7;
> + _2 = (unsigned int) _1;
> + _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> + _4 = (int) _3;
> + _5 = _4 & mask_7;
> + to
> + _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> + _12 = _3 & mask_7;
> + _5 = (int) _12;
> + */
> + replace_uses_by (use_lhs, lhs);
> + tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> + var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> + gimple_assign_set_lhs (use_nop_stmt, var);
> + gsi = gsi_for_stmt (use_stmt);
> + gsi_remove (&gsi, true);
> + release_defs (use_stmt);
> + gsi_remove (gsip, true);
> + g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
> + gsi = gsi_for_stmt (use_nop_stmt);
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + use_stmt = use_nop_stmt;
> + }
> + }
> +
> + if (!bit)
> + {
> + if (ibit < 0)
> + gcc_unreachable ();
> + bit = build_int_cst (TREE_TYPE (lhs), ibit);
> + }
> + }
> +
> switch (fn)
> {
> case IFN_ATOMIC_BIT_TEST_AND_SET:
> @@ -3301,51 +3642,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
> return;
>
> - mask = gimple_call_arg (call, 1);
> tree use_lhs = gimple_assign_lhs (use_stmt);
> if (!use_lhs)
> return;
>
> - if (TREE_CODE (mask) == INTEGER_CST)
> - {
> - if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> - mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> - mask = fold_convert (TREE_TYPE (lhs), mask);
> - int ibit = tree_log2 (mask);
> - if (ibit < 0)
> - return;
> - bit = build_int_cst (TREE_TYPE (lhs), ibit);
> - }
> - else if (TREE_CODE (mask) == SSA_NAME)
> + if (!bit)
> {
> - gimple *g = SSA_NAME_DEF_STMT (mask);
> - if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> + if (TREE_CODE (mask) == INTEGER_CST)
> {
> - if (!is_gimple_assign (g)
> - || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> + mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> + mask = fold_convert (TREE_TYPE (lhs), mask);
> + int ibit = tree_log2 (mask);
> + if (ibit < 0)
> return;
> - mask = gimple_assign_rhs1 (g);
> - if (TREE_CODE (mask) != SSA_NAME)
> + bit = build_int_cst (TREE_TYPE (lhs), ibit);
> + }
> + else if (TREE_CODE (mask) == SSA_NAME)
> + {
> + gimple *g = SSA_NAME_DEF_STMT (mask);
> + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> + {
> + if (!is_gimple_assign (g)
> + || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> + return;
> + mask = gimple_assign_rhs1 (g);
> + if (TREE_CODE (mask) != SSA_NAME)
> + return;
> + g = SSA_NAME_DEF_STMT (mask);
> + }
> + if (!is_gimple_assign (g))
> return;
> - g = SSA_NAME_DEF_STMT (mask);
> + rhs_code = gimple_assign_rhs_code (g);
> + if (rhs_code != LSHIFT_EXPR)
> + {
> + if (rhs_code != NOP_EXPR)
> + return;
> +
> + /* Handle
> + _1 = 1 << bit_4(D);
> + mask_5 = (unsigned int) _1;
> + _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> + _3 = _2 & mask_5;
> + */
> + tree nop_lhs = gimple_assign_lhs (g);
> + tree nop_rhs = gimple_assign_rhs1 (g);
> + if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> + != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> + return;
> + g = SSA_NAME_DEF_STMT (nop_rhs);
> + if (!is_gimple_assign (g)
> + || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> + return;
> + }
> + if (!integer_onep (gimple_assign_rhs1 (g)))
> + return;
> + bit = gimple_assign_rhs2 (g);
> }
> - if (!is_gimple_assign (g)
> - || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> - || !integer_onep (gimple_assign_rhs1 (g)))
> + else
> return;
> - bit = gimple_assign_rhs2 (g);
> - }
> - else
> - return;
>
> - if (gimple_assign_rhs1 (use_stmt) == lhs)
> - {
> - if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> + if (gimple_assign_rhs1 (use_stmt) == lhs)
> + {
> + if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> + return;
> + }
> + else if (gimple_assign_rhs2 (use_stmt) != lhs
> + || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> + mask, 0))
> return;
> }
> - else if (gimple_assign_rhs2 (use_stmt) != lhs
> - || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> - return;
>
> bool use_bool = true;
> bool has_debug_uses = false;
> @@ -3434,18 +3800,20 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> of the specified bit after the atomic operation (makes only sense
> for xor, otherwise the bit content is compile time known),
> we need to invert the bit. */
> - g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> - BIT_XOR_EXPR, new_lhs,
> - use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> - : mask);
> - new_lhs = gimple_assign_lhs (g);
> + tree mask_convert = mask;
> + gimple_seq stmts = NULL;
> + if (!use_bool)
> + mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> + new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
> + use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> + : mask_convert);
> if (throws)
> {
> - gsi_insert_on_edge_immediate (e, g);
> - gsi = gsi_for_stmt (g);
> + gsi_insert_seq_on_edge_immediate (e, stmts);
> + gsi = gsi_for_stmt (gimple_seq_last (stmts));
> }
> else
> - gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
> }
> if (use_bool && has_debug_uses)
> {
> --
> 2.18.1
>
@@ -104,6 +104,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(define_operator_list COND_TERNARY
IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
+/* __atomic_fetch_or_*, __atomic_fetch_xor_*, __atomic_xor_fetch_* */
+(define_operator_list ATOMIC_FETCH_OR_XOR_N
+ BUILT_IN_ATOMIC_FETCH_OR_1 BUILT_IN_ATOMIC_FETCH_OR_2
+ BUILT_IN_ATOMIC_FETCH_OR_4 BUILT_IN_ATOMIC_FETCH_OR_8
+ BUILT_IN_ATOMIC_FETCH_OR_16
+ BUILT_IN_ATOMIC_FETCH_XOR_1 BUILT_IN_ATOMIC_FETCH_XOR_2
+ BUILT_IN_ATOMIC_FETCH_XOR_4 BUILT_IN_ATOMIC_FETCH_XOR_8
+ BUILT_IN_ATOMIC_FETCH_XOR_16
+ BUILT_IN_ATOMIC_XOR_FETCH_1 BUILT_IN_ATOMIC_XOR_FETCH_2
+ BUILT_IN_ATOMIC_XOR_FETCH_4 BUILT_IN_ATOMIC_XOR_FETCH_8
+ BUILT_IN_ATOMIC_XOR_FETCH_16)
+/* __sync_fetch_and_or_*, __sync_fetch_and_xor_*, __sync_xor_and_fetch_* */
+(define_operator_list SYNC_FETCH_OR_XOR_N
+ BUILT_IN_SYNC_FETCH_AND_OR_1 BUILT_IN_SYNC_FETCH_AND_OR_2
+ BUILT_IN_SYNC_FETCH_AND_OR_4 BUILT_IN_SYNC_FETCH_AND_OR_8
+ BUILT_IN_SYNC_FETCH_AND_OR_16
+ BUILT_IN_SYNC_FETCH_AND_XOR_1 BUILT_IN_SYNC_FETCH_AND_XOR_2
+ BUILT_IN_SYNC_FETCH_AND_XOR_4 BUILT_IN_SYNC_FETCH_AND_XOR_8
+ BUILT_IN_SYNC_FETCH_AND_XOR_16
+ BUILT_IN_SYNC_XOR_AND_FETCH_1 BUILT_IN_SYNC_XOR_AND_FETCH_2
+ BUILT_IN_SYNC_XOR_AND_FETCH_4 BUILT_IN_SYNC_XOR_AND_FETCH_8
+ BUILT_IN_SYNC_XOR_AND_FETCH_16)
+/* __atomic_fetch_and_*. */
+(define_operator_list ATOMIC_FETCH_AND_N
+ BUILT_IN_ATOMIC_FETCH_AND_1 BUILT_IN_ATOMIC_FETCH_AND_2
+ BUILT_IN_ATOMIC_FETCH_AND_4 BUILT_IN_ATOMIC_FETCH_AND_8
+ BUILT_IN_ATOMIC_FETCH_AND_16)
+/* __sync_fetch_and_and_*. */
+(define_operator_list SYNC_FETCH_AND_AND_N
+ BUILT_IN_SYNC_FETCH_AND_AND_1 BUILT_IN_SYNC_FETCH_AND_AND_2
+ BUILT_IN_SYNC_FETCH_AND_AND_4 BUILT_IN_SYNC_FETCH_AND_AND_8
+ BUILT_IN_SYNC_FETCH_AND_AND_16)
+
/* With nop_convert? combine convert? and view_convert? in one pattern
plus conditionalize on tree_nop_conversion_p conversions. */
(match (nop_convert @0)
@@ -3931,6 +3964,98 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(vec_cond @0 (op! @3 @1) (op! @3 @2))))
#endif
+#if GIMPLE
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3))
+ INTEGER_CST@1)
+ (with {
+ int ibit = tree_log2 (@0);
+ int ibit2 = tree_log2 (@1);
+ }
+ (if (single_use (@4)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
+ && ibit == ibit2
+ && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0))
+ INTEGER_CST@1)
+ (with {
+ int ibit = tree_log2 (@0);
+ int ibit2 = tree_log2 (@1);
+ }
+ (if (single_use (@3)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+ && ibit == ibit2
+ && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+ (nop_convert?@4
+ (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3))
+ @1)
+ (if (single_use (@4)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
+ && operand_equal_p (@0, @1))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+ (nop_convert?@4
+ (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5))))
+ @1)
+ (if (single_use (@4)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@4)
+ && operand_equal_p (@0, @1))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5))
+ INTEGER_CST@1)
+ (with {
+ tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
+ mask = fold_convert (TREE_TYPE (@4), mask);
+ int ibit = tree_log2 (mask);
+ int ibit2 = tree_log2 (@1);
+ }
+ (if (single_use (@3)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+ && ibit == ibit2
+ && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c@4
+ (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0))
+ INTEGER_CST@1)
+ (with {
+ tree mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (@0), @0);
+ mask = fold_convert (TREE_TYPE (@4), mask);
+ int ibit = tree_log2 (mask);
+ int ibit2 = tree_log2 (@1);
+ }
+ (if (single_use (@3)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+ && ibit == ibit2
+ && ibit >= 0))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+ (nop_convert?@3
+ (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5))
+ @1)
+ (if (single_use (@3)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+ && operand_equal_p (@0, @1))))
+
+(match (nop_atomic_bit_test_and_p @0 @1)
+ (bit_and:c
+ (nop_convert?@3
+ (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7)))))
+ @1)
+ (if (single_use (@3)
+ && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (@3)
+ && operand_equal_p (@0, @1))))
+
+#endif
+
/* (v ? w : 0) ? a : b is just (v & w) ? a : b
Currently disabled after pass lvec because ARM understands
VEC_COND_EXPR<v==w,-1,0> but not a plain v==w fed to BIT_IOR_EXPR. */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<int> &i)
+{
+#define BIT (1 << 0)
+ return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit30 (std::atomic<int> &i)
+{
+#define BIT (1 << 30)
+ return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit31 (std::atomic<int> &i)
+{
+#define BIT (1 << 31)
+ return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+ return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+ return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+ return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+ return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+ return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+ return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+tbit0 (std::atomic<int_type> &i)
+{
+ return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
+}
+
+bool
+tbit30 (std::atomic<int_type> &i)
+{
+ return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
+}
+
+bool
+tbit31 (std::atomic<int_type> &i)
+{
+ return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+ return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+ return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+ return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+ return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+ return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+ return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+ return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+ return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+ return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+ return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+ return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+ return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+ int mask = 1 << bit;
+ return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+ long long int mask = 1ll << bit;
+ return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic int *v)
+{
+ return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic unsigned int *v, int mask)
+{
+ return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic unsigned int *v, int mask)
+{
+ return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic long *v)
+{
+ return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic long *v, long mask)
+{
+ return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic long *v, long mask)
+{
+ return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "btr" } } */
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define FOO(TYPE,MASK) \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __sync_fetch_and_or (a, mask) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __sync_fetch_and_xor (a, mask) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __sync_xor_and_fetch (a, mask) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1 << MASK; \
+ return __sync_fetch_and_and (a, ~mask) & mask; \
+ } \
+
+FOO(short, 0);
+FOO(short, 7);
+FOO(short, 15);
+FOO(int, 0);
+FOO(int, 15);
+FOO(int, 31);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 12 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 24 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 12 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,65 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+#include <stdatomic.h>
+#include <stdbool.h>
+typedef long long int64;
+
+#define FOO(TYPE,MASK) \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __sync_fetch_and_or (a, mask) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __sync_fetch_and_xor (a, mask) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __sync_xor_and_fetch (a, mask) & mask; \
+ } \
+ __attribute__((noinline,noclone)) TYPE \
+ sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a) \
+ { \
+ TYPE mask = 1ll << MASK; \
+ return __sync_fetch_and_and (a, ~mask) & mask; \
+ } \
+
+
+FOO(int64, 0);
+FOO(int64, 32);
+FOO(int64, 63);
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 6 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 12 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (void);
+
+__attribute__((noinline, noclone)) int
+f1 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
+ int t2 = t1 & mask;
+ return t2 != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f3 (long int *a, int bit)
+{
+ long int mask = 1l << bit;
+ return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int *a)
+{
+ int mask = 1 << 7;
+ return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (int *a)
+{
+ int mask = 1 << 13;
+ return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f6 (int *a)
+{
+ int mask = 1 << 0;
+ return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
+ bar ();
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
+ bar ();
+}
+
+__attribute__((noinline, noclone)) int
+f9 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int *a)
+{
+ int mask = 1 << 7;
+ return (__sync_fetch_and_xor (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f11 (int *a)
+{
+ int mask = 1 << 13;
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f12 (int *a)
+{
+ int mask = 1 << 0;
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f14 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f15 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int *a)
+{
+ int mask = 1 << 7;
+ return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f17 (int *a)
+{
+ int mask = 1 << 13;
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f18 (int *a)
+{
+ int mask = 1 << 0;
+ return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f19 (long int *a, int bit)
+{
+ long int mask = 1l << bit;
+ return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f20 (long int *a)
+{
+ long int mask = 1l << 7;
+ return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f21 (int *a, int bit)
+{
+ int mask = 1 << bit;
+ return (__sync_fetch_and_or (a, mask) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f22 (long int *a)
+{
+ long int mask = 1l << 7;
+ return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f23 (long int *a)
+{
+ long int mask = 1l << 7;
+ return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) short int
+f24 (short int *a)
+{
+ short int mask = 1 << 7;
+ return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) short int
+f25 (short int *a)
+{
+ short int mask = 1 << 7;
+ return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -g" } */
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+ cnt++;
+}
+
+#include "pr102566-1a.c"
+
+int a;
+long int b;
+unsigned long int c;
+unsigned short int d;
+
+int
+main ()
+{
+ __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
+ if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
+ || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
+ __builtin_abort ();
+ if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
+ || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
+ __builtin_abort ();
+ __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
+ if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
+ || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
+ __builtin_abort ();
+ __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
+ if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
+ || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
+ __builtin_abort ();
+ if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+ || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
+ __builtin_abort ();
+ if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
+ || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (cnt != 0
+ || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+ || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+ || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+ || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+ __builtin_abort ();
+ if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+ __builtin_abort ();
+ if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+ || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+ || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
+ if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+ || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+ __builtin_abort ();
+ if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+ || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+ || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+ || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+ || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+ __builtin_abort ();
+ __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
+ if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
+ || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
+ __builtin_abort ();
+ __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
+ if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+ || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+ || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+ __builtin_abort ();
+ if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
+ || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
+ __builtin_abort ();
+ __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
+ if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+ || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+ || cnt != 2)
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+ return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+ return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+ return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+ int mask = 1 << bit;
+ return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+ long long int mask = 1ll << bit;
+ return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+ unsigned int mask = 1 << bit;
+ return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+ int mask = 1 << bit;
+ return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+ return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+ return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+ return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+foo0 (_Atomic int_type *v)
+{
+ return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
+}
+
+bool
+foo1 (_Atomic int_type *v)
+{
+ return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
+}
+
+bool
+foo2 (_Atomic int_type *v)
+{
+ return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+ return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+ return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+ return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+ return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+ return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+ return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+ return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+ return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+ return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+ return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+ return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+ return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
@@ -3243,6 +3243,90 @@ optimize_unreachable (gimple_stmt_iterator i)
return ret;
}
+/* Convert
+ _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+ _7 = ~_1;
+ _5 = (_Bool) _7;
+ to
+ _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+ _8 = _1 & 1;
+ _5 = _8 == 0;
+ and convert
+ _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+ _7 = ~_1;
+ _4 = (_Bool) _7;
+ to
+ _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+ _8 = _1 & 1;
+ _4 = (_Bool) _8;
+
+ USE_STMT is the gimplt statement which uses the return value of
+ __atomic_fetch_or_*. LHS is the return value of __atomic_fetch_or_*.
+ MASK is the mask passed to __atomic_fetch_or_*.
+ */
+
+static gimple *
+convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
+ tree lhs, tree mask)
+{
+ tree and_mask;
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ {
+ /* MASK must be ~1. */
+ if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+ ~HOST_WIDE_INT_1), mask, 0))
+ return nullptr;
+ and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+ }
+ else
+ {
+ /* MASK must be 1. */
+ if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
+ return nullptr;
+ and_mask = mask;
+ }
+
+ tree use_lhs = gimple_assign_lhs (use_stmt);
+
+ use_operand_p use_p;
+ gimple *use_not_stmt;
+
+ if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
+ || !is_gimple_assign (use_not_stmt))
+ return nullptr;
+
+ if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
+ return nullptr;
+
+ tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
+ if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
+ return nullptr;
+
+ gimple_stmt_iterator gsi;
+ gsi = gsi_for_stmt (use_stmt);
+ gsi_remove (&gsi, true);
+ tree var = make_ssa_name (TREE_TYPE (lhs));
+ use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
+ gsi = gsi_for_stmt (use_not_stmt);
+ gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
+ lhs = gimple_assign_lhs (use_not_stmt);
+ gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
+ build_zero_cst (TREE_TYPE (mask)));
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ gsi = gsi_for_stmt (use_not_stmt);
+ gsi_remove (&gsi, true);
+ return use_stmt;
+}
+
+/* match.pd function to match atomic_bit_test_and pattern which
+ has nop_convert:
+ _1 = __atomic_fetch_or_4 (&v, 1, 0);
+ _2 = (int) _1;
+ _5 = _2 & 1;
+ */
+extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *,
+ tree (*) (tree));
+
/* Optimize
mask_2 = 1 << cnt_1;
_4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
@@ -3269,7 +3353,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
tree lhs = gimple_call_lhs (call);
use_operand_p use_p;
gimple *use_stmt;
- tree mask, bit;
+ tree mask;
optab optab;
if (!flag_inline_atomics
@@ -3279,10 +3363,267 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
|| SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
|| !single_imm_use (lhs, &use_p, &use_stmt)
|| !is_gimple_assign (use_stmt)
- || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
|| !gimple_vdef (call))
return;
+ tree bit = nullptr;
+
+ mask = gimple_call_arg (call, 1);
+ tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+ if (rhs_code != BIT_AND_EXPR)
+ {
+ if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
+ return;
+
+ tree use_lhs = gimple_assign_lhs (use_stmt);
+ if (TREE_CODE (use_lhs) == SSA_NAME
+ && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
+ return;
+
+ tree use_rhs = gimple_assign_rhs1 (use_stmt);
+ if (lhs != use_rhs)
+ return;
+
+ gimple *g;
+ gimple_stmt_iterator gsi;
+ tree var;
+ int ibit = -1;
+
+ if (rhs_code == BIT_NOT_EXPR)
+ {
+ g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
+ if (!g)
+ return;
+ use_stmt = g;
+ ibit = 0;
+ }
+ else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
+ {
+ tree and_mask;
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ {
+ /* MASK must be ~1. */
+ if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+ ~HOST_WIDE_INT_1),
+ mask, 0))
+ return;
+
+ /* Convert
+ _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+ _4 = (_Bool) _1;
+ to
+ _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+ _5 = _1 & 1;
+ _4 = (_Bool) _5;
+ */
+ and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+ }
+ else
+ {
+ and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+ if (!operand_equal_p (and_mask, mask, 0))
+ return;
+
+ /* Convert
+ _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+ _4 = (_Bool) _1;
+ to
+ _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+ _5 = _1 & 1;
+ _4 = (_Bool) _5;
+ */
+ }
+ var = make_ssa_name (TREE_TYPE (use_rhs));
+ replace_uses_by (use_rhs, var);
+ g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+ and_mask);
+ gsi = gsi_for_stmt (use_stmt);
+ gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+ use_stmt = g;
+ ibit = 0;
+ }
+ else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
+ == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+ {
+ gimple *use_nop_stmt;
+ if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
+ || !is_gimple_assign (use_nop_stmt))
+ return;
+ rhs_code = gimple_assign_rhs_code (use_nop_stmt);
+ if (rhs_code != BIT_AND_EXPR)
+ {
+ tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+ if (TREE_CODE (use_nop_lhs) == SSA_NAME
+ && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
+ return;
+ if (rhs_code == BIT_NOT_EXPR)
+ {
+ g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
+ mask);
+ if (!g)
+ return;
+ /* Convert
+ _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
+ _2 = (int) _1;
+ _7 = ~_2;
+ _5 = (_Bool) _7;
+ to
+ _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
+ _8 = _1 & 1;
+ _5 = _8 == 0;
+ and convert
+ _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
+ _2 = (int) _1;
+ _7 = ~_2;
+ _5 = (_Bool) _7;
+ to
+ _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
+ _8 = _1 & 1;
+ _5 = _8 == 0;
+ */
+ gsi = gsi_for_stmt (use_stmt);
+ gsi_remove (&gsi, true);
+ use_stmt = g;
+ ibit = 0;
+ }
+ else
+ {
+ if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
+ return;
+ if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
+ return;
+ tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
+ if (use_lhs != cmp_rhs1)
+ return;
+ tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
+ if (!integer_zerop (cmp_rhs2))
+ return;
+
+ tree and_mask;
+
+ unsigned HOST_WIDE_INT bytes
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
+ ibit = bytes * BITS_PER_UNIT - 1;
+ unsigned HOST_WIDE_INT highest
+ = HOST_WIDE_INT_1U << ibit;
+
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ {
+ /* Get the signed maximum of the USE_RHS type. */
+ and_mask = build_int_cst (TREE_TYPE (use_rhs),
+ highest - 1);
+ if (!operand_equal_p (and_mask, mask, 0))
+ return;
+
+ /* Convert
+ _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+ _5 = (signed int) _1;
+ _4 = _5 < 0 or _5 >= 0;
+ to
+ _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+ _6 = _1 & 0x80000000;
+ _4 = _6 != 0 or _6 == 0;
+ */
+ and_mask = build_int_cst (TREE_TYPE (use_rhs),
+ highest);
+ }
+ else
+ {
+ /* Get the signed minimum of the USE_RHS type. */
+ and_mask = build_int_cst (TREE_TYPE (use_rhs),
+ highest);
+ if (!operand_equal_p (and_mask, mask, 0))
+ return;
+
+ /* Convert
+ _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+ _5 = (signed int) _1;
+ _4 = _5 < 0 or _5 >= 0;
+ to
+ _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+ _6 = _1 & 0x80000000;
+ _4 = _6 != 0 or _6 == 0;
+ */
+ }
+ var = make_ssa_name (TREE_TYPE (use_rhs));
+ gsi = gsi_for_stmt (use_stmt);
+ gsi_remove (&gsi, true);
+ g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+ and_mask);
+ gsi = gsi_for_stmt (use_nop_stmt);
+ gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+ use_stmt = g;
+ g = gimple_build_assign (use_nop_lhs,
+ (rhs_code == GE_EXPR
+ ? EQ_EXPR : NE_EXPR),
+ var,
+ build_zero_cst (TREE_TYPE (use_rhs)));
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ gsi = gsi_for_stmt (use_nop_stmt);
+ gsi_remove (&gsi, true);
+ }
+ }
+ else
+ {
+ tree and_expr = gimple_assign_lhs (use_nop_stmt);
+ tree res_mask[2];
+ if (!gimple_nop_atomic_bit_test_and_p (and_expr,
+ &res_mask[0], NULL))
+ return;
+ mask = res_mask[1];
+ if (TREE_CODE (mask) == INTEGER_CST)
+ {
+ ibit = tree_log2 (mask);
+ gcc_assert (ibit >= 0);
+ }
+ else
+ {
+ g = SSA_NAME_DEF_STMT (mask);
+ gcc_assert (is_gimple_assign (g));
+ bit = gimple_assign_rhs2 (g);
+ }
+ /* Convert
+ _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+ _2 = (int) _1;
+ _5 = _2 & mask;
+ to
+ _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+ _6 = _1 & mask;
+ _5 = (int) _6;
+ and convert
+ _1 = ~mask_7;
+ _2 = (unsigned int) _1;
+ _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
+ _4 = (int) _3;
+ _5 = _4 & mask_7;
+ to
+ _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
+ _12 = _3 & mask_7;
+ _5 = (int) _12;
+ */
+ replace_uses_by (use_lhs, lhs);
+ tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+ var = make_ssa_name (TREE_TYPE (use_nop_lhs));
+ gimple_assign_set_lhs (use_nop_stmt, var);
+ gsi = gsi_for_stmt (use_stmt);
+ gsi_remove (&gsi, true);
+ release_defs (use_stmt);
+ gsi_remove (gsip, true);
+ g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var);
+ gsi = gsi_for_stmt (use_nop_stmt);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ use_stmt = use_nop_stmt;
+ }
+ }
+
+ if (!bit)
+ {
+ if (ibit < 0)
+ gcc_unreachable ();
+ bit = build_int_cst (TREE_TYPE (lhs), ibit);
+ }
+ }
+
switch (fn)
{
case IFN_ATOMIC_BIT_TEST_AND_SET:
@@ -3301,51 +3642,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
return;
- mask = gimple_call_arg (call, 1);
tree use_lhs = gimple_assign_lhs (use_stmt);
if (!use_lhs)
return;
- if (TREE_CODE (mask) == INTEGER_CST)
- {
- if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
- mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
- mask = fold_convert (TREE_TYPE (lhs), mask);
- int ibit = tree_log2 (mask);
- if (ibit < 0)
- return;
- bit = build_int_cst (TREE_TYPE (lhs), ibit);
- }
- else if (TREE_CODE (mask) == SSA_NAME)
+ if (!bit)
{
- gimple *g = SSA_NAME_DEF_STMT (mask);
- if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ if (TREE_CODE (mask) == INTEGER_CST)
{
- if (!is_gimple_assign (g)
- || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
+ mask = fold_convert (TREE_TYPE (lhs), mask);
+ int ibit = tree_log2 (mask);
+ if (ibit < 0)
return;
- mask = gimple_assign_rhs1 (g);
- if (TREE_CODE (mask) != SSA_NAME)
+ bit = build_int_cst (TREE_TYPE (lhs), ibit);
+ }
+ else if (TREE_CODE (mask) == SSA_NAME)
+ {
+ gimple *g = SSA_NAME_DEF_STMT (mask);
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ {
+ if (!is_gimple_assign (g)
+ || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+ return;
+ mask = gimple_assign_rhs1 (g);
+ if (TREE_CODE (mask) != SSA_NAME)
+ return;
+ g = SSA_NAME_DEF_STMT (mask);
+ }
+ if (!is_gimple_assign (g))
return;
- g = SSA_NAME_DEF_STMT (mask);
+ rhs_code = gimple_assign_rhs_code (g);
+ if (rhs_code != LSHIFT_EXPR)
+ {
+ if (rhs_code != NOP_EXPR)
+ return;
+
+ /* Handle
+ _1 = 1 << bit_4(D);
+ mask_5 = (unsigned int) _1;
+ _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
+ _3 = _2 & mask_5;
+ */
+ tree nop_lhs = gimple_assign_lhs (g);
+ tree nop_rhs = gimple_assign_rhs1 (g);
+ if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
+ != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+ return;
+ g = SSA_NAME_DEF_STMT (nop_rhs);
+ if (!is_gimple_assign (g)
+ || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+ return;
+ }
+ if (!integer_onep (gimple_assign_rhs1 (g)))
+ return;
+ bit = gimple_assign_rhs2 (g);
}
- if (!is_gimple_assign (g)
- || gimple_assign_rhs_code (g) != LSHIFT_EXPR
- || !integer_onep (gimple_assign_rhs1 (g)))
+ else
return;
- bit = gimple_assign_rhs2 (g);
- }
- else
- return;
- if (gimple_assign_rhs1 (use_stmt) == lhs)
- {
- if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+ if (gimple_assign_rhs1 (use_stmt) == lhs)
+ {
+ if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+ return;
+ }
+ else if (gimple_assign_rhs2 (use_stmt) != lhs
+ || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
+ mask, 0))
return;
}
- else if (gimple_assign_rhs2 (use_stmt) != lhs
- || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
- return;
bool use_bool = true;
bool has_debug_uses = false;
@@ -3434,18 +3800,20 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
of the specified bit after the atomic operation (makes only sense
for xor, otherwise the bit content is compile time known),
we need to invert the bit. */
- g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
- BIT_XOR_EXPR, new_lhs,
- use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
- : mask);
- new_lhs = gimple_assign_lhs (g);
+ tree mask_convert = mask;
+ gimple_seq stmts = NULL;
+ if (!use_bool)
+ mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
+ new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
+ use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
+ : mask_convert);
if (throws)
{
- gsi_insert_on_edge_immediate (e, g);
- gsi = gsi_for_stmt (g);
+ gsi_insert_seq_on_edge_immediate (e, stmts);
+ gsi = gsi_for_stmt (gimple_seq_last (stmts));
}
else
- gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
}
if (use_bool && has_debug_uses)
{