[v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns

Message ID 20211010134931.270871-1-hjl.tools@gmail.com
State New
Headers
Series [v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns |

Commit Message

H.J. Lu Oct. 10, 2021, 1:49 p.m. UTC
  Changes in v4:

1. Bypass redundant check when inputs have been transformed to the
equivalent canonical form with valid bit operation.

Changes in v3:

1.  Check invalid bit operation.

commit adedd5c173388ae505470df152b9cb3947339566
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue May 3 13:37:25 2016 +0200

    re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')

optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
with lock bts/btr/btc by turning

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

into

  _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
  _5 = _4;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

into

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
  _4 = _11 != 0;

But it failed to optimize many equivalent, but slighly different cases:

1.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _4 = (_Bool) _1;
2.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _4 = (_Bool) _1;
3.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
4.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
5.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
6.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
7.
  _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
  _2 = (int) _1;
  _5 = _2 & mask;
8.
  _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
9.
  _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
10.
  _1 = 1 << bit_4(D);
  mask_5 = (unsigned int) _1;
  _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
  _3 = _2 & mask_5;
11.
  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
  _4 = (int) _3;
  _5 = _4 & mask_7;

We make

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

the canonical forms for this optimization and transform cases 1-9 to the
equivalent canonical form.  For cases 10 and 11, we simply remove the cast
before __atomic_fetch_or_4/__atomic_fetch_and_4 with

  _1 = 1 << bit_4(D);
  _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
  _3 = _2 & _1;

and

  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
  _6 = _3 & mask_7;
  _5 = (int) _6;

gcc/

	PR middle-end/102566
	* tree-ssa-ccp.c (convert_atomic_bit_not): New function.
	(optimize_atomic_bit_test_and): Transform equivalent, but slighly
	different cases to their canonical forms.

gcc/testsuite/

	PR middle-end/102566
	* g++.target/i386/pr102566-1.C: New test.
	* g++.target/i386/pr102566-2.C: Likewise.
	* g++.target/i386/pr102566-3.C: Likewise.
	* g++.target/i386/pr102566-4.C: Likewise.
	* g++.target/i386/pr102566-5a.C: Likewise.
	* g++.target/i386/pr102566-5b.C: Likewise.
	* g++.target/i386/pr102566-6a.C: Likewise.
	* g++.target/i386/pr102566-6b.C: Likewise.
	* gcc.target/i386/pr102566-1a.c: Likewise.
	* gcc.target/i386/pr102566-1b.c: Likewise.
	* gcc.target/i386/pr102566-2.c: Likewise.
	* gcc.target/i386/pr102566-3a.c: Likewise.
	* gcc.target/i386/pr102566-3b.c: Likewise.
	* gcc.target/i386/pr102566-4.c: Likewise.
	* gcc.target/i386/pr102566-5.c: Likewise.
	* gcc.target/i386/pr102566-6.c: Likewise.
	* gcc.target/i386/pr102566-7.c: Likewise.
	* gcc.target/i386/pr102566-8a.c: Likewise.
	* gcc.target/i386/pr102566-8b.c: Likewise.
	* gcc.target/i386/pr102566-9a.c: Likewise.
	* gcc.target/i386/pr102566-9b.c: Likewise.
	* gcc.target/i386/pr102566-10a.c: Likewise.
	* gcc.target/i386/pr102566-10b.c: Likewise.
	* gcc.target/i386/pr102566-11.c: Likewise.
	* gcc.target/i386/pr102566-12.c: Likewise.
---
 gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
 gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
 gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
 gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
 gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
 gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
 gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
 gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
 26 files changed, 1375 insertions(+), 37 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
 create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
  

Comments

Richard Biener Oct. 13, 2021, 12:34 p.m. UTC | #1
On Sun, Oct 10, 2021 at 3:49 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Changes in v4:
>
> 1. Bypass redundant check when inputs have been transformed to the
> equivalent canonical form with valid bit operation.
>
> Changes in v3:
>
> 1.  Check invalid bit operation.
>
> commit adedd5c173388ae505470df152b9cb3947339566
> Author: Jakub Jelinek <jakub@redhat.com>
> Date:   Tue May 3 13:37:25 2016 +0200
>
>     re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
>
> optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> with lock bts/btr/btc by turning
>
>   mask_2 = 1 << cnt_1;
>   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
>   _5 = _4 & mask_2;
>
> into
>
>   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
>   _5 = _4;
>
> and
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
>   _3 = _2 & mask_6;
>   _4 = _3 != 0;
>
> into
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
>   _4 = _11 != 0;
>
> But it failed to optimize many equivalent, but slighly different cases:
>
> 1.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _4 = (_Bool) _1;
> 2.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _4 = (_Bool) _1;
> 3.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _7 = ~_1;
>   _5 = (_Bool) _7;
> 4.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _7 = ~_1;
>   _5 = (_Bool) _7;
> 5.
>   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
>   _2 = (int) _1;
>   _7 = ~_2;
>   _5 = (_Bool) _7;
> 6.
>   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
>   _2 = (int) _1;
>   _7 = ~_2;
>   _5 = (_Bool) _7;
> 7.
>   _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
>   _2 = (int) _1;
>   _5 = _2 & mask;
> 8.
>   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
>   _5 = (signed int) _1;
>   _4 = _5 < 0;
> 9.
>   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
>   _5 = (signed int) _1;
>   _4 = _5 < 0;
> 10.
>   _1 = 1 << bit_4(D);
>   mask_5 = (unsigned int) _1;
>   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
>   _3 = _2 & mask_5;
> 11.
>   mask_7 = 1 << bit_6(D);
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
>   _4 = (int) _3;
>   _5 = _4 & mask_7;
>
> We make
>
>   mask_2 = 1 << cnt_1;
>   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
>   _5 = _4 & mask_2;
>
> and
>
>   mask_6 = 1 << bit_5(D);
>   _1 = ~mask_6;
>   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
>   _3 = _2 & mask_6;
>   _4 = _3 != 0;
>
> the canonical forms for this optimization and transform cases 1-9 to the
> equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> before __atomic_fetch_or_4/__atomic_fetch_and_4 with
>
>   _1 = 1 << bit_4(D);
>   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
>   _3 = _2 & _1;
>
> and
>
>   mask_7 = 1 << bit_6(D);
>   _1 = ~mask_7;
>   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
>   _6 = _3 & mask_7;
>   _5 = (int) _6;
>
> gcc/
>
>         PR middle-end/102566
>         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
>         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
>         different cases to their canonical forms.
>
> gcc/testsuite/
>
>         PR middle-end/102566
>         * g++.target/i386/pr102566-1.C: New test.
>         * g++.target/i386/pr102566-2.C: Likewise.
>         * g++.target/i386/pr102566-3.C: Likewise.
>         * g++.target/i386/pr102566-4.C: Likewise.
>         * g++.target/i386/pr102566-5a.C: Likewise.
>         * g++.target/i386/pr102566-5b.C: Likewise.
>         * g++.target/i386/pr102566-6a.C: Likewise.
>         * g++.target/i386/pr102566-6b.C: Likewise.
>         * gcc.target/i386/pr102566-1a.c: Likewise.
>         * gcc.target/i386/pr102566-1b.c: Likewise.
>         * gcc.target/i386/pr102566-2.c: Likewise.
>         * gcc.target/i386/pr102566-3a.c: Likewise.
>         * gcc.target/i386/pr102566-3b.c: Likewise.
>         * gcc.target/i386/pr102566-4.c: Likewise.
>         * gcc.target/i386/pr102566-5.c: Likewise.
>         * gcc.target/i386/pr102566-6.c: Likewise.
>         * gcc.target/i386/pr102566-7.c: Likewise.
>         * gcc.target/i386/pr102566-8a.c: Likewise.
>         * gcc.target/i386/pr102566-8b.c: Likewise.
>         * gcc.target/i386/pr102566-9a.c: Likewise.
>         * gcc.target/i386/pr102566-9b.c: Likewise.
>         * gcc.target/i386/pr102566-10a.c: Likewise.
>         * gcc.target/i386/pr102566-10b.c: Likewise.
>         * gcc.target/i386/pr102566-11.c: Likewise.
>         * gcc.target/i386/pr102566-12.c: Likewise.
> ---
>  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
>  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
>  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
>  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
>  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
>  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
>  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
>  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
>  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
>  gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
>  26 files changed, 1375 insertions(+), 37 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
>  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
>
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> new file mode 100644
> index 00000000000..94a66d717cc
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> new file mode 100644
> index 00000000000..4f2aea961c2
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> new file mode 100644
> index 00000000000..e88921dd155
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> new file mode 100644
> index 00000000000..44d1362ac2e
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> @@ -0,0 +1,29 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +tbit0 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +tbit30 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +tbit31 (std::atomic<int_type> &i)
> +{
> +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> new file mode 100644
> index 00000000000..f9595bee2ab
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> new file mode 100644
> index 00000000000..d917b27a918
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> new file mode 100644
> index 00000000000..01d495eda23
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned int> &i)
> +{
> +#define BIT (1 << 31)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> new file mode 100644
> index 00000000000..adc11fcbf2d
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> @@ -0,0 +1,31 @@
> +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool
> +tbit0 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 0)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit30 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 30)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +tbit31 (std::atomic<unsigned long long> &i)
> +{
> +#define BIT (1ll << 63)
> +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> new file mode 100644
> index 00000000000..1c1f86a9659
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> new file mode 100644
> index 00000000000..0bf39824ea6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> new file mode 100644
> index 00000000000..2c8f8c4e59a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic int *v)
> +{
> +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic unsigned int *v, int mask)
> +{
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic unsigned int *v, int mask)
> +{
> +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> new file mode 100644
> index 00000000000..4603a77612c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +#define MASK 0x1234
> +
> +bool
> +foo1 (_Atomic long *v)
> +{
> +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> +}
> +
> +bool
> +foo2 (_Atomic long *v, long mask)
> +{
> +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> +}
> +
> +bool
> +foo3 (_Atomic long *v, long mask)
> +{
> +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "btr" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> new file mode 100644
> index 00000000000..a915de354e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> @@ -0,0 +1,188 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (void);
> +
> +__attribute__((noinline, noclone)) int
> +f1 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f2 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> +  int t2 = t1 & mask;
> +  return t2 != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f3 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f4 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f5 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f6 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> +    bar ();
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f9 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f10 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f11 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f12 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f13 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f14 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f15 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f16 (int *a)
> +{
> +  int mask = 1 << 7;
> +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f17 (int *a)
> +{
> +  int mask = 1 << 13;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f18 (int *a)
> +{
> +  int mask = 1 << 0;
> +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f19 (long int *a, int bit)
> +{
> +  long int mask = 1l << bit;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f20 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f21 (int *a, int bit)
> +{
> +  int mask = 1 << bit;
> +  return (__sync_fetch_and_or (a, mask) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f22 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f23 (long int *a)
> +{
> +  long int mask = 1l << 7;
> +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f24 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f25 (short int *a)
> +{
> +  short int mask = 1 << 7;
> +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> new file mode 100644
> index 00000000000..c4dab8135c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> @@ -0,0 +1,107 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -g" } */
> +
> +int cnt;
> +
> +__attribute__((noinline, noclone)) void
> +bar (void)
> +{
> +  cnt++;
> +}
> +
> +#include "pr102566-1a.c"
> +
> +int a;
> +long int b;
> +unsigned long int c;
> +unsigned short int d;
> +
> +int
> +main ()
> +{
> +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> +    __builtin_abort ();
> +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> +    __builtin_abort ();
> +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> +    __builtin_abort ();
> +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (cnt != 0
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> +    __builtin_abort ();
> +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> +    __builtin_abort ();
> +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> +    __builtin_abort ();
> +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> +    __builtin_abort ();
> +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> +    __builtin_abort ();
> +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> +    __builtin_abort ();
> +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> +      || cnt != 2)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> new file mode 100644
> index 00000000000..00a7c349f2a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> new file mode 100644
> index 00000000000..d155ed367a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic long long int *v, int bit)
> +{
> +  long long int mask = 1ll << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> new file mode 100644
> index 00000000000..2668ccf827c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  unsigned int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> new file mode 100644
> index 00000000000..8bf1cd6e1bd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v, int bit)
> +{
> +  int mask = 1 << bit;
> +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> new file mode 100644
> index 00000000000..3dfe55ac683
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> new file mode 100644
> index 00000000000..6bc0ae0f320
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +typedef int __attribute__ ((mode (__word__))) int_type;
> +
> +#define BIT (1 << 0)
> +
> +bool
> +foo0 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> +}
> +
> +bool
> +foo1 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> +}
> +
> +bool
> +foo2 (_Atomic int_type *v)
> +{
> +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> +/* { dg-final { scan-assembler-not "bts" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> new file mode 100644
> index 00000000000..168e3db78c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> new file mode 100644
> index 00000000000..392da3098e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> new file mode 100644
> index 00000000000..3fa2a3ef043
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic int *v)
> +{
> +#define BIT (1 << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic int *v)
> +{
> +#define BIT (1 << 30)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic int *v)
> +{
> +#define BIT (1 << 31)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> new file mode 100644
> index 00000000000..38ddbdc630f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo0 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 0)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo30 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 62)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +bool
> +foo31 (_Atomic long long *v)
> +{
> +#define BIT (1ll << 63)
> +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> +#undef BIT
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> index 70ce6a4d5b8..bb70b87aa5e 100644
> --- a/gcc/tree-ssa-ccp.c
> +++ b/gcc/tree-ssa-ccp.c
> @@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
>    return ret;
>  }
>
> +/* Convert
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _7 = ~_1;
> +   _5 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +   _8 = _1 & 1;
> +   _5 = _8 == 0;
> +   and convert
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _7 = ~_1;
> +   _4 = (_Bool) _7;
> +   to
> +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +   _8 = _1 & 1;
> +   _4 = (_Bool) _8;
> +
> +   USE_STMT is the gimplt statement which uses the return value of
> +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> +   MASK is the mask passed to __atomic_fetch_or_*.
> + */
> +
> +static gimple *
> +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> +                       tree lhs, tree mask)
> +{
> +  tree and_mask;
> +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +    {
> +      /* MASK must be ~1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                          ~HOST_WIDE_INT_1), mask, 0))
> +       return nullptr;
> +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +    }
> +  else
> +    {
> +      /* MASK must be 1.  */
> +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> +       return nullptr;
> +      and_mask = mask;
> +    }
> +
> +  tree use_lhs = gimple_assign_lhs (use_stmt);
> +
> +  use_operand_p use_p;
> +  gimple *use_not_stmt;
> +
> +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> +      || !is_gimple_assign (use_not_stmt))
> +    return nullptr;
> +
> +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> +    return nullptr;
> +
> +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> +    return nullptr;
> +
> +  gimple_stmt_iterator gsi;
> +  gsi = gsi_for_stmt (use_stmt);
> +  gsi_remove (&gsi, true);
> +  tree var = make_ssa_name (TREE_TYPE (lhs));
> +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> +  lhs = gimple_assign_lhs (use_not_stmt);
> +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> +                                  build_zero_cst (TREE_TYPE (mask)));
> +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +  gsi = gsi_for_stmt (use_not_stmt);
> +  gsi_remove (&gsi, true);
> +  return use_stmt;
> +}
> +
>  /* Optimize
>       mask_2 = 1 << cnt_1;
>       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> @@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    tree lhs = gimple_call_lhs (call);
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  tree mask, bit;
> +  tree mask;
>    optab optab;
>
>    if (!flag_inline_atomics
> @@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
>        || !single_imm_use (lhs, &use_p, &use_stmt)
>        || !is_gimple_assign (use_stmt)
> -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
>        || !gimple_vdef (call))
>      return;
>
> +  tree bit = nullptr;
> +
> +  mask = gimple_call_arg (call, 1);
> +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> +  if (rhs_code != BIT_AND_EXPR)
> +    {
> +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> +       return;
> +
> +      tree use_lhs = gimple_assign_lhs (use_stmt);
> +      if (TREE_CODE (use_lhs) == SSA_NAME
> +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> +       return;
> +
> +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> +      if (lhs != use_rhs)
> +       return;
> +
> +      gimple *g;
> +      gimple_stmt_iterator gsi;
> +      tree var;
> +      int ibit = -1;
> +
> +      if (rhs_code == BIT_NOT_EXPR)
> +       {
> +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> +         if (!g)
> +           return;
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> +       {
> +         tree and_mask;
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             /* MASK must be ~1.  */
> +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> +                                                  ~HOST_WIDE_INT_1),
> +                                   mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +           }
> +         else
> +           {
> +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> +             if (!operand_equal_p (and_mask, mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _4 = (_Bool) _1;
> +                to
> +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> +                _5 = _1 & 1;
> +                _4 = (_Bool) _5;
> +              */
> +           }
> +         var = make_ssa_name (TREE_TYPE (use_rhs));
> +         replace_uses_by (use_rhs, var);
> +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                  and_mask);
> +         gsi = gsi_for_stmt (use_stmt);
> +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +         use_stmt = g;
> +         ibit = 0;
> +       }
> +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> +       {
> +         gimple *use_nop_stmt;
> +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> +             || !is_gimple_assign (use_nop_stmt))
> +           return;
> +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> +         if (rhs_code != BIT_AND_EXPR)
> +           {
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> +               return;
> +             if (rhs_code == BIT_NOT_EXPR)
> +               {
> +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> +                                             mask);
> +                 if (!g)
> +                   return;
> +                 /* Convert
> +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                    and convert
> +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> +                    _2 = (int) _1;
> +                    _7 = ~_2;
> +                    _5 = (_Bool) _7;
> +                    to
> +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> +                    _8 = _1 & 1;
> +                    _5 = _8 == 0;
> +                  */
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 use_stmt = g;
> +                 ibit = 0;
> +               }
> +             else
> +               {
> +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> +                   return;
> +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> +                   return;
> +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> +                 if (use_lhs != cmp_rhs1)
> +                   return;
> +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> +                 if (!integer_zerop (cmp_rhs2))
> +                   return;
> +
> +                 tree and_mask;
> +
> +                 unsigned HOST_WIDE_INT bytes
> +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> +                 ibit = bytes * BITS_PER_UNIT - 1;
> +                 unsigned HOST_WIDE_INT highest
> +                   = HOST_WIDE_INT_1U << ibit;
> +
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   {
> +                     /* Get the signed maximum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest - 1);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                   }
> +                 else
> +                   {
> +                     /* Get the signed minimum of the USE_RHS type.  */
> +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> +                                               highest);
> +                     if (!operand_equal_p (and_mask, mask, 0))
> +                       return;
> +
> +                     /* Convert
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _5 = (signed int) _1;
> +                        _4 = _5 < 0 or _5 >= 0;
> +                        to
> +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> +                        _6 = _1 & 0x80000000;
> +                        _4 = _6 != 0 or _6 == 0;
> +                      */
> +                   }
> +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> +                 gsi = gsi_for_stmt (use_stmt);
> +                 gsi_remove (&gsi, true);
> +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> +                                          and_mask);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> +                 use_stmt = g;
> +                 g = gimple_build_assign (use_nop_lhs,
> +                                          (rhs_code == GE_EXPR
> +                                           ? EQ_EXPR : NE_EXPR),
> +                                          var,
> +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +                 gsi = gsi_for_stmt (use_nop_stmt);
> +                 gsi_remove (&gsi, true);
> +               }
> +           }
> +         else
> +           {
> +             tree op_mask = mask;
> +             tree check_mask = op_mask;
> +             if (TREE_CODE (op_mask) == SSA_NAME)
> +               {
> +                 g = SSA_NAME_DEF_STMT (op_mask);
> +                 if (!is_gimple_assign (g))
> +                   return;
> +                 if (gimple_assign_rhs_code (g) == NOP_EXPR)
> +                   {
> +                     tree mask_nop_lhs = gimple_assign_lhs (g);
> +
> +                     if (TREE_CODE (mask_nop_lhs) == SSA_NAME
> +                         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
> +                       return;
> +
> +                     tree mask_nop_rhs = gimple_assign_rhs1 (g);
> +                     if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> +                         != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> +                       return;
> +                     op_mask = mask_nop_rhs;
> +                     check_mask = op_mask;
> +                     g = SSA_NAME_DEF_STMT (op_mask);
> +                     if (!is_gimple_assign (g))
> +                       return;
> +                   }
> +
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   {
> +                     if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +                       return;
> +                     check_mask = gimple_assign_rhs1 (g);
> +                     if (TREE_CODE (check_mask) != SSA_NAME)
> +                       return;
> +                     g = SSA_NAME_DEF_STMT (check_mask);
> +                     if (!is_gimple_assign (g))
> +                       return;
> +                   }
> +
> +                 if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
> +                     || !integer_onep (gimple_assign_rhs1 (g)))
> +                   return;
> +
> +                 bit = gimple_assign_rhs2 (g);
> +               }
> +
> +             if (TREE_CODE (check_mask) == INTEGER_CST)
> +               {
> +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +                   check_mask = const_unop (BIT_NOT_EXPR,
> +                                            TREE_TYPE (check_mask),
> +                                            check_mask);
> +                 check_mask = fold_convert (TREE_TYPE (lhs),
> +                                            check_mask);
> +                 /* Check if CHECK_MASK is a power of two.  */
> +                 ibit = tree_log2 (check_mask);
> +                 if (ibit < 0)
> +                   return;
> +               }
> +
> +             tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> +             tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> +             if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
> +                 && !operand_equal_p (use_nop_rhs2, check_mask, 0))
> +               return;
> +
> +             /* Convert
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _2 = (int) _1;
> +                _5 = _2 & mask;

(***)

> +                to
> +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> +                _6 = _1 & mask;
> +                _5 = (int) _6;
> +                and convert
> +                _1 = ~mask_7;
> +                _2 = (unsigned int) _1;
> +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> +                _4 = (int) _3;
> +                _5 = _4 & mask_7;
> +                to
> +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> +                _12 = _3 & mask_7;
> +                _5 = (int) _12;
> +              */

I wonder if it's better to maintain to have the matching part of match.pd

there you could have

(match (atomic_fetch_mask @1 @2 @3 @mask)
 (bit_and (convert (IFN_ATOMIC_BIT_TEST_AND_RESET @2 @mask @3)) @mask))

and here in this code do

extern bool gimple_atomic_fetch_mask (tree t, tree *res_ops, tree (*)(tree));

and call it on the _5 from (***) where the function will return true if it
matched and it will set res_ops[] with the positional operands @1 @2
@3 and @mask.

You can add variants and conditions to the same match entry, see match.pd
for examples and also match-and-simplify.texi

> +             replace_uses_by (use_lhs, lhs);
> +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> +             gimple_assign_set_lhs (use_nop_stmt, var);
> +             gsi = gsi_for_stmt (use_stmt);
> +             gsi_remove (&gsi, true);
> +             release_defs (use_stmt);
> +             gsi_remove (gsip, true);
> +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);

instead of building a GENERIC NOP you could use the

gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);

overload.

> +             gsi = gsi_for_stmt (use_nop_stmt);
> +             g = gimple_build_assign (use_nop_lhs, var);
> +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +             use_stmt = use_nop_stmt;
> +             mask = op_mask;
> +           }
> +       }
> +
> +      if (!bit)
> +       {
> +         if (ibit < 0)
> +           gcc_unreachable ();
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +    }
> +
>    switch (fn)
>      {
>      case IFN_ATOMIC_BIT_TEST_AND_SET:
> @@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
>      return;
>
> -  mask = gimple_call_arg (call, 1);
>    tree use_lhs = gimple_assign_lhs (use_stmt);
>    if (!use_lhs)
>      return;
>
> -  if (TREE_CODE (mask) == INTEGER_CST)
> +  if (!bit)
>      {
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> -      mask = fold_convert (TREE_TYPE (lhs), mask);
> -      int ibit = tree_log2 (mask);
> -      if (ibit < 0)
> -       return;
> -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> -    }
> -  else if (TREE_CODE (mask) == SSA_NAME)
> -    {
> -      gimple *g = SSA_NAME_DEF_STMT (mask);
> -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +      if (TREE_CODE (mask) == INTEGER_CST)
>         {
> -         if (!is_gimple_assign (g)
> -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> +         mask = fold_convert (TREE_TYPE (lhs), mask);
> +         int ibit = tree_log2 (mask);
> +         if (ibit < 0)
> +           return;
> +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> +       }
> +      else if (TREE_CODE (mask) == SSA_NAME)
> +       {
> +         gimple *g = SSA_NAME_DEF_STMT (mask);
> +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> +           {
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> +               return;
> +             mask = gimple_assign_rhs1 (g);
> +             if (TREE_CODE (mask) != SSA_NAME)
> +               return;
> +             g = SSA_NAME_DEF_STMT (mask);
> +           }
> +         if (!is_gimple_assign (g))
>             return;
> -         mask = gimple_assign_rhs1 (g);
> -         if (TREE_CODE (mask) != SSA_NAME)
> +         rhs_code = gimple_assign_rhs_code (g);
> +         if (rhs_code != LSHIFT_EXPR)
> +           {
> +             if (rhs_code != NOP_EXPR)
> +               return;
> +
> +             /* Handle
> +                _1 = 1 << bit_4(D);
> +                mask_5 = (unsigned int) _1;
> +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> +                _3 = _2 & mask_5;
> +                */
> +             tree nop_lhs = gimple_assign_lhs (g);
> +             tree nop_rhs = gimple_assign_rhs1 (g);
> +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> +               return;
> +             g = SSA_NAME_DEF_STMT (nop_rhs);
> +             if (!is_gimple_assign (g)
> +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> +               return;
> +           }
> +         if (!integer_onep (gimple_assign_rhs1 (g)))
>             return;
> -         g = SSA_NAME_DEF_STMT (mask);
> +         bit = gimple_assign_rhs2 (g);
>         }
> -      if (!is_gimple_assign (g)
> -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> -         || !integer_onep (gimple_assign_rhs1 (g)))
> +      else
>         return;
> -      bit = gimple_assign_rhs2 (g);
> -    }
> -  else
> -    return;
>
> -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> -    {
> -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> +       {
> +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> +           return;
> +       }
> +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> +                                   mask, 0))
>         return;
>      }
> -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> -    return;
>
>    bool use_bool = true;
>    bool has_debug_uses = false;
> @@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
>          of the specified bit after the atomic operation (makes only sense
>          for xor, otherwise the bit content is compile time known),
>          we need to invert the bit.  */
> +      tree mask_convert = mask;
> +      gimple *g_convert = nullptr;
> +      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> +       {
> +         mask_convert = make_ssa_name (TREE_TYPE (lhs));
> +         tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> +         g_convert = gimple_build_assign (mask_convert, var);
> +       }
>        g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
>                                BIT_XOR_EXPR, new_lhs,
>                                use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> -                                       : mask);
> +                                       : mask_convert);
>        new_lhs = gimple_assign_lhs (g);

You could use

        gimple_seq stmts = NULL;
        mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
        new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
                                               use_bool ?
build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);

>        if (throws)
>         {
> -         gsi_insert_on_edge_immediate (e, g);

gsi_insert_seq_on_edge_immediate (e, stmts);

to simplify this.  The conversion will be only generated if necessary.

> +         if (g_convert)
> +           {
> +             gsi_insert_on_edge_immediate (e, g_convert);
> +             gsi = gsi_for_stmt (g_convert);
> +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +           }
> +         else
> +           gsi_insert_on_edge_immediate (e, g);
>           gsi = gsi_for_stmt (g);
>         }
>        else
> -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +       {
> +         if (g_convert)
> +           {
> +             gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> +             gsi = gsi_for_stmt (g_convert);
> +           }
> +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +       }
>      }
>    if (use_bool && has_debug_uses)
>      {
> --
> 2.31.1
>
  
Hongtao Liu Oct. 21, 2021, 11:15 a.m. UTC | #2
i is

On Wed, Oct 13, 2021 at 8:34 PM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Sun, Oct 10, 2021 at 3:49 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Changes in v4:
> >
> > 1. Bypass redundant check when inputs have been transformed to the
> > equivalent canonical form with valid bit operation.
> >
> > Changes in v3:
> >
> > 1.  Check invalid bit operation.
> >
> > commit adedd5c173388ae505470df152b9cb3947339566
> > Author: Jakub Jelinek <jakub@redhat.com>
> > Date:   Tue May 3 13:37:25 2016 +0200
> >
> >     re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
> >
> > optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> > with lock bts/btr/btc by turning
> >
> >   mask_2 = 1 << cnt_1;
> >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> >   _5 = _4 & mask_2;
> >
> > into
> >
> >   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
> >   _5 = _4;
> >
> > and
> >
> >   mask_6 = 1 << bit_5(D);
> >   _1 = ~mask_6;
> >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> >   _3 = _2 & mask_6;
> >   _4 = _3 != 0;
> >
> > into
> >
> >   mask_6 = 1 << bit_5(D);
> >   _1 = ~mask_6;
> >   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
> >   _4 = _11 != 0;
> >
> > But it failed to optimize many equivalent, but slighly different cases:
> >
> > 1.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> >   _4 = (_Bool) _1;
> > 2.
> >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> >   _4 = (_Bool) _1;
> > 3.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> >   _7 = ~_1;
> >   _5 = (_Bool) _7;
> > 4.
> >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> >   _7 = ~_1;
> >   _5 = (_Bool) _7;
> > 5.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> >   _2 = (int) _1;
> >   _7 = ~_2;
> >   _5 = (_Bool) _7;
> > 6.
> >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> >   _2 = (int) _1;
> >   _7 = ~_2;
> >   _5 = (_Bool) _7;
> > 7.
> >   _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
> >   _2 = (int) _1;
> >   _5 = _2 & mask;
> > 8.
> >   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> >   _5 = (signed int) _1;
> >   _4 = _5 < 0;
> > 9.
> >   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> >   _5 = (signed int) _1;
> >   _4 = _5 < 0;
> > 10.
> >   _1 = 1 << bit_4(D);
> >   mask_5 = (unsigned int) _1;
> >   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> >   _3 = _2 & mask_5;
> > 11.
> >   mask_7 = 1 << bit_6(D);
> >   _1 = ~mask_7;
> >   _2 = (unsigned int) _1;
> >   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
> >   _4 = (int) _3;
> >   _5 = _4 & mask_7;
> >
> > We make
> >
> >   mask_2 = 1 << cnt_1;
> >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> >   _5 = _4 & mask_2;
> >
> > and
> >
> >   mask_6 = 1 << bit_5(D);
> >   _1 = ~mask_6;
> >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> >   _3 = _2 & mask_6;
> >   _4 = _3 != 0;
> >
> > the canonical forms for this optimization and transform cases 1-9 to the
> > equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> > before __atomic_fetch_or_4/__atomic_fetch_and_4 with
> >
> >   _1 = 1 << bit_4(D);
> >   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
> >   _3 = _2 & _1;
> >
> > and
> >
> >   mask_7 = 1 << bit_6(D);
> >   _1 = ~mask_7;
> >   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
> >   _6 = _3 & mask_7;
> >   _5 = (int) _6;
> >
> > gcc/
> >
> >         PR middle-end/102566
> >         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
> >         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
> >         different cases to their canonical forms.
> >
> > gcc/testsuite/
> >
> >         PR middle-end/102566
> >         * g++.target/i386/pr102566-1.C: New test.
> >         * g++.target/i386/pr102566-2.C: Likewise.
> >         * g++.target/i386/pr102566-3.C: Likewise.
> >         * g++.target/i386/pr102566-4.C: Likewise.
> >         * g++.target/i386/pr102566-5a.C: Likewise.
> >         * g++.target/i386/pr102566-5b.C: Likewise.
> >         * g++.target/i386/pr102566-6a.C: Likewise.
> >         * g++.target/i386/pr102566-6b.C: Likewise.
> >         * gcc.target/i386/pr102566-1a.c: Likewise.
> >         * gcc.target/i386/pr102566-1b.c: Likewise.
> >         * gcc.target/i386/pr102566-2.c: Likewise.
> >         * gcc.target/i386/pr102566-3a.c: Likewise.
> >         * gcc.target/i386/pr102566-3b.c: Likewise.
> >         * gcc.target/i386/pr102566-4.c: Likewise.
> >         * gcc.target/i386/pr102566-5.c: Likewise.
> >         * gcc.target/i386/pr102566-6.c: Likewise.
> >         * gcc.target/i386/pr102566-7.c: Likewise.
> >         * gcc.target/i386/pr102566-8a.c: Likewise.
> >         * gcc.target/i386/pr102566-8b.c: Likewise.
> >         * gcc.target/i386/pr102566-9a.c: Likewise.
> >         * gcc.target/i386/pr102566-9b.c: Likewise.
> >         * gcc.target/i386/pr102566-10a.c: Likewise.
> >         * gcc.target/i386/pr102566-10b.c: Likewise.
> >         * gcc.target/i386/pr102566-11.c: Likewise.
> >         * gcc.target/i386/pr102566-12.c: Likewise.
> > ---
> >  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
> >  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
> >  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
> >  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
> >  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
> >  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
> >  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
> >  gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
> >  26 files changed, 1375 insertions(+), 37 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
> >
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > new file mode 100644
> > index 00000000000..94a66d717cc
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > new file mode 100644
> > index 00000000000..4f2aea961c2
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > new file mode 100644
> > index 00000000000..e88921dd155
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > new file mode 100644
> > index 00000000000..44d1362ac2e
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > @@ -0,0 +1,29 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +typedef int __attribute__ ((mode (__word__))) int_type;
> > +
> > +#define BIT (1 << 0)
> > +
> > +bool
> > +tbit0 (std::atomic<int_type> &i)
> > +{
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<int_type> &i)
> > +{
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<int_type> &i)
> > +{
> > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "bts" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > new file mode 100644
> > index 00000000000..f9595bee2ab
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > new file mode 100644
> > index 00000000000..d917b27a918
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 0)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 30)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 63)
> > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > new file mode 100644
> > index 00000000000..01d495eda23
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target c++11 } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned int> &i)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > new file mode 100644
> > index 00000000000..adc11fcbf2d
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <atomic>
> > +
> > +bool
> > +tbit0 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 0)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit30 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 30)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +tbit31 (std::atomic<unsigned long long> &i)
> > +{
> > +#define BIT (1ll << 63)
> > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > new file mode 100644
> > index 00000000000..1c1f86a9659
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > new file mode 100644
> > index 00000000000..0bf39824ea6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic long long int *v, int bit)
> > +{
> > +  long long int mask = 1ll << bit;
> > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > new file mode 100644
> > index 00000000000..2c8f8c4e59a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > @@ -0,0 +1,28 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +#define MASK 0x1234
> > +
> > +bool
> > +foo1 (_Atomic int *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> > +}
> > +
> > +bool
> > +foo2 (_Atomic unsigned int *v, int mask)
> > +{
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +bool
> > +foo3 (_Atomic unsigned int *v, int mask)
> > +{
> > +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "bts" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > new file mode 100644
> > index 00000000000..4603a77612c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > @@ -0,0 +1,28 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +#define MASK 0x1234
> > +
> > +bool
> > +foo1 (_Atomic long *v)
> > +{
> > +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> > +}
> > +
> > +bool
> > +foo2 (_Atomic long *v, long mask)
> > +{
> > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +bool
> > +foo3 (_Atomic long *v, long mask)
> > +{
> > +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "btr" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > new file mode 100644
> > index 00000000000..a915de354e5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > @@ -0,0 +1,188 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +void bar (void);
> > +
> > +__attribute__((noinline, noclone)) int
> > +f1 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f2 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> > +  int t2 = t1 & mask;
> > +  return t2 != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f3 (long int *a, int bit)
> > +{
> > +  long int mask = 1l << bit;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f4 (int *a)
> > +{
> > +  int mask = 1 << 7;
> > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f5 (int *a)
> > +{
> > +  int mask = 1 << 13;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f6 (int *a)
> > +{
> > +  int mask = 1 << 0;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) void
> > +f7 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> > +    bar ();
> > +}
> > +
> > +__attribute__((noinline, noclone)) void
> > +f8 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> > +    bar ();
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f9 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f10 (int *a)
> > +{
> > +  int mask = 1 << 7;
> > +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f11 (int *a)
> > +{
> > +  int mask = 1 << 13;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f12 (int *a)
> > +{
> > +  int mask = 1 << 0;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f13 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f14 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f15 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f16 (int *a)
> > +{
> > +  int mask = 1 << 7;
> > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f17 (int *a)
> > +{
> > +  int mask = 1 << 13;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f18 (int *a)
> > +{
> > +  int mask = 1 << 0;
> > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f19 (long int *a, int bit)
> > +{
> > +  long int mask = 1l << bit;
> > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f20 (long int *a)
> > +{
> > +  long int mask = 1l << 7;
> > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) int
> > +f21 (int *a, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return (__sync_fetch_and_or (a, mask) & mask);
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f22 (long int *a)
> > +{
> > +  long int mask = 1l << 7;
> > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> > +}
> > +
> > +__attribute__((noinline, noclone)) long int
> > +f23 (long int *a)
> > +{
> > +  long int mask = 1l << 7;
> > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> > +}
> > +
> > +__attribute__((noinline, noclone)) short int
> > +f24 (short int *a)
> > +{
> > +  short int mask = 1 << 7;
> > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > +}
> > +
> > +__attribute__((noinline, noclone)) short int
> > +f25 (short int *a)
> > +{
> > +  short int mask = 1 << 7;
> > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > new file mode 100644
> > index 00000000000..c4dab8135c7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > @@ -0,0 +1,107 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -g" } */
> > +
> > +int cnt;
> > +
> > +__attribute__((noinline, noclone)) void
> > +bar (void)
> > +{
> > +  cnt++;
> > +}
> > +
> > +#include "pr102566-1a.c"
> > +
> > +int a;
> > +long int b;
> > +unsigned long int c;
> > +unsigned short int d;
> > +
> > +int
> > +main ()
> > +{
> > +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> > +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> > +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> > +    __builtin_abort ();
> > +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> > +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> > +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> > +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> > +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> > +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> > +    __builtin_abort ();
> > +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> > +    __builtin_abort ();
> > +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> > +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (cnt != 0
> > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > +    __builtin_abort ();
> > +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > +    __builtin_abort ();
> > +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> > +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > +    __builtin_abort ();
> > +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> > +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> > +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> > +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > +    __builtin_abort ();
> > +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> > +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> > +    __builtin_abort ();
> > +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> > +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > +      || cnt != 2)
> > +    __builtin_abort ();
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > new file mode 100644
> > index 00000000000..00a7c349f2a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > new file mode 100644
> > index 00000000000..8bf1cd6e1bd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > new file mode 100644
> > index 00000000000..d155ed367a1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic long long int *v, int bit)
> > +{
> > +  long long int mask = 1ll << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > new file mode 100644
> > index 00000000000..2668ccf827c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  unsigned int mask = 1 << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > new file mode 100644
> > index 00000000000..8bf1cd6e1bd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo (_Atomic int *v, int bit)
> > +{
> > +  int mask = 1 << bit;
> > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > new file mode 100644
> > index 00000000000..3dfe55ac683
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > new file mode 100644
> > index 00000000000..6bc0ae0f320
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +typedef int __attribute__ ((mode (__word__))) int_type;
> > +
> > +#define BIT (1 << 0)
> > +
> > +bool
> > +foo0 (_Atomic int_type *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> > +}
> > +
> > +bool
> > +foo1 (_Atomic int_type *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> > +}
> > +
> > +bool
> > +foo2 (_Atomic int_type *v)
> > +{
> > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > +/* { dg-final { scan-assembler-not "bts" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > new file mode 100644
> > index 00000000000..168e3db78c9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > new file mode 100644
> > index 00000000000..392da3098e0
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 0)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 62)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 63)
> > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > new file mode 100644
> > index 00000000000..3fa2a3ef043
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 0)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 30)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic int *v)
> > +{
> > +#define BIT (1 << 31)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > new file mode 100644
> > index 00000000000..38ddbdc630f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include <stdatomic.h>
> > +#include <stdbool.h>
> > +
> > +bool
> > +foo0 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 0)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo30 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 62)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +bool
> > +foo31 (_Atomic long long *v)
> > +{
> > +#define BIT (1ll << 63)
> > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > +#undef BIT
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> > index 70ce6a4d5b8..bb70b87aa5e 100644
> > --- a/gcc/tree-ssa-ccp.c
> > +++ b/gcc/tree-ssa-ccp.c
> > @@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
> >    return ret;
> >  }
> >
> > +/* Convert
> > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +   _7 = ~_1;
> > +   _5 = (_Bool) _7;
> > +   to
> > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +   _8 = _1 & 1;
> > +   _5 = _8 == 0;
> > +   and convert
> > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +   _7 = ~_1;
> > +   _4 = (_Bool) _7;
> > +   to
> > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +   _8 = _1 & 1;
> > +   _4 = (_Bool) _8;
> > +
> > +   USE_STMT is the gimplt statement which uses the return value of
> > +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> > +   MASK is the mask passed to __atomic_fetch_or_*.
> > + */
> > +
> > +static gimple *
> > +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> > +                       tree lhs, tree mask)
> > +{
> > +  tree and_mask;
> > +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +    {
> > +      /* MASK must be ~1.  */
> > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > +                                          ~HOST_WIDE_INT_1), mask, 0))
> > +       return nullptr;
> > +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > +    }
> > +  else
> > +    {
> > +      /* MASK must be 1.  */
> > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> > +       return nullptr;
> > +      and_mask = mask;
> > +    }
> > +
> > +  tree use_lhs = gimple_assign_lhs (use_stmt);
> > +
> > +  use_operand_p use_p;
> > +  gimple *use_not_stmt;
> > +
> > +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> > +      || !is_gimple_assign (use_not_stmt))
> > +    return nullptr;
> > +
> > +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> > +    return nullptr;
> > +
> > +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> > +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> > +    return nullptr;
> > +
> > +  gimple_stmt_iterator gsi;
> > +  gsi = gsi_for_stmt (use_stmt);
> > +  gsi_remove (&gsi, true);
> > +  tree var = make_ssa_name (TREE_TYPE (lhs));
> > +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> > +  gsi = gsi_for_stmt (use_not_stmt);
> > +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> > +  lhs = gimple_assign_lhs (use_not_stmt);
> > +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> > +                                  build_zero_cst (TREE_TYPE (mask)));
> > +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +  gsi = gsi_for_stmt (use_not_stmt);
> > +  gsi_remove (&gsi, true);
> > +  return use_stmt;
> > +}
> > +
> >  /* Optimize
> >       mask_2 = 1 << cnt_1;
> >       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > @@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >    tree lhs = gimple_call_lhs (call);
> >    use_operand_p use_p;
> >    gimple *use_stmt;
> > -  tree mask, bit;
> > +  tree mask;
> >    optab optab;
> >
> >    if (!flag_inline_atomics
> > @@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> >        || !single_imm_use (lhs, &use_p, &use_stmt)
> >        || !is_gimple_assign (use_stmt)
> > -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
> >        || !gimple_vdef (call))
> >      return;
> >
> > +  tree bit = nullptr;
> > +
> > +  mask = gimple_call_arg (call, 1);
> > +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> > +  if (rhs_code != BIT_AND_EXPR)
> > +    {
> > +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> > +       return;
> > +
> > +      tree use_lhs = gimple_assign_lhs (use_stmt);
> > +      if (TREE_CODE (use_lhs) == SSA_NAME
> > +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> > +       return;
> > +
> > +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> > +      if (lhs != use_rhs)
> > +       return;
> > +
> > +      gimple *g;
> > +      gimple_stmt_iterator gsi;
> > +      tree var;
> > +      int ibit = -1;
> > +
> > +      if (rhs_code == BIT_NOT_EXPR)
> > +       {
> > +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> > +         if (!g)
> > +           return;
> > +         use_stmt = g;
> > +         ibit = 0;
> > +       }
> > +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> > +       {
> > +         tree and_mask;
> > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +           {
> > +             /* MASK must be ~1.  */
> > +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > +                                                  ~HOST_WIDE_INT_1),
> > +                                   mask, 0))
> > +               return;
> > +
> > +             /* Convert
> > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +                _4 = (_Bool) _1;
> > +                to
> > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > +                _5 = _1 & 1;
> > +                _4 = (_Bool) _5;
> > +              */
> > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > +           }
> > +         else
> > +           {
> > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > +             if (!operand_equal_p (and_mask, mask, 0))
> > +               return;
> > +
> > +             /* Convert
> > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +                _4 = (_Bool) _1;
> > +                to
> > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > +                _5 = _1 & 1;
> > +                _4 = (_Bool) _5;
> > +              */
> > +           }
> > +         var = make_ssa_name (TREE_TYPE (use_rhs));
> > +         replace_uses_by (use_rhs, var);
> > +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > +                                  and_mask);
> > +         gsi = gsi_for_stmt (use_stmt);
> > +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > +         use_stmt = g;
> > +         ibit = 0;
> > +       }
> > +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> > +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> > +       {
> > +         gimple *use_nop_stmt;
> > +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> > +             || !is_gimple_assign (use_nop_stmt))
> > +           return;
> > +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> > +         if (rhs_code != BIT_AND_EXPR)
> > +           {
> > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> > +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> > +               return;
> > +             if (rhs_code == BIT_NOT_EXPR)
> > +               {
> > +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> > +                                             mask);
> > +                 if (!g)
> > +                   return;
> > +                 /* Convert
> > +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > +                    _2 = (int) _1;
> > +                    _7 = ~_2;
> > +                    _5 = (_Bool) _7;
> > +                    to
> > +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> > +                    _8 = _1 & 1;
> > +                    _5 = _8 == 0;
> > +                    and convert
> > +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > +                    _2 = (int) _1;
> > +                    _7 = ~_2;
> > +                    _5 = (_Bool) _7;
> > +                    to
> > +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> > +                    _8 = _1 & 1;
> > +                    _5 = _8 == 0;
> > +                  */
> > +                 gsi = gsi_for_stmt (use_stmt);
> > +                 gsi_remove (&gsi, true);
> > +                 use_stmt = g;
> > +                 ibit = 0;
> > +               }
> > +             else
> > +               {
> > +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> > +                   return;
> > +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> > +                   return;
> > +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > +                 if (use_lhs != cmp_rhs1)
> > +                   return;
> > +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > +                 if (!integer_zerop (cmp_rhs2))
> > +                   return;
> > +
> > +                 tree and_mask;
> > +
> > +                 unsigned HOST_WIDE_INT bytes
> > +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> > +                 ibit = bytes * BITS_PER_UNIT - 1;
> > +                 unsigned HOST_WIDE_INT highest
> > +                   = HOST_WIDE_INT_1U << ibit;
> > +
> > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +                   {
> > +                     /* Get the signed maximum of the USE_RHS type.  */
> > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > +                                               highest - 1);
> > +                     if (!operand_equal_p (and_mask, mask, 0))
> > +                       return;
> > +
> > +                     /* Convert
> > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > +                        _5 = (signed int) _1;
> > +                        _4 = _5 < 0 or _5 >= 0;
> > +                        to
> > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > +                        _6 = _1 & 0x80000000;
> > +                        _4 = _6 != 0 or _6 == 0;
> > +                      */
> > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > +                                               highest);
> > +                   }
> > +                 else
> > +                   {
> > +                     /* Get the signed minimum of the USE_RHS type.  */
> > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > +                                               highest);
> > +                     if (!operand_equal_p (and_mask, mask, 0))
> > +                       return;
> > +
> > +                     /* Convert
> > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > +                        _5 = (signed int) _1;
> > +                        _4 = _5 < 0 or _5 >= 0;
> > +                        to
> > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > +                        _6 = _1 & 0x80000000;
> > +                        _4 = _6 != 0 or _6 == 0;
> > +                      */
> > +                   }
> > +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> > +                 gsi = gsi_for_stmt (use_stmt);
> > +                 gsi_remove (&gsi, true);
> > +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > +                                          and_mask);
> > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > +                 use_stmt = g;
> > +                 g = gimple_build_assign (use_nop_lhs,
> > +                                          (rhs_code == GE_EXPR
> > +                                           ? EQ_EXPR : NE_EXPR),
> > +                                          var,
> > +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> > +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > +                 gsi_remove (&gsi, true);
> > +               }
> > +           }
> > +         else
> > +           {
> > +             tree op_mask = mask;
> > +             tree check_mask = op_mask;
> > +             if (TREE_CODE (op_mask) == SSA_NAME)
> > +               {
> > +                 g = SSA_NAME_DEF_STMT (op_mask);
> > +                 if (!is_gimple_assign (g))
> > +                   return;
> > +                 if (gimple_assign_rhs_code (g) == NOP_EXPR)
> > +                   {
> > +                     tree mask_nop_lhs = gimple_assign_lhs (g);
> > +
> > +                     if (TREE_CODE (mask_nop_lhs) == SSA_NAME
> > +                         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
> > +                       return;
> > +
> > +                     tree mask_nop_rhs = gimple_assign_rhs1 (g);
> > +                     if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> > +                         != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> > +                       return;
> > +                     op_mask = mask_nop_rhs;
> > +                     check_mask = op_mask;
> > +                     g = SSA_NAME_DEF_STMT (op_mask);
> > +                     if (!is_gimple_assign (g))
> > +                       return;
> > +                   }
> > +
> > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +                   {
> > +                     if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > +                       return;
> > +                     check_mask = gimple_assign_rhs1 (g);
> > +                     if (TREE_CODE (check_mask) != SSA_NAME)
> > +                       return;
> > +                     g = SSA_NAME_DEF_STMT (check_mask);
> > +                     if (!is_gimple_assign (g))
> > +                       return;
> > +                   }
> > +
> > +                 if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > +                     || !integer_onep (gimple_assign_rhs1 (g)))
> > +                   return;
> > +
> > +                 bit = gimple_assign_rhs2 (g);
> > +               }
> > +
> > +             if (TREE_CODE (check_mask) == INTEGER_CST)
> > +               {
> > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +                   check_mask = const_unop (BIT_NOT_EXPR,
> > +                                            TREE_TYPE (check_mask),
> > +                                            check_mask);
> > +                 check_mask = fold_convert (TREE_TYPE (lhs),
> > +                                            check_mask);
> > +                 /* Check if CHECK_MASK is a power of two.  */
> > +                 ibit = tree_log2 (check_mask);
> > +                 if (ibit < 0)
> > +                   return;
> > +               }
> > +
> > +             tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > +             tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > +             if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
> > +                 && !operand_equal_p (use_nop_rhs2, check_mask, 0))
> > +               return;
> > +
> > +             /* Convert
> > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > +                _2 = (int) _1;
> > +                _5 = _2 & mask;
>
> (***)
>
> > +                to
> > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > +                _6 = _1 & mask;
> > +                _5 = (int) _6;
> > +                and convert
> > +                _1 = ~mask_7;
> > +                _2 = (unsigned int) _1;
> > +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> > +                _4 = (int) _3;
> > +                _5 = _4 & mask_7;
> > +                to
> > +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> > +                _12 = _3 & mask_7;
> > +                _5 = (int) _12;
> > +              */
>
> I wonder if it's better to maintain to have the matching part of match.pd
I'm trying to rewrite match part in match.pd and find the
canonicalization is ok when mask is constant, but not for variable
since it will be simplified back by
 /* In GIMPLE, getting rid of 2 conversions for one new results
    in smaller IL.  */
 (simplify
  (convert (bitop:cs@2 (nop_convert:s @0) @1))
  (if (GIMPLE
       && TREE_CODE (@1) != INTEGER_CST
       && tree_nop_conversion_p (type, TREE_TYPE (@2))
       && types_match (type, @0))
   (bitop @0 (convert @1)))))

The canonicalization for variabled is like

convert
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
 _4 = (int) _3;
 _5 = _4 & mask_7;

to
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
  _4 = (unsigned int) mask_7
  _6 = _3 & _4
  _5 = (int) _6

and be simplified back.

I've also tried another way of simplication like

convert
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
 _4 = (int) _3;
 _5 = _4 & mask_7;

to
  _1 = (unsigned int)mask_7;
  _2 = ~ _1;
  _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
   _6 = _3 & _1
  _5 = (int)

but it's prevent by below since __atomic_fetch_and_4 is not CONST, but
we need to regenerate it with updated parameter.

  /* We can't and should not emit calls to non-const functions.  */
  if (!(flags_from_decl_or_type (decl) & ECF_CONST))
    return NULL;

>
> there you could have
>
> (match (atomic_fetch_mask @1 @2 @3 @mask)
>  (bit_and (convert (IFN_ATOMIC_BIT_TEST_AND_RESET @2 @mask @3)) @mask))
>
> and here in this code do
>
> extern bool gimple_atomic_fetch_mask (tree t, tree *res_ops, tree (*)(tree));
>
> and call it on the _5 from (***) where the function will return true if it
> matched and it will set res_ops[] with the positional operands @1 @2
> @3 and @mask.
>
> You can add variants and conditions to the same match entry, see match.pd
> for examples and also match-and-simplify.texi
>
> > +             replace_uses_by (use_lhs, lhs);
> > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> > +             gimple_assign_set_lhs (use_nop_stmt, var);
> > +             gsi = gsi_for_stmt (use_stmt);
> > +             gsi_remove (&gsi, true);
> > +             release_defs (use_stmt);
> > +             gsi_remove (gsip, true);
> > +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
>
> instead of building a GENERIC NOP you could use the
>
> gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
>
> overload.
>
> > +             gsi = gsi_for_stmt (use_nop_stmt);
> > +             g = gimple_build_assign (use_nop_lhs, var);
> > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +             use_stmt = use_nop_stmt;
> > +             mask = op_mask;
> > +           }
> > +       }
> > +
> > +      if (!bit)
> > +       {
> > +         if (ibit < 0)
> > +           gcc_unreachable ();
> > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > +       }
> > +    }
> > +
> >    switch (fn)
> >      {
> >      case IFN_ATOMIC_BIT_TEST_AND_SET:
> > @@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
> >      return;
> >
> > -  mask = gimple_call_arg (call, 1);
> >    tree use_lhs = gimple_assign_lhs (use_stmt);
> >    if (!use_lhs)
> >      return;
> >
> > -  if (TREE_CODE (mask) == INTEGER_CST)
> > +  if (!bit)
> >      {
> > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > -      mask = fold_convert (TREE_TYPE (lhs), mask);
> > -      int ibit = tree_log2 (mask);
> > -      if (ibit < 0)
> > -       return;
> > -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > -    }
> > -  else if (TREE_CODE (mask) == SSA_NAME)
> > -    {
> > -      gimple *g = SSA_NAME_DEF_STMT (mask);
> > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +      if (TREE_CODE (mask) == INTEGER_CST)
> >         {
> > -         if (!is_gimple_assign (g)
> > -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > +         mask = fold_convert (TREE_TYPE (lhs), mask);
> > +         int ibit = tree_log2 (mask);
> > +         if (ibit < 0)
> > +           return;
> > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > +       }
> > +      else if (TREE_CODE (mask) == SSA_NAME)
> > +       {
> > +         gimple *g = SSA_NAME_DEF_STMT (mask);
> > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > +           {
> > +             if (!is_gimple_assign (g)
> > +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > +               return;
> > +             mask = gimple_assign_rhs1 (g);
> > +             if (TREE_CODE (mask) != SSA_NAME)
> > +               return;
> > +             g = SSA_NAME_DEF_STMT (mask);
> > +           }
> > +         if (!is_gimple_assign (g))
> >             return;
> > -         mask = gimple_assign_rhs1 (g);
> > -         if (TREE_CODE (mask) != SSA_NAME)
> > +         rhs_code = gimple_assign_rhs_code (g);
> > +         if (rhs_code != LSHIFT_EXPR)
> > +           {
> > +             if (rhs_code != NOP_EXPR)
> > +               return;
> > +
> > +             /* Handle
> > +                _1 = 1 << bit_4(D);
> > +                mask_5 = (unsigned int) _1;
> > +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > +                _3 = _2 & mask_5;
> > +                */
> > +             tree nop_lhs = gimple_assign_lhs (g);
> > +             tree nop_rhs = gimple_assign_rhs1 (g);
> > +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> > +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> > +               return;
> > +             g = SSA_NAME_DEF_STMT (nop_rhs);
> > +             if (!is_gimple_assign (g)
> > +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> > +               return;
> > +           }
> > +         if (!integer_onep (gimple_assign_rhs1 (g)))
> >             return;
> > -         g = SSA_NAME_DEF_STMT (mask);
> > +         bit = gimple_assign_rhs2 (g);
> >         }
> > -      if (!is_gimple_assign (g)
> > -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > -         || !integer_onep (gimple_assign_rhs1 (g)))
> > +      else
> >         return;
> > -      bit = gimple_assign_rhs2 (g);
> > -    }
> > -  else
> > -    return;
> >
> > -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> > -    {
> > -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> > +       {
> > +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > +           return;
> > +       }
> > +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> > +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> > +                                   mask, 0))
> >         return;
> >      }
> > -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> > -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> > -    return;
> >
> >    bool use_bool = true;
> >    bool has_debug_uses = false;
> > @@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> >          of the specified bit after the atomic operation (makes only sense
> >          for xor, otherwise the bit content is compile time known),
> >          we need to invert the bit.  */
> > +      tree mask_convert = mask;
> > +      gimple *g_convert = nullptr;
> > +      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> > +       {
> > +         mask_convert = make_ssa_name (TREE_TYPE (lhs));
> > +         tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> > +         g_convert = gimple_build_assign (mask_convert, var);
> > +       }
> >        g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> >                                BIT_XOR_EXPR, new_lhs,
> >                                use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> > -                                       : mask);
> > +                                       : mask_convert);
> >        new_lhs = gimple_assign_lhs (g);
>
> You could use
>
>         gimple_seq stmts = NULL;
>         mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
>         new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
>                                                use_bool ?
> build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
>
> >        if (throws)
> >         {
> > -         gsi_insert_on_edge_immediate (e, g);
>
> gsi_insert_seq_on_edge_immediate (e, stmts);
>
> to simplify this.  The conversion will be only generated if necessary.
>
> > +         if (g_convert)
> > +           {
> > +             gsi_insert_on_edge_immediate (e, g_convert);
> > +             gsi = gsi_for_stmt (g_convert);
> > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +           }
> > +         else
> > +           gsi_insert_on_edge_immediate (e, g);
> >           gsi = gsi_for_stmt (g);
> >         }
> >        else
> > -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +       {
> > +         if (g_convert)
> > +           {
> > +             gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> > +             gsi = gsi_for_stmt (g_convert);
> > +           }
> > +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > +       }
> >      }
> >    if (use_bool && has_debug_uses)
> >      {
> > --
> > 2.31.1
> >



--
BR,
Hongtao
  
Richard Biener Oct. 26, 2021, 8:16 a.m. UTC | #3
On Thu, Oct 21, 2021 at 1:09 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
>  i is
>
> On Wed, Oct 13, 2021 at 8:34 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Sun, Oct 10, 2021 at 3:49 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Changes in v4:
> > >
> > > 1. Bypass redundant check when inputs have been transformed to the
> > > equivalent canonical form with valid bit operation.
> > >
> > > Changes in v3:
> > >
> > > 1.  Check invalid bit operation.
> > >
> > > commit adedd5c173388ae505470df152b9cb3947339566
> > > Author: Jakub Jelinek <jakub@redhat.com>
> > > Date:   Tue May 3 13:37:25 2016 +0200
> > >
> > >     re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
> > >
> > > optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
> > > with lock bts/btr/btc by turning
> > >
> > >   mask_2 = 1 << cnt_1;
> > >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > >   _5 = _4 & mask_2;
> > >
> > > into
> > >
> > >   _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
> > >   _5 = _4;
> > >
> > > and
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> > >   _3 = _2 & mask_6;
> > >   _4 = _3 != 0;
> > >
> > > into
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
> > >   _4 = _11 != 0;
> > >
> > > But it failed to optimize many equivalent, but slighly different cases:
> > >
> > > 1.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _4 = (_Bool) _1;
> > > 2.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _4 = (_Bool) _1;
> > > 3.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _7 = ~_1;
> > >   _5 = (_Bool) _7;
> > > 4.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _7 = ~_1;
> > >   _5 = (_Bool) _7;
> > > 5.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > >   _2 = (int) _1;
> > >   _7 = ~_2;
> > >   _5 = (_Bool) _7;
> > > 6.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > >   _2 = (int) _1;
> > >   _7 = ~_2;
> > >   _5 = (_Bool) _7;
> > > 7.
> > >   _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
> > >   _2 = (int) _1;
> > >   _5 = _2 & mask;
> > > 8.
> > >   _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > >   _5 = (signed int) _1;
> > >   _4 = _5 < 0;
> > > 9.
> > >   _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > >   _5 = (signed int) _1;
> > >   _4 = _5 < 0;
> > > 10.
> > >   _1 = 1 << bit_4(D);
> > >   mask_5 = (unsigned int) _1;
> > >   _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > >   _3 = _2 & mask_5;
> > > 11.
> > >   mask_7 = 1 << bit_6(D);
> > >   _1 = ~mask_7;
> > >   _2 = (unsigned int) _1;
> > >   _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
> > >   _4 = (int) _3;
> > >   _5 = _4 & mask_7;
> > >
> > > We make
> > >
> > >   mask_2 = 1 << cnt_1;
> > >   _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > >   _5 = _4 & mask_2;
> > >
> > > and
> > >
> > >   mask_6 = 1 << bit_5(D);
> > >   _1 = ~mask_6;
> > >   _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
> > >   _3 = _2 & mask_6;
> > >   _4 = _3 != 0;
> > >
> > > the canonical forms for this optimization and transform cases 1-9 to the
> > > equivalent canonical form.  For cases 10 and 11, we simply remove the cast
> > > before __atomic_fetch_or_4/__atomic_fetch_and_4 with
> > >
> > >   _1 = 1 << bit_4(D);
> > >   _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
> > >   _3 = _2 & _1;
> > >
> > > and
> > >
> > >   mask_7 = 1 << bit_6(D);
> > >   _1 = ~mask_7;
> > >   _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
> > >   _6 = _3 & mask_7;
> > >   _5 = (int) _6;
> > >
> > > gcc/
> > >
> > >         PR middle-end/102566
> > >         * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
> > >         (optimize_atomic_bit_test_and): Transform equivalent, but slighly
> > >         different cases to their canonical forms.
> > >
> > > gcc/testsuite/
> > >
> > >         PR middle-end/102566
> > >         * g++.target/i386/pr102566-1.C: New test.
> > >         * g++.target/i386/pr102566-2.C: Likewise.
> > >         * g++.target/i386/pr102566-3.C: Likewise.
> > >         * g++.target/i386/pr102566-4.C: Likewise.
> > >         * g++.target/i386/pr102566-5a.C: Likewise.
> > >         * g++.target/i386/pr102566-5b.C: Likewise.
> > >         * g++.target/i386/pr102566-6a.C: Likewise.
> > >         * g++.target/i386/pr102566-6b.C: Likewise.
> > >         * gcc.target/i386/pr102566-1a.c: Likewise.
> > >         * gcc.target/i386/pr102566-1b.c: Likewise.
> > >         * gcc.target/i386/pr102566-2.c: Likewise.
> > >         * gcc.target/i386/pr102566-3a.c: Likewise.
> > >         * gcc.target/i386/pr102566-3b.c: Likewise.
> > >         * gcc.target/i386/pr102566-4.c: Likewise.
> > >         * gcc.target/i386/pr102566-5.c: Likewise.
> > >         * gcc.target/i386/pr102566-6.c: Likewise.
> > >         * gcc.target/i386/pr102566-7.c: Likewise.
> > >         * gcc.target/i386/pr102566-8a.c: Likewise.
> > >         * gcc.target/i386/pr102566-8b.c: Likewise.
> > >         * gcc.target/i386/pr102566-9a.c: Likewise.
> > >         * gcc.target/i386/pr102566-9b.c: Likewise.
> > >         * gcc.target/i386/pr102566-10a.c: Likewise.
> > >         * gcc.target/i386/pr102566-10b.c: Likewise.
> > >         * gcc.target/i386/pr102566-11.c: Likewise.
> > >         * gcc.target/i386/pr102566-12.c: Likewise.
> > > ---
> > >  gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
> > >  gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++++++
> > >  gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 ++++
> > >  gcc/testsuite/gcc.target/i386/pr102566-2.c   |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-3a.c  |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-3b.c  |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-4.c   |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-5.c   |  15 +
> > >  gcc/testsuite/gcc.target/i386/pr102566-6.c   |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-7.c   |  30 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-8a.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-8b.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-9a.c  |  32 ++
> > >  gcc/testsuite/gcc.target/i386/pr102566-9b.c  |  32 ++
> > >  gcc/tree-ssa-ccp.c                           | 503 +++++++++++++++++--
> > >  26 files changed, 1375 insertions(+), 37 deletions(-)
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-2.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-3.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-4.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5a.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-5b.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6a.C
> > >  create mode 100644 gcc/testsuite/g++.target/i386/pr102566-6b.C
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-11.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-12.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-4.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-5.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-6.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-7.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > >
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > > new file mode 100644
> > > index 00000000000..94a66d717cc
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > > new file mode 100644
> > > index 00000000000..4f2aea961c2
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > > new file mode 100644
> > > index 00000000000..e88921dd155
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > > new file mode 100644
> > > index 00000000000..44d1362ac2e
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
> > > @@ -0,0 +1,29 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +typedef int __attribute__ ((mode (__word__))) int_type;
> > > +
> > > +#define BIT (1 << 0)
> > > +
> > > +bool
> > > +tbit0 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<int_type> &i)
> > > +{
> > > +  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > > new file mode 100644
> > > index 00000000000..f9595bee2ab
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > > new file mode 100644
> > > index 00000000000..d917b27a918
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 30)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > > new file mode 100644
> > > index 00000000000..01d495eda23
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target c++11 } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned int> &i)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > > new file mode 100644
> > > index 00000000000..adc11fcbf2d
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <atomic>
> > > +
> > > +bool
> > > +tbit0 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit30 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 30)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +tbit31 (std::atomic<unsigned long long> &i)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > > new file mode 100644
> > > index 00000000000..1c1f86a9659
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > > new file mode 100644
> > > index 00000000000..0bf39824ea6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic long long int *v, int bit)
> > > +{
> > > +  long long int mask = 1ll << bit;
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > > new file mode 100644
> > > index 00000000000..2c8f8c4e59a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +#define MASK 0x1234
> > > +
> > > +bool
> > > +foo1 (_Atomic int *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic unsigned int *v, int mask)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +bool
> > > +foo3 (_Atomic unsigned int *v, int mask)
> > > +{
> > > +  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > > new file mode 100644
> > > index 00000000000..4603a77612c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +#define MASK 0x1234
> > > +
> > > +bool
> > > +foo1 (_Atomic long *v)
> > > +{
> > > +  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic long *v, long mask)
> > > +{
> > > +  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +bool
> > > +foo3 (_Atomic long *v, long mask)
> > > +{
> > > +  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "btr" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > > new file mode 100644
> > > index 00000000000..a915de354e5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> > > @@ -0,0 +1,188 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +void bar (void);
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f1 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f2 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> > > +  int t2 = t1 & mask;
> > > +  return t2 != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f3 (long int *a, int bit)
> > > +{
> > > +  long int mask = 1l << bit;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f4 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f5 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f6 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +f7 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> > > +    bar ();
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +f8 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> > > +    bar ();
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f9 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f10 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f11 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f12 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f13 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f14 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f15 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f16 (int *a)
> > > +{
> > > +  int mask = 1 << 7;
> > > +  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f17 (int *a)
> > > +{
> > > +  int mask = 1 << 13;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f18 (int *a)
> > > +{
> > > +  int mask = 1 << 0;
> > > +  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f19 (long int *a, int bit)
> > > +{
> > > +  long int mask = 1l << bit;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f20 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) int
> > > +f21 (int *a, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return (__sync_fetch_and_or (a, mask) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f22 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) long int
> > > +f23 (long int *a)
> > > +{
> > > +  long int mask = 1l << 7;
> > > +  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) short int
> > > +f24 (short int *a)
> > > +{
> > > +  short int mask = 1 << 7;
> > > +  return (__sync_fetch_and_or (a, mask) & mask) != 0;
> > > +}
> > > +
> > > +__attribute__((noinline, noclone)) short int
> > > +f25 (short int *a)
> > > +{
> > > +  short int mask = 1 << 7;
> > > +  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > > new file mode 100644
> > > index 00000000000..c4dab8135c7
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> > > @@ -0,0 +1,107 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O2 -g" } */
> > > +
> > > +int cnt;
> > > +
> > > +__attribute__((noinline, noclone)) void
> > > +bar (void)
> > > +{
> > > +  cnt++;
> > > +}
> > > +
> > > +#include "pr102566-1a.c"
> > > +
> > > +int a;
> > > +long int b;
> > > +unsigned long int c;
> > > +unsigned short int d;
> > > +
> > > +int
> > > +main ()
> > > +{
> > > +  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> > > +  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> > > +      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> > > +    __builtin_abort ();
> > > +  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> > > +      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> > > +  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> > > +      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> > > +  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> > > +      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> > > +    __builtin_abort ();
> > > +  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > > +      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> > > +    __builtin_abort ();
> > > +  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> > > +      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (cnt != 0
> > > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > > +      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> > > +      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> > > +      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> > > +    __builtin_abort ();
> > > +  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > > +    __builtin_abort ();
> > > +  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > > +      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > > +      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> > > +  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> > > +      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> > > +    __builtin_abort ();
> > > +  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> > > +      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> > > +      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > > +      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> > > +      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> > > +  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> > > +      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> > > +  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > > +      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> > > +      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> > > +    __builtin_abort ();
> > > +  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> > > +      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> > > +    __builtin_abort ();
> > > +  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> > > +  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > > +      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> > > +      || cnt != 2)
> > > +    __builtin_abort ();
> > > +  return 0;
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > > new file mode 100644
> > > index 00000000000..00a7c349f2a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > > new file mode 100644
> > > index 00000000000..8bf1cd6e1bd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > > new file mode 100644
> > > index 00000000000..d155ed367a1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic long long int *v, int bit)
> > > +{
> > > +  long long int mask = 1ll << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > > new file mode 100644
> > > index 00000000000..2668ccf827c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  unsigned int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > > new file mode 100644
> > > index 00000000000..8bf1cd6e1bd
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
> > > @@ -0,0 +1,15 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo (_Atomic int *v, int bit)
> > > +{
> > > +  int mask = 1 << bit;
> > > +  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > > new file mode 100644
> > > index 00000000000..3dfe55ac683
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > > new file mode 100644
> > > index 00000000000..6bc0ae0f320
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
> > > @@ -0,0 +1,30 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +typedef int __attribute__ ((mode (__word__))) int_type;
> > > +
> > > +#define BIT (1 << 0)
> > > +
> > > +bool
> > > +foo0 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
> > > +}
> > > +
> > > +bool
> > > +foo1 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
> > > +}
> > > +
> > > +bool
> > > +foo2 (_Atomic int_type *v)
> > > +{
> > > +  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
> > > +/* { dg-final { scan-assembler-not "bts" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > > new file mode 100644
> > > index 00000000000..168e3db78c9
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > > new file mode 100644
> > > index 00000000000..392da3098e0
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 62)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > > new file mode 100644
> > > index 00000000000..3fa2a3ef043
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 0)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 30)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic int *v)
> > > +{
> > > +#define BIT (1 << 31)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > > new file mode 100644
> > > index 00000000000..38ddbdc630f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
> > > @@ -0,0 +1,32 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +#include <stdatomic.h>
> > > +#include <stdbool.h>
> > > +
> > > +bool
> > > +foo0 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 0)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo30 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 62)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +bool
> > > +foo31 (_Atomic long long *v)
> > > +{
> > > +#define BIT (1ll << 63)
> > > +  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
> > > +#undef BIT
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
> > > +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> > > diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> > > index 70ce6a4d5b8..bb70b87aa5e 100644
> > > --- a/gcc/tree-ssa-ccp.c
> > > +++ b/gcc/tree-ssa-ccp.c
> > > @@ -3243,6 +3243,81 @@ optimize_unreachable (gimple_stmt_iterator i)
> > >    return ret;
> > >  }
> > >
> > > +/* Convert
> > > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +   _7 = ~_1;
> > > +   _5 = (_Bool) _7;
> > > +   to
> > > +   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +   _8 = _1 & 1;
> > > +   _5 = _8 == 0;
> > > +   and convert
> > > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +   _7 = ~_1;
> > > +   _4 = (_Bool) _7;
> > > +   to
> > > +   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +   _8 = _1 & 1;
> > > +   _4 = (_Bool) _8;
> > > +
> > > +   USE_STMT is the gimplt statement which uses the return value of
> > > +   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
> > > +   MASK is the mask passed to __atomic_fetch_or_*.
> > > + */
> > > +
> > > +static gimple *
> > > +convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
> > > +                       tree lhs, tree mask)
> > > +{
> > > +  tree and_mask;
> > > +  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +    {
> > > +      /* MASK must be ~1.  */
> > > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > > +                                          ~HOST_WIDE_INT_1), mask, 0))
> > > +       return nullptr;
> > > +      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +    }
> > > +  else
> > > +    {
> > > +      /* MASK must be 1.  */
> > > +      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
> > > +       return nullptr;
> > > +      and_mask = mask;
> > > +    }
> > > +
> > > +  tree use_lhs = gimple_assign_lhs (use_stmt);
> > > +
> > > +  use_operand_p use_p;
> > > +  gimple *use_not_stmt;
> > > +
> > > +  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
> > > +      || !is_gimple_assign (use_not_stmt))
> > > +    return nullptr;
> > > +
> > > +  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
> > > +    return nullptr;
> > > +
> > > +  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
> > > +  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
> > > +    return nullptr;
> > > +
> > > +  gimple_stmt_iterator gsi;
> > > +  gsi = gsi_for_stmt (use_stmt);
> > > +  gsi_remove (&gsi, true);
> > > +  tree var = make_ssa_name (TREE_TYPE (lhs));
> > > +  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
> > > +  gsi = gsi_for_stmt (use_not_stmt);
> > > +  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
> > > +  lhs = gimple_assign_lhs (use_not_stmt);
> > > +  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
> > > +                                  build_zero_cst (TREE_TYPE (mask)));
> > > +  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +  gsi = gsi_for_stmt (use_not_stmt);
> > > +  gsi_remove (&gsi, true);
> > > +  return use_stmt;
> > > +}
> > > +
> > >  /* Optimize
> > >       mask_2 = 1 << cnt_1;
> > >       _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
> > > @@ -3269,7 +3344,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >    tree lhs = gimple_call_lhs (call);
> > >    use_operand_p use_p;
> > >    gimple *use_stmt;
> > > -  tree mask, bit;
> > > +  tree mask;
> > >    optab optab;
> > >
> > >    if (!flag_inline_atomics
> > > @@ -3279,10 +3354,317 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >        || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> > >        || !single_imm_use (lhs, &use_p, &use_stmt)
> > >        || !is_gimple_assign (use_stmt)
> > > -      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
> > >        || !gimple_vdef (call))
> > >      return;
> > >
> > > +  tree bit = nullptr;
> > > +
> > > +  mask = gimple_call_arg (call, 1);
> > > +  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> > > +  if (rhs_code != BIT_AND_EXPR)
> > > +    {
> > > +      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
> > > +       return;
> > > +
> > > +      tree use_lhs = gimple_assign_lhs (use_stmt);
> > > +      if (TREE_CODE (use_lhs) == SSA_NAME
> > > +         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
> > > +       return;
> > > +
> > > +      tree use_rhs = gimple_assign_rhs1 (use_stmt);
> > > +      if (lhs != use_rhs)
> > > +       return;
> > > +
> > > +      gimple *g;
> > > +      gimple_stmt_iterator gsi;
> > > +      tree var;
> > > +      int ibit = -1;
> > > +
> > > +      if (rhs_code == BIT_NOT_EXPR)
> > > +       {
> > > +         g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
> > > +         if (!g)
> > > +           return;
> > > +         use_stmt = g;
> > > +         ibit = 0;
> > > +       }
> > > +      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
> > > +       {
> > > +         tree and_mask;
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           {
> > > +             /* MASK must be ~1.  */
> > > +             if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
> > > +                                                  ~HOST_WIDE_INT_1),
> > > +                                   mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +                _4 = (_Bool) _1;
> > > +                to
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
> > > +                _5 = _1 & 1;
> > > +                _4 = (_Bool) _5;
> > > +              */
> > > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +           }
> > > +         else
> > > +           {
> > > +             and_mask = build_int_cst (TREE_TYPE (lhs), 1);
> > > +             if (!operand_equal_p (and_mask, mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +                _4 = (_Bool) _1;
> > > +                to
> > > +                _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
> > > +                _5 = _1 & 1;
> > > +                _4 = (_Bool) _5;
> > > +              */
> > > +           }
> > > +         var = make_ssa_name (TREE_TYPE (use_rhs));
> > > +         replace_uses_by (use_rhs, var);
> > > +         g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > > +                                  and_mask);
> > > +         gsi = gsi_for_stmt (use_stmt);
> > > +         gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > > +         use_stmt = g;
> > > +         ibit = 0;
> > > +       }
> > > +      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
> > > +              == TYPE_PRECISION (TREE_TYPE (use_rhs)))
> > > +       {
> > > +         gimple *use_nop_stmt;
> > > +         if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
> > > +             || !is_gimple_assign (use_nop_stmt))
> > > +           return;
> > > +         rhs_code = gimple_assign_rhs_code (use_nop_stmt);
> > > +         if (rhs_code != BIT_AND_EXPR)
> > > +           {
> > > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > > +             if (TREE_CODE (use_nop_lhs) == SSA_NAME
> > > +                 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
> > > +               return;
> > > +             if (rhs_code == BIT_NOT_EXPR)
> > > +               {
> > > +                 g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
> > > +                                             mask);
> > > +                 if (!g)
> > > +                   return;
> > > +                 /* Convert
> > > +                    _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
> > > +                    _2 = (int) _1;
> > > +                    _7 = ~_2;
> > > +                    _5 = (_Bool) _7;
> > > +                    to
> > > +                    _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
> > > +                    _8 = _1 & 1;
> > > +                    _5 = _8 == 0;
> > > +                    and convert
> > > +                    _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
> > > +                    _2 = (int) _1;
> > > +                    _7 = ~_2;
> > > +                    _5 = (_Bool) _7;
> > > +                    to
> > > +                    _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
> > > +                    _8 = _1 & 1;
> > > +                    _5 = _8 == 0;
> > > +                  */
> > > +                 gsi = gsi_for_stmt (use_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +                 use_stmt = g;
> > > +                 ibit = 0;
> > > +               }
> > > +             else
> > > +               {
> > > +                 if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
> > > +                   return;
> > > +                 if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
> > > +                   return;
> > > +                 tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > > +                 if (use_lhs != cmp_rhs1)
> > > +                   return;
> > > +                 tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > > +                 if (!integer_zerop (cmp_rhs2))
> > > +                   return;
> > > +
> > > +                 tree and_mask;
> > > +
> > > +                 unsigned HOST_WIDE_INT bytes
> > > +                   = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
> > > +                 ibit = bytes * BITS_PER_UNIT - 1;
> > > +                 unsigned HOST_WIDE_INT highest
> > > +                   = HOST_WIDE_INT_1U << ibit;
> > > +
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   {
> > > +                     /* Get the signed maximum of the USE_RHS type.  */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest - 1);
> > > +                     if (!operand_equal_p (and_mask, mask, 0))
> > > +                       return;
> > > +
> > > +                     /* Convert
> > > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > > +                        _5 = (signed int) _1;
> > > +                        _4 = _5 < 0 or _5 >= 0;
> > > +                        to
> > > +                        _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
> > > +                        _6 = _1 & 0x80000000;
> > > +                        _4 = _6 != 0 or _6 == 0;
> > > +                      */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest);
> > > +                   }
> > > +                 else
> > > +                   {
> > > +                     /* Get the signed minimum of the USE_RHS type.  */
> > > +                     and_mask = build_int_cst (TREE_TYPE (use_rhs),
> > > +                                               highest);
> > > +                     if (!operand_equal_p (and_mask, mask, 0))
> > > +                       return;
> > > +
> > > +                     /* Convert
> > > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > > +                        _5 = (signed int) _1;
> > > +                        _4 = _5 < 0 or _5 >= 0;
> > > +                        to
> > > +                        _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
> > > +                        _6 = _1 & 0x80000000;
> > > +                        _4 = _6 != 0 or _6 == 0;
> > > +                      */
> > > +                   }
> > > +                 var = make_ssa_name (TREE_TYPE (use_rhs));
> > > +                 gsi = gsi_for_stmt (use_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +                 g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
> > > +                                          and_mask);
> > > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > > +                 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> > > +                 use_stmt = g;
> > > +                 g = gimple_build_assign (use_nop_lhs,
> > > +                                          (rhs_code == GE_EXPR
> > > +                                           ? EQ_EXPR : NE_EXPR),
> > > +                                          var,
> > > +                                          build_zero_cst (TREE_TYPE (use_rhs)));
> > > +                 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +                 gsi = gsi_for_stmt (use_nop_stmt);
> > > +                 gsi_remove (&gsi, true);
> > > +               }
> > > +           }
> > > +         else
> > > +           {
> > > +             tree op_mask = mask;
> > > +             tree check_mask = op_mask;
> > > +             if (TREE_CODE (op_mask) == SSA_NAME)
> > > +               {
> > > +                 g = SSA_NAME_DEF_STMT (op_mask);
> > > +                 if (!is_gimple_assign (g))
> > > +                   return;
> > > +                 if (gimple_assign_rhs_code (g) == NOP_EXPR)
> > > +                   {
> > > +                     tree mask_nop_lhs = gimple_assign_lhs (g);
> > > +
> > > +                     if (TREE_CODE (mask_nop_lhs) == SSA_NAME
> > > +                         && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
> > > +                       return;
> > > +
> > > +                     tree mask_nop_rhs = gimple_assign_rhs1 (g);
> > > +                     if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> > > +                         != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> > > +                       return;
> > > +                     op_mask = mask_nop_rhs;
> > > +                     check_mask = op_mask;
> > > +                     g = SSA_NAME_DEF_STMT (op_mask);
> > > +                     if (!is_gimple_assign (g))
> > > +                       return;
> > > +                   }
> > > +
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   {
> > > +                     if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +                       return;
> > > +                     check_mask = gimple_assign_rhs1 (g);
> > > +                     if (TREE_CODE (check_mask) != SSA_NAME)
> > > +                       return;
> > > +                     g = SSA_NAME_DEF_STMT (check_mask);
> > > +                     if (!is_gimple_assign (g))
> > > +                       return;
> > > +                   }
> > > +
> > > +                 if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > > +                     || !integer_onep (gimple_assign_rhs1 (g)))
> > > +                   return;
> > > +
> > > +                 bit = gimple_assign_rhs2 (g);
> > > +               }
> > > +
> > > +             if (TREE_CODE (check_mask) == INTEGER_CST)
> > > +               {
> > > +                 if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +                   check_mask = const_unop (BIT_NOT_EXPR,
> > > +                                            TREE_TYPE (check_mask),
> > > +                                            check_mask);
> > > +                 check_mask = fold_convert (TREE_TYPE (lhs),
> > > +                                            check_mask);
> > > +                 /* Check if CHECK_MASK is a power of two.  */
> > > +                 ibit = tree_log2 (check_mask);
> > > +                 if (ibit < 0)
> > > +                   return;
> > > +               }
> > > +
> > > +             tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
> > > +             tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
> > > +             if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
> > > +                 && !operand_equal_p (use_nop_rhs2, check_mask, 0))
> > > +               return;
> > > +
> > > +             /* Convert
> > > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > > +                _2 = (int) _1;
> > > +                _5 = _2 & mask;
> >
> > (***)
> >
> > > +                to
> > > +                _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
> > > +                _6 = _1 & mask;
> > > +                _5 = (int) _6;
> > > +                and convert
> > > +                _1 = ~mask_7;
> > > +                _2 = (unsigned int) _1;
> > > +                _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
> > > +                _4 = (int) _3;
> > > +                _5 = _4 & mask_7;
> > > +                to
> > > +                _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
> > > +                _12 = _3 & mask_7;
> > > +                _5 = (int) _12;
> > > +              */
> >
> > I wonder if it's better to maintain to have the matching part of match.pd
> I'm trying to rewrite match part in match.pd and find the
> canonicalization is ok when mask is constant, but not for variable
> since it will be simplified back by

Note I didn't suggest to use (simplify (....)) but instead use

(match (...))

you can look at the ctz_table_index example and how it is used from
tree-ssa-forwprop.c as gimple_ctz_table_index ().  With such way you
can replace the boiler-plates for matching expressions.  You can
match multiple related forms (when the "leafs" have the same structure)
by multiple (match instances with the same name, see for example
'nop_convert'.

>  /* In GIMPLE, getting rid of 2 conversions for one new results
>     in smaller IL.  */
>  (simplify
>   (convert (bitop:cs@2 (nop_convert:s @0) @1))
>   (if (GIMPLE
>        && TREE_CODE (@1) != INTEGER_CST
>        && tree_nop_conversion_p (type, TREE_TYPE (@2))
>        && types_match (type, @0))
>    (bitop @0 (convert @1)))))
>
> The canonicalization for variabled is like
>
> convert
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>  _4 = (int) _3;
>  _5 = _4 & mask_7;
>
> to
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>   _4 = (unsigned int) mask_7
>   _6 = _3 & _4
>   _5 = (int) _6
>
> and be simplified back.
>
> I've also tried another way of simplication like
>
> convert
>   _1 = ~mask_7;
>   _2 = (unsigned int) _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>  _4 = (int) _3;
>  _5 = _4 & mask_7;
>
> to
>   _1 = (unsigned int)mask_7;
>   _2 = ~ _1;
>   _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
>    _6 = _3 & _1
>   _5 = (int)
>
> but it's prevent by below since __atomic_fetch_and_4 is not CONST, but
> we need to regenerate it with updated parameter.
>
>   /* We can't and should not emit calls to non-const functions.  */
>   if (!(flags_from_decl_or_type (decl) & ECF_CONST))
>     return NULL;
>
> >
> > there you could have
> >
> > (match (atomic_fetch_mask @1 @2 @3 @mask)
> >  (bit_and (convert (IFN_ATOMIC_BIT_TEST_AND_RESET @2 @mask @3)) @mask))
> >
> > and here in this code do
> >
> > extern bool gimple_atomic_fetch_mask (tree t, tree *res_ops, tree (*)(tree));
> >
> > and call it on the _5 from (***) where the function will return true if it
> > matched and it will set res_ops[] with the positional operands @1 @2
> > @3 and @mask.
> >
> > You can add variants and conditions to the same match entry, see match.pd
> > for examples and also match-and-simplify.texi
> >
> > > +             replace_uses_by (use_lhs, lhs);
> > > +             tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> > > +             var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> > > +             gimple_assign_set_lhs (use_nop_stmt, var);
> > > +             gsi = gsi_for_stmt (use_stmt);
> > > +             gsi_remove (&gsi, true);
> > > +             release_defs (use_stmt);
> > > +             gsi_remove (gsip, true);
> > > +             var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
> >
> > instead of building a GENERIC NOP you could use the
> >
> > gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
> >
> > overload.
> >
> > > +             gsi = gsi_for_stmt (use_nop_stmt);
> > > +             g = gimple_build_assign (use_nop_lhs, var);
> > > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +             use_stmt = use_nop_stmt;
> > > +             mask = op_mask;
> > > +           }
> > > +       }
> > > +
> > > +      if (!bit)
> > > +       {
> > > +         if (ibit < 0)
> > > +           gcc_unreachable ();
> > > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > +       }
> > > +    }
> > > +
> > >    switch (fn)
> > >      {
> > >      case IFN_ATOMIC_BIT_TEST_AND_SET:
> > > @@ -3301,51 +3683,76 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >    if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
> > >      return;
> > >
> > > -  mask = gimple_call_arg (call, 1);
> > >    tree use_lhs = gimple_assign_lhs (use_stmt);
> > >    if (!use_lhs)
> > >      return;
> > >
> > > -  if (TREE_CODE (mask) == INTEGER_CST)
> > > +  if (!bit)
> > >      {
> > > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > -       mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > > -      mask = fold_convert (TREE_TYPE (lhs), mask);
> > > -      int ibit = tree_log2 (mask);
> > > -      if (ibit < 0)
> > > -       return;
> > > -      bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > -    }
> > > -  else if (TREE_CODE (mask) == SSA_NAME)
> > > -    {
> > > -      gimple *g = SSA_NAME_DEF_STMT (mask);
> > > -      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +      if (TREE_CODE (mask) == INTEGER_CST)
> > >         {
> > > -         if (!is_gimple_assign (g)
> > > -             || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
> > > +         mask = fold_convert (TREE_TYPE (lhs), mask);
> > > +         int ibit = tree_log2 (mask);
> > > +         if (ibit < 0)
> > > +           return;
> > > +         bit = build_int_cst (TREE_TYPE (lhs), ibit);
> > > +       }
> > > +      else if (TREE_CODE (mask) == SSA_NAME)
> > > +       {
> > > +         gimple *g = SSA_NAME_DEF_STMT (mask);
> > > +         if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> > > +           {
> > > +             if (!is_gimple_assign (g)
> > > +                 || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> > > +               return;
> > > +             mask = gimple_assign_rhs1 (g);
> > > +             if (TREE_CODE (mask) != SSA_NAME)
> > > +               return;
> > > +             g = SSA_NAME_DEF_STMT (mask);
> > > +           }
> > > +         if (!is_gimple_assign (g))
> > >             return;
> > > -         mask = gimple_assign_rhs1 (g);
> > > -         if (TREE_CODE (mask) != SSA_NAME)
> > > +         rhs_code = gimple_assign_rhs_code (g);
> > > +         if (rhs_code != LSHIFT_EXPR)
> > > +           {
> > > +             if (rhs_code != NOP_EXPR)
> > > +               return;
> > > +
> > > +             /* Handle
> > > +                _1 = 1 << bit_4(D);
> > > +                mask_5 = (unsigned int) _1;
> > > +                _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
> > > +                _3 = _2 & mask_5;
> > > +                */
> > > +             tree nop_lhs = gimple_assign_lhs (g);
> > > +             tree nop_rhs = gimple_assign_rhs1 (g);
> > > +             if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> > > +                 != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> > > +               return;
> > > +             g = SSA_NAME_DEF_STMT (nop_rhs);
> > > +             if (!is_gimple_assign (g)
> > > +                 || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
> > > +               return;
> > > +           }
> > > +         if (!integer_onep (gimple_assign_rhs1 (g)))
> > >             return;
> > > -         g = SSA_NAME_DEF_STMT (mask);
> > > +         bit = gimple_assign_rhs2 (g);
> > >         }
> > > -      if (!is_gimple_assign (g)
> > > -         || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> > > -         || !integer_onep (gimple_assign_rhs1 (g)))
> > > +      else
> > >         return;
> > > -      bit = gimple_assign_rhs2 (g);
> > > -    }
> > > -  else
> > > -    return;
> > >
> > > -  if (gimple_assign_rhs1 (use_stmt) == lhs)
> > > -    {
> > > -      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > > +      if (gimple_assign_rhs1 (use_stmt) == lhs)
> > > +       {
> > > +         if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
> > > +           return;
> > > +       }
> > > +      else if (gimple_assign_rhs2 (use_stmt) != lhs
> > > +              || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
> > > +                                   mask, 0))
> > >         return;
> > >      }
> > > -  else if (gimple_assign_rhs2 (use_stmt) != lhs
> > > -          || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
> > > -    return;
> > >
> > >    bool use_bool = true;
> > >    bool has_debug_uses = false;
> > > @@ -3434,18 +3841,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> > >          of the specified bit after the atomic operation (makes only sense
> > >          for xor, otherwise the bit content is compile time known),
> > >          we need to invert the bit.  */
> > > +      tree mask_convert = mask;
> > > +      gimple *g_convert = nullptr;
> > > +      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> > > +       {
> > > +         mask_convert = make_ssa_name (TREE_TYPE (lhs));
> > > +         tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> > > +         g_convert = gimple_build_assign (mask_convert, var);
> > > +       }
> > >        g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> > >                                BIT_XOR_EXPR, new_lhs,
> > >                                use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> > > -                                       : mask);
> > > +                                       : mask_convert);
> > >        new_lhs = gimple_assign_lhs (g);
> >
> > You could use
> >
> >         gimple_seq stmts = NULL;
> >         mask_convert = gimple_convert (&stmts, TREE_TYPE (lhs), mask);
> >         new_lhs = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (lhs), new_lhs,
> >                                                use_bool ?
> > build_int_cst (TREE_TYPE (lhs), 1) : mask_convert);
> >
> > >        if (throws)
> > >         {
> > > -         gsi_insert_on_edge_immediate (e, g);
> >
> > gsi_insert_seq_on_edge_immediate (e, stmts);
> >
> > to simplify this.  The conversion will be only generated if necessary.
> >
> > > +         if (g_convert)
> > > +           {
> > > +             gsi_insert_on_edge_immediate (e, g_convert);
> > > +             gsi = gsi_for_stmt (g_convert);
> > > +             gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +           }
> > > +         else
> > > +           gsi_insert_on_edge_immediate (e, g);
> > >           gsi = gsi_for_stmt (g);
> > >         }
> > >        else
> > > -       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +       {
> > > +         if (g_convert)
> > > +           {
> > > +             gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> > > +             gsi = gsi_for_stmt (g_convert);
> > > +           }
> > > +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> > > +       }
> > >      }
> > >    if (use_bool && has_debug_uses)
> > >      {
> > > --
> > > 2.31.1
> > >
>
>
>
> --
> BR,
> Hongtao
  

Patch

diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
new file mode 100644
index 00000000000..94a66d717cc
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-2.C b/gcc/testsuite/g++.target/i386/pr102566-2.C
new file mode 100644
index 00000000000..4f2aea961c2
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-2.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_or(BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-3.C b/gcc/testsuite/g++.target/i386/pr102566-3.C
new file mode 100644
index 00000000000..e88921dd155
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-3.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_or(BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-4.C b/gcc/testsuite/g++.target/i386/pr102566-4.C
new file mode 100644
index 00000000000..44d1362ac2e
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-4.C
@@ -0,0 +1,29 @@ 
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+tbit0 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~1;
+}
+
+bool
+tbit30 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~2;
+}
+
+bool
+tbit31 (std::atomic<int_type> &i)
+{
+  return i.fetch_or(BIT, std::memory_order_relaxed) & ~4;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5a.C b/gcc/testsuite/g++.target/i386/pr102566-5a.C
new file mode 100644
index 00000000000..f9595bee2ab
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5a.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-5b.C b/gcc/testsuite/g++.target/i386/pr102566-5b.C
new file mode 100644
index 00000000000..d917b27a918
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-5b.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return i.fetch_and(~BIT, std::memory_order_relaxed) & BIT;
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6a.C b/gcc/testsuite/g++.target/i386/pr102566-6a.C
new file mode 100644
index 00000000000..01d495eda23
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6a.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned int> &i)
+{
+#define BIT (1 << 31)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr102566-6b.C b/gcc/testsuite/g++.target/i386/pr102566-6b.C
new file mode 100644
index 00000000000..adc11fcbf2d
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr102566-6b.C
@@ -0,0 +1,31 @@ 
+/* { dg-do compile { target { c++11 && { ! ia32 } } } } */
+/* { dg-options "-O2" } */
+
+#include <atomic>
+
+bool
+tbit0 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 0)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit30 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 30)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+bool
+tbit31 (std::atomic<unsigned long long> &i)
+{
+#define BIT (1ll << 63)
+  return !(i.fetch_and(~BIT, std::memory_order_relaxed) & BIT);
+#undef BIT 
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10a.c b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
new file mode 100644
index 00000000000..1c1f86a9659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10a.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-10b.c b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
new file mode 100644
index 00000000000..0bf39824ea6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-10b.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-11.c b/gcc/testsuite/gcc.target/i386/pr102566-11.c
new file mode 100644
index 00000000000..2c8f8c4e59a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-11.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic int *v)
+{
+  return atomic_fetch_or_explicit (v, MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic unsigned int *v, int mask)
+{
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic unsigned int *v, int mask)
+{
+  return !(atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-12.c b/gcc/testsuite/gcc.target/i386/pr102566-12.c
new file mode 100644
index 00000000000..4603a77612c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-12.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#define MASK 0x1234
+
+bool
+foo1 (_Atomic long *v)
+{
+  return atomic_fetch_and_explicit (v, ~MASK, memory_order_relaxed) & MASK;
+}
+
+bool
+foo2 (_Atomic long *v, long mask)
+{
+  return atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask;
+}
+
+bool
+foo3 (_Atomic long *v, long mask)
+{
+  return !(atomic_fetch_and_explicit (v, ~mask, memory_order_relaxed) & mask);
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "btr" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
new file mode 100644
index 00000000000..a915de354e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
@@ -0,0 +1,188 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (void);
+
+__attribute__((noinline, noclone)) int
+f1 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f2 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
+  int t2 = t1 & mask;
+  return t2 != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f3 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f6 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
+    bar ();
+}
+
+__attribute__((noinline, noclone)) int
+f9 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f10 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_xor (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f11 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f12 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f13 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f14 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f15 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f16 (int *a)
+{
+  int mask = 1 << 7;
+  return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f17 (int *a)
+{
+  int mask = 1 << 13;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) int
+f18 (int *a)
+{
+  int mask = 1 << 0;
+  return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f19 (long int *a, int bit)
+{
+  long int mask = 1l << bit;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) long int
+f20 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
+}
+
+__attribute__((noinline, noclone)) int
+f21 (int *a, int bit)
+{
+  int mask = 1 << bit;
+  return (__sync_fetch_and_or (a, mask) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f22 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) long int
+f23 (long int *a)
+{
+  long int mask = 1l << 7;
+  return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
+}
+
+__attribute__((noinline, noclone)) short int
+f24 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__sync_fetch_and_or (a, mask) & mask) != 0;
+}
+
+__attribute__((noinline, noclone)) short int
+f25 (short int *a)
+{
+  short int mask = 1 << 7;
+  return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
new file mode 100644
index 00000000000..c4dab8135c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
@@ -0,0 +1,107 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -g" } */
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+bar (void)
+{
+  cnt++;
+}
+
+#include "pr102566-1a.c"
+
+int a;
+long int b;
+unsigned long int c;
+unsigned short int d;
+
+int
+main ()
+{
+  __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
+  if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
+      || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
+    __builtin_abort ();
+  if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
+      || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
+    __builtin_abort ();
+  __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
+  if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
+      || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
+    __builtin_abort ();
+  __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
+  if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
+      || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
+    __builtin_abort ();
+  if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
+      || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (cnt != 0
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
+      || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
+      || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
+    __builtin_abort ();
+  if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
+  if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
+      || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
+    __builtin_abort ();
+  if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
+      || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
+      || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
+      || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
+    __builtin_abort ();
+  __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
+  if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
+      || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
+    __builtin_abort ();
+  __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
+  if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
+      || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
+    __builtin_abort ();
+  if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
+      || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
+    __builtin_abort ();
+  __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
+  if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
+      || cnt != 2)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
new file mode 100644
index 00000000000..00a7c349f2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3a.c b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3a.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-3b.c b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
new file mode 100644
index 00000000000..d155ed367a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-3b.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic long long int *v, int bit)
+{
+  long long int mask = 1ll << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsq" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-4.c b/gcc/testsuite/gcc.target/i386/pr102566-4.c
new file mode 100644
index 00000000000..2668ccf827c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-4.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  unsigned int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-5.c b/gcc/testsuite/gcc.target/i386/pr102566-5.c
new file mode 100644
index 00000000000..8bf1cd6e1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-5.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo (_Atomic int *v, int bit)
+{
+  int mask = 1 << bit;
+  return atomic_fetch_or_explicit (v, mask, memory_order_relaxed) & mask;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-6.c b/gcc/testsuite/gcc.target/i386/pr102566-6.c
new file mode 100644
index 00000000000..3dfe55ac683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-6.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-7.c b/gcc/testsuite/gcc.target/i386/pr102566-7.c
new file mode 100644
index 00000000000..6bc0ae0f320
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-7.c
@@ -0,0 +1,30 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+typedef int __attribute__ ((mode (__word__))) int_type;
+
+#define BIT (1 << 0)
+
+bool
+foo0 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~1;
+}
+
+bool
+foo1 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~2;
+}
+
+bool
+foo2 (_Atomic int_type *v)
+{
+  return atomic_fetch_or_explicit (v, BIT, memory_order_relaxed) & ~3;
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*cmpxchg" 3 } } */
+/* { dg-final { scan-assembler-not "bts" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8a.c b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
new file mode 100644
index 00000000000..168e3db78c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8a.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-8b.c b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
new file mode 100644
index 00000000000..392da3098e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-8b.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT;
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9a.c b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
new file mode 100644
index 00000000000..3fa2a3ef043
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9a.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic int *v)
+{
+#define BIT (1 << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic int *v)
+{
+#define BIT (1 << 30)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic int *v)
+{
+#define BIT (1 << 31)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrl" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102566-9b.c b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
new file mode 100644
index 00000000000..38ddbdc630f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102566-9b.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+bool
+foo0 (_Atomic long long *v)
+{
+#define BIT (1ll << 0)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo30 (_Atomic long long *v)
+{
+#define BIT (1ll << 62)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+bool
+foo31 (_Atomic long long *v)
+{
+#define BIT (1ll << 63)
+  return !(atomic_fetch_and_explicit (v, ~BIT, memory_order_relaxed) & BIT);
+#undef BIT
+}
+
+/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btrq" 3 } } */
+/* { dg-final { scan-assembler-not "cmpxchg" } } */
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 70ce6a4d5b8..bb70b87aa5e 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3243,6 +3243,81 @@  optimize_unreachable (gimple_stmt_iterator i)
   return ret;
 }
 
+/* Convert
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _7 = ~_1;
+   _5 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+   _8 = _1 & 1;
+   _5 = _8 == 0;
+   and convert
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _7 = ~_1;
+   _4 = (_Bool) _7;
+   to
+   _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+   _8 = _1 & 1;
+   _4 = (_Bool) _8;
+
+   USE_STMT is the gimplt statement which uses the return value of
+   __atomic_fetch_or_*.  LHS is the return value of __atomic_fetch_or_*.
+   MASK is the mask passed to __atomic_fetch_or_*.
+ */
+
+static gimple *
+convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt,
+			tree lhs, tree mask)
+{
+  tree and_mask;
+  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+    {
+      /* MASK must be ~1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+					   ~HOST_WIDE_INT_1), mask, 0))
+	return nullptr;
+      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+    }
+  else
+    {
+      /* MASK must be 1.  */
+      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs), 1), mask, 0))
+	return nullptr;
+      and_mask = mask;
+    }
+
+  tree use_lhs = gimple_assign_lhs (use_stmt);
+
+  use_operand_p use_p;
+  gimple *use_not_stmt;
+
+  if (!single_imm_use (use_lhs, &use_p, &use_not_stmt)
+      || !is_gimple_assign (use_not_stmt))
+    return nullptr;
+
+  if (gimple_assign_rhs_code (use_not_stmt) != NOP_EXPR)
+    return nullptr;
+
+  tree use_not_lhs = gimple_assign_lhs (use_not_stmt);
+  if (TREE_CODE (TREE_TYPE (use_not_lhs)) != BOOLEAN_TYPE)
+    return nullptr;
+
+  gimple_stmt_iterator gsi;
+  gsi = gsi_for_stmt (use_stmt);
+  gsi_remove (&gsi, true);
+  tree var = make_ssa_name (TREE_TYPE (lhs));
+  use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
+  lhs = gimple_assign_lhs (use_not_stmt);
+  gimple *g = gimple_build_assign (lhs, EQ_EXPR, var,
+				   build_zero_cst (TREE_TYPE (mask)));
+  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+  gsi = gsi_for_stmt (use_not_stmt);
+  gsi_remove (&gsi, true);
+  return use_stmt;
+}
+
 /* Optimize
      mask_2 = 1 << cnt_1;
      _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
@@ -3269,7 +3344,7 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   tree lhs = gimple_call_lhs (call);
   use_operand_p use_p;
   gimple *use_stmt;
-  tree mask, bit;
+  tree mask;
   optab optab;
 
   if (!flag_inline_atomics
@@ -3279,10 +3354,317 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
       || !single_imm_use (lhs, &use_p, &use_stmt)
       || !is_gimple_assign (use_stmt)
-      || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
       || !gimple_vdef (call))
     return;
 
+  tree bit = nullptr;
+
+  mask = gimple_call_arg (call, 1);
+  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+  if (rhs_code != BIT_AND_EXPR)
+    {
+      if (rhs_code != NOP_EXPR && rhs_code != BIT_NOT_EXPR)
+	return;
+
+      tree use_lhs = gimple_assign_lhs (use_stmt);
+      if (TREE_CODE (use_lhs) == SSA_NAME
+	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
+	return;
+
+      tree use_rhs = gimple_assign_rhs1 (use_stmt);
+      if (lhs != use_rhs)
+	return;
+
+      gimple *g;
+      gimple_stmt_iterator gsi;
+      tree var;
+      int ibit = -1;
+
+      if (rhs_code == BIT_NOT_EXPR)
+	{
+	  g = convert_atomic_bit_not (fn, use_stmt, lhs, mask);
+	  if (!g)
+	    return;
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TREE_CODE (TREE_TYPE (use_lhs)) == BOOLEAN_TYPE)
+	{
+	  tree and_mask;
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      /* MASK must be ~1.  */
+	      if (!operand_equal_p (build_int_cst (TREE_TYPE (lhs),
+						   ~HOST_WIDE_INT_1),
+				    mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	    }
+	  else
+	    {
+	      and_mask = build_int_cst (TREE_TYPE (lhs), 1);
+	      if (!operand_equal_p (and_mask, mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _4 = (_Bool) _1;
+		 to
+		 _1 = __atomic_fetch_or_* (ptr_6, 1, _3);
+		 _5 = _1 & 1;
+		 _4 = (_Bool) _5;
+	       */
+	    }
+	  var = make_ssa_name (TREE_TYPE (use_rhs));
+	  replace_uses_by (use_rhs, var);
+	  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+				   and_mask);
+	  gsi = gsi_for_stmt (use_stmt);
+	  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+	  use_stmt = g;
+	  ibit = 0;
+	}
+      else if (TYPE_PRECISION (TREE_TYPE (use_lhs))
+	       == TYPE_PRECISION (TREE_TYPE (use_rhs)))
+	{
+	  gimple *use_nop_stmt;
+	  if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt)
+	      || !is_gimple_assign (use_nop_stmt))
+	    return;
+	  rhs_code = gimple_assign_rhs_code (use_nop_stmt);
+	  if (rhs_code != BIT_AND_EXPR)
+	    {
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      if (TREE_CODE (use_nop_lhs) == SSA_NAME
+		  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs))
+		return;
+	      if (rhs_code == BIT_NOT_EXPR)
+		{
+		  g = convert_atomic_bit_not (fn, use_nop_stmt, lhs,
+					      mask);
+		  if (!g)
+		    return;
+		  /* Convert
+		     _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_or_4 (ptr_6, ~1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		     and convert
+		     _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
+		     _2 = (int) _1;
+		     _7 = ~_2;
+		     _5 = (_Bool) _7;
+		     to
+		     _1 = __atomic_fetch_and_4 (ptr_6, 1, _3);
+		     _8 = _1 & 1;
+		     _5 = _8 == 0;
+		   */
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  use_stmt = g;
+		  ibit = 0;
+		}
+	      else
+		{
+		  if (TREE_CODE (TREE_TYPE (use_nop_lhs)) != BOOLEAN_TYPE)
+		    return;
+		  if (rhs_code != GE_EXPR && rhs_code != LT_EXPR)
+		    return;
+		  tree cmp_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
+		  if (use_lhs != cmp_rhs1)
+		    return;
+		  tree cmp_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
+		  if (!integer_zerop (cmp_rhs2))
+		    return;
+
+		  tree and_mask;
+
+		  unsigned HOST_WIDE_INT bytes
+		    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (use_rhs)));
+		  ibit = bytes * BITS_PER_UNIT - 1;
+		  unsigned HOST_WIDE_INT highest
+		    = HOST_WIDE_INT_1U << ibit;
+
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    {
+		      /* Get the signed maximum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest - 1);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		    }
+		  else
+		    {
+		      /* Get the signed minimum of the USE_RHS type.  */
+		      and_mask = build_int_cst (TREE_TYPE (use_rhs),
+						highest);
+		      if (!operand_equal_p (and_mask, mask, 0))
+			return;
+
+		      /* Convert
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _5 = (signed int) _1;
+			 _4 = _5 < 0 or _5 >= 0;
+			 to
+			 _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
+			 _6 = _1 & 0x80000000;
+			 _4 = _6 != 0 or _6 == 0;
+		       */
+		    }
+		  var = make_ssa_name (TREE_TYPE (use_rhs));
+		  gsi = gsi_for_stmt (use_stmt);
+		  gsi_remove (&gsi, true);
+		  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
+					   and_mask);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+		  use_stmt = g;
+		  g = gimple_build_assign (use_nop_lhs,
+					   (rhs_code == GE_EXPR
+					    ? EQ_EXPR : NE_EXPR),
+					   var,
+					   build_zero_cst (TREE_TYPE (use_rhs)));
+		  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+		  gsi = gsi_for_stmt (use_nop_stmt);
+		  gsi_remove (&gsi, true);
+		}
+	    }
+	  else
+	    {
+	      tree op_mask = mask;
+	      tree check_mask = op_mask;
+	      if (TREE_CODE (op_mask) == SSA_NAME)
+		{
+		  g = SSA_NAME_DEF_STMT (op_mask);
+		  if (!is_gimple_assign (g))
+		    return;
+		  if (gimple_assign_rhs_code (g) == NOP_EXPR)
+		    {
+		      tree mask_nop_lhs = gimple_assign_lhs (g);
+
+		      if (TREE_CODE (mask_nop_lhs) == SSA_NAME
+			  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
+			return;
+
+		      tree mask_nop_rhs = gimple_assign_rhs1 (g);
+		      if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
+			  != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
+			return;
+		      op_mask = mask_nop_rhs;
+		      check_mask = op_mask;
+		      g = SSA_NAME_DEF_STMT (op_mask);
+		      if (!is_gimple_assign (g))
+			return;
+		    }
+
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    {
+		      if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+			return;
+		      check_mask = gimple_assign_rhs1 (g);
+		      if (TREE_CODE (check_mask) != SSA_NAME)
+			return;
+		      g = SSA_NAME_DEF_STMT (check_mask);
+		      if (!is_gimple_assign (g))
+			return;
+		    }
+
+		  if (gimple_assign_rhs_code (g) != LSHIFT_EXPR
+		      || !integer_onep (gimple_assign_rhs1 (g)))
+		    return;
+
+		  bit = gimple_assign_rhs2 (g);
+		}
+
+	      if (TREE_CODE (check_mask) == INTEGER_CST)
+		{
+		  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+		    check_mask = const_unop (BIT_NOT_EXPR,
+					     TREE_TYPE (check_mask),
+					     check_mask);
+		  check_mask = fold_convert (TREE_TYPE (lhs),
+					     check_mask);
+		  /* Check if CHECK_MASK is a power of two.  */
+		  ibit = tree_log2 (check_mask);
+		  if (ibit < 0)
+		    return;
+		}
+
+	      tree use_nop_rhs1 = gimple_assign_rhs1 (use_nop_stmt);
+	      tree use_nop_rhs2 = gimple_assign_rhs2 (use_nop_stmt);
+	      if (!operand_equal_p (use_nop_rhs1, check_mask, 0)
+		  && !operand_equal_p (use_nop_rhs2, check_mask, 0))
+		return;
+
+	      /* Convert
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _2 = (int) _1;
+		 _5 = _2 & mask;
+		 to
+		 _1 = __atomic_fetch_or_4 (ptr_6, mask, _3);
+		 _6 = _1 & mask;
+		 _5 = (int) _6;
+		 and convert
+		 _1 = ~mask_7;
+		 _2 = (unsigned int) _1;
+		 _3 = __atomic_fetch_and_4 (ptr_6, _2, 0);
+		 _4 = (int) _3;
+		 _5 = _4 & mask_7;
+		 to
+		 _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3);
+		 _12 = _3 & mask_7;
+		 _5 = (int) _12;
+	       */
+	      replace_uses_by (use_lhs, lhs);
+	      tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
+	      var = make_ssa_name (TREE_TYPE (use_nop_lhs));
+	      gimple_assign_set_lhs (use_nop_stmt, var);
+	      gsi = gsi_for_stmt (use_stmt);
+	      gsi_remove (&gsi, true);
+	      release_defs (use_stmt);
+	      gsi_remove (gsip, true);
+	      var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
+	      gsi = gsi_for_stmt (use_nop_stmt);
+	      g = gimple_build_assign (use_nop_lhs, var);
+	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	      use_stmt = use_nop_stmt;
+	      mask = op_mask;
+	    }
+	}
+
+      if (!bit)
+	{
+	  if (ibit < 0)
+	    gcc_unreachable ();
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+    }
+
   switch (fn)
     {
     case IFN_ATOMIC_BIT_TEST_AND_SET:
@@ -3301,51 +3683,76 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
     return;
 
-  mask = gimple_call_arg (call, 1);
   tree use_lhs = gimple_assign_lhs (use_stmt);
   if (!use_lhs)
     return;
 
-  if (TREE_CODE (mask) == INTEGER_CST)
+  if (!bit)
     {
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
-	mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
-      mask = fold_convert (TREE_TYPE (lhs), mask);
-      int ibit = tree_log2 (mask);
-      if (ibit < 0)
-	return;
-      bit = build_int_cst (TREE_TYPE (lhs), ibit);
-    }
-  else if (TREE_CODE (mask) == SSA_NAME)
-    {
-      gimple *g = SSA_NAME_DEF_STMT (mask);
-      if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+      if (TREE_CODE (mask) == INTEGER_CST)
 	{
-	  if (!is_gimple_assign (g)
-	      || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
+	  mask = fold_convert (TREE_TYPE (lhs), mask);
+	  int ibit = tree_log2 (mask);
+	  if (ibit < 0)
+	    return;
+	  bit = build_int_cst (TREE_TYPE (lhs), ibit);
+	}
+      else if (TREE_CODE (mask) == SSA_NAME)
+	{
+	  gimple *g = SSA_NAME_DEF_STMT (mask);
+	  if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+	    {
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+		return;
+	      mask = gimple_assign_rhs1 (g);
+	      if (TREE_CODE (mask) != SSA_NAME)
+		return;
+	      g = SSA_NAME_DEF_STMT (mask);
+	    }
+	  if (!is_gimple_assign (g))
 	    return;
-	  mask = gimple_assign_rhs1 (g);
-	  if (TREE_CODE (mask) != SSA_NAME)
+	  rhs_code = gimple_assign_rhs_code (g);
+	  if (rhs_code != LSHIFT_EXPR)
+	    {
+	      if (rhs_code != NOP_EXPR)
+		return;
+
+	      /* Handle
+		 _1 = 1 << bit_4(D);
+		 mask_5 = (unsigned int) _1;
+		 _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
+		 _3 = _2 & mask_5;
+		 */
+	      tree nop_lhs = gimple_assign_lhs (g);
+	      tree nop_rhs = gimple_assign_rhs1 (g);
+	      if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
+		  != TYPE_PRECISION (TREE_TYPE (nop_rhs)))
+		return;
+	      g = SSA_NAME_DEF_STMT (nop_rhs);
+	      if (!is_gimple_assign (g)
+		  || gimple_assign_rhs_code (g) != LSHIFT_EXPR)
+		return;
+	    }
+	  if (!integer_onep (gimple_assign_rhs1 (g)))
 	    return;
-	  g = SSA_NAME_DEF_STMT (mask);
+	  bit = gimple_assign_rhs2 (g);
 	}
-      if (!is_gimple_assign (g)
-	  || gimple_assign_rhs_code (g) != LSHIFT_EXPR
-	  || !integer_onep (gimple_assign_rhs1 (g)))
+      else
 	return;
-      bit = gimple_assign_rhs2 (g);
-    }
-  else
-    return;
 
-  if (gimple_assign_rhs1 (use_stmt) == lhs)
-    {
-      if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+      if (gimple_assign_rhs1 (use_stmt) == lhs)
+	{
+	  if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+	    return;
+	}
+      else if (gimple_assign_rhs2 (use_stmt) != lhs
+	       || !operand_equal_p (gimple_assign_rhs1 (use_stmt),
+				    mask, 0))
 	return;
     }
-  else if (gimple_assign_rhs2 (use_stmt) != lhs
-	   || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
-    return;
 
   bool use_bool = true;
   bool has_debug_uses = false;
@@ -3434,18 +3841,40 @@  optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
 	 of the specified bit after the atomic operation (makes only sense
 	 for xor, otherwise the bit content is compile time known),
 	 we need to invert the bit.  */
+      tree mask_convert = mask;
+      gimple *g_convert = nullptr;
+      if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
+	{
+	  mask_convert = make_ssa_name (TREE_TYPE (lhs));
+	  tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
+	  g_convert = gimple_build_assign (mask_convert, var);
+	}
       g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
 			       BIT_XOR_EXPR, new_lhs,
 			       use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
-					: mask);
+					: mask_convert);
       new_lhs = gimple_assign_lhs (g);
       if (throws)
 	{
-	  gsi_insert_on_edge_immediate (e, g);
+	  if (g_convert)
+	    {
+	      gsi_insert_on_edge_immediate (e, g_convert);
+	      gsi = gsi_for_stmt (g_convert);
+	      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	    }
+	  else
+	    gsi_insert_on_edge_immediate (e, g);
 	  gsi = gsi_for_stmt (g);
 	}
       else
-	gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	{
+	  if (g_convert)
+	    {
+	      gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
+	      gsi = gsi_for_stmt (g_convert);
+	    }
+	  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+	}
     }
   if (use_bool && has_debug_uses)
     {