forwprop, v2: Canonicalize atomic fetch_op op x to op_fetch or vice versa [PR98737]
Commit Message
On Thu, Jan 13, 2022 at 04:07:20PM +0100, Richard Biener wrote:
> I'm mostly concerned about the replace_uses_by use. forwprop
> will go over newly emitted stmts and thus the hypothetical added
>
> lhs2 = d;
>
> record the copy and schedule the stmt for removal, substituting 'd'
> in each use as it goes along the function and folding them. It's
> a bit iffy (and maybe has unintended side-effects in odd cases)
> to trample around and fold stuff behind that flows back.
>
> I'd always vote to simplify the folding code so it's easier to
> maintain and not micro-optimize there since it's not going to be
> a hot part of the compiler.
Ok. So like this?
2022-01-13 Jakub Jelinek <jakub@redhat.com>
PR target/98737
* tree-ssa-forwprop.c (simplify_builtin_call): Canonicalize
__atomic_fetch_op (p, x, y) op x into __atomic_op_fetch (p, x, y)
and __atomic_op_fetch (p, x, y) iop x into
__atomic_fetch_op (p, x, y).
* gcc.dg/tree-ssa/pr98737-1.c: New test.
* gcc.dg/tree-ssa/pr98737-2.c: New test.
Jakub
Comments
On Thu, 13 Jan 2022, Jakub Jelinek wrote:
> On Thu, Jan 13, 2022 at 04:07:20PM +0100, Richard Biener wrote:
> > I'm mostly concerned about the replace_uses_by use. forwprop
> > will go over newly emitted stmts and thus the hypothetical added
> >
> > lhs2 = d;
> >
> > record the copy and schedule the stmt for removal, substituting 'd'
> > in each use as it goes along the function and folding them. It's
> > a bit iffy (and maybe has unintended side-effects in odd cases)
> > to trample around and fold stuff behind that flows back.
> >
> > I'd always vote to simplify the folding code so it's easier to
> > maintain and not micro-optimize there since it's not going to be
> > a hot part of the compiler.
>
> Ok. So like this?
>
> 2022-01-13 Jakub Jelinek <jakub@redhat.com>
>
> PR target/98737
> * tree-ssa-forwprop.c (simplify_builtin_call): Canonicalize
> __atomic_fetch_op (p, x, y) op x into __atomic_op_fetch (p, x, y)
> and __atomic_op_fetch (p, x, y) iop x into
> __atomic_fetch_op (p, x, y).
>
> * gcc.dg/tree-ssa/pr98737-1.c: New test.
> * gcc.dg/tree-ssa/pr98737-2.c: New test.
>
> --- gcc/tree-ssa-forwprop.c.jj 2022-01-11 23:11:23.467275019 +0100
> +++ gcc/tree-ssa-forwprop.c 2022-01-13 18:09:50.318625915 +0100
> @@ -1241,12 +1241,19 @@ constant_pointer_difference (tree p1, tr
> memset (p + 4, ' ', 3);
> into
> memcpy (p, "abcd ", 7);
> - call if the latter can be stored by pieces during expansion. */
> + call if the latter can be stored by pieces during expansion.
> +
> + Also canonicalize __atomic_fetch_op (p, x, y) op x
> + to __atomic_op_fetch (p, x, y) or
> + __atomic_op_fetch (p, x, y) iop x
> + to __atomic_fetch_op (p, x, y) when possible (also __sync). */
>
> static bool
> simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
> {
> gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p);
> + enum built_in_function other_atomic = END_BUILTINS;
> + enum tree_code atomic_op = ERROR_MARK;
> tree vuse = gimple_vuse (stmt2);
> if (vuse == NULL)
> return false;
> @@ -1448,6 +1455,290 @@ simplify_builtin_call (gimple_stmt_itera
> }
> }
> break;
> +
> + #define CASE_ATOMIC(NAME, OTHER, OP) \
> + case BUILT_IN_##NAME##_1: \
> + case BUILT_IN_##NAME##_2: \
> + case BUILT_IN_##NAME##_4: \
> + case BUILT_IN_##NAME##_8: \
> + case BUILT_IN_##NAME##_16: \
> + atomic_op = OP; \
> + other_atomic \
> + = (enum built_in_function) (BUILT_IN_##OTHER##_1 \
> + + (DECL_FUNCTION_CODE (callee2) \
> + - BUILT_IN_##NAME##_1)); \
> + goto handle_atomic_fetch_op;
> +
> + CASE_ATOMIC (ATOMIC_FETCH_ADD, ATOMIC_ADD_FETCH, PLUS_EXPR)
> + CASE_ATOMIC (ATOMIC_FETCH_SUB, ATOMIC_SUB_FETCH, MINUS_EXPR)
> + CASE_ATOMIC (ATOMIC_FETCH_AND, ATOMIC_AND_FETCH, BIT_AND_EXPR)
> + CASE_ATOMIC (ATOMIC_FETCH_XOR, ATOMIC_XOR_FETCH, BIT_XOR_EXPR)
> + CASE_ATOMIC (ATOMIC_FETCH_OR, ATOMIC_OR_FETCH, BIT_IOR_EXPR)
> +
> + CASE_ATOMIC (SYNC_FETCH_AND_ADD, SYNC_ADD_AND_FETCH, PLUS_EXPR)
> + CASE_ATOMIC (SYNC_FETCH_AND_SUB, SYNC_SUB_AND_FETCH, MINUS_EXPR)
> + CASE_ATOMIC (SYNC_FETCH_AND_AND, SYNC_AND_AND_FETCH, BIT_AND_EXPR)
> + CASE_ATOMIC (SYNC_FETCH_AND_XOR, SYNC_XOR_AND_FETCH, BIT_XOR_EXPR)
> + CASE_ATOMIC (SYNC_FETCH_AND_OR, SYNC_OR_AND_FETCH, BIT_IOR_EXPR)
> +
> + CASE_ATOMIC (ATOMIC_ADD_FETCH, ATOMIC_FETCH_ADD, MINUS_EXPR)
> + CASE_ATOMIC (ATOMIC_SUB_FETCH, ATOMIC_FETCH_SUB, PLUS_EXPR)
> + CASE_ATOMIC (ATOMIC_XOR_FETCH, ATOMIC_FETCH_XOR, BIT_XOR_EXPR)
> +
> + CASE_ATOMIC (SYNC_ADD_AND_FETCH, SYNC_FETCH_AND_ADD, MINUS_EXPR)
> + CASE_ATOMIC (SYNC_SUB_AND_FETCH, SYNC_FETCH_AND_SUB, PLUS_EXPR)
> + CASE_ATOMIC (SYNC_XOR_AND_FETCH, SYNC_FETCH_AND_XOR, BIT_XOR_EXPR)
> +
> +#undef CASE_ATOMIC
> +
> + handle_atomic_fetch_op:
> + if (gimple_call_num_args (stmt2) >= 2 && gimple_call_lhs (stmt2))
> + {
> + tree lhs2 = gimple_call_lhs (stmt2), lhsc = lhs2;
> + tree arg = gimple_call_arg (stmt2, 1);
> + gimple *use_stmt, *cast_stmt = NULL;
> + use_operand_p use_p;
> + tree ndecl = builtin_decl_explicit (other_atomic);
> +
> + if (ndecl == NULL_TREE || !single_imm_use (lhs2, &use_p, &use_stmt))
> + break;
> +
> + if (gimple_assign_cast_p (use_stmt))
> + {
> + cast_stmt = use_stmt;
> + lhsc = gimple_assign_lhs (cast_stmt);
> + if (lhsc == NULL_TREE
> + || !INTEGRAL_TYPE_P (TREE_TYPE (lhsc))
> + || (TYPE_PRECISION (TREE_TYPE (lhsc))
> + != TYPE_PRECISION (TREE_TYPE (lhs2)))
> + || !single_imm_use (lhsc, &use_p, &use_stmt))
> + {
> + use_stmt = cast_stmt;
> + cast_stmt = NULL;
> + lhsc = lhs2;
> + }
> + }
> +
> + bool ok = false;
> + tree oarg = NULL_TREE;
> + enum tree_code ccode = ERROR_MARK;
> + tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
> + if (is_gimple_assign (use_stmt)
> + && gimple_assign_rhs_code (use_stmt) == atomic_op)
> + {
> + if (gimple_assign_rhs1 (use_stmt) == lhsc)
> + oarg = gimple_assign_rhs2 (use_stmt);
> + else if (atomic_op != MINUS_EXPR)
> + oarg = gimple_assign_rhs1 (use_stmt);
> + }
> + else if (atomic_op == MINUS_EXPR
> + && is_gimple_assign (use_stmt)
> + && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR
> + && TREE_CODE (arg) == INTEGER_CST
> + && (TREE_CODE (gimple_assign_rhs2 (use_stmt))
> + == INTEGER_CST))
> + {
> + tree a = fold_convert (TREE_TYPE (lhs2), arg);
> + tree o = fold_convert (TREE_TYPE (lhs2),
> + gimple_assign_rhs2 (use_stmt));
> + if (wi::to_wide (a) == wi::neg (wi::to_wide (o)))
> + ok = true;
> + }
> + else if (atomic_op == BIT_AND_EXPR || atomic_op == BIT_IOR_EXPR)
> + ;
> + else if (gimple_code (use_stmt) == GIMPLE_COND)
> + {
> + ccode = gimple_cond_code (use_stmt);
> + crhs1 = gimple_cond_lhs (use_stmt);
> + crhs2 = gimple_cond_rhs (use_stmt);
> + }
> + else if (is_gimple_assign (use_stmt))
> + {
> + if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
> + {
> + ccode = gimple_assign_rhs_code (use_stmt);
> + crhs1 = gimple_assign_rhs1 (use_stmt);
> + crhs2 = gimple_assign_rhs2 (use_stmt);
> + }
> + else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
> + {
> + tree cond = gimple_assign_rhs1 (use_stmt);
> + if (COMPARISON_CLASS_P (cond))
> + {
> + ccode = TREE_CODE (cond);
> + crhs1 = TREE_OPERAND (cond, 0);
> + crhs2 = TREE_OPERAND (cond, 1);
> + }
> + }
> + }
> + if (ccode == EQ_EXPR || ccode == NE_EXPR)
> + {
> + /* Deal with x - y == 0 or x ^ y == 0
> + being optimized into x == y and x + cst == 0
> + into x == -cst. */
> + tree o = NULL_TREE;
> + if (crhs1 == lhsc)
> + o = crhs2;
> + else if (crhs2 == lhsc)
> + o = crhs1;
> + if (o && atomic_op != PLUS_EXPR)
> + oarg = o;
> + else if (o
> + && TREE_CODE (o) == INTEGER_CST
> + && TREE_CODE (arg) == INTEGER_CST)
> + {
> + tree a = fold_convert (TREE_TYPE (lhs2), arg);
> + o = fold_convert (TREE_TYPE (lhs2), o);
> + if (wi::to_wide (a) == wi::neg (wi::to_wide (o)))
> + ok = true;
> + }
> + }
> + if (oarg && !ok)
> + {
> + if (operand_equal_p (arg, oarg, 0))
> + ok = true;
> + else if (TREE_CODE (arg) == SSA_NAME
> + && TREE_CODE (oarg) == SSA_NAME)
> + {
> + tree oarg2 = oarg;
> + if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (oarg)))
> + {
> + gimple *g = SSA_NAME_DEF_STMT (oarg);
> + oarg2 = gimple_assign_rhs1 (g);
> + if (TREE_CODE (oarg2) != SSA_NAME
> + || !INTEGRAL_TYPE_P (TREE_TYPE (oarg2))
> + || (TYPE_PRECISION (TREE_TYPE (oarg2))
> + != TYPE_PRECISION (TREE_TYPE (oarg))))
> + oarg2 = oarg;
> + }
> + if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (arg)))
> + {
> + gimple *g = SSA_NAME_DEF_STMT (arg);
> + tree rhs1 = gimple_assign_rhs1 (g);
> + /* Handle e.g.
> + x.0_1 = (long unsigned int) x_4(D);
> + _2 = __atomic_fetch_add_8 (&vlong, x.0_1, 0);
> + _3 = (long int) _2;
> + _7 = x_4(D) + _3; */
> + if (rhs1 == oarg || rhs1 == oarg2)
> + ok = true;
> + /* Handle e.g.
> + x.18_1 = (short unsigned int) x_5(D);
> + _2 = (int) x.18_1;
> + _3 = __atomic_fetch_xor_2 (&vshort, _2, 0);
> + _4 = (short int) _3;
> + _8 = x_5(D) ^ _4;
> + This happens only for char/short. */
> + else if (TREE_CODE (rhs1) == SSA_NAME
> + && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
> + && (TYPE_PRECISION (TREE_TYPE (rhs1))
> + == TYPE_PRECISION (TREE_TYPE (lhs2))))
> + {
> + g = SSA_NAME_DEF_STMT (rhs1);
> + if (gimple_assign_cast_p (g)
> + && (gimple_assign_rhs1 (g) == oarg
> + || gimple_assign_rhs1 (g) == oarg2))
> + ok = true;
> + }
> + }
> + if (!ok && arg == oarg2)
> + /* Handle e.g.
> + _1 = __sync_fetch_and_add_4 (&v, x_5(D));
> + _2 = (int) _1;
> + x.0_3 = (int) x_5(D);
> + _7 = _2 + x.0_3; */
> + ok = true;
> + }
> + }
> +
> + if (ok)
> + {
> + tree new_lhs = make_ssa_name (TREE_TYPE (lhs2));
> + gimple_call_set_lhs (stmt2, new_lhs);
> + gimple_call_set_fndecl (stmt2, ndecl);
> + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
> + if (ccode == ERROR_MARK)
> + gimple_assign_set_rhs_with_ops (&gsi, cast_stmt
> + ? NOP_EXPR : SSA_NAME,
> + new_lhs);
> + else
> + {
> + crhs1 = new_lhs;
> + crhs2 = build_zero_cst (TREE_TYPE (lhs2));
> + if (gimple_code (use_stmt) == GIMPLE_COND)
> + {
> + gcond *cond_stmt = as_a <gcond *> (use_stmt);
> + gimple_cond_set_lhs (cond_stmt, crhs1);
> + gimple_cond_set_rhs (cond_stmt, crhs2);
> + }
> + else if (gimple_assign_rhs_class (use_stmt)
> + == GIMPLE_BINARY_RHS)
> + {
> + gimple_assign_set_rhs1 (use_stmt, crhs1);
> + gimple_assign_set_rhs2 (use_stmt, crhs2);
> + }
> + else
> + {
> + gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
> + == COND_EXPR);
> + tree cond = build2 (ccode, boolean_type_node,
> + crhs1, crhs2);
> + gimple_assign_set_rhs1 (use_stmt, cond);
> + }
> + }
> + update_stmt (use_stmt);
> + /* & and | aren't reversible. */
> + if (atomic_op != BIT_AND_EXPR
> + && atomic_op != BIT_IOR_EXPR
> + /* With -fnon-call-exceptions if we can't
> + add stmts after the call easily. */
> + && !stmt_ends_bb_p (stmt2))
> + {
> + gsi = gsi_for_stmt (stmt2);
> + tree type = TREE_TYPE (lhs2);
> + if (TREE_CODE (arg) == INTEGER_CST)
> + arg = fold_convert (type, arg);
> + else if (!useless_type_conversion_p (type, TREE_TYPE (arg)))
> + {
> + tree narg = make_ssa_name (type);
> + gimple *g = gimple_build_assign (narg, NOP_EXPR, arg);
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + arg = narg;
> + }
> + enum tree_code rcode;
> + switch (atomic_op)
> + {
> + case PLUS_EXPR: rcode = MINUS_EXPR; break;
> + case MINUS_EXPR: rcode = PLUS_EXPR; break;
> + case BIT_XOR_EXPR: rcode = atomic_op; break;
> + default: gcc_unreachable ();
> + }
> + gimple *g = gimple_build_assign (lhs2, rcode, new_lhs, arg);
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + update_stmt (stmt2);
> + }
> + else
> + {
> + imm_use_iterator iter;
> + FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs2)
> + if (use_stmt != cast_stmt)
> + {
> + gcc_assert (is_gimple_debug (use_stmt));
> + gimple_debug_bind_reset_value (use_stmt);
> + update_stmt (use_stmt);
> + }
I don't quite understand this (maybe add a comment), otherwise looks
good to me.
Thanks,
Richard.
> + if (cast_stmt)
> + {
> + gsi = gsi_for_stmt (cast_stmt);
> + gsi_remove (&gsi, true);
> + }
> + update_stmt (stmt2);
> + release_ssa_name (lhs2);
> + }
> + }
> + }
> + break;
> +
> default:
> break;
> }
> --- gcc/testsuite/gcc.dg/tree-ssa/pr98737-1.c.jj 2022-01-12 14:48:45.743941426 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr98737-1.c 2022-01-12 16:36:54.228346979 +0100
> @@ -0,0 +1,148 @@
> +/* PR target/98737 */
> +/* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fcompare-debug" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-tree-dump-not "__atomic_fetch_" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "__sync_fetch_and_" "optimized" } } */
> +
> +typedef signed char schar;
> +typedef unsigned long ulong;
> +typedef unsigned int uint;
> +typedef unsigned short ushort;
> +typedef unsigned char uchar;
> +long vlong;
> +int vint;
> +short vshort;
> +schar vschar;
> +ulong vulong;
> +uint vuint;
> +ushort vushort;
> +uchar vuchar;
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (t x) \
> +{ \
> + ut z = f (&v##t, x, ##__VA_ARGS__); \
> + t w = (t) z; \
> + return w o x; \
> +}
> +#define B(n, f, o, ...) \
> + A(n##0, long, ulong, f, o, ##__VA_ARGS__) \
> + A(n##1, int, uint, f, o, ##__VA_ARGS__) \
> + A(n##2, short, ushort, f, o, ##__VA_ARGS__) \
> + A(n##3, schar, uchar, f, o, ##__VA_ARGS__) \
> + A(n##4, ulong, ulong, f, o, ##__VA_ARGS__) \
> + A(n##5, uint, uint, f, o, ##__VA_ARGS__) \
> + A(n##6, ushort, ushort, f, o, ##__VA_ARGS__) \
> + A(n##7, uchar, uchar, f, o, ##__VA_ARGS__)
> +
> +B(00, __atomic_fetch_add, +, __ATOMIC_RELAXED)
> +B(01, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
> +B(02, __atomic_fetch_and, &, __ATOMIC_RELAXED)
> +B(03, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
> +B(04, __atomic_fetch_or, |, __ATOMIC_RELAXED)
> +B(05, __sync_fetch_and_add, +)
> +B(06, __sync_fetch_and_sub, -)
> +B(07, __sync_fetch_and_and, &)
> +B(08, __sync_fetch_and_xor, ^)
> +B(09, __sync_fetch_and_or, |)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (void) \
> +{ \
> + ut z = f (&v##t, 42, ##__VA_ARGS__); \
> + t w = (t) z; \
> + return w o 42; \
> +}
> +
> +B(10, __atomic_fetch_add, +, __ATOMIC_RELAXED)
> +B(11, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
> +B(12, __atomic_fetch_and, &, __ATOMIC_RELAXED)
> +B(13, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
> +B(14, __atomic_fetch_or, |, __ATOMIC_RELAXED)
> +B(15, __sync_fetch_and_add, +)
> +B(16, __sync_fetch_and_sub, -)
> +B(17, __sync_fetch_and_and, &)
> +B(18, __sync_fetch_and_xor, ^)
> +B(19, __sync_fetch_and_or, |)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (t x) \
> +{ \
> + ut z = f (&v##t, x, ##__VA_ARGS__); \
> + t w = (t) z; \
> + t v = w o x; \
> + return v == 0; \
> +}
> +
> +B(20, __atomic_fetch_add, +, __ATOMIC_RELAXED)
> +B(21, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
> +B(22, __atomic_fetch_and, &, __ATOMIC_RELAXED)
> +B(23, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
> +B(24, __atomic_fetch_or, |, __ATOMIC_RELAXED)
> +B(25, __sync_fetch_and_add, +)
> +B(26, __sync_fetch_and_sub, -)
> +B(27, __sync_fetch_and_and, &)
> +B(28, __sync_fetch_and_xor, ^)
> +B(29, __sync_fetch_and_or, |)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (void) \
> +{ \
> + ut z = f (&v##t, 42, ##__VA_ARGS__); \
> + t w = (t) z; \
> + t v = w o 42; \
> + return v != 0; \
> +}
> +
> +B(30, __atomic_fetch_add, +, __ATOMIC_RELAXED)
> +B(31, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
> +B(32, __atomic_fetch_and, &, __ATOMIC_RELAXED)
> +B(33, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
> +B(34, __atomic_fetch_or, |, __ATOMIC_RELAXED)
> +B(35, __sync_fetch_and_add, +)
> +B(36, __sync_fetch_and_sub, -)
> +B(37, __sync_fetch_and_and, &)
> +B(38, __sync_fetch_and_xor, ^)
> +B(39, __sync_fetch_and_or, |)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (t x) \
> +{ \
> + return (t) (((t) f (&v##t, x, ##__VA_ARGS__)) \
> + o x) != 0; \
> +}
> +
> +B(40, __atomic_fetch_add, +, __ATOMIC_RELAXED)
> +B(41, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
> +B(42, __atomic_fetch_and, &, __ATOMIC_RELAXED)
> +B(43, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
> +B(44, __atomic_fetch_or, |, __ATOMIC_RELAXED)
> +B(45, __sync_fetch_and_add, +)
> +B(46, __sync_fetch_and_sub, -)
> +B(47, __sync_fetch_and_and, &)
> +B(48, __sync_fetch_and_xor, ^)
> +B(49, __sync_fetch_and_or, |)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (void) \
> +{ \
> + return (t) (((t) f (&v##t, 42, ##__VA_ARGS__))\
> + o 42) == 0; \
> +}
> +
> +B(50, __atomic_fetch_add, +, __ATOMIC_RELAXED)
> +B(51, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
> +B(52, __atomic_fetch_and, &, __ATOMIC_RELAXED)
> +B(53, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
> +/* (whatever | 42) == 0 is 0, so we can't test this. */
> +/* B(54, __atomic_fetch_or, |, __ATOMIC_RELAXED) */
> +B(55, __sync_fetch_and_add, +)
> +B(56, __sync_fetch_and_sub, -)
> +B(57, __sync_fetch_and_and, &)
> +B(58, __sync_fetch_and_xor, ^)
> +/* B(59, __sync_fetch_and_or, |) */
> --- gcc/testsuite/gcc.dg/tree-ssa/pr98737-2.c.jj 2022-01-12 16:43:29.411766485 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr98737-2.c 2022-01-12 16:41:24.301534958 +0100
> @@ -0,0 +1,123 @@
> +/* PR target/98737 */
> +/* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fcompare-debug" } */
> +/* { dg-additional-options "-march=i686" { target ia32 } } */
> +/* { dg-final { scan-tree-dump-not "__atomic_\[^f]" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "__sync_\[^f]" "optimized" } } */
> +
> +typedef signed char schar;
> +typedef unsigned long ulong;
> +typedef unsigned int uint;
> +typedef unsigned short ushort;
> +typedef unsigned char uchar;
> +long vlong;
> +int vint;
> +short vshort;
> +schar vschar;
> +ulong vulong;
> +uint vuint;
> +ushort vushort;
> +uchar vuchar;
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (t x) \
> +{ \
> + ut z = f (&v##t, x, ##__VA_ARGS__); \
> + t w = (t) z; \
> + return w o x; \
> +}
> +#define B(n, f, o, ...) \
> + A(n##0, long, ulong, f, o, ##__VA_ARGS__) \
> + A(n##1, int, uint, f, o, ##__VA_ARGS__) \
> + A(n##2, short, ushort, f, o, ##__VA_ARGS__) \
> + A(n##3, schar, uchar, f, o, ##__VA_ARGS__) \
> + A(n##4, ulong, ulong, f, o, ##__VA_ARGS__) \
> + A(n##5, uint, uint, f, o, ##__VA_ARGS__) \
> + A(n##6, ushort, ushort, f, o, ##__VA_ARGS__) \
> + A(n##7, uchar, uchar, f, o, ##__VA_ARGS__)
> +
> +B(00, __atomic_add_fetch, -, __ATOMIC_RELAXED)
> +B(01, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
> +B(03, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
> +B(05, __sync_add_and_fetch, -)
> +B(06, __sync_sub_and_fetch, +)
> +B(08, __sync_xor_and_fetch, ^)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (void) \
> +{ \
> + ut z = f (&v##t, 42, ##__VA_ARGS__); \
> + t w = (t) z; \
> + return w o 42; \
> +}
> +
> +B(10, __atomic_add_fetch, -, __ATOMIC_RELAXED)
> +B(11, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
> +B(13, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
> +B(15, __sync_add_and_fetch, -)
> +B(16, __sync_sub_and_fetch, +)
> +B(18, __sync_xor_and_fetch, ^)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (t x) \
> +{ \
> + ut z = f (&v##t, x, ##__VA_ARGS__); \
> + t w = (t) z; \
> + t v = w o x; \
> + return v == 0; \
> +}
> +
> +B(20, __atomic_add_fetch, -, __ATOMIC_RELAXED)
> +B(21, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
> +B(23, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
> +B(25, __sync_add_and_fetch, -)
> +B(26, __sync_sub_and_fetch, +)
> +B(28, __sync_xor_and_fetch, ^)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (void) \
> +{ \
> + ut z = f (&v##t, 42, ##__VA_ARGS__); \
> + t w = (t) z; \
> + t v = w o 42; \
> + return v != 0; \
> +}
> +
> +B(30, __atomic_add_fetch, -, __ATOMIC_RELAXED)
> +B(31, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
> +B(33, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
> +B(35, __sync_add_and_fetch, -)
> +B(36, __sync_sub_and_fetch, +)
> +B(38, __sync_xor_and_fetch, ^)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (t x) \
> +{ \
> + return (t) (((t) f (&v##t, x, ##__VA_ARGS__)) \
> + o x) != 0; \
> +}
> +
> +B(40, __atomic_add_fetch, -, __ATOMIC_RELAXED)
> +B(41, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
> +B(43, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
> +B(45, __sync_add_and_fetch, -)
> +B(46, __sync_sub_and_fetch, +)
> +B(48, __sync_xor_and_fetch, ^)
> +
> +#undef A
> +#define A(n, t, ut, f, o, ...) \
> +t fn##n (void) \
> +{ \
> + return (t) (((t) f (&v##t, 42, ##__VA_ARGS__))\
> + o 42) == 0; \
> +}
> +
> +B(50, __atomic_add_fetch, -, __ATOMIC_RELAXED)
> +B(51, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
> +B(53, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
> +B(55, __sync_add_and_fetch, -)
> +B(56, __sync_sub_and_fetch, +)
> +B(58, __sync_xor_and_fetch, ^)
>
>
> Jakub
>
>
@@ -1241,12 +1241,19 @@ constant_pointer_difference (tree p1, tr
memset (p + 4, ' ', 3);
into
memcpy (p, "abcd ", 7);
- call if the latter can be stored by pieces during expansion. */
+ call if the latter can be stored by pieces during expansion.
+
+ Also canonicalize __atomic_fetch_op (p, x, y) op x
+ to __atomic_op_fetch (p, x, y) or
+ __atomic_op_fetch (p, x, y) iop x
+ to __atomic_fetch_op (p, x, y) when possible (also __sync). */
static bool
simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
{
gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p);
+ enum built_in_function other_atomic = END_BUILTINS;
+ enum tree_code atomic_op = ERROR_MARK;
tree vuse = gimple_vuse (stmt2);
if (vuse == NULL)
return false;
@@ -1448,6 +1455,290 @@ simplify_builtin_call (gimple_stmt_itera
}
}
break;
+
+ #define CASE_ATOMIC(NAME, OTHER, OP) \
+ case BUILT_IN_##NAME##_1: \
+ case BUILT_IN_##NAME##_2: \
+ case BUILT_IN_##NAME##_4: \
+ case BUILT_IN_##NAME##_8: \
+ case BUILT_IN_##NAME##_16: \
+ atomic_op = OP; \
+ other_atomic \
+ = (enum built_in_function) (BUILT_IN_##OTHER##_1 \
+ + (DECL_FUNCTION_CODE (callee2) \
+ - BUILT_IN_##NAME##_1)); \
+ goto handle_atomic_fetch_op;
+
+ CASE_ATOMIC (ATOMIC_FETCH_ADD, ATOMIC_ADD_FETCH, PLUS_EXPR)
+ CASE_ATOMIC (ATOMIC_FETCH_SUB, ATOMIC_SUB_FETCH, MINUS_EXPR)
+ CASE_ATOMIC (ATOMIC_FETCH_AND, ATOMIC_AND_FETCH, BIT_AND_EXPR)
+ CASE_ATOMIC (ATOMIC_FETCH_XOR, ATOMIC_XOR_FETCH, BIT_XOR_EXPR)
+ CASE_ATOMIC (ATOMIC_FETCH_OR, ATOMIC_OR_FETCH, BIT_IOR_EXPR)
+
+ CASE_ATOMIC (SYNC_FETCH_AND_ADD, SYNC_ADD_AND_FETCH, PLUS_EXPR)
+ CASE_ATOMIC (SYNC_FETCH_AND_SUB, SYNC_SUB_AND_FETCH, MINUS_EXPR)
+ CASE_ATOMIC (SYNC_FETCH_AND_AND, SYNC_AND_AND_FETCH, BIT_AND_EXPR)
+ CASE_ATOMIC (SYNC_FETCH_AND_XOR, SYNC_XOR_AND_FETCH, BIT_XOR_EXPR)
+ CASE_ATOMIC (SYNC_FETCH_AND_OR, SYNC_OR_AND_FETCH, BIT_IOR_EXPR)
+
+ CASE_ATOMIC (ATOMIC_ADD_FETCH, ATOMIC_FETCH_ADD, MINUS_EXPR)
+ CASE_ATOMIC (ATOMIC_SUB_FETCH, ATOMIC_FETCH_SUB, PLUS_EXPR)
+ CASE_ATOMIC (ATOMIC_XOR_FETCH, ATOMIC_FETCH_XOR, BIT_XOR_EXPR)
+
+ CASE_ATOMIC (SYNC_ADD_AND_FETCH, SYNC_FETCH_AND_ADD, MINUS_EXPR)
+ CASE_ATOMIC (SYNC_SUB_AND_FETCH, SYNC_FETCH_AND_SUB, PLUS_EXPR)
+ CASE_ATOMIC (SYNC_XOR_AND_FETCH, SYNC_FETCH_AND_XOR, BIT_XOR_EXPR)
+
+#undef CASE_ATOMIC
+
+ handle_atomic_fetch_op:
+ if (gimple_call_num_args (stmt2) >= 2 && gimple_call_lhs (stmt2))
+ {
+ tree lhs2 = gimple_call_lhs (stmt2), lhsc = lhs2;
+ tree arg = gimple_call_arg (stmt2, 1);
+ gimple *use_stmt, *cast_stmt = NULL;
+ use_operand_p use_p;
+ tree ndecl = builtin_decl_explicit (other_atomic);
+
+ if (ndecl == NULL_TREE || !single_imm_use (lhs2, &use_p, &use_stmt))
+ break;
+
+ if (gimple_assign_cast_p (use_stmt))
+ {
+ cast_stmt = use_stmt;
+ lhsc = gimple_assign_lhs (cast_stmt);
+ if (lhsc == NULL_TREE
+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhsc))
+ || (TYPE_PRECISION (TREE_TYPE (lhsc))
+ != TYPE_PRECISION (TREE_TYPE (lhs2)))
+ || !single_imm_use (lhsc, &use_p, &use_stmt))
+ {
+ use_stmt = cast_stmt;
+ cast_stmt = NULL;
+ lhsc = lhs2;
+ }
+ }
+
+ bool ok = false;
+ tree oarg = NULL_TREE;
+ enum tree_code ccode = ERROR_MARK;
+ tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
+ if (is_gimple_assign (use_stmt)
+ && gimple_assign_rhs_code (use_stmt) == atomic_op)
+ {
+ if (gimple_assign_rhs1 (use_stmt) == lhsc)
+ oarg = gimple_assign_rhs2 (use_stmt);
+ else if (atomic_op != MINUS_EXPR)
+ oarg = gimple_assign_rhs1 (use_stmt);
+ }
+ else if (atomic_op == MINUS_EXPR
+ && is_gimple_assign (use_stmt)
+ && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR
+ && TREE_CODE (arg) == INTEGER_CST
+ && (TREE_CODE (gimple_assign_rhs2 (use_stmt))
+ == INTEGER_CST))
+ {
+ tree a = fold_convert (TREE_TYPE (lhs2), arg);
+ tree o = fold_convert (TREE_TYPE (lhs2),
+ gimple_assign_rhs2 (use_stmt));
+ if (wi::to_wide (a) == wi::neg (wi::to_wide (o)))
+ ok = true;
+ }
+ else if (atomic_op == BIT_AND_EXPR || atomic_op == BIT_IOR_EXPR)
+ ;
+ else if (gimple_code (use_stmt) == GIMPLE_COND)
+ {
+ ccode = gimple_cond_code (use_stmt);
+ crhs1 = gimple_cond_lhs (use_stmt);
+ crhs2 = gimple_cond_rhs (use_stmt);
+ }
+ else if (is_gimple_assign (use_stmt))
+ {
+ if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
+ {
+ ccode = gimple_assign_rhs_code (use_stmt);
+ crhs1 = gimple_assign_rhs1 (use_stmt);
+ crhs2 = gimple_assign_rhs2 (use_stmt);
+ }
+ else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
+ {
+ tree cond = gimple_assign_rhs1 (use_stmt);
+ if (COMPARISON_CLASS_P (cond))
+ {
+ ccode = TREE_CODE (cond);
+ crhs1 = TREE_OPERAND (cond, 0);
+ crhs2 = TREE_OPERAND (cond, 1);
+ }
+ }
+ }
+ if (ccode == EQ_EXPR || ccode == NE_EXPR)
+ {
+ /* Deal with x - y == 0 or x ^ y == 0
+ being optimized into x == y and x + cst == 0
+ into x == -cst. */
+ tree o = NULL_TREE;
+ if (crhs1 == lhsc)
+ o = crhs2;
+ else if (crhs2 == lhsc)
+ o = crhs1;
+ if (o && atomic_op != PLUS_EXPR)
+ oarg = o;
+ else if (o
+ && TREE_CODE (o) == INTEGER_CST
+ && TREE_CODE (arg) == INTEGER_CST)
+ {
+ tree a = fold_convert (TREE_TYPE (lhs2), arg);
+ o = fold_convert (TREE_TYPE (lhs2), o);
+ if (wi::to_wide (a) == wi::neg (wi::to_wide (o)))
+ ok = true;
+ }
+ }
+ if (oarg && !ok)
+ {
+ if (operand_equal_p (arg, oarg, 0))
+ ok = true;
+ else if (TREE_CODE (arg) == SSA_NAME
+ && TREE_CODE (oarg) == SSA_NAME)
+ {
+ tree oarg2 = oarg;
+ if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (oarg)))
+ {
+ gimple *g = SSA_NAME_DEF_STMT (oarg);
+ oarg2 = gimple_assign_rhs1 (g);
+ if (TREE_CODE (oarg2) != SSA_NAME
+ || !INTEGRAL_TYPE_P (TREE_TYPE (oarg2))
+ || (TYPE_PRECISION (TREE_TYPE (oarg2))
+ != TYPE_PRECISION (TREE_TYPE (oarg))))
+ oarg2 = oarg;
+ }
+ if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (arg)))
+ {
+ gimple *g = SSA_NAME_DEF_STMT (arg);
+ tree rhs1 = gimple_assign_rhs1 (g);
+ /* Handle e.g.
+ x.0_1 = (long unsigned int) x_4(D);
+ _2 = __atomic_fetch_add_8 (&vlong, x.0_1, 0);
+ _3 = (long int) _2;
+ _7 = x_4(D) + _3; */
+ if (rhs1 == oarg || rhs1 == oarg2)
+ ok = true;
+ /* Handle e.g.
+ x.18_1 = (short unsigned int) x_5(D);
+ _2 = (int) x.18_1;
+ _3 = __atomic_fetch_xor_2 (&vshort, _2, 0);
+ _4 = (short int) _3;
+ _8 = x_5(D) ^ _4;
+ This happens only for char/short. */
+ else if (TREE_CODE (rhs1) == SSA_NAME
+ && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+ && (TYPE_PRECISION (TREE_TYPE (rhs1))
+ == TYPE_PRECISION (TREE_TYPE (lhs2))))
+ {
+ g = SSA_NAME_DEF_STMT (rhs1);
+ if (gimple_assign_cast_p (g)
+ && (gimple_assign_rhs1 (g) == oarg
+ || gimple_assign_rhs1 (g) == oarg2))
+ ok = true;
+ }
+ }
+ if (!ok && arg == oarg2)
+ /* Handle e.g.
+ _1 = __sync_fetch_and_add_4 (&v, x_5(D));
+ _2 = (int) _1;
+ x.0_3 = (int) x_5(D);
+ _7 = _2 + x.0_3; */
+ ok = true;
+ }
+ }
+
+ if (ok)
+ {
+ tree new_lhs = make_ssa_name (TREE_TYPE (lhs2));
+ gimple_call_set_lhs (stmt2, new_lhs);
+ gimple_call_set_fndecl (stmt2, ndecl);
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+ if (ccode == ERROR_MARK)
+ gimple_assign_set_rhs_with_ops (&gsi, cast_stmt
+ ? NOP_EXPR : SSA_NAME,
+ new_lhs);
+ else
+ {
+ crhs1 = new_lhs;
+ crhs2 = build_zero_cst (TREE_TYPE (lhs2));
+ if (gimple_code (use_stmt) == GIMPLE_COND)
+ {
+ gcond *cond_stmt = as_a <gcond *> (use_stmt);
+ gimple_cond_set_lhs (cond_stmt, crhs1);
+ gimple_cond_set_rhs (cond_stmt, crhs2);
+ }
+ else if (gimple_assign_rhs_class (use_stmt)
+ == GIMPLE_BINARY_RHS)
+ {
+ gimple_assign_set_rhs1 (use_stmt, crhs1);
+ gimple_assign_set_rhs2 (use_stmt, crhs2);
+ }
+ else
+ {
+ gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
+ == COND_EXPR);
+ tree cond = build2 (ccode, boolean_type_node,
+ crhs1, crhs2);
+ gimple_assign_set_rhs1 (use_stmt, cond);
+ }
+ }
+ update_stmt (use_stmt);
+ /* & and | aren't reversible. */
+ if (atomic_op != BIT_AND_EXPR
+ && atomic_op != BIT_IOR_EXPR
+ /* With -fnon-call-exceptions if we can't
+ add stmts after the call easily. */
+ && !stmt_ends_bb_p (stmt2))
+ {
+ gsi = gsi_for_stmt (stmt2);
+ tree type = TREE_TYPE (lhs2);
+ if (TREE_CODE (arg) == INTEGER_CST)
+ arg = fold_convert (type, arg);
+ else if (!useless_type_conversion_p (type, TREE_TYPE (arg)))
+ {
+ tree narg = make_ssa_name (type);
+ gimple *g = gimple_build_assign (narg, NOP_EXPR, arg);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ arg = narg;
+ }
+ enum tree_code rcode;
+ switch (atomic_op)
+ {
+ case PLUS_EXPR: rcode = MINUS_EXPR; break;
+ case MINUS_EXPR: rcode = PLUS_EXPR; break;
+ case BIT_XOR_EXPR: rcode = atomic_op; break;
+ default: gcc_unreachable ();
+ }
+ gimple *g = gimple_build_assign (lhs2, rcode, new_lhs, arg);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ update_stmt (stmt2);
+ }
+ else
+ {
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs2)
+ if (use_stmt != cast_stmt)
+ {
+ gcc_assert (is_gimple_debug (use_stmt));
+ gimple_debug_bind_reset_value (use_stmt);
+ update_stmt (use_stmt);
+ }
+ if (cast_stmt)
+ {
+ gsi = gsi_for_stmt (cast_stmt);
+ gsi_remove (&gsi, true);
+ }
+ update_stmt (stmt2);
+ release_ssa_name (lhs2);
+ }
+ }
+ }
+ break;
+
default:
break;
}
@@ -0,0 +1,148 @@
+/* PR target/98737 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } } */
+/* { dg-options "-O2 -fdump-tree-optimized -fcompare-debug" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-tree-dump-not "__atomic_fetch_" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__sync_fetch_and_" "optimized" } } */
+
+typedef signed char schar;
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+long vlong;
+int vint;
+short vshort;
+schar vschar;
+ulong vulong;
+uint vuint;
+ushort vushort;
+uchar vuchar;
+#define A(n, t, ut, f, o, ...) \
+t fn##n (t x) \
+{ \
+ ut z = f (&v##t, x, ##__VA_ARGS__); \
+ t w = (t) z; \
+ return w o x; \
+}
+#define B(n, f, o, ...) \
+ A(n##0, long, ulong, f, o, ##__VA_ARGS__) \
+ A(n##1, int, uint, f, o, ##__VA_ARGS__) \
+ A(n##2, short, ushort, f, o, ##__VA_ARGS__) \
+ A(n##3, schar, uchar, f, o, ##__VA_ARGS__) \
+ A(n##4, ulong, ulong, f, o, ##__VA_ARGS__) \
+ A(n##5, uint, uint, f, o, ##__VA_ARGS__) \
+ A(n##6, ushort, ushort, f, o, ##__VA_ARGS__) \
+ A(n##7, uchar, uchar, f, o, ##__VA_ARGS__)
+
+B(00, __atomic_fetch_add, +, __ATOMIC_RELAXED)
+B(01, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
+B(02, __atomic_fetch_and, &, __ATOMIC_RELAXED)
+B(03, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
+B(04, __atomic_fetch_or, |, __ATOMIC_RELAXED)
+B(05, __sync_fetch_and_add, +)
+B(06, __sync_fetch_and_sub, -)
+B(07, __sync_fetch_and_and, &)
+B(08, __sync_fetch_and_xor, ^)
+B(09, __sync_fetch_and_or, |)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (void) \
+{ \
+ ut z = f (&v##t, 42, ##__VA_ARGS__); \
+ t w = (t) z; \
+ return w o 42; \
+}
+
+B(10, __atomic_fetch_add, +, __ATOMIC_RELAXED)
+B(11, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
+B(12, __atomic_fetch_and, &, __ATOMIC_RELAXED)
+B(13, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
+B(14, __atomic_fetch_or, |, __ATOMIC_RELAXED)
+B(15, __sync_fetch_and_add, +)
+B(16, __sync_fetch_and_sub, -)
+B(17, __sync_fetch_and_and, &)
+B(18, __sync_fetch_and_xor, ^)
+B(19, __sync_fetch_and_or, |)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (t x) \
+{ \
+ ut z = f (&v##t, x, ##__VA_ARGS__); \
+ t w = (t) z; \
+ t v = w o x; \
+ return v == 0; \
+}
+
+B(20, __atomic_fetch_add, +, __ATOMIC_RELAXED)
+B(21, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
+B(22, __atomic_fetch_and, &, __ATOMIC_RELAXED)
+B(23, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
+B(24, __atomic_fetch_or, |, __ATOMIC_RELAXED)
+B(25, __sync_fetch_and_add, +)
+B(26, __sync_fetch_and_sub, -)
+B(27, __sync_fetch_and_and, &)
+B(28, __sync_fetch_and_xor, ^)
+B(29, __sync_fetch_and_or, |)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (void) \
+{ \
+ ut z = f (&v##t, 42, ##__VA_ARGS__); \
+ t w = (t) z; \
+ t v = w o 42; \
+ return v != 0; \
+}
+
+B(30, __atomic_fetch_add, +, __ATOMIC_RELAXED)
+B(31, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
+B(32, __atomic_fetch_and, &, __ATOMIC_RELAXED)
+B(33, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
+B(34, __atomic_fetch_or, |, __ATOMIC_RELAXED)
+B(35, __sync_fetch_and_add, +)
+B(36, __sync_fetch_and_sub, -)
+B(37, __sync_fetch_and_and, &)
+B(38, __sync_fetch_and_xor, ^)
+B(39, __sync_fetch_and_or, |)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (t x) \
+{ \
+ return (t) (((t) f (&v##t, x, ##__VA_ARGS__)) \
+ o x) != 0; \
+}
+
+B(40, __atomic_fetch_add, +, __ATOMIC_RELAXED)
+B(41, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
+B(42, __atomic_fetch_and, &, __ATOMIC_RELAXED)
+B(43, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
+B(44, __atomic_fetch_or, |, __ATOMIC_RELAXED)
+B(45, __sync_fetch_and_add, +)
+B(46, __sync_fetch_and_sub, -)
+B(47, __sync_fetch_and_and, &)
+B(48, __sync_fetch_and_xor, ^)
+B(49, __sync_fetch_and_or, |)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (void) \
+{ \
+ return (t) (((t) f (&v##t, 42, ##__VA_ARGS__))\
+ o 42) == 0; \
+}
+
+B(50, __atomic_fetch_add, +, __ATOMIC_RELAXED)
+B(51, __atomic_fetch_sub, -, __ATOMIC_RELAXED)
+B(52, __atomic_fetch_and, &, __ATOMIC_RELAXED)
+B(53, __atomic_fetch_xor, ^, __ATOMIC_RELAXED)
+/* (whatever | 42) == 0 is 0, so we can't test this. */
+/* B(54, __atomic_fetch_or, |, __ATOMIC_RELAXED) */
+B(55, __sync_fetch_and_add, +)
+B(56, __sync_fetch_and_sub, -)
+B(57, __sync_fetch_and_and, &)
+B(58, __sync_fetch_and_xor, ^)
+/* B(59, __sync_fetch_and_or, |) */
@@ -0,0 +1,123 @@
+/* PR target/98737 */
+/* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } } */
+/* { dg-options "-O2 -fdump-tree-optimized -fcompare-debug" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-tree-dump-not "__atomic_\[^f]" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__sync_\[^f]" "optimized" } } */
+
+typedef signed char schar;
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+long vlong;
+int vint;
+short vshort;
+schar vschar;
+ulong vulong;
+uint vuint;
+ushort vushort;
+uchar vuchar;
+#define A(n, t, ut, f, o, ...) \
+t fn##n (t x) \
+{ \
+ ut z = f (&v##t, x, ##__VA_ARGS__); \
+ t w = (t) z; \
+ return w o x; \
+}
+#define B(n, f, o, ...) \
+ A(n##0, long, ulong, f, o, ##__VA_ARGS__) \
+ A(n##1, int, uint, f, o, ##__VA_ARGS__) \
+ A(n##2, short, ushort, f, o, ##__VA_ARGS__) \
+ A(n##3, schar, uchar, f, o, ##__VA_ARGS__) \
+ A(n##4, ulong, ulong, f, o, ##__VA_ARGS__) \
+ A(n##5, uint, uint, f, o, ##__VA_ARGS__) \
+ A(n##6, ushort, ushort, f, o, ##__VA_ARGS__) \
+ A(n##7, uchar, uchar, f, o, ##__VA_ARGS__)
+
+B(00, __atomic_add_fetch, -, __ATOMIC_RELAXED)
+B(01, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
+B(03, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
+B(05, __sync_add_and_fetch, -)
+B(06, __sync_sub_and_fetch, +)
+B(08, __sync_xor_and_fetch, ^)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (void) \
+{ \
+ ut z = f (&v##t, 42, ##__VA_ARGS__); \
+ t w = (t) z; \
+ return w o 42; \
+}
+
+B(10, __atomic_add_fetch, -, __ATOMIC_RELAXED)
+B(11, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
+B(13, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
+B(15, __sync_add_and_fetch, -)
+B(16, __sync_sub_and_fetch, +)
+B(18, __sync_xor_and_fetch, ^)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (t x) \
+{ \
+ ut z = f (&v##t, x, ##__VA_ARGS__); \
+ t w = (t) z; \
+ t v = w o x; \
+ return v == 0; \
+}
+
+B(20, __atomic_add_fetch, -, __ATOMIC_RELAXED)
+B(21, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
+B(23, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
+B(25, __sync_add_and_fetch, -)
+B(26, __sync_sub_and_fetch, +)
+B(28, __sync_xor_and_fetch, ^)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (void) \
+{ \
+ ut z = f (&v##t, 42, ##__VA_ARGS__); \
+ t w = (t) z; \
+ t v = w o 42; \
+ return v != 0; \
+}
+
+B(30, __atomic_add_fetch, -, __ATOMIC_RELAXED)
+B(31, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
+B(33, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
+B(35, __sync_add_and_fetch, -)
+B(36, __sync_sub_and_fetch, +)
+B(38, __sync_xor_and_fetch, ^)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (t x) \
+{ \
+ return (t) (((t) f (&v##t, x, ##__VA_ARGS__)) \
+ o x) != 0; \
+}
+
+B(40, __atomic_add_fetch, -, __ATOMIC_RELAXED)
+B(41, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
+B(43, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
+B(45, __sync_add_and_fetch, -)
+B(46, __sync_sub_and_fetch, +)
+B(48, __sync_xor_and_fetch, ^)
+
+#undef A
+#define A(n, t, ut, f, o, ...) \
+t fn##n (void) \
+{ \
+ return (t) (((t) f (&v##t, 42, ##__VA_ARGS__))\
+ o 42) == 0; \
+}
+
+B(50, __atomic_add_fetch, -, __ATOMIC_RELAXED)
+B(51, __atomic_sub_fetch, +, __ATOMIC_RELAXED)
+B(53, __atomic_xor_fetch, ^, __ATOMIC_RELAXED)
+B(55, __sync_add_and_fetch, -)
+B(56, __sync_sub_and_fetch, +)
+B(58, __sync_xor_and_fetch, ^)