rs6000: Adding missed ISA 3.0 atomic memory operation instructions.

Message ID ef94f459-971a-4fa0-b843-40843136211d@linux.ibm.com
State New
Headers
Series rs6000: Adding missed ISA 3.0 atomic memory operation instructions. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed

Commit Message

jeevitha Jan. 8, 2025, 9 a.m. UTC
  Hi All,

The following patch has been bootstrapped and regtested on powerpc64le-linux.

Changes to amo.h include the addition of the following load atomic operations:
Compare and Swap Not Equal, Fetch and Increment Bounded, Fetch and Increment
Equal, and Fetch and Decrement Bounded. Additionally, Store Twin is added for
store atomic operations.

2024-01-08 Peter Bergner <bergner@linux.ibm.com>

gcc/:
	* config/rs6000/amo.h: Add missing atomic memory operations.
	* doc/extend.texi (PowerPC Atomic Memory Operation Functions):
        Document new functions.

gcc/testsuite/:
	* gcc.target/powerpc/amo3.c: New test.
	* gcc.target/powerpc/amo4.c: Likewise.
	* gcc.target/powerpc/amo5.c: Likewise.
	* gcc.target/powerpc/amo6.c: Likewise.
	* gcc.target/powerpc/amo7.c: Likewise.

Co-authored-by: Jeevitha Palanisamy  <jeevitha@linux.ibm.com>
  

Comments

Surya Kumari Jangala Jan. 13, 2025, 5:57 p.m. UTC | #1
On 08/01/25 2:30 pm, jeevitha wrote:
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> Changes to amo.h include the addition of the following load atomic operations:
> Compare and Swap Not Equal, Fetch and Increment Bounded, Fetch and Increment
> Equal, and Fetch and Decrement Bounded. Additionally, Store Twin is added for
> store atomic operations.
> 
> 2024-01-08 Peter Bergner <bergner@linux.ibm.com>
> 
> gcc/:
> 	* config/rs6000/amo.h: Add missing atomic memory operations.
> 	* doc/extend.texi (PowerPC Atomic Memory Operation Functions):
>         Document new functions.
> 
> gcc/testsuite/:
> 	* gcc.target/powerpc/amo3.c: New test.
> 	* gcc.target/powerpc/amo4.c: Likewise.
> 	* gcc.target/powerpc/amo5.c: Likewise.
> 	* gcc.target/powerpc/amo6.c: Likewise.
> 	* gcc.target/powerpc/amo7.c: Likewise.
> 
> Co-authored-by: Jeevitha Palanisamy  <jeevitha@linux.ibm.com>
> 
> diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
> index 25ab1c7b4c4..10960208d31 100644
> --- a/gcc/config/rs6000/amo.h
> +++ b/gcc/config/rs6000/amo.h
> @@ -71,6 +71,64 @@ NAME (TYPE *_PTR, TYPE _VALUE)						\
>    return _RET;								\
>  }
>  
> +/* Implementation of the LWAT/LDAT operations that take two input registers
> +   and modify one word or double-word of memory and return the value that was
> +   previously in the memory location.  The destination and two source
> +   registers are encoded with only one register number, so we need three
> +   consecutive GPR registers and there is no C/C++ type that will give
> +   us that, so we have to use register asm variables to achieve that.
> +
> +   The LWAT/LDAT opcode requires the address to be a single register,
> +   and that points to a suitably aligned memory location.  Asm volatile
> +   is used to prevent the optimizer from moving the operation.  */
> +
> +#define _AMO_LD_CMPSWP(NAME, TYPE, OPCODE, FC)				\
> +static __inline__ TYPE							\
> +NAME (TYPE *_PTR, TYPE _COND, TYPE _VALUE)				\
> +{									\
> +  register TYPE _ret asm ("r8");					\
> +  register TYPE _cond asm ("r9") = _COND;				\
> +  register TYPE _value asm ("r10") = _VALUE;				\
> +  __asm__ __volatile__ (OPCODE " %[ret],%P[addr],%[code]"		\
> +			: [addr] "+Q" (_PTR[0]), [ret] "=r" (_ret)	\
> +			: "r" (_cond), "r" (_value), [code] "n" (FC));	\
> +  return _ret;								\
> +}
> +
> +/* Implementation of the LWAT/LDAT fetch and increment operations.
> +
> +   The LWAT/LDAT opcode requires the address to be a single register that
> +   points to a suitably aligned memory location.  Asm volatile is used to
> +   prevent the optimizer from moving the operation.  */
> +
> +#define _AMO_LD_INCREMENT(NAME, TYPE, OPCODE, FC)			\
> +static __inline__ TYPE							\
> +NAME (TYPE *_PTR)							\
> +{									\
> +  TYPE _RET;								\
> +  __asm__ volatile (OPCODE " %[ret],%P[addr],%[code]\n"			\
> +		    : [addr] "+Q" (_PTR[0]), [ret] "=r" (_RET)		\
> +		    : "Q" (*(TYPE (*)[2]) _PTR), [code] "n" (FC));	\
> +  return _RET;								\
> +}
> +
> +/* Implementation of the LWAT/LDAT fetch and decrement operations.
> +
> +   The LWAT/LDAT opcode requires the address to be a single register that
> +   points to a suitably aligned memory location.  Asm volatile is used to
> +   prevent the optimizer from moving the operation.  */
> +
> +#define _AMO_LD_DECREMENT(NAME, TYPE, OPCODE, FC)			\
> +static __inline__ TYPE							\
> +NAME (TYPE *_PTR)							\
> +{									\
> +  TYPE _RET;								\
> +  __asm__ volatile (OPCODE " %[ret],%P[addr],%[code]\n"			\
> +		    : [addr] "+Q" (_PTR[1]), [ret] "=r" (_RET)		\
> +		    : "Q" (*(TYPE (*)[2]) (_PTR)), [code] "n" (FC));	\
> +  return _RET;								\
> +}
> +
>  _AMO_LD_SIMPLE (amo_lwat_add,   uint32_t, "lwat", _AMO_LD_ADD)
>  _AMO_LD_SIMPLE (amo_lwat_xor,   uint32_t, "lwat", _AMO_LD_XOR)
>  _AMO_LD_SIMPLE (amo_lwat_ior,   uint32_t, "lwat", _AMO_LD_IOR)
> @@ -78,11 +136,19 @@ _AMO_LD_SIMPLE (amo_lwat_and,   uint32_t, "lwat", _AMO_LD_AND)
>  _AMO_LD_SIMPLE (amo_lwat_umax,  uint32_t, "lwat", _AMO_LD_UMAX)
>  _AMO_LD_SIMPLE (amo_lwat_umin,  uint32_t, "lwat", _AMO_LD_UMIN)
>  _AMO_LD_SIMPLE (amo_lwat_swap,  uint32_t, "lwat", _AMO_LD_SWAP)
> +_AMO_LD_CMPSWP    (amo_lwat_cas_neq,     uint32_t, "lwat", _AMO_LD_CS_NE)
> +_AMO_LD_INCREMENT (amo_lwat_inc_eq,      uint32_t, "lwat", _AMO_LD_INC_EQUAL)
> +_AMO_LD_INCREMENT (amo_lwat_inc_bounded, uint32_t, "lwat", _AMO_LD_INC_BOUNDED)
> +_AMO_LD_DECREMENT (amo_lwat_dec_bounded, uint32_t, "lwat", _AMO_LD_DEC_BOUNDED)
>  
>  _AMO_LD_SIMPLE (amo_lwat_sadd,  int32_t,  "lwat", _AMO_LD_ADD)
>  _AMO_LD_SIMPLE (amo_lwat_smax,  int32_t,  "lwat", _AMO_LD_SMAX)
>  _AMO_LD_SIMPLE (amo_lwat_smin,  int32_t,  "lwat", _AMO_LD_SMIN)
>  _AMO_LD_SIMPLE (amo_lwat_sswap, int32_t,  "lwat", _AMO_LD_SWAP)
> +_AMO_LD_CMPSWP    (amo_lwat_scas_neq,     int32_t, "lwat", _AMO_LD_CS_NE)
> +_AMO_LD_INCREMENT (amo_lwat_sinc_eq,      int32_t, "lwat", _AMO_LD_INC_EQUAL)
> +_AMO_LD_INCREMENT (amo_lwat_sinc_bounded, int32_t, "lwat", _AMO_LD_INC_BOUNDED)
> +_AMO_LD_DECREMENT (amo_lwat_sdec_bounded, int32_t, "lwat", _AMO_LD_DEC_BOUNDED)
>  
>  _AMO_LD_SIMPLE (amo_ldat_add,   uint64_t, "ldat", _AMO_LD_ADD)
>  _AMO_LD_SIMPLE (amo_ldat_xor,   uint64_t, "ldat", _AMO_LD_XOR)
> @@ -91,12 +157,19 @@ _AMO_LD_SIMPLE (amo_ldat_and,   uint64_t, "ldat", _AMO_LD_AND)
>  _AMO_LD_SIMPLE (amo_ldat_umax,  uint64_t, "ldat", _AMO_LD_UMAX)
>  _AMO_LD_SIMPLE (amo_ldat_umin,  uint64_t, "ldat", _AMO_LD_UMIN)
>  _AMO_LD_SIMPLE (amo_ldat_swap,  uint64_t, "ldat", _AMO_LD_SWAP)
> +_AMO_LD_CMPSWP    (amo_ldat_cas_neq,     uint64_t, "ldat", _AMO_LD_CS_NE)
> +_AMO_LD_INCREMENT (amo_ldat_inc_eq,      uint64_t, "ldat", _AMO_LD_INC_EQUAL)
> +_AMO_LD_INCREMENT (amo_ldat_inc_bounded, uint64_t, "ldat", _AMO_LD_INC_BOUNDED)
> +_AMO_LD_DECREMENT (amo_ldat_dec_bounded, uint64_t, "ldat", _AMO_LD_DEC_BOUNDED)
>  
>  _AMO_LD_SIMPLE (amo_ldat_sadd,  int64_t,  "ldat", _AMO_LD_ADD)
>  _AMO_LD_SIMPLE (amo_ldat_smax,  int64_t,  "ldat", _AMO_LD_SMAX)
>  _AMO_LD_SIMPLE (amo_ldat_smin,  int64_t,  "ldat", _AMO_LD_SMIN)
>  _AMO_LD_SIMPLE (amo_ldat_sswap, int64_t,  "ldat", _AMO_LD_SWAP)
> -
> +_AMO_LD_CMPSWP    (amo_ldat_scas_neq,     int64_t, "ldat", _AMO_LD_CS_NE)
> +_AMO_LD_INCREMENT (amo_ldat_sinc_eq,      int64_t, "ldat", _AMO_LD_INC_EQUAL)
> +_AMO_LD_INCREMENT (amo_ldat_sinc_bounded, int64_t, "ldat", _AMO_LD_INC_BOUNDED)
> +_AMO_LD_DECREMENT (amo_ldat_sdec_bounded, int64_t, "ldat", _AMO_LD_DEC_BOUNDED)
>  /* Enumeration of the STWAT/STDAT sub-opcodes.  */
>  enum _AMO_ST {
>    _AMO_ST_ADD		= 0x00,		/* Store Add.  */
> @@ -127,16 +200,35 @@ NAME (TYPE *_PTR, TYPE _VALUE)						\
>    return;								\
>  }
>  
> +/* Implementation of the STWAT/STDAT store twin operation that takes
> +   one register and modifies two words or double-wordxs of memory.
> +   No value is returned.
> +
> +   The STWAT/STDAT opcode requires the address to be a single register
> +   that points to a suitably aligned memory location.  Asm volatile is
> +   used to prevent the optimizer from moving the operation.  */
> +
> +#define _AMO_ST_TWIN(NAME, TYPE, OPCODE, FC)			\
> +static __inline__ void							\
> +NAME (TYPE *_PTR, TYPE _VALUE)						\
> +{									\
> +  __asm__ volatile (OPCODE " %[src],%P[addr],%[code]"			\
> +		    : [addr] "+Q" (*(TYPE (*)[2]) _PTR)			\
> +		    : [src] "r" (_VALUE), [code] "n" (FC));		\
> +}
> +
>  _AMO_ST_SIMPLE (amo_stwat_add,  uint32_t, "stwat", _AMO_ST_ADD)
>  _AMO_ST_SIMPLE (amo_stwat_xor,  uint32_t, "stwat", _AMO_ST_XOR)
>  _AMO_ST_SIMPLE (amo_stwat_ior,  uint32_t, "stwat", _AMO_ST_IOR)
>  _AMO_ST_SIMPLE (amo_stwat_and,  uint32_t, "stwat", _AMO_ST_AND)
>  _AMO_ST_SIMPLE (amo_stwat_umax, uint32_t, "stwat", _AMO_ST_UMAX)
>  _AMO_ST_SIMPLE (amo_stwat_umin, uint32_t, "stwat", _AMO_ST_UMIN)
> +_AMO_ST_TWIN   (amo_stwat_twin, uint32_t, "stwat", _AMO_ST_TWIN)
>  
>  _AMO_ST_SIMPLE (amo_stwat_sadd, int32_t,  "stwat", _AMO_ST_ADD)
>  _AMO_ST_SIMPLE (amo_stwat_smax, int32_t,  "stwat", _AMO_ST_SMAX)
>  _AMO_ST_SIMPLE (amo_stwat_smin, int32_t,  "stwat", _AMO_ST_SMIN)
> +_AMO_ST_TWIN   (amo_stwat_stwin, int32_t, "stwat", _AMO_ST_TWIN)
>  
>  _AMO_ST_SIMPLE (amo_stdat_add,  uint64_t, "stdat", _AMO_ST_ADD)
>  _AMO_ST_SIMPLE (amo_stdat_xor,  uint64_t, "stdat", _AMO_ST_XOR)
> @@ -144,9 +236,11 @@ _AMO_ST_SIMPLE (amo_stdat_ior,  uint64_t, "stdat", _AMO_ST_IOR)
>  _AMO_ST_SIMPLE (amo_stdat_and,  uint64_t, "stdat", _AMO_ST_AND)
>  _AMO_ST_SIMPLE (amo_stdat_umax, uint64_t, "stdat", _AMO_ST_UMAX)
>  _AMO_ST_SIMPLE (amo_stdat_umin, uint64_t, "stdat", _AMO_ST_UMIN)
> +_AMO_ST_TWIN   (amo_stdat_twin, uint64_t, "stdat", _AMO_ST_TWIN)
>  
>  _AMO_ST_SIMPLE (amo_stdat_sadd, int64_t,  "stdat", _AMO_ST_ADD)
>  _AMO_ST_SIMPLE (amo_stdat_smax, int64_t,  "stdat", _AMO_ST_SMAX)
>  _AMO_ST_SIMPLE (amo_stdat_smin, int64_t,  "stdat", _AMO_ST_SMIN)
> +_AMO_ST_TWIN   (amo_stdat_stwin, int64_t, "stdat", _AMO_ST_TWIN)
>  #endif	/* _ARCH_PWR9 && _ARCH_PPC64.  */
>  #endif	/* _POWERPC_AMO_H.  */
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 1e1b4cc837d..4c2db70880d 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -24707,11 +24707,19 @@ uint32_t amo_lwat_and (uint32_t *, uint32_t);
>  uint32_t amo_lwat_umax (uint32_t *, uint32_t);
>  uint32_t amo_lwat_umin (uint32_t *, uint32_t);
>  uint32_t amo_lwat_swap (uint32_t *, uint32_t);
> +uint32_t amo_lwat_cas_neq (uint32_t *, uint32_t, uint32_t);
> +uint32_t amo_lwat_inc_eq (uint32_t *);
> +uint32_t amo_lwat_inc_bounded (uint32_t *);
> +uint32_t amo_lwat_dec_bounded (uint32_t *);
>  
>  int32_t amo_lwat_sadd (int32_t *, int32_t);
>  int32_t amo_lwat_smax (int32_t *, int32_t);
>  int32_t amo_lwat_smin (int32_t *, int32_t);
>  int32_t amo_lwat_sswap (int32_t *, int32_t);
> +int32_t amo_lwat_scas_neq (int32_t *, int32_t, int32_t);
> +int32_t amo_lwat_sinc_eq (int32_t *);
> +int32_t amo_lwat_sinc_bounded (int32_t *);
> +int32_t amo_lwat_sdec_bounded (int32_t *);
>  
>  uint64_t amo_ldat_add (uint64_t *, uint64_t);
>  uint64_t amo_ldat_xor (uint64_t *, uint64_t);
> @@ -24720,11 +24728,19 @@ uint64_t amo_ldat_and (uint64_t *, uint64_t);
>  uint64_t amo_ldat_umax (uint64_t *, uint64_t);
>  uint64_t amo_ldat_umin (uint64_t *, uint64_t);
>  uint64_t amo_ldat_swap (uint64_t *, uint64_t);
> +uint64_t amo_ldat_cas_neq (uint64_t *, uint64_t, uint64_t);
> +uint64_t amo_ldat_inc_eq (uint64_t *);
> +uint64_t amo_ldat_inc_bounded (uint64_t *);
> +uint64_t amo_ldat_dec_bounded (uint64_t *);
>  
>  int64_t amo_ldat_sadd (int64_t *, int64_t);
>  int64_t amo_ldat_smax (int64_t *, int64_t);
>  int64_t amo_ldat_smin (int64_t *, int64_t);
>  int64_t amo_ldat_sswap (int64_t *, int64_t);
> +int64_t amo_ldat_scas_neq (int64_t *, int64_t, int64_t);
> +int64_t amo_ldat_sinc_eq (int64_t *);
> +int64_t amo_ldat_sinc_bounded (int64_t *);
> +int64_t amo_ldat_sdec_bounded (int64_t *);
>  
>  void amo_stwat_add (uint32_t *, uint32_t);
>  void amo_stwat_xor (uint32_t *, uint32_t);
> @@ -24732,10 +24748,12 @@ void amo_stwat_ior (uint32_t *, uint32_t);
>  void amo_stwat_and (uint32_t *, uint32_t);
>  void amo_stwat_umax (uint32_t *, uint32_t);
>  void amo_stwat_umin (uint32_t *, uint32_t);
> +void amo_stwat_twin (uint32_t *, uint32_t);
>  
>  void amo_stwat_sadd (int32_t *, int32_t);
>  void amo_stwat_smax (int32_t *, int32_t);
>  void amo_stwat_smin (int32_t *, int32_t);
> +void amo_stwat_stwin (int32_t *, int32_t);
>  
>  void amo_stdat_add (uint64_t *, uint64_t);
>  void amo_stdat_xor (uint64_t *, uint64_t);
> @@ -24743,10 +24761,12 @@ void amo_stdat_ior (uint64_t *, uint64_t);
>  void amo_stdat_and (uint64_t *, uint64_t);
>  void amo_stdat_umax (uint64_t *, uint64_t);
>  void amo_stdat_umin (uint64_t *, uint64_t);
> +void amo_stdat_twin (uint64_t *, uint64_t);
>  
>  void amo_stdat_sadd (int64_t *, int64_t);
>  void amo_stdat_smax (int64_t *, int64_t);
>  void amo_stdat_smin (int64_t *, int64_t);
> +void amo_stdat_stwin (int64_t *, int64_t);
>  @end smallexample
>  
>  @node PowerPC Matrix-Multiply Assist Built-in Functions
> diff --git a/gcc/testsuite/gcc.target/powerpc/amo3.c b/gcc/testsuite/gcc.target/powerpc/amo3.c
> new file mode 100644
> index 00000000000..27fb962fdec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/amo3.c
> @@ -0,0 +1,131 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
> +/* { dg-require-effective-target powerpc_vsx } */
> +
> +/* Verify P9 atomic memory operations.  */

The comments in the other tests mention the ISA level, perhaps we should change here too for consistency. That is, mention the ISA level instead of P9.

> +
> +#include <amo.h>
> +#include <stdint.h>
> +
> +uint32_t
> +do_lw_cs_ne (uint32_t *mem, uint32_t cond, uint32_t value)
> +{
> +  return amo_lwat_cas_neq (mem, cond, value);
> +}
> +
> +int32_t
> +do_lw_scs_ne (int32_t *mem, int32_t cond, int32_t value)
> +{
> +  return amo_lwat_scas_neq (mem, cond, value);
> +}
> +
> +uint32_t
> +do_lw_inc_equal (uint32_t *mem)
> +{
> +  return amo_lwat_inc_eq (mem);
> +}
> +
> +int32_t
> +do_lw_sinc_equal (int32_t *mem)
> +{
> +  return amo_lwat_sinc_eq (mem);
> +}
> +
> +uint32_t
> +do_lw_inc_bounded (uint32_t *mem)
> +{
> +  return amo_lwat_inc_bounded (mem);
> +}
> +
> +int32_t
> +do_lw_sinc_bounded (int32_t *mem)
> +{
> +  return amo_lwat_sinc_bounded (mem);
> +}
> +uint32_t
> +do_lw_dec_bounded (uint32_t *mem)
> +{
> +  return amo_lwat_dec_bounded (mem);
> +}
> +
> +int32_t
> +do_lw_sdec_bounded (int32_t *mem)
> +{
> +  return amo_lwat_sdec_bounded (mem);
> +}
> +
> +uint64_t
> +do_ld_cs_ne (uint64_t *mem, uint64_t cond, uint64_t value)
> +{
> +  return amo_ldat_cas_neq (mem, cond, value);
> +}
> +
> +int64_t
> +do_ld_scs_ne (int64_t *mem, int64_t cond, int64_t value)
> +{
> +  return amo_ldat_scas_neq (mem, cond, value);
> +}
> +
> +uint64_t
> +do_ld_inc_equal (uint64_t *mem)
> +{
> +  return amo_ldat_inc_eq (mem);
> +}
> +
> +int64_t
> +do_ld_sinc_equal (int64_t *mem)
> +{
> +  return amo_ldat_sinc_eq (mem);
> +}
> +
> +uint64_t
> +do_ld_inc_bounded (uint64_t *mem)
> +{
> +  return amo_ldat_inc_bounded (mem);
> +}
> +
> +int64_t
> +do_ld_sinc_bounded (int64_t *mem)
> +{
> +  return amo_ldat_sinc_bounded (mem);
> +}
> +uint64_t
> +do_ld_dec_bounded (uint64_t *mem)
> +{
> +  return amo_ldat_dec_bounded (mem);
> +}
> +
> +int64_t
> +do_ld_sdec_bounded (int64_t *mem)
> +{
> +  return amo_ldat_sdec_bounded (mem);
> +}
> +
> +void
> +do_sw_twin (uint32_t *mem, uint32_t value)
> +{
> +  amo_stwat_twin (mem, value);
> +}
> +
> +void
> +do_sw_stwin (int32_t *mem, int32_t value)
> +{
> +  amo_stwat_stwin (mem, value);
> +}
> +
> +void
> +do_sd_twin (uint64_t *mem, uint64_t value)
> +{
> +  amo_stdat_twin (mem, value);
> +}
> +
> +void
> +do_sd_stwin (int64_t *mem, int64_t value)
> +{
> +  amo_stdat_stwin (mem, value);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mldat\M}  8 } } */
> +/* { dg-final { scan-assembler-times {\mlwat\M}  8 } } */
> +/* { dg-final { scan-assembler-times {\mstdat\M}  2 } } */
> +/* { dg-final { scan-assembler-times {\mstwat\M}  2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/amo4.c b/gcc/testsuite/gcc.target/powerpc/amo4.c
> new file mode 100644
> index 00000000000..f354f2caac4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/amo4.c
> @@ -0,0 +1,92 @@
> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
> +/* { dg-require-effective-target powerpc_vsx } */
> +
> +#include <amo.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +
> +/* Test whether the ISA 3.1 amo (atomic memory operations) functions perform as
> +   expected.  */

It should be "ISA 3.0" and not "ISA 3.1". Ditto for other tests.

Regards,
Surya
  

Patch

diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
index 25ab1c7b4c4..10960208d31 100644
--- a/gcc/config/rs6000/amo.h
+++ b/gcc/config/rs6000/amo.h
@@ -71,6 +71,64 @@  NAME (TYPE *_PTR, TYPE _VALUE)						\
   return _RET;								\
 }
 
+/* Implementation of the LWAT/LDAT operations that take two input registers
+   and modify one word or double-word of memory and return the value that was
+   previously in the memory location.  The destination and two source
+   registers are encoded with only one register number, so we need three
+   consecutive GPR registers and there is no C/C++ type that will give
+   us that, so we have to use register asm variables to achieve that.
+
+   The LWAT/LDAT opcode requires the address to be a single register,
+   and that points to a suitably aligned memory location.  Asm volatile
+   is used to prevent the optimizer from moving the operation.  */
+
+#define _AMO_LD_CMPSWP(NAME, TYPE, OPCODE, FC)				\
+static __inline__ TYPE							\
+NAME (TYPE *_PTR, TYPE _COND, TYPE _VALUE)				\
+{									\
+  register TYPE _ret asm ("r8");					\
+  register TYPE _cond asm ("r9") = _COND;				\
+  register TYPE _value asm ("r10") = _VALUE;				\
+  __asm__ __volatile__ (OPCODE " %[ret],%P[addr],%[code]"		\
+			: [addr] "+Q" (_PTR[0]), [ret] "=r" (_ret)	\
+			: "r" (_cond), "r" (_value), [code] "n" (FC));	\
+  return _ret;								\
+}
+
+/* Implementation of the LWAT/LDAT fetch and increment operations.
+
+   The LWAT/LDAT opcode requires the address to be a single register that
+   points to a suitably aligned memory location.  Asm volatile is used to
+   prevent the optimizer from moving the operation.  */
+
+#define _AMO_LD_INCREMENT(NAME, TYPE, OPCODE, FC)			\
+static __inline__ TYPE							\
+NAME (TYPE *_PTR)							\
+{									\
+  TYPE _RET;								\
+  __asm__ volatile (OPCODE " %[ret],%P[addr],%[code]\n"			\
+		    : [addr] "+Q" (_PTR[0]), [ret] "=r" (_RET)		\
+		    : "Q" (*(TYPE (*)[2]) _PTR), [code] "n" (FC));	\
+  return _RET;								\
+}
+
+/* Implementation of the LWAT/LDAT fetch and decrement operations.
+
+   The LWAT/LDAT opcode requires the address to be a single register that
+   points to a suitably aligned memory location.  Asm volatile is used to
+   prevent the optimizer from moving the operation.  */
+
+#define _AMO_LD_DECREMENT(NAME, TYPE, OPCODE, FC)			\
+static __inline__ TYPE							\
+NAME (TYPE *_PTR)							\
+{									\
+  TYPE _RET;								\
+  __asm__ volatile (OPCODE " %[ret],%P[addr],%[code]\n"			\
+		    : [addr] "+Q" (_PTR[1]), [ret] "=r" (_RET)		\
+		    : "Q" (*(TYPE (*)[2]) (_PTR)), [code] "n" (FC));	\
+  return _RET;								\
+}
+
 _AMO_LD_SIMPLE (amo_lwat_add,   uint32_t, "lwat", _AMO_LD_ADD)
 _AMO_LD_SIMPLE (amo_lwat_xor,   uint32_t, "lwat", _AMO_LD_XOR)
 _AMO_LD_SIMPLE (amo_lwat_ior,   uint32_t, "lwat", _AMO_LD_IOR)
@@ -78,11 +136,19 @@  _AMO_LD_SIMPLE (amo_lwat_and,   uint32_t, "lwat", _AMO_LD_AND)
 _AMO_LD_SIMPLE (amo_lwat_umax,  uint32_t, "lwat", _AMO_LD_UMAX)
 _AMO_LD_SIMPLE (amo_lwat_umin,  uint32_t, "lwat", _AMO_LD_UMIN)
 _AMO_LD_SIMPLE (amo_lwat_swap,  uint32_t, "lwat", _AMO_LD_SWAP)
+_AMO_LD_CMPSWP    (amo_lwat_cas_neq,     uint32_t, "lwat", _AMO_LD_CS_NE)
+_AMO_LD_INCREMENT (amo_lwat_inc_eq,      uint32_t, "lwat", _AMO_LD_INC_EQUAL)
+_AMO_LD_INCREMENT (amo_lwat_inc_bounded, uint32_t, "lwat", _AMO_LD_INC_BOUNDED)
+_AMO_LD_DECREMENT (amo_lwat_dec_bounded, uint32_t, "lwat", _AMO_LD_DEC_BOUNDED)
 
 _AMO_LD_SIMPLE (amo_lwat_sadd,  int32_t,  "lwat", _AMO_LD_ADD)
 _AMO_LD_SIMPLE (amo_lwat_smax,  int32_t,  "lwat", _AMO_LD_SMAX)
 _AMO_LD_SIMPLE (amo_lwat_smin,  int32_t,  "lwat", _AMO_LD_SMIN)
 _AMO_LD_SIMPLE (amo_lwat_sswap, int32_t,  "lwat", _AMO_LD_SWAP)
+_AMO_LD_CMPSWP    (amo_lwat_scas_neq,     int32_t, "lwat", _AMO_LD_CS_NE)
+_AMO_LD_INCREMENT (amo_lwat_sinc_eq,      int32_t, "lwat", _AMO_LD_INC_EQUAL)
+_AMO_LD_INCREMENT (amo_lwat_sinc_bounded, int32_t, "lwat", _AMO_LD_INC_BOUNDED)
+_AMO_LD_DECREMENT (amo_lwat_sdec_bounded, int32_t, "lwat", _AMO_LD_DEC_BOUNDED)
 
 _AMO_LD_SIMPLE (amo_ldat_add,   uint64_t, "ldat", _AMO_LD_ADD)
 _AMO_LD_SIMPLE (amo_ldat_xor,   uint64_t, "ldat", _AMO_LD_XOR)
@@ -91,12 +157,19 @@  _AMO_LD_SIMPLE (amo_ldat_and,   uint64_t, "ldat", _AMO_LD_AND)
 _AMO_LD_SIMPLE (amo_ldat_umax,  uint64_t, "ldat", _AMO_LD_UMAX)
 _AMO_LD_SIMPLE (amo_ldat_umin,  uint64_t, "ldat", _AMO_LD_UMIN)
 _AMO_LD_SIMPLE (amo_ldat_swap,  uint64_t, "ldat", _AMO_LD_SWAP)
+_AMO_LD_CMPSWP    (amo_ldat_cas_neq,     uint64_t, "ldat", _AMO_LD_CS_NE)
+_AMO_LD_INCREMENT (amo_ldat_inc_eq,      uint64_t, "ldat", _AMO_LD_INC_EQUAL)
+_AMO_LD_INCREMENT (amo_ldat_inc_bounded, uint64_t, "ldat", _AMO_LD_INC_BOUNDED)
+_AMO_LD_DECREMENT (amo_ldat_dec_bounded, uint64_t, "ldat", _AMO_LD_DEC_BOUNDED)
 
 _AMO_LD_SIMPLE (amo_ldat_sadd,  int64_t,  "ldat", _AMO_LD_ADD)
 _AMO_LD_SIMPLE (amo_ldat_smax,  int64_t,  "ldat", _AMO_LD_SMAX)
 _AMO_LD_SIMPLE (amo_ldat_smin,  int64_t,  "ldat", _AMO_LD_SMIN)
 _AMO_LD_SIMPLE (amo_ldat_sswap, int64_t,  "ldat", _AMO_LD_SWAP)
-
+_AMO_LD_CMPSWP    (amo_ldat_scas_neq,     int64_t, "ldat", _AMO_LD_CS_NE)
+_AMO_LD_INCREMENT (amo_ldat_sinc_eq,      int64_t, "ldat", _AMO_LD_INC_EQUAL)
+_AMO_LD_INCREMENT (amo_ldat_sinc_bounded, int64_t, "ldat", _AMO_LD_INC_BOUNDED)
+_AMO_LD_DECREMENT (amo_ldat_sdec_bounded, int64_t, "ldat", _AMO_LD_DEC_BOUNDED)
 /* Enumeration of the STWAT/STDAT sub-opcodes.  */
 enum _AMO_ST {
   _AMO_ST_ADD		= 0x00,		/* Store Add.  */
@@ -127,16 +200,35 @@  NAME (TYPE *_PTR, TYPE _VALUE)						\
   return;								\
 }
 
+/* Implementation of the STWAT/STDAT store twin operation that takes
+   one register and modifies two words or double-wordxs of memory.
+   No value is returned.
+
+   The STWAT/STDAT opcode requires the address to be a single register
+   that points to a suitably aligned memory location.  Asm volatile is
+   used to prevent the optimizer from moving the operation.  */
+
+#define _AMO_ST_TWIN(NAME, TYPE, OPCODE, FC)			\
+static __inline__ void							\
+NAME (TYPE *_PTR, TYPE _VALUE)						\
+{									\
+  __asm__ volatile (OPCODE " %[src],%P[addr],%[code]"			\
+		    : [addr] "+Q" (*(TYPE (*)[2]) _PTR)			\
+		    : [src] "r" (_VALUE), [code] "n" (FC));		\
+}
+
 _AMO_ST_SIMPLE (amo_stwat_add,  uint32_t, "stwat", _AMO_ST_ADD)
 _AMO_ST_SIMPLE (amo_stwat_xor,  uint32_t, "stwat", _AMO_ST_XOR)
 _AMO_ST_SIMPLE (amo_stwat_ior,  uint32_t, "stwat", _AMO_ST_IOR)
 _AMO_ST_SIMPLE (amo_stwat_and,  uint32_t, "stwat", _AMO_ST_AND)
 _AMO_ST_SIMPLE (amo_stwat_umax, uint32_t, "stwat", _AMO_ST_UMAX)
 _AMO_ST_SIMPLE (amo_stwat_umin, uint32_t, "stwat", _AMO_ST_UMIN)
+_AMO_ST_TWIN   (amo_stwat_twin, uint32_t, "stwat", _AMO_ST_TWIN)
 
 _AMO_ST_SIMPLE (amo_stwat_sadd, int32_t,  "stwat", _AMO_ST_ADD)
 _AMO_ST_SIMPLE (amo_stwat_smax, int32_t,  "stwat", _AMO_ST_SMAX)
 _AMO_ST_SIMPLE (amo_stwat_smin, int32_t,  "stwat", _AMO_ST_SMIN)
+_AMO_ST_TWIN   (amo_stwat_stwin, int32_t, "stwat", _AMO_ST_TWIN)
 
 _AMO_ST_SIMPLE (amo_stdat_add,  uint64_t, "stdat", _AMO_ST_ADD)
 _AMO_ST_SIMPLE (amo_stdat_xor,  uint64_t, "stdat", _AMO_ST_XOR)
@@ -144,9 +236,11 @@  _AMO_ST_SIMPLE (amo_stdat_ior,  uint64_t, "stdat", _AMO_ST_IOR)
 _AMO_ST_SIMPLE (amo_stdat_and,  uint64_t, "stdat", _AMO_ST_AND)
 _AMO_ST_SIMPLE (amo_stdat_umax, uint64_t, "stdat", _AMO_ST_UMAX)
 _AMO_ST_SIMPLE (amo_stdat_umin, uint64_t, "stdat", _AMO_ST_UMIN)
+_AMO_ST_TWIN   (amo_stdat_twin, uint64_t, "stdat", _AMO_ST_TWIN)
 
 _AMO_ST_SIMPLE (amo_stdat_sadd, int64_t,  "stdat", _AMO_ST_ADD)
 _AMO_ST_SIMPLE (amo_stdat_smax, int64_t,  "stdat", _AMO_ST_SMAX)
 _AMO_ST_SIMPLE (amo_stdat_smin, int64_t,  "stdat", _AMO_ST_SMIN)
+_AMO_ST_TWIN   (amo_stdat_stwin, int64_t, "stdat", _AMO_ST_TWIN)
 #endif	/* _ARCH_PWR9 && _ARCH_PPC64.  */
 #endif	/* _POWERPC_AMO_H.  */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1e1b4cc837d..4c2db70880d 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -24707,11 +24707,19 @@  uint32_t amo_lwat_and (uint32_t *, uint32_t);
 uint32_t amo_lwat_umax (uint32_t *, uint32_t);
 uint32_t amo_lwat_umin (uint32_t *, uint32_t);
 uint32_t amo_lwat_swap (uint32_t *, uint32_t);
+uint32_t amo_lwat_cas_neq (uint32_t *, uint32_t, uint32_t);
+uint32_t amo_lwat_inc_eq (uint32_t *);
+uint32_t amo_lwat_inc_bounded (uint32_t *);
+uint32_t amo_lwat_dec_bounded (uint32_t *);
 
 int32_t amo_lwat_sadd (int32_t *, int32_t);
 int32_t amo_lwat_smax (int32_t *, int32_t);
 int32_t amo_lwat_smin (int32_t *, int32_t);
 int32_t amo_lwat_sswap (int32_t *, int32_t);
+int32_t amo_lwat_scas_neq (int32_t *, int32_t, int32_t);
+int32_t amo_lwat_sinc_eq (int32_t *);
+int32_t amo_lwat_sinc_bounded (int32_t *);
+int32_t amo_lwat_sdec_bounded (int32_t *);
 
 uint64_t amo_ldat_add (uint64_t *, uint64_t);
 uint64_t amo_ldat_xor (uint64_t *, uint64_t);
@@ -24720,11 +24728,19 @@  uint64_t amo_ldat_and (uint64_t *, uint64_t);
 uint64_t amo_ldat_umax (uint64_t *, uint64_t);
 uint64_t amo_ldat_umin (uint64_t *, uint64_t);
 uint64_t amo_ldat_swap (uint64_t *, uint64_t);
+uint64_t amo_ldat_cas_neq (uint64_t *, uint64_t, uint64_t);
+uint64_t amo_ldat_inc_eq (uint64_t *);
+uint64_t amo_ldat_inc_bounded (uint64_t *);
+uint64_t amo_ldat_dec_bounded (uint64_t *);
 
 int64_t amo_ldat_sadd (int64_t *, int64_t);
 int64_t amo_ldat_smax (int64_t *, int64_t);
 int64_t amo_ldat_smin (int64_t *, int64_t);
 int64_t amo_ldat_sswap (int64_t *, int64_t);
+int64_t amo_ldat_scas_neq (int64_t *, int64_t, int64_t);
+int64_t amo_ldat_sinc_eq (int64_t *);
+int64_t amo_ldat_sinc_bounded (int64_t *);
+int64_t amo_ldat_sdec_bounded (int64_t *);
 
 void amo_stwat_add (uint32_t *, uint32_t);
 void amo_stwat_xor (uint32_t *, uint32_t);
@@ -24732,10 +24748,12 @@  void amo_stwat_ior (uint32_t *, uint32_t);
 void amo_stwat_and (uint32_t *, uint32_t);
 void amo_stwat_umax (uint32_t *, uint32_t);
 void amo_stwat_umin (uint32_t *, uint32_t);
+void amo_stwat_twin (uint32_t *, uint32_t);
 
 void amo_stwat_sadd (int32_t *, int32_t);
 void amo_stwat_smax (int32_t *, int32_t);
 void amo_stwat_smin (int32_t *, int32_t);
+void amo_stwat_stwin (int32_t *, int32_t);
 
 void amo_stdat_add (uint64_t *, uint64_t);
 void amo_stdat_xor (uint64_t *, uint64_t);
@@ -24743,10 +24761,12 @@  void amo_stdat_ior (uint64_t *, uint64_t);
 void amo_stdat_and (uint64_t *, uint64_t);
 void amo_stdat_umax (uint64_t *, uint64_t);
 void amo_stdat_umin (uint64_t *, uint64_t);
+void amo_stdat_twin (uint64_t *, uint64_t);
 
 void amo_stdat_sadd (int64_t *, int64_t);
 void amo_stdat_smax (int64_t *, int64_t);
 void amo_stdat_smin (int64_t *, int64_t);
+void amo_stdat_stwin (int64_t *, int64_t);
 @end smallexample
 
 @node PowerPC Matrix-Multiply Assist Built-in Functions
diff --git a/gcc/testsuite/gcc.target/powerpc/amo3.c b/gcc/testsuite/gcc.target/powerpc/amo3.c
new file mode 100644
index 00000000000..27fb962fdec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/amo3.c
@@ -0,0 +1,131 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* Verify P9 atomic memory operations.  */
+
+#include <amo.h>
+#include <stdint.h>
+
+uint32_t
+do_lw_cs_ne (uint32_t *mem, uint32_t cond, uint32_t value)
+{
+  return amo_lwat_cas_neq (mem, cond, value);
+}
+
+int32_t
+do_lw_scs_ne (int32_t *mem, int32_t cond, int32_t value)
+{
+  return amo_lwat_scas_neq (mem, cond, value);
+}
+
+uint32_t
+do_lw_inc_equal (uint32_t *mem)
+{
+  return amo_lwat_inc_eq (mem);
+}
+
+int32_t
+do_lw_sinc_equal (int32_t *mem)
+{
+  return amo_lwat_sinc_eq (mem);
+}
+
+uint32_t
+do_lw_inc_bounded (uint32_t *mem)
+{
+  return amo_lwat_inc_bounded (mem);
+}
+
+int32_t
+do_lw_sinc_bounded (int32_t *mem)
+{
+  return amo_lwat_sinc_bounded (mem);
+}
+uint32_t
+do_lw_dec_bounded (uint32_t *mem)
+{
+  return amo_lwat_dec_bounded (mem);
+}
+
+int32_t
+do_lw_sdec_bounded (int32_t *mem)
+{
+  return amo_lwat_sdec_bounded (mem);
+}
+
+uint64_t
+do_ld_cs_ne (uint64_t *mem, uint64_t cond, uint64_t value)
+{
+  return amo_ldat_cas_neq (mem, cond, value);
+}
+
+int64_t
+do_ld_scs_ne (int64_t *mem, int64_t cond, int64_t value)
+{
+  return amo_ldat_scas_neq (mem, cond, value);
+}
+
+uint64_t
+do_ld_inc_equal (uint64_t *mem)
+{
+  return amo_ldat_inc_eq (mem);
+}
+
+int64_t
+do_ld_sinc_equal (int64_t *mem)
+{
+  return amo_ldat_sinc_eq (mem);
+}
+
+uint64_t
+do_ld_inc_bounded (uint64_t *mem)
+{
+  return amo_ldat_inc_bounded (mem);
+}
+
+int64_t
+do_ld_sinc_bounded (int64_t *mem)
+{
+  return amo_ldat_sinc_bounded (mem);
+}
+uint64_t
+do_ld_dec_bounded (uint64_t *mem)
+{
+  return amo_ldat_dec_bounded (mem);
+}
+
+int64_t
+do_ld_sdec_bounded (int64_t *mem)
+{
+  return amo_ldat_sdec_bounded (mem);
+}
+
+void
+do_sw_twin (uint32_t *mem, uint32_t value)
+{
+  amo_stwat_twin (mem, value);
+}
+
+void
+do_sw_stwin (int32_t *mem, int32_t value)
+{
+  amo_stwat_stwin (mem, value);
+}
+
+void
+do_sd_twin (uint64_t *mem, uint64_t value)
+{
+  amo_stdat_twin (mem, value);
+}
+
+void
+do_sd_stwin (int64_t *mem, int64_t value)
+{
+  amo_stdat_stwin (mem, value);
+}
+
+/* { dg-final { scan-assembler-times {\mldat\M}  8 } } */
+/* { dg-final { scan-assembler-times {\mlwat\M}  8 } } */
+/* { dg-final { scan-assembler-times {\mstdat\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mstwat\M}  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/amo4.c b/gcc/testsuite/gcc.target/powerpc/amo4.c
new file mode 100644
index 00000000000..f354f2caac4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/amo4.c
@@ -0,0 +1,92 @@ 
+/* { dg-do run { target { lp64 && p9vector_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+#include <amo.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits.h>
+
+/* Test whether the ISA 3.1 amo (atomic memory operations) functions perform as
+   expected.  */
+
+/* 32-bit tests.  */
+static uint32_t u32_ld[4][2] = {
+  { 10, 15 },			/* Increment Bounded */
+  { 10, 10 },			/* Increment Bounded */
+  { 10, 10 },			/* Increment Equal */
+  { 10, 15 }			/* Increment Equal */
+};
+
+static uint32_t u32_result[4];
+
+static uint32_t u32_update[4] = {
+  10 + 1,			/* Increment Bounded */
+  10,				/* Increment Bounded */
+  10 + 1,			/* Increment Equal */
+  10				/* Increment Equal */
+};
+
+static uint32_t u32_prev[4] = {
+  10,				/* Increment Bounded */
+  INT_MIN,			/* Increment Bounded */
+  10,				/* Increment Equal */
+  INT_MIN			/* Increment Equal */
+};
+
+/* 64-bit tests.  */
+static uint64_t u64_ld[4][2] = {
+  { 10, 15 },			/* Increment Bounded */
+  { 10, 10 },			/* Increment Bounded */
+  { 10, 10 },			/* Increment Equal */
+  { 10, 15 }			/* Increment Equal */
+};
+
+static uint64_t u64_result[4];
+
+static uint64_t u64_update[4] = {
+  10 + 1,			/* Increment Bounded */
+  10,				/* Increment Bounded */
+  10 + 1,			/* Increment Equal */
+  10				/* Increment Equal */
+};
+
+static uint64_t u64_prev[4] = {
+  10,				/* Increment Bounded */
+  INT64_MIN,			/* Increment Bounded */
+  10,				/* Increment Equal */
+  INT64_MIN			/* Increment Equal */
+};
+
+int
+main (void)
+{
+  size_t i;
+
+  u32_result[0] = amo_lwat_inc_bounded (&u32_ld[0][0]);
+  u32_result[1] = amo_lwat_inc_bounded (&u32_ld[1][0]);
+  u32_result[2] = amo_lwat_inc_eq (&u32_ld[2][0]);
+  u32_result[3] = amo_lwat_inc_eq (&u32_ld[3][0]);
+
+  u64_result[0] = amo_ldat_inc_bounded (&u64_ld[0][0]);
+  u64_result[1] = amo_ldat_inc_bounded (&u64_ld[1][0]);
+  u64_result[2] = amo_ldat_inc_eq (&u64_ld[2][0]);
+  u64_result[3] = amo_ldat_inc_eq (&u64_ld[3][0]);
+
+  for (i = 0; i < 4; i++)
+    {
+      if (u32_result[i] != u32_prev[i])
+	abort ();
+
+      if (u32_ld[i][0] != u32_update[i])
+	abort ();
+
+      if (u64_result[i] != u64_prev[i])
+	abort ();
+
+      if (u64_ld[i][0] != u64_update[i])
+	abort ();
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/amo5.c b/gcc/testsuite/gcc.target/powerpc/amo5.c
new file mode 100644
index 00000000000..80e85260b91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/amo5.c
@@ -0,0 +1,44 @@ 
+/* { dg-do run { target { lp64 && p9vector_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+#include <amo.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits.h>
+
+/* Test whether the ISA 3.1 amo (atomic memory operations) functions perform as
+   expected.  */
+
+int
+main (void)
+{
+  static uint32_t u32_mem = 100;
+  static uint32_t u32_cond = 200;
+  static uint32_t u32_value = 250;
+  static uint32_t u32_prev = 100;
+  static uint32_t u32_result;
+
+  static uint64_t u64_mem = 200;
+  static uint64_t u64_cond = 300;
+  static uint64_t u64_value = 250;
+  static uint64_t u64_prev = 200;
+  static uint64_t u64_result;
+
+  u32_result = amo_lwat_cas_neq (&u32_mem, u32_cond, u32_value);
+  u64_result = amo_ldat_cas_neq (&u64_mem, u64_cond, u64_value);
+
+  if (u32_mem != u32_value)
+    abort();
+
+  if (u32_result != u32_prev)
+    abort();
+
+  if (u64_mem != u64_value)
+    abort();
+
+  if (u64_result != u64_prev)
+    abort();
+
+    return 0;
+ }
diff --git a/gcc/testsuite/gcc.target/powerpc/amo6.c b/gcc/testsuite/gcc.target/powerpc/amo6.c
new file mode 100644
index 00000000000..347768e08e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/amo6.c
@@ -0,0 +1,40 @@ 
+/* { dg-do run { target { lp64 && p9vector_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+#include <amo.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits.h>
+
+/* Test whether the ISA 3.1 amo (atomic memory operations) functions perform as
+   expected.  */
+
+int
+main (void)
+{
+  size_t i;
+  static uint32_t u32_mem[2] = { 3, 3 };
+  static uint32_t u32_value = 5;
+  static uint32_t u32_prev[2] = { 3, 3 };
+  static uint32_t u32_result;
+
+  static uint64_t u64_mem[2] = { 7, 7 };
+  static uint64_t u64_value = 9;
+  static uint64_t u64_prev[2] = { 7, 7 };
+  static uint64_t u64_result;
+
+  amo_stwat_twin (u32_mem, u32_value);
+  amo_stdat_twin (u64_mem, u64_value);
+
+  for ( i = 0; i < 2; i++)
+    {
+       if (u32_mem[i] != u32_value)
+	  abort();
+
+       if (u64_mem[i] != u64_value)
+	  abort();
+     }
+
+    return 0;
+ }
diff --git a/gcc/testsuite/gcc.target/powerpc/amo7.c b/gcc/testsuite/gcc.target/powerpc/amo7.c
new file mode 100644
index 00000000000..19580fa8a0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/amo7.c
@@ -0,0 +1,76 @@ 
+/* { dg-do run { target { lp64 && p9vector_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+#include <amo.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits.h>
+
+/* Test whether the ISA 3.1 amo (atomic memory operations) functions perform as
+   expected.  */
+
+/* 32-bit tests.  */
+static uint32_t u32_ld[2][2] = {
+  { 10, 15 },			/* Decrement Bounded */
+  { 10, 10 },			/* Decrement Bounded */
+};
+
+static uint32_t u32_result[2];
+
+static uint32_t u32_update[2] = {
+  15 - 1,			/* Decrement Bounded */
+  10,				/* Decrement Bounded */
+};
+
+static uint32_t u32_prev[2] = {
+  15,				/* Decrement Bounded */
+  INT_MIN,			/* Decrement Bounded */
+};
+
+/* 64-bit tests.  */
+static uint64_t u64_ld[2][2] = {
+  { 10, 15 },                   /* Decrement Bounded */
+  { 10, 10 },                   /* Decrement Bounded */
+};
+
+static uint64_t u64_result[2];
+
+static uint64_t u64_update[2] = {
+  15 - 1,                       /* Decrement Bounded */
+  10,                           /* Decrement Bounded */
+};
+
+static uint64_t u64_prev[2] = {
+  15,                           /* Decrement Bounded */
+  INT64_MIN,                    /* Decrement Bounded */
+};
+
+int
+main (void)
+{
+  size_t i;
+
+  u32_result[0] = amo_lwat_dec_bounded (&u32_ld[0][0]);
+  u32_result[1] = amo_lwat_dec_bounded (&u32_ld[1][0]);
+
+  u64_result[0] = amo_ldat_dec_bounded (&u64_ld[0][0]);
+  u64_result[1] = amo_ldat_dec_bounded (&u64_ld[1][0]);
+  
+  for (i = 0; i < 2; i++)
+    {
+      if (u32_result[i] != u32_prev[i])
+	abort ();
+
+      if (u32_ld[i][1] != u32_update[i])
+	abort ();
+
+      if (u64_result[i] != u64_prev[i])
+	abort ();
+
+      if (u64_ld[i][1] != u64_update[i])
+	abort ();
+    }
+
+  return 0;
+}