Add ABI test for __bf16 type

Message ID 20220818073405.5527-1-haochen.jiang@intel.com
State Committed
Commit 86c0d98620ee3a9610a3f9d725e943e1d2d4c7ec
Headers
Series Add ABI test for __bf16 type |

Commit Message

Haochen Jiang Aug. 18, 2022, 7:34 a.m. UTC
  Hi all,

This patch aims to add bf16 abi test after the whole __bf16 type is added.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/testsuite/ChangeLog:

	* gcc.target/x86_64/abi/bf16/abi-bf16.exp: New test.
	* gcc.target/x86_64/abi/bf16/args.h: Ditto.
	* gcc.target/x86_64/abi/bf16/asm-support.S: Ditto.
	* gcc.target/x86_64/abi/bf16/bf16-check.h: Ditto.
	* gcc.target/x86_64/abi/bf16/bf16-helper.h: Ditto.
	* gcc.target/x86_64/abi/bf16/defines.h: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/args.h: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/args.h: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c: Ditto.
	* gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c: Ditto.
	* gcc.target/x86_64/abi/bf16/macros.h: Ditto.
	* gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_basic_alignment.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_basic_returning.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_basic_sizes.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_m128_returning.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_passing_floats.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_passing_m128.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_passing_structs.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_passing_unions.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_struct_returning.c: Ditto.
	* gcc.target/x86_64/abi/bf16/test_varargs-m128.c: Ditto.
---
 .../gcc.target/x86_64/abi/bf16/abi-bf16.exp   |  46 +++
 .../gcc.target/x86_64/abi/bf16/args.h         | 164 +++++++++
 .../gcc.target/x86_64/abi/bf16/asm-support.S  |  84 +++++
 .../gcc.target/x86_64/abi/bf16/bf16-check.h   |  24 ++
 .../gcc.target/x86_64/abi/bf16/bf16-helper.h  |  41 +++
 .../gcc.target/x86_64/abi/bf16/defines.h      | 163 +++++++++
 .../x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp |  46 +++
 .../x86_64/abi/bf16/m256bf16/args.h           | 152 +++++++++
 .../x86_64/abi/bf16/m256bf16/asm-support.S    |  84 +++++
 .../x86_64/abi/bf16/m256bf16/bf16-ymm-check.h |  24 ++
 .../abi/bf16/m256bf16/test_m256_returning.c   |  38 +++
 .../abi/bf16/m256bf16/test_passing_m256.c     | 235 +++++++++++++
 .../abi/bf16/m256bf16/test_passing_structs.c  |  69 ++++
 .../abi/bf16/m256bf16/test_passing_unions.c   | 179 ++++++++++
 .../abi/bf16/m256bf16/test_varargs-m256.c     | 107 ++++++
 .../x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp |  46 +++
 .../x86_64/abi/bf16/m512bf16/args.h           | 155 +++++++++
 .../x86_64/abi/bf16/m512bf16/asm-support.S    | 100 ++++++
 .../x86_64/abi/bf16/m512bf16/bf16-zmm-check.h |  23 ++
 .../abi/bf16/m512bf16/test_m512_returning.c   |  44 +++
 .../abi/bf16/m512bf16/test_passing_m512.c     | 243 ++++++++++++++
 .../abi/bf16/m512bf16/test_passing_structs.c  |  77 +++++
 .../abi/bf16/m512bf16/test_passing_unions.c   | 222 +++++++++++++
 .../abi/bf16/m512bf16/test_varargs-m512.c     | 111 +++++++
 .../gcc.target/x86_64/abi/bf16/macros.h       |  53 +++
 .../bf16/test_3_element_struct_and_unions.c   | 214 ++++++++++++
 .../x86_64/abi/bf16/test_basic_alignment.c    |  14 +
 .../bf16/test_basic_array_size_and_align.c    |  13 +
 .../x86_64/abi/bf16/test_basic_returning.c    |  20 ++
 .../x86_64/abi/bf16/test_basic_sizes.c        |  14 +
 .../bf16/test_basic_struct_size_and_align.c   |  14 +
 .../bf16/test_basic_union_size_and_align.c    |  12 +
 .../x86_64/abi/bf16/test_m128_returning.c     |  38 +++
 .../x86_64/abi/bf16/test_passing_floats.c     | 312 ++++++++++++++++++
 .../x86_64/abi/bf16/test_passing_m128.c       | 238 +++++++++++++
 .../x86_64/abi/bf16/test_passing_structs.c    |  67 ++++
 .../x86_64/abi/bf16/test_passing_unions.c     | 160 +++++++++
 .../x86_64/abi/bf16/test_struct_returning.c   | 176 ++++++++++
 .../x86_64/abi/bf16/test_varargs-m128.c       | 111 +++++++
 39 files changed, 3933 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c
 create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c
  

Comments

Hongtao Liu Aug. 19, 2022, 12:58 a.m. UTC | #1
On Thu, Aug 18, 2022 at 3:36 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi all,
>
> This patch aims to add bf16 abi test after the whole __bf16 type is added.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
Ok.
>
> BRs,
> Haochen
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/x86_64/abi/bf16/abi-bf16.exp: New test.
>         * gcc.target/x86_64/abi/bf16/args.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/asm-support.S: Ditto.
>         * gcc.target/x86_64/abi/bf16/bf16-check.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/bf16-helper.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/defines.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/macros.h: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_basic_alignment.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_basic_returning.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_basic_sizes.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_m128_returning.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_passing_floats.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_passing_m128.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_passing_structs.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_passing_unions.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_struct_returning.c: Ditto.
>         * gcc.target/x86_64/abi/bf16/test_varargs-m128.c: Ditto.
> ---
>  .../gcc.target/x86_64/abi/bf16/abi-bf16.exp   |  46 +++
>  .../gcc.target/x86_64/abi/bf16/args.h         | 164 +++++++++
>  .../gcc.target/x86_64/abi/bf16/asm-support.S  |  84 +++++
>  .../gcc.target/x86_64/abi/bf16/bf16-check.h   |  24 ++
>  .../gcc.target/x86_64/abi/bf16/bf16-helper.h  |  41 +++
>  .../gcc.target/x86_64/abi/bf16/defines.h      | 163 +++++++++
>  .../x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp |  46 +++
>  .../x86_64/abi/bf16/m256bf16/args.h           | 152 +++++++++
>  .../x86_64/abi/bf16/m256bf16/asm-support.S    |  84 +++++
>  .../x86_64/abi/bf16/m256bf16/bf16-ymm-check.h |  24 ++
>  .../abi/bf16/m256bf16/test_m256_returning.c   |  38 +++
>  .../abi/bf16/m256bf16/test_passing_m256.c     | 235 +++++++++++++
>  .../abi/bf16/m256bf16/test_passing_structs.c  |  69 ++++
>  .../abi/bf16/m256bf16/test_passing_unions.c   | 179 ++++++++++
>  .../abi/bf16/m256bf16/test_varargs-m256.c     | 107 ++++++
>  .../x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp |  46 +++
>  .../x86_64/abi/bf16/m512bf16/args.h           | 155 +++++++++
>  .../x86_64/abi/bf16/m512bf16/asm-support.S    | 100 ++++++
>  .../x86_64/abi/bf16/m512bf16/bf16-zmm-check.h |  23 ++
>  .../abi/bf16/m512bf16/test_m512_returning.c   |  44 +++
>  .../abi/bf16/m512bf16/test_passing_m512.c     | 243 ++++++++++++++
>  .../abi/bf16/m512bf16/test_passing_structs.c  |  77 +++++
>  .../abi/bf16/m512bf16/test_passing_unions.c   | 222 +++++++++++++
>  .../abi/bf16/m512bf16/test_varargs-m512.c     | 111 +++++++
>  .../gcc.target/x86_64/abi/bf16/macros.h       |  53 +++
>  .../bf16/test_3_element_struct_and_unions.c   | 214 ++++++++++++
>  .../x86_64/abi/bf16/test_basic_alignment.c    |  14 +
>  .../bf16/test_basic_array_size_and_align.c    |  13 +
>  .../x86_64/abi/bf16/test_basic_returning.c    |  20 ++
>  .../x86_64/abi/bf16/test_basic_sizes.c        |  14 +
>  .../bf16/test_basic_struct_size_and_align.c   |  14 +
>  .../bf16/test_basic_union_size_and_align.c    |  12 +
>  .../x86_64/abi/bf16/test_m128_returning.c     |  38 +++
>  .../x86_64/abi/bf16/test_passing_floats.c     | 312 ++++++++++++++++++
>  .../x86_64/abi/bf16/test_passing_m128.c       | 238 +++++++++++++
>  .../x86_64/abi/bf16/test_passing_structs.c    |  67 ++++
>  .../x86_64/abi/bf16/test_passing_unions.c     | 160 +++++++++
>  .../x86_64/abi/bf16/test_struct_returning.c   | 176 ++++++++++
>  .../x86_64/abi/bf16/test_varargs-m128.c       | 111 +++++++
>  39 files changed, 3933 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c
>  create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c
>
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
> new file mode 100644
> index 00000000000..bd386f2a560
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
> @@ -0,0 +1,46 @@
> +# Copyright (C) 2022 Free Software Foundation, Inc.
> +
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with GCC; see the file COPYING3.  If not see
> +# <http://www.gnu.org/licenses/>.
> +
> +# The x86-64 ABI testsuite needs one additional assembler file for most
> +# testcases.  For simplicity we will just link it into each test.
> +
> +load_lib c-torture.exp
> +load_lib target-supports.exp
> +load_lib torture-options.exp
> +load_lib clearcap.exp
> +
> +if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
> +     || ![is-effective-target lp64]
> +     || ![is-effective-target sse2] } then {
> +  return
> +}
> +
> +
> +torture-init
> +clearcap-init
> +set-torture-options $C_TORTURE_OPTIONS
> +set additional_flags "-W -Wall -msse2"
> +
> +foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
> +    if {[runtest_file_p $runtests $src]} {
> +        c-torture-execute [list $src \
> +                                $srcdir/$subdir/asm-support.S] \
> +                                $additional_flags
> +    }
> +}
> +
> +clearcap-finish
> +torture-finish
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
> new file mode 100644
> index 00000000000..11d7e2b3a1c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
> @@ -0,0 +1,164 @@
> +#ifndef INCLUDED_ARGS_H
> +#define INCLUDED_ARGS_H
> +
> +#include <string.h>
> +
> +/* This defines the calling sequences for integers and floats.  */
> +#define I0 rdi
> +#define I1 rsi
> +#define I2 rdx
> +#define I3 rcx
> +#define I4 r8
> +#define I5 r9
> +#define F0 xmm0
> +#define F1 xmm1
> +#define F2 xmm2
> +#define F3 xmm3
> +#define F4 xmm4
> +#define F5 xmm5
> +#define F6 xmm6
> +#define F7 xmm7
> +
> +typedef union {
> +  __bf16 ___bf16[8];
> +  float _float[4];
> +  double _double[2];
> +  long long _longlong[2];
> +  int _int[4];
> +  ulonglong _ulonglong[2];
> +#ifdef CHECK_M64_M128
> +  __m64 _m64[2];
> +  __m128 _m128[1];
> +  __m128bf16 _m128bf16[1];
> +#endif
> +} XMM_T;
> +
> +typedef union {
> +  __bf16 ___bf16;
> +  float _float;
> +  double _double;
> +  ldouble _ldouble;
> +  ulonglong _ulonglong[2];
> +} X87_T;
> +extern void (*callthis)(void);
> +extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> +XMM_T xmm_regs[16];
> +X87_T x87_regs[8];
> +extern volatile unsigned long long volatile_var;
> +extern void snapshot (void);
> +extern void snapshot_ret (void);
> +#define WRAP_CALL(N) \
> +  (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
> +#define WRAP_RET(N) \
> +  (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
> +
> +/* Clear all integer registers.  */
> +#define clear_int_hardware_registers \
> +  asm __volatile__ ("xor %%rax, %%rax\n\t" \
> +                   "xor %%rbx, %%rbx\n\t" \
> +                   "xor %%rcx, %%rcx\n\t" \
> +                   "xor %%rdx, %%rdx\n\t" \
> +                   "xor %%rsi, %%rsi\n\t" \
> +                   "xor %%rdi, %%rdi\n\t" \
> +                   "xor %%r8, %%r8\n\t" \
> +                   "xor %%r9, %%r9\n\t" \
> +                   "xor %%r10, %%r10\n\t" \
> +                   "xor %%r11, %%r11\n\t" \
> +                   "xor %%r12, %%r12\n\t" \
> +                   "xor %%r13, %%r13\n\t" \
> +                   "xor %%r14, %%r14\n\t" \
> +                   "xor %%r15, %%r15\n\t" \
> +                   ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
> +                   "r9", "r10", "r11", "r12", "r13", "r14", "r15");
> +
> +/* This is the list of registers available for passing arguments. Not all of
> +   these are used or even really available.  */
> +struct IntegerRegisters
> +{
> +  unsigned long long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
> +};
> +struct FloatRegisters
> +{
> +  double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
> +  ldouble st0, st1, st2, st3, st4, st5, st6, st7;
> +  XMM_T xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9,
> +        xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
> +};
> +
> +/* Implemented in scalarargs.c  */
> +extern struct IntegerRegisters iregs;
> +extern struct FloatRegisters fregs;
> +extern unsigned int num_iregs, num_fregs;
> +
> +/* Clear register struct.  */
> +#define clear_struct_registers \
> +  rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
> +    = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
> +  memset (&iregs, 0, sizeof (iregs)); \
> +  memset (&fregs, 0, sizeof (fregs)); \
> +  memset (xmm_regs, 0, sizeof (xmm_regs)); \
> +  memset (x87_regs, 0, sizeof (x87_regs));
> +
> +/* Clear both hardware and register structs for integers.  */
> +#define clear_int_registers \
> +  clear_struct_registers \
> +  clear_int_hardware_registers
> +
> +/* Do the checking.  */
> +#define check_f_arguments(T) do { \
> +  assert (num_fregs <= 0 || check_bf16 (fregs.xmm0._ ## T [0], xmm_regs[0]._ ## T [0]) == 1); \
> +  assert (num_fregs <= 1 || check_bf16 (fregs.xmm1._ ## T [0], xmm_regs[1]._ ## T [0]) == 1); \
> +  assert (num_fregs <= 2 || check_bf16 (fregs.xmm2._ ## T [0], xmm_regs[2]._ ## T [0]) == 1); \
> +  assert (num_fregs <= 3 || check_bf16 (fregs.xmm3._ ## T [0], xmm_regs[3]._ ## T [0]) == 1); \
> +  assert (num_fregs <= 4 || check_bf16 (fregs.xmm4._ ## T [0], xmm_regs[4]._ ## T [0]) == 1); \
> +  assert (num_fregs <= 5 || check_bf16 (fregs.xmm5._ ## T [0], xmm_regs[5]._ ## T [0]) == 1); \
> +  assert (num_fregs <= 6 || check_bf16 (fregs.xmm6._ ## T [0], xmm_regs[6]._ ## T [0]) == 1); \
> +  assert (num_fregs <= 7 || check_bf16 (fregs.xmm7._ ## T [0], xmm_regs[7]._ ## T [0]) == 1); \
> +  } while (0)
> +
> +#define check_bf16_arguments check_f_arguments(__bf16)
> +
> +#define check_vector_arguments(T,O) do { \
> +  assert (num_fregs <= 0 \
> +         || memcmp (((char *) &fregs.xmm0) + (O), \
> +                    &xmm_regs[0], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 1 \
> +         || memcmp (((char *) &fregs.xmm1) + (O), \
> +                    &xmm_regs[1], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 2 \
> +         || memcmp (((char *) &fregs.xmm2) + (O), \
> +                    &xmm_regs[2], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 3 \
> +         || memcmp (((char *) &fregs.xmm3) + (O), \
> +                    &xmm_regs[3], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 4 \
> +         || memcmp (((char *) &fregs.xmm4) + (O), \
> +                    &xmm_regs[4], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 5 \
> +         || memcmp (((char *) &fregs.xmm5) + (O), \
> +                    &xmm_regs[5], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 6 \
> +         || memcmp (((char *) &fregs.xmm6) + (O), \
> +                    &xmm_regs[6], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 7 \
> +         || memcmp (((char *) &fregs.xmm7) + (O), \
> +                    &xmm_regs[7], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  } while (0)
> +
> +#define check_m128_arguments check_vector_arguments(m128, 0)
> +
> +#define clear_float_registers \
> +  clear_struct_registers
> +
> +#define clear_x87_registers \
> +  clear_struct_registers
> +
> +#endif /* INCLUDED_ARGS_H  */
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
> new file mode 100644
> index 00000000000..a8165d86317
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
> @@ -0,0 +1,84 @@
> +       .text
> +       .p2align 4,,15
> +.globl snapshot
> +       .type   snapshot, @function
> +snapshot:
> +.LFB3:
> +       movq    %rax, rax(%rip)
> +       movq    %rbx, rbx(%rip)
> +       movq    %rcx, rcx(%rip)
> +       movq    %rdx, rdx(%rip)
> +       movq    %rdi, rdi(%rip)
> +       movq    %rsi, rsi(%rip)
> +       movq    %rbp, rbp(%rip)
> +       movq    %rsp, rsp(%rip)
> +       movq    %r8, r8(%rip)
> +       movq    %r9, r9(%rip)
> +       movq    %r10, r10(%rip)
> +       movq    %r11, r11(%rip)
> +       movq    %r12, r12(%rip)
> +       movq    %r13, r13(%rip)
> +       movq    %r14, r14(%rip)
> +       movq    %r15, r15(%rip)
> +       vmovdqu %xmm0, xmm_regs+0(%rip)
> +       vmovdqu %xmm1, xmm_regs+16(%rip)
> +       vmovdqu %xmm2, xmm_regs+32(%rip)
> +       vmovdqu %xmm3, xmm_regs+48(%rip)
> +       vmovdqu %xmm4, xmm_regs+64(%rip)
> +       vmovdqu %xmm5, xmm_regs+80(%rip)
> +       vmovdqu %xmm6, xmm_regs+96(%rip)
> +       vmovdqu %xmm7, xmm_regs+112(%rip)
> +       vmovdqu %xmm8, xmm_regs+128(%rip)
> +       vmovdqu %xmm9, xmm_regs+144(%rip)
> +       vmovdqu %xmm10, xmm_regs+160(%rip)
> +       vmovdqu %xmm11, xmm_regs+176(%rip)
> +       vmovdqu %xmm12, xmm_regs+192(%rip)
> +       vmovdqu %xmm13, xmm_regs+208(%rip)
> +       vmovdqu %xmm14, xmm_regs+224(%rip)
> +       vmovdqu %xmm15, xmm_regs+240(%rip)
> +       jmp     *callthis(%rip)
> +.LFE3:
> +       .size   snapshot, .-snapshot
> +
> +       .p2align 4,,15
> +.globl snapshot_ret
> +       .type   snapshot_ret, @function
> +snapshot_ret:
> +       movq    %rdi, rdi(%rip)
> +       subq    $8, %rsp
> +       call    *callthis(%rip)
> +       addq    $8, %rsp
> +       movq    %rax, rax(%rip)
> +       movq    %rdx, rdx(%rip)
> +       vmovdqu %xmm0, xmm_regs+0(%rip)
> +       vmovdqu %xmm1, xmm_regs+16(%rip)
> +       fstpt   x87_regs(%rip)
> +       fstpt   x87_regs+16(%rip)
> +       fldt    x87_regs+16(%rip)
> +       fldt    x87_regs(%rip)
> +       ret
> +       .size   snapshot_ret, .-snapshot_ret
> +
> +       .comm   callthis,8,8
> +       .comm   rax,8,8
> +       .comm   rbx,8,8
> +       .comm   rcx,8,8
> +       .comm   rdx,8,8
> +       .comm   rsi,8,8
> +       .comm   rdi,8,8
> +       .comm   rsp,8,8
> +       .comm   rbp,8,8
> +       .comm   r8,8,8
> +       .comm   r9,8,8
> +       .comm   r10,8,8
> +       .comm   r11,8,8
> +       .comm   r12,8,8
> +       .comm   r13,8,8
> +       .comm   r14,8,8
> +       .comm   r15,8,8
> +       .comm   xmm_regs,256,32
> +       .comm   x87_regs,128,32
> +       .comm   volatile_var,8,8
> +#ifdef __linux__
> +       .section        .note.GNU-stack,"",@progbits
> +#endif
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h
> new file mode 100644
> index 00000000000..25448fc6863
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h
> @@ -0,0 +1,24 @@
> +#include <stdlib.h>
> +#include "bf16-helper.h"
> +
> +static void do_test (void);
> +
> +int
> +main ()
> +{
> +
> +  if (__builtin_cpu_supports ("sse2"))
> +    {
> +      do_test ();
> +#ifdef DEBUG
> +      printf ("PASSED\n");
> +#endif
> +      return 0;
> +    }
> +
> +#ifdef DEBUG
> +  printf ("SKIPPED\n");
> +#endif
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
> new file mode 100644
> index 00000000000..83d89fcf62c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
> @@ -0,0 +1,41 @@
> +typedef union
> +{
> +  float f;
> +  unsigned int u;
> +  __bf16 b[2];
> +} unionf_b;
> +
> +static __bf16 make_f32_bf16 (float f)
> +{
> +  unionf_b tmp;
> +  tmp.f = f;
> +  return tmp.b[1];
> +}
> +
> +static float make_bf16_f32 (__bf16 bf)
> +{
> +  unionf_b tmp;
> +  tmp.u = 0;
> +  tmp.b[1] = bf;
> +  return tmp.f;
> +}
> +
> +static int check_bf16 (__bf16 bf1, __bf16 bf2)
> +{
> +  unionf_b tmp1, tmp2;
> +  tmp1.u = 0;
> +  tmp2.u = 0;
> +  tmp1.b[1] = bf1;
> +  tmp2.b[1] = bf2;
> +  return (tmp1.u == tmp2.u);
> +}
> +
> +static int check_bf16_float (__bf16 bf, float f)
> +{
> +  unionf_b tmp1, tmp2;
> +  tmp1.u = 0;
> +  tmp1.b[0] = bf;
> +  tmp2.f = f;
> +  tmp2.u >>= 16;
> +  return (tmp1.u == tmp2.u);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h
> new file mode 100644
> index 00000000000..a4df0b0528d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h
> @@ -0,0 +1,163 @@
> +#ifndef DEFINED_DEFINES_H
> +#define DEFINED_DEFINES_H
> +
> +/* Get __m64 and __m128. */
> +#include <immintrin.h>
> +
> +typedef unsigned long long ulonglong;
> +typedef long double ldouble;
> +
> +/* These defines determines what part of the test should be run.  When
> +   GCC implements these parts, the defines should be uncommented to
> +   enable testing.  */
> +
> +/* Scalar type __int128.  */
> +/* #define CHECK_INT128 */
> +
> +/* Scalar type long double.  */
> +#define CHECK_LONG_DOUBLE
> +
> +/* Scalar type __float128.  */
> +/* #define CHECK_FLOAT128 */
> +
> +/* Scalar types __m64 and __m128.  */
> +#define CHECK_M64_M128
> +
> +/* Structs with size >= 16.  */
> +#define CHECK_LARGER_STRUCTS
> +
> +/* Checks for passing floats and doubles.  */
> +#define CHECK_FLOAT_DOUBLE_PASSING
> +
> +/* Union passing with not-extremely-simple unions.  */
> +#define CHECK_LARGER_UNION_PASSING
> +
> +/* Variable args.  */
> +#define CHECK_VARARGS
> +
> +/* Check argument passing and returning for scalar types with sizeof = 16.  */
> +/* TODO: Implement these tests. Don't activate them for now.  */
> +#define CHECK_LARGE_SCALAR_PASSING
> +
> +/* Defines for sizing and alignment.  */
> +
> +#define TYPE_SIZE_CHAR         1
> +#define TYPE_SIZE_SHORT        2
> +#define TYPE_SIZE_INT          4
> +#ifdef __ILP32__
> +# define TYPE_SIZE_LONG        4
> +#else
> +# define TYPE_SIZE_LONG        8
> +#endif
> +#define TYPE_SIZE_LONG_LONG    8
> +#define TYPE_SIZE_INT128       16
> +#define TYPE_SIZE_BF16        2
> +#define TYPE_SIZE_FLOAT        4
> +#define TYPE_SIZE_DOUBLE       8
> +#define TYPE_SIZE_LONG_DOUBLE  16
> +#define TYPE_SIZE_FLOAT128     16
> +#define TYPE_SIZE_M64          8
> +#define TYPE_SIZE_M128         16
> +#define TYPE_SIZE_ENUM         4
> +#ifdef __ILP32__
> +# define TYPE_SIZE_POINTER     4
> +#else
> +# define TYPE_SIZE_POINTER     8
> +#endif
> +
> +#define TYPE_ALIGN_CHAR        1
> +#define TYPE_ALIGN_SHORT       2
> +#define TYPE_ALIGN_INT         4
> +#ifdef __ILP32__
> +# define TYPE_ALIGN_LONG       4
> +#else
> +# define TYPE_ALIGN_LONG       8
> +#endif
> +#define TYPE_ALIGN_LONG_LONG   8
> +#define TYPE_ALIGN_INT128      16
> +#define TYPE_ALIGN_BF16               2
> +#define TYPE_ALIGN_FLOAT       4
> +#define TYPE_ALIGN_DOUBLE      8
> +#define TYPE_ALIGN_LONG_DOUBLE 16
> +#define TYPE_ALIGN_FLOAT128    16
> +#define TYPE_ALIGN_M64         8
> +#define TYPE_ALIGN_M128        16
> +#define TYPE_ALIGN_ENUM        4
> +#ifdef __ILP32__
> +# define TYPE_ALIGN_POINTER    4
> +#else
> +# define TYPE_ALIGN_POINTER    8
> +#endif
> +
> +/* These defines control the building of the list of types to check. There
> +   is a string identifying the type (with a comma after), a size of the type
> +   (also with a comma and an integer for adding to the total amount of types)
> +   and an alignment of the type (which is currently not really needed since
> +   the abi specifies that alignof == sizeof for all scalar types).  */
> +#ifdef CHECK_INT128
> +#define CI128_STR "__int128",
> +#define CI128_SIZ TYPE_SIZE_INT128,
> +#define CI128_ALI TYPE_ALIGN_INT128,
> +#define CI128_RET "???",
> +#else
> +#define CI128_STR
> +#define CI128_SIZ
> +#define CI128_ALI
> +#define CI128_RET
> +#endif
> +#ifdef CHECK_LONG_DOUBLE
> +#define CLD_STR "long double",
> +#define CLD_SIZ TYPE_SIZE_LONG_DOUBLE,
> +#define CLD_ALI TYPE_ALIGN_LONG_DOUBLE,
> +#define CLD_RET "x87_regs[0]._ldouble",
> +#else
> +#define CLD_STR
> +#define CLD_SIZ
> +#define CLD_ALI
> +#define CLD_RET
> +#endif
> +#ifdef CHECK_FLOAT128
> +#define CF128_STR "__float128",
> +#define CF128_SIZ TYPE_SIZE_FLOAT128,
> +#define CF128_ALI TYPE_ALIGN_FLOAT128,
> +#define CF128_RET "???",
> +#else
> +#define CF128_STR
> +#define CF128_SIZ
> +#define CF128_ALI
> +#define CF128_RET
> +#endif
> +#ifdef CHECK_M64_M128
> +#define CMM_STR "__m64", "__m128",
> +#define CMM_SIZ TYPE_SIZE_M64, TYPE_SIZE_M128,
> +#define CMM_ALI TYPE_ALIGN_M64, TYPE_ALIGN_M128,
> +#define CMM_RET "???", "???",
> +#else
> +#define CMM_STR
> +#define CMM_SIZ
> +#define CMM_ALI
> +#define CMM_RET
> +#endif
> +
> +/* Used in size and alignment tests.  */
> +enum dummytype { enumtype };
> +
> +extern void abort (void);
> +
> +/* Assertion macro.  */
> +#define assert(test) if (!(test)) abort()
> +
> +#ifdef __GNUC__
> +#define ATTRIBUTE_UNUSED __attribute__((__unused__))
> +#else
> +#define ATTRIBUTE_UNUSED
> +#endif
> +
> +#ifdef __GNUC__
> +#define PACKED __attribute__((__packed__))
> +#else
> +#warning Some tests will fail due to missing __packed__ support
> +#define PACKED
> +#endif
> +
> +#endif /* DEFINED_DEFINES_H */
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
> new file mode 100644
> index 00000000000..309db8ff12e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
> @@ -0,0 +1,46 @@
> +# Copyright (C) 2022 Free Software Foundation, Inc.
> +
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with GCC; see the file COPYING3.  If not see
> +# <http://www.gnu.org/licenses/>.
> +
> +# The x86-64 ABI testsuite needs one additional assembler file for most
> +# testcases.  For simplicity we will just link it into each test.
> +
> +load_lib c-torture.exp
> +load_lib target-supports.exp
> +load_lib torture-options.exp
> +load_lib clearcap.exp
> +
> +if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
> +     || ![is-effective-target lp64]
> +     || ![is-effective-target avx2] } then {
> +  return
> +}
> +
> +
> +torture-init
> +clearcap-init
> +set-torture-options $C_TORTURE_OPTIONS
> +set additional_flags "-W -Wall -mavx2"
> +
> +foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
> +    if {[runtest_file_p $runtests $src]} {
> +        c-torture-execute [list $src \
> +                                $srcdir/$subdir/asm-support.S] \
> +                                $additional_flags
> +    }
> +}
> +
> +clearcap-finish
> +torture-finish
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
> new file mode 100644
> index 00000000000..94627ffbd44
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
> @@ -0,0 +1,152 @@
> +#ifndef INCLUDED_ARGS_H
> +#define INCLUDED_ARGS_H
> +
> +#include <immintrin.h>
> +#include <string.h>
> +
> +/* Assertion macro.  */
> +#define assert(test) if (!(test)) abort()
> +
> +#ifdef __GNUC__
> +#define ATTRIBUTE_UNUSED __attribute__((__unused__))
> +#else
> +#define ATTRIBUTE_UNUSED
> +#endif
> +
> +/* This defines the calling sequences for integers and floats.  */
> +#define I0 rdi
> +#define I1 rsi
> +#define I2 rdx
> +#define I3 rcx
> +#define I4 r8
> +#define I5 r9
> +#define F0 ymm0
> +#define F1 ymm1
> +#define F2 ymm2
> +#define F3 ymm3
> +#define F4 ymm4
> +#define F5 ymm5
> +#define F6 ymm6
> +#define F7 ymm7
> +
> +typedef union {
> +  __bf16 ___bf16[16];
> +  float _float[8];
> +  double _double[4];
> +  long long _longlong[4];
> +  int _int[8];
> +  unsigned long long _ulonglong[4];
> +  __m64 _m64[4];
> +  __m128 _m128[2];
> +  __m256 _m256[1];
> +  __m256bf16 _m256bf16[1];
> +} YMM_T;
> +
> +typedef union {
> +  float _float;
> +  double _double;
> +  long double _ldouble;
> +  unsigned long long _ulonglong[2];
> +} X87_T;
> +extern void (*callthis)(void);
> +extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> +YMM_T ymm_regs[16];
> +X87_T x87_regs[8];
> +extern volatile unsigned long long volatile_var;
> +extern void snapshot (void);
> +extern void snapshot_ret (void);
> +#define WRAP_CALL(N) \
> +  (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
> +#define WRAP_RET(N) \
> +  (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
> +
> +/* Clear all integer registers.  */
> +#define clear_int_hardware_registers \
> +  asm __volatile__ ("xor %%rax, %%rax\n\t" \
> +                   "xor %%rbx, %%rbx\n\t" \
> +                   "xor %%rcx, %%rcx\n\t" \
> +                   "xor %%rdx, %%rdx\n\t" \
> +                   "xor %%rsi, %%rsi\n\t" \
> +                   "xor %%rdi, %%rdi\n\t" \
> +                   "xor %%r8, %%r8\n\t" \
> +                   "xor %%r9, %%r9\n\t" \
> +                   "xor %%r10, %%r10\n\t" \
> +                   "xor %%r11, %%r11\n\t" \
> +                   "xor %%r12, %%r12\n\t" \
> +                   "xor %%r13, %%r13\n\t" \
> +                   "xor %%r14, %%r14\n\t" \
> +                   "xor %%r15, %%r15\n\t" \
> +                   ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
> +                   "r9", "r10", "r11", "r12", "r13", "r14", "r15");
> +
> +/* This is the list of registers available for passing arguments. Not all of
> +   these are used or even really available.  */
> +struct IntegerRegisters
> +{
> +  unsigned long long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
> +};
> +struct FloatRegisters
> +{
> +  double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
> +  long double st0, st1, st2, st3, st4, st5, st6, st7;
> +  YMM_T ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9,
> +        ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
> +};
> +
> +/* Implemented in scalarargs.c  */
> +extern struct IntegerRegisters iregs;
> +extern struct FloatRegisters fregs;
> +extern unsigned int num_iregs, num_fregs;
> +
> +/* Clear register struct.  */
> +#define clear_struct_registers \
> +  rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
> +    = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
> +  memset (&iregs, 0, sizeof (iregs)); \
> +  memset (&fregs, 0, sizeof (fregs)); \
> +  memset (ymm_regs, 0, sizeof (ymm_regs)); \
> +  memset (x87_regs, 0, sizeof (x87_regs));
> +
> +/* Clear both hardware and register structs for integers.  */
> +#define clear_int_registers \
> +  clear_struct_registers \
> +  clear_int_hardware_registers
> +
> +#define check_vector_arguments(T,O) do { \
> +  assert (num_fregs <= 0 \
> +         || memcmp (((char *) &fregs.ymm0) + (O), \
> +                    &ymm_regs[0], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 1 \
> +         || memcmp (((char *) &fregs.ymm1) + (O), \
> +                    &ymm_regs[1], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 2 \
> +         || memcmp (((char *) &fregs.ymm2) + (O), \
> +                    &ymm_regs[2], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 3 \
> +         || memcmp (((char *) &fregs.ymm3) + (O), \
> +                    &ymm_regs[3], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 4 \
> +         || memcmp (((char *) &fregs.ymm4) + (O), \
> +                    &ymm_regs[4], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 5 \
> +         || memcmp (((char *) &fregs.ymm5) + (O), \
> +                    &ymm_regs[5], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 6 \
> +         || memcmp (((char *) &fregs.ymm6) + (O), \
> +                    &ymm_regs[6], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 7 \
> +         || memcmp (((char *) &fregs.ymm7) + (O), \
> +                    &ymm_regs[7], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  } while (0)
> +
> +#define check_m256_arguments check_vector_arguments(m256, 0)
> +
> +#endif /* INCLUDED_ARGS_H  */
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
> new file mode 100644
> index 00000000000..24c8b3c9023
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
> @@ -0,0 +1,84 @@
> +       .text
> +       .p2align 4,,15
> +.globl snapshot
> +       .type   snapshot, @function
> +snapshot:
> +.LFB3:
> +       movq    %rax, rax(%rip)
> +       movq    %rbx, rbx(%rip)
> +       movq    %rcx, rcx(%rip)
> +       movq    %rdx, rdx(%rip)
> +       movq    %rdi, rdi(%rip)
> +       movq    %rsi, rsi(%rip)
> +       movq    %rbp, rbp(%rip)
> +       movq    %rsp, rsp(%rip)
> +       movq    %r8, r8(%rip)
> +       movq    %r9, r9(%rip)
> +       movq    %r10, r10(%rip)
> +       movq    %r11, r11(%rip)
> +       movq    %r12, r12(%rip)
> +       movq    %r13, r13(%rip)
> +       movq    %r14, r14(%rip)
> +       movq    %r15, r15(%rip)
> +       vmovdqu %ymm0, ymm_regs+0(%rip)
> +       vmovdqu %ymm1, ymm_regs+32(%rip)
> +       vmovdqu %ymm2, ymm_regs+64(%rip)
> +       vmovdqu %ymm3, ymm_regs+96(%rip)
> +       vmovdqu %ymm4, ymm_regs+128(%rip)
> +       vmovdqu %ymm5, ymm_regs+160(%rip)
> +       vmovdqu %ymm6, ymm_regs+192(%rip)
> +       vmovdqu %ymm7, ymm_regs+224(%rip)
> +       vmovdqu %ymm8, ymm_regs+256(%rip)
> +       vmovdqu %ymm9, ymm_regs+288(%rip)
> +       vmovdqu %ymm10, ymm_regs+320(%rip)
> +       vmovdqu %ymm11, ymm_regs+352(%rip)
> +       vmovdqu %ymm12, ymm_regs+384(%rip)
> +       vmovdqu %ymm13, ymm_regs+416(%rip)
> +       vmovdqu %ymm14, ymm_regs+448(%rip)
> +       vmovdqu %ymm15, ymm_regs+480(%rip)
> +       jmp     *callthis(%rip)
> +.LFE3:
> +       .size   snapshot, .-snapshot
> +
> +       .p2align 4,,15
> +.globl snapshot_ret
> +       .type   snapshot_ret, @function
> +snapshot_ret:
> +       movq    %rdi, rdi(%rip)
> +       subq    $8, %rsp
> +       call    *callthis(%rip)
> +       addq    $8, %rsp
> +       movq    %rax, rax(%rip)
> +       movq    %rdx, rdx(%rip)
> +       vmovdqu %ymm0, ymm_regs+0(%rip)
> +       vmovdqu %ymm1, ymm_regs+32(%rip)
> +       fstpt   x87_regs(%rip)
> +       fstpt   x87_regs+16(%rip)
> +       fldt    x87_regs+16(%rip)
> +       fldt    x87_regs(%rip)
> +       ret
> +       .size   snapshot_ret, .-snapshot_ret
> +
> +       .comm   callthis,8,8
> +       .comm   rax,8,8
> +       .comm   rbx,8,8
> +       .comm   rcx,8,8
> +       .comm   rdx,8,8
> +       .comm   rsi,8,8
> +       .comm   rdi,8,8
> +       .comm   rsp,8,8
> +       .comm   rbp,8,8
> +       .comm   r8,8,8
> +       .comm   r9,8,8
> +       .comm   r10,8,8
> +       .comm   r11,8,8
> +       .comm   r12,8,8
> +       .comm   r13,8,8
> +       .comm   r14,8,8
> +       .comm   r15,8,8
> +       .comm   ymm_regs,512,32
> +       .comm   x87_regs,128,32
> +       .comm   volatile_var,8,8
> +#ifdef __linux__
> +       .section        .note.GNU-stack,"",@progbits
> +#endif
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h
> new file mode 100644
> index 00000000000..479ebc3ec3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h
> @@ -0,0 +1,24 @@
> +#include <stdlib.h>
> +#include "../bf16-helper.h"
> +
> +static void do_test (void);
> +
> +int
> +main ()
> +{
> +
> +  if (__builtin_cpu_supports ("avx2"))
> +    {
> +      do_test ();
> +#ifdef DEBUG
> +      printf ("PASSED\n");
> +#endif
> +      return 0;
> +    }
> +
> +#ifdef DEBUG
> +  printf ("SKIPPED\n");
> +#endif
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c
> new file mode 100644
> index 00000000000..ea7512850ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c
> @@ -0,0 +1,38 @@
> +#include <stdio.h>
> +#include "bf16-ymm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
> +
> +__m256bf16
> +fun_test_returning___m256bf16 (void)
> +{
> +  volatile_var++;
> +  return (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                       bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
> +}
> +
> +__m256bf16 test_256bf16;
> +
> +static void
> +do_test (void)
> +{
> +  unsigned failed = 0;
> +  YMM_T ymmt1, ymmt2;
> +
> +  clear_struct_registers;
> +  test_256bf16 = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
> +  ymmt1._m256bf16[0] = test_256bf16;
> +  ymmt2._m256bf16[0] = WRAP_RET (fun_test_returning___m256bf16) ();
> +  if (memcmp (&ymmt1, &ymmt2, sizeof (ymmt2)) != 0)
> +    printf ("fail m256bf16\n"), failed++;
> +
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c
> new file mode 100644
> index 00000000000..3fb2d7d20f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c
> @@ -0,0 +1,235 @@
> +#include <stdio.h>
> +#include "bf16-ymm-check.h"
> +#include "args.h"
> +
> +struct IntegerRegisters iregs;
> +struct FloatRegisters fregs;
> +unsigned int num_iregs, num_fregs;
> +
> +/* This struct holds values for argument checking.  */
> +struct
> +{
> +  YMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
> +    i16, i17, i18, i19, i20, i21, i22, i23;
> +} values;
> +
> +char *pass;
> +int failed = 0;
> +
> +#undef assert
> +#define assert(c) do { \
> +  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
> +} while (0)
> +
> +#define compare(X1,X2,T) do { \
> +  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
> +} while (0)
> +
> +fun_check_passing_m256bf16_8_values (__m256bf16 i0 ATTRIBUTE_UNUSED,
> +                                    __m256bf16 i1 ATTRIBUTE_UNUSED,
> +                                    __m256bf16 i2 ATTRIBUTE_UNUSED,
> +                                    __m256bf16 i3 ATTRIBUTE_UNUSED,
> +                                    __m256bf16 i4 ATTRIBUTE_UNUSED,
> +                                    __m256bf16 i5 ATTRIBUTE_UNUSED,
> +                                    __m256bf16 i6 ATTRIBUTE_UNUSED,
> +                                    __m256bf16 i7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  compare (values.i0, i0, __m256bf16);
> +  compare (values.i1, i1, __m256bf16);
> +  compare (values.i2, i2, __m256bf16);
> +  compare (values.i3, i3, __m256bf16);
> +  compare (values.i4, i4, __m256bf16);
> +  compare (values.i5, i5, __m256bf16);
> +  compare (values.i6, i6, __m256bf16);
> +  compare (values.i7, i7, __m256bf16);
> +}
> +
> +void
> +fun_check_passing_m256bf16_8_regs (__m256bf16 i0 ATTRIBUTE_UNUSED,
> +                                  __m256bf16 i1 ATTRIBUTE_UNUSED,
> +                                  __m256bf16 i2 ATTRIBUTE_UNUSED,
> +                                  __m256bf16 i3 ATTRIBUTE_UNUSED,
> +                                  __m256bf16 i4 ATTRIBUTE_UNUSED,
> +                                  __m256bf16 i5 ATTRIBUTE_UNUSED,
> +                                  __m256bf16 i6 ATTRIBUTE_UNUSED,
> +                                  __m256bf16 i7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m256_arguments;
> +}
> +
> +void
> +fun_check_passing_m256bf16_20_values (__m256bf16 i0 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i1 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i2 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i3 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i4 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i5 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i6 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i7 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i8 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i9 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i10 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i11 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i12 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i13 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i14 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i15 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i16 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i17 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i18 ATTRIBUTE_UNUSED,
> +                                     __m256bf16 i19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  compare (values.i0, i0, __m256bf16);
> +  compare (values.i1, i1, __m256bf16);
> +  compare (values.i2, i2, __m256bf16);
> +  compare (values.i3, i3, __m256bf16);
> +  compare (values.i4, i4, __m256bf16);
> +  compare (values.i5, i5, __m256bf16);
> +  compare (values.i6, i6, __m256bf16);
> +  compare (values.i7, i7, __m256bf16);
> +  compare (values.i8, i8, __m256bf16);
> +  compare (values.i9, i9, __m256bf16);
> +  compare (values.i10, i10, __m256bf16);
> +  compare (values.i11, i11, __m256bf16);
> +  compare (values.i12, i12, __m256bf16);
> +  compare (values.i13, i13, __m256bf16);
> +  compare (values.i14, i14, __m256bf16);
> +  compare (values.i15, i15, __m256bf16);
> +  compare (values.i16, i16, __m256bf16);
> +  compare (values.i17, i17, __m256bf16);
> +  compare (values.i18, i18, __m256bf16);
> +  compare (values.i19, i19, __m256bf16);
> +}
> +
> +void
> +fun_check_passing_m256bf16_20_regs (__m256bf16 i0 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i1 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i2 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i3 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i4 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i5 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i6 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i7 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i8 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i9 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i10 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i11 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i12 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i13 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i14 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i15 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i16 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i17 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i18 ATTRIBUTE_UNUSED,
> +                                   __m256bf16 i19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m256_arguments;
> +}
> +
> +#define def_check_passing8(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _func1, _func2, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
> +  clear_struct_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  fregs.F4.TYPE[0] = _i4; \
> +  fregs.F5.TYPE[0] = _i5; \
> +  fregs.F6.TYPE[0] = _i6; \
> +  fregs.F7.TYPE[0] = _i7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
> +
> +#define def_check_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, \
> +                           _i8, _i9, _i10, _i11, _i12, _i13, _i14, \
> +                           _i15, _i16, _i17, _i18, _i19, _func1, \
> +                           _func2, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  values.i8.TYPE[0] = _i8; \
> +  values.i9.TYPE[0] = _i9; \
> +  values.i10.TYPE[0] = _i10; \
> +  values.i11.TYPE[0] = _i11; \
> +  values.i12.TYPE[0] = _i12; \
> +  values.i13.TYPE[0] = _i13; \
> +  values.i14.TYPE[0] = _i14; \
> +  values.i15.TYPE[0] = _i15; \
> +  values.i16.TYPE[0] = _i16; \
> +  values.i17.TYPE[0] = _i17; \
> +  values.i18.TYPE[0] = _i18; \
> +  values.i19.TYPE[0] = _i19; \
> +  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
> +                    _i9, _i10, _i11, _i12, _i13, _i14, _i15, \
> +                    _i16, _i17, _i18, _i19); \
> +  clear_struct_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  fregs.F4.TYPE[0] = _i4; \
> +  fregs.F5.TYPE[0] = _i5; \
> +  fregs.F6.TYPE[0] = _i6; \
> +  fregs.F7.TYPE[0] = _i7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
> +                    _i9, _i10, _i11, _i12, _i13, _i14, _i15, \
> +                    _i16, _i17, _i18, _i19);
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
> +
> +void
> +test_m256bf16_on_stack ()
> +{
> +  __m256bf16 x[8];
> +  int i;
> +  for (i = 0; i < 8; i++)
> +    x[i] = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
> +  pass = "m256bf16-8";
> +  def_check_passing8 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
> +                     fun_check_passing_m256bf16_8_values,
> +                     fun_check_passing_m256bf16_8_regs, _m256bf16);
> +}
> +
> +void
> +test_too_many_m256bf16 ()
> +{
> +  __m256bf16 x[20];
> +  int i;
> +  for (i = 0; i < 20; i++)
> +    x[i] = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
> +  pass = "m256bf16-20";
> +  def_check_passing20 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8],
> +                      x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16],
> +                      x[17], x[18], x[19], fun_check_passing_m256bf16_20_values,
> +                      fun_check_passing_m256bf16_20_regs, _m256bf16);
> +}
> +
> +static void
> +do_test (void)
> +{
> +  test_m256bf16_on_stack ();
> +  test_too_many_m256bf16 ();
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c
> new file mode 100644
> index 00000000000..e06350ed493
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c
> @@ -0,0 +1,69 @@
> +#include "bf16-ymm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +struct m256bf16_struct
> +{
> +  __m256bf16 x;
> +};
> +
> +struct m256bf16_2_struct
> +{
> +  __m256bf16 x1, x2;
> +};
> +
> +/* Check that the struct is passed as the individual members in fregs.  */
> +void
> +check_struct_passing1bf16 (struct m256bf16_struct ms1 ATTRIBUTE_UNUSED,
> +                          struct m256bf16_struct ms2 ATTRIBUTE_UNUSED,
> +                          struct m256bf16_struct ms3 ATTRIBUTE_UNUSED,
> +                          struct m256bf16_struct ms4 ATTRIBUTE_UNUSED,
> +                          struct m256bf16_struct ms5 ATTRIBUTE_UNUSED,
> +                          struct m256bf16_struct ms6 ATTRIBUTE_UNUSED,
> +                          struct m256bf16_struct ms7 ATTRIBUTE_UNUSED,
> +                          struct m256bf16_struct ms8 ATTRIBUTE_UNUSED)
> +{
> +  check_m256_arguments;
> +}
> +
> +void
> +check_struct_passing2bf16 (struct m256bf16_2_struct ms ATTRIBUTE_UNUSED)
> +{
> +  /* Check the passing on the stack by comparing the address of the
> +     stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&ms.x1 == rsp+8);
> +  assert ((unsigned long)&ms.x2 == rsp+40);
> +}
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
> +
> +static void
> +do_test (void)
> +{
> +  struct m256bf16_struct m256bf16s [8];
> +  struct m256bf16_2_struct m256bf16_2s = {
> +    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16},
> +    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16},
> +  };
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      m256bf16s[i].x = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                                     bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
> +    }
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    (&fregs.ymm0)[i]._m256bf16[0] = m256bf16s[i].x;
> +  num_fregs = 8;
> +  WRAP_CALL (check_struct_passing1bf16) (m256bf16s[0], m256bf16s[1], m256bf16s[2], m256bf16s[3],
> +                                        m256bf16s[4], m256bf16s[5], m256bf16s[6], m256bf16s[7]);
> +  WRAP_CALL (check_struct_passing2bf16) (m256bf16_2s);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c
> new file mode 100644
> index 00000000000..6d663b88b1a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c
> @@ -0,0 +1,179 @@
> +#include "bf16-ymm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +union un1b
> +{
> +  __m256bf16 x;
> +  float f;
> +};
> +
> +union un1bb
> +{
> +  __m256bf16 x;
> +  __bf16 f;
> +};
> +
> +union un2b
> +{
> +  __m256bf16 x;
> +  double d;
> +};
> +
> +union un3b
> +{
> +  __m256bf16 x;
> +  __m128 v;
> +};
> +
> +union un4b
> +{
> +  __m256bf16 x;
> +  long double ld;
> +};
> +
> +union un5b
> +{
> +  __m256bf16 x;
> +  int i;
> +};
> +
> +void
> +check_union_passing1b (union un1b u1 ATTRIBUTE_UNUSED,
> +                      union un1b u2 ATTRIBUTE_UNUSED,
> +                      union un1b u3 ATTRIBUTE_UNUSED,
> +                      union un1b u4 ATTRIBUTE_UNUSED,
> +                      union un1b u5 ATTRIBUTE_UNUSED,
> +                      union un1b u6 ATTRIBUTE_UNUSED,
> +                      union un1b u7 ATTRIBUTE_UNUSED,
> +                      union un1b u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m256_arguments;
> +}
> +
> +void
> +check_union_passing1bb (union un1bb u1 ATTRIBUTE_UNUSED,
> +                       union un1bb u2 ATTRIBUTE_UNUSED,
> +                       union un1bb u3 ATTRIBUTE_UNUSED,
> +                       union un1bb u4 ATTRIBUTE_UNUSED,
> +                       union un1bb u5 ATTRIBUTE_UNUSED,
> +                       union un1bb u6 ATTRIBUTE_UNUSED,
> +                       union un1bb u7 ATTRIBUTE_UNUSED,
> +                       union un1bb u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m256_arguments;
> +}
> +
> +void
> +check_union_passing2b (union un2b u1 ATTRIBUTE_UNUSED,
> +                      union un2b u2 ATTRIBUTE_UNUSED,
> +                      union un2b u3 ATTRIBUTE_UNUSED,
> +                      union un2b u4 ATTRIBUTE_UNUSED,
> +                      union un2b u5 ATTRIBUTE_UNUSED,
> +                      union un2b u6 ATTRIBUTE_UNUSED,
> +                      union un2b u7 ATTRIBUTE_UNUSED,
> +                      union un2b u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m256_arguments;
> +}
> +
> +void
> +check_union_passing3b (union un3b u1 ATTRIBUTE_UNUSED,
> +                      union un3b u2 ATTRIBUTE_UNUSED,
> +                      union un3b u3 ATTRIBUTE_UNUSED,
> +                      union un3b u4 ATTRIBUTE_UNUSED,
> +                      union un3b u5 ATTRIBUTE_UNUSED,
> +                      union un3b u6 ATTRIBUTE_UNUSED,
> +                      union un3b u7 ATTRIBUTE_UNUSED,
> +                      union un3b u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m256_arguments;
> +}
> +
> +void
> +check_union_passing4b (union un4b u ATTRIBUTE_UNUSED)
> +{
> +   /* Check the passing on the stack by comparing the address of the
> +      stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&u.x == rsp+8);
> +  assert ((unsigned long)&u.ld == rsp+8);
> +}
> +
> +void
> +check_union_passing5b (union un5b u ATTRIBUTE_UNUSED)
> +{
> +   /* Check the passing on the stack by comparing the address of the
> +      stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&u.x == rsp+8);
> +  assert ((unsigned long)&u.i == rsp+8);
> +}
> +
> +#define check_union_passing1b WRAP_CALL(check_union_passing1b)
> +#define check_union_passing1bb WRAP_CALL(check_union_passing1bb)
> +#define check_union_passing2b WRAP_CALL(check_union_passing2b)
> +#define check_union_passing3b WRAP_CALL(check_union_passing3b)
> +#define check_union_passing4b WRAP_CALL(check_union_passing4b)
> +#define check_union_passing5b WRAP_CALL(check_union_passing5b)
> +
> +static void
> +do_test (void)
> +{
> +  union un1b u1b[8];
> +  union un1bb u1bb[8];
> +  union un2b u2b[8];
> +  union un3b u3b[8];
> +  union un4b u4b;
> +  union un5b u5b;
> +  int i;
> +  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +        bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      u1b[i].x = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16 };
> +    }
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    (&fregs.ymm0)[i]._m256bf16[0] = u1b[i].x;
> +  num_fregs = 8;
> +  check_union_passing1b (u1b[0], u1b[1], u1b[2], u1b[3],
> +                        u1b[4], u1b[5], u1b[6], u1b[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u1bb[i].x = u1b[i].x;
> +      (&fregs.ymm0)[i]._m256bf16[0] = u1bb[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing1bb (u1bb[0], u1bb[1], u1bb[2], u1bb[3],
> +                         u1bb[4], u1bb[5], u1bb[6], u1bb[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u2b[i].x = u1b[i].x;
> +      (&fregs.ymm0)[i]._m256bf16[0] = u2b[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing2b (u2b[0], u2b[1], u2b[2], u2b[3],
> +                        u2b[4], u2b[5], u2b[6], u2b[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u3b[i].x = u1b[i].x;
> +      (&fregs.ymm0)[i]._m256bf16[0] = u3b[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing3b (u3b[0], u3b[1], u3b[2], u3b[3],
> +                        u3b[4], u3b[5], u3b[6], u3b[7]);
> +
> +  check_union_passing4b (u4b);
> +  check_union_passing5b (u5b);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c
> new file mode 100644
> index 00000000000..b69e095d808
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c
> @@ -0,0 +1,107 @@
> +/* Test variable number of 256-bit vector arguments passed to functions.  */
> +
> +#include <stdio.h>
> +#include "bf16-ymm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +
> +/* This struct holds values for argument checking.  */
> +struct
> +{
> +  YMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9;
> +} values;
> +
> +char *pass;
> +int failed = 0;
> +
> +#undef assert
> +#define assert(c) do { \
> +  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
> +} while (0)
> +
> +#define compare(X1,X2,T) do { \
> +  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
> +} while (0)
> +
> +void
> +fun_check_passing_m256bf16_varargs (__m256bf16 i0, __m256bf16 i1, __m256bf16 i2,
> +                                __m256bf16 i3, ...)
> +{
> +  /* Check argument values.  */
> +  void **fp = __builtin_frame_address (0);
> +  void *ra = __builtin_return_address (0);
> +  __m256bf16 *argp;
> +
> +  compare (values.i0, i0, __m256bf16);
> +  compare (values.i1, i1, __m256bf16);
> +  compare (values.i2, i2, __m256bf16);
> +  compare (values.i3, i3, __m256bf16);
> +
> +  /* Get the pointer to the return address on stack.  */
> +  while (*fp != ra)
> +    fp++;
> +
> +  /* Skip the return address stack slot.  */
> +  argp = (__m256bf16 *)(((char *) fp) + 8);
> +
> +  /* Check __m256bf16 arguments passed on stack.  */
> +  compare (values.i4, argp[0], __m256bf16);
> +  compare (values.i5, argp[1], __m256bf16);
> +  compare (values.i6, argp[2], __m256bf16);
> +  compare (values.i7, argp[3], __m256bf16);
> +  compare (values.i8, argp[4], __m256bf16);
> +  compare (values.i9, argp[5], __m256bf16);
> +
> +  /* Check register contents.  */
> +  compare (fregs.ymm0, ymm_regs[0], __m256bf16);
> +  compare (fregs.ymm1, ymm_regs[1], __m256bf16);
> +  compare (fregs.ymm2, ymm_regs[2], __m256bf16);
> +  compare (fregs.ymm3, ymm_regs[3], __m256bf16);
> +}
> +
> +#define def_check_int_passing_varargs(_i0, _i1, _i2, _i3, _i4, _i5, \
> +                                     _i6, _i7, _i8, _i9, \
> +                                     _func, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  values.i8.TYPE[0] = _i8; \
> +  values.i9.TYPE[0] = _i9; \
> +  clear_struct_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  WRAP_CALL(_func) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9);
> +
> +void
> +test_m256bf16_varargs (void)
> +{
> +  __m256bf16 x[10];
> +  int i;
> +  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
> +  for (i = 0; i < 10; i++)
> +    x[i] = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16 };
> +  pass = "m256bf16-varargs";
> +  def_check_int_passing_varargs (x[0], x[1], x[2], x[3], x[4], x[5],
> +                                x[6], x[7], x[8], x[9],
> +                                fun_check_passing_m256bf16_varargs,
> +                                _m256bf16);
> +}
> +
> +void
> +do_test (void)
> +{
> +  test_m256bf16_varargs ();
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
> new file mode 100644
> index 00000000000..b6e0fed4cb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
> @@ -0,0 +1,46 @@
> +# Copyright (C) 2022 Free Software Foundation, Inc.
> +
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with GCC; see the file COPYING3.  If not see
> +# <http://www.gnu.org/licenses/>.
> +
> +# The x86-64 ABI testsuite needs one additional assembler file for most
> +# testcases.  For simplicity we will just link it into each test.
> +
> +load_lib c-torture.exp
> +load_lib target-supports.exp
> +load_lib torture-options.exp
> +load_lib clearcap.exp
> +
> +if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
> +     || ![is-effective-target lp64]
> +     || ![is-effective-target avx512f] } then {
> +  return
> +}
> +
> +
> +torture-init
> +clearcap-init
> +set-torture-options $C_TORTURE_OPTIONS
> +set additional_flags "-W -Wall -mavx512f"
> +
> +foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
> +    if {[runtest_file_p $runtests $src]} {
> +        c-torture-execute [list $src \
> +                                $srcdir/$subdir/asm-support.S] \
> +                                $additional_flags
> +    }
> +}
> +
> +clearcap-finish
> +torture-finish
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
> new file mode 100644
> index 00000000000..64b24783833
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
> @@ -0,0 +1,155 @@
> +#ifndef INCLUDED_ARGS_H
> +#define INCLUDED_ARGS_H
> +
> +#include <immintrin.h>
> +#include <string.h>
> +
> +/* Assertion macro.  */
> +#define assert(test) if (!(test)) abort()
> +
> +#ifdef __GNUC__
> +#define ATTRIBUTE_UNUSED __attribute__((__unused__))
> +#else
> +#define ATTRIBUTE_UNUSED
> +#endif
> +
> +/* This defines the calling sequences for integers and floats.  */
> +#define I0 rdi
> +#define I1 rsi
> +#define I2 rdx
> +#define I3 rcx
> +#define I4 r8
> +#define I5 r9
> +#define F0 zmm0
> +#define F1 zmm1
> +#define F2 zmm2
> +#define F3 zmm3
> +#define F4 zmm4
> +#define F5 zmm5
> +#define F6 zmm6
> +#define F7 zmm7
> +
> +typedef union {
> +  __bf16 ___bf16[32];
> +  float _float[16];
> +  double _double[8];
> +  long long _longlong[8];
> +  int _int[16];
> +  unsigned long long _ulonglong[8];
> +  __m64 _m64[8];
> +  __m128 _m128[4];
> +  __m256 _m256[2];
> +  __m512 _m512[1];
> +  __m512bf16 _m512bf16[1];
> +} ZMM_T;
> +
> +typedef union {
> +  float _float;
> +  double _double;
> +  long double _ldouble;
> +  unsigned long long _ulonglong[2];
> +} X87_T;
> +extern void (*callthis)(void);
> +extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
> +ZMM_T zmm_regs[32];
> +X87_T x87_regs[8];
> +extern volatile unsigned long long volatile_var;
> +extern void snapshot (void);
> +extern void snapshot_ret (void);
> +#define WRAP_CALL(N) \
> +  (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
> +#define WRAP_RET(N) \
> +  (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
> +
> +/* Clear all integer registers.  */
> +#define clear_int_hardware_registers \
> +  asm __volatile__ ("xor %%rax, %%rax\n\t" \
> +                   "xor %%rbx, %%rbx\n\t" \
> +                   "xor %%rcx, %%rcx\n\t" \
> +                   "xor %%rdx, %%rdx\n\t" \
> +                   "xor %%rsi, %%rsi\n\t" \
> +                   "xor %%rdi, %%rdi\n\t" \
> +                   "xor %%r8, %%r8\n\t" \
> +                   "xor %%r9, %%r9\n\t" \
> +                   "xor %%r10, %%r10\n\t" \
> +                   "xor %%r11, %%r11\n\t" \
> +                   "xor %%r12, %%r12\n\t" \
> +                   "xor %%r13, %%r13\n\t" \
> +                   "xor %%r14, %%r14\n\t" \
> +                   "xor %%r15, %%r15\n\t" \
> +                   ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
> +                   "r9", "r10", "r11", "r12", "r13", "r14", "r15");
> +
> +/* This is the list of registers available for passing arguments. Not all of
> +   these are used or even really available.  */
> +struct IntegerRegisters
> +{
> +  unsigned long long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
> +};
> +struct FloatRegisters
> +{
> +  double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
> +  long double st0, st1, st2, st3, st4, st5, st6, st7;
> +  ZMM_T zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7, zmm8, zmm9,
> +        zmm10, zmm11, zmm12, zmm13, zmm14, zmm15, zmm16, zmm17, zmm18,
> +       zmm19, zmm20, zmm21, zmm22, zmm23, zmm24, zmm25, zmm26, zmm27,
> +       zmm28, zmm29, zmm30, zmm31;
> +};
> +
> +/* Implemented in scalarargs.c  */
> +extern struct IntegerRegisters iregs;
> +extern struct FloatRegisters fregs;
> +extern unsigned int num_iregs, num_fregs;
> +
> +/* Clear register struct.  */
> +#define clear_struct_registers \
> +  rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
> +    = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
> +  memset (&iregs, 0, sizeof (iregs)); \
> +  memset (&fregs, 0, sizeof (fregs)); \
> +  memset (zmm_regs, 0, sizeof (zmm_regs)); \
> +  memset (x87_regs, 0, sizeof (x87_regs));
> +
> +/* Clear both hardware and register structs for integers.  */
> +#define clear_int_registers \
> +  clear_struct_registers \
> +  clear_int_hardware_registers
> +
> +#define check_vector_arguments(T,O) do { \
> +  assert (num_fregs <= 0 \
> +         || memcmp (((char *) &fregs.zmm0) + (O), \
> +                    &zmm_regs[0], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 1 \
> +         || memcmp (((char *) &fregs.zmm1) + (O), \
> +                    &zmm_regs[1], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 2 \
> +         || memcmp (((char *) &fregs.zmm2) + (O), \
> +                    &zmm_regs[2], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 3 \
> +         || memcmp (((char *) &fregs.zmm3) + (O), \
> +                    &zmm_regs[3], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 4 \
> +         || memcmp (((char *) &fregs.zmm4) + (O), \
> +                    &zmm_regs[4], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 5 \
> +         || memcmp (((char *) &fregs.zmm5) + (O), \
> +                    &zmm_regs[5], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 6 \
> +         || memcmp (((char *) &fregs.zmm6) + (O), \
> +                    &zmm_regs[6], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  assert (num_fregs <= 7 \
> +         || memcmp (((char *) &fregs.zmm7) + (O), \
> +                    &zmm_regs[7], \
> +                    sizeof (__ ## T) - (O)) == 0); \
> +  } while (0)
> +
> +#define check_m512_arguments check_vector_arguments(m512, 0)
> +
> +#endif /* INCLUDED_ARGS_H  */
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
> new file mode 100644
> index 00000000000..86d54d11c58
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
> @@ -0,0 +1,100 @@
> +       .text
> +       .p2align 4,,15
> +.globl snapshot
> +       .type   snapshot, @function
> +snapshot:
> +.LFB3:
> +       movq    %rax, rax(%rip)
> +       movq    %rbx, rbx(%rip)
> +       movq    %rcx, rcx(%rip)
> +       movq    %rdx, rdx(%rip)
> +       movq    %rdi, rdi(%rip)
> +       movq    %rsi, rsi(%rip)
> +       movq    %rbp, rbp(%rip)
> +       movq    %rsp, rsp(%rip)
> +       movq    %r8, r8(%rip)
> +       movq    %r9, r9(%rip)
> +       movq    %r10, r10(%rip)
> +       movq    %r11, r11(%rip)
> +       movq    %r12, r12(%rip)
> +       movq    %r13, r13(%rip)
> +       movq    %r14, r14(%rip)
> +       movq    %r15, r15(%rip)
> +       vmovdqu32 %zmm0, zmm_regs+0(%rip)
> +       vmovdqu32 %zmm1, zmm_regs+64(%rip)
> +       vmovdqu32 %zmm2, zmm_regs+128(%rip)
> +       vmovdqu32 %zmm3, zmm_regs+192(%rip)
> +       vmovdqu32 %zmm4, zmm_regs+256(%rip)
> +       vmovdqu32 %zmm5, zmm_regs+320(%rip)
> +       vmovdqu32 %zmm6, zmm_regs+384(%rip)
> +       vmovdqu32 %zmm7, zmm_regs+448(%rip)
> +       vmovdqu32 %zmm8, zmm_regs+512(%rip)
> +       vmovdqu32 %zmm9, zmm_regs+576(%rip)
> +       vmovdqu32 %zmm10, zmm_regs+640(%rip)
> +       vmovdqu32 %zmm11, zmm_regs+704(%rip)
> +       vmovdqu32 %zmm12, zmm_regs+768(%rip)
> +       vmovdqu32 %zmm13, zmm_regs+832(%rip)
> +       vmovdqu32 %zmm14, zmm_regs+896(%rip)
> +       vmovdqu32 %zmm15, zmm_regs+960(%rip)
> +       vmovdqu32 %zmm16, zmm_regs+1024(%rip)
> +       vmovdqu32 %zmm17, zmm_regs+1088(%rip)
> +       vmovdqu32 %zmm18, zmm_regs+1152(%rip)
> +       vmovdqu32 %zmm19, zmm_regs+1216(%rip)
> +       vmovdqu32 %zmm20, zmm_regs+1280(%rip)
> +       vmovdqu32 %zmm21, zmm_regs+1344(%rip)
> +       vmovdqu32 %zmm22, zmm_regs+1408(%rip)
> +       vmovdqu32 %zmm23, zmm_regs+1472(%rip)
> +       vmovdqu32 %zmm24, zmm_regs+1536(%rip)
> +       vmovdqu32 %zmm25, zmm_regs+1600(%rip)
> +       vmovdqu32 %zmm26, zmm_regs+1664(%rip)
> +       vmovdqu32 %zmm27, zmm_regs+1728(%rip)
> +       vmovdqu32 %zmm28, zmm_regs+1792(%rip)
> +       vmovdqu32 %zmm29, zmm_regs+1856(%rip)
> +       vmovdqu32 %zmm30, zmm_regs+1920(%rip)
> +       vmovdqu32 %zmm31, zmm_regs+1984(%rip)
> +       jmp     *callthis(%rip)
> +.LFE3:
> +       .size   snapshot, .-snapshot
> +
> +       .p2align 4,,15
> +.globl snapshot_ret
> +       .type   snapshot_ret, @function
> +snapshot_ret:
> +       movq    %rdi, rdi(%rip)
> +       subq    $8, %rsp
> +       call    *callthis(%rip)
> +       addq    $8, %rsp
> +       movq    %rax, rax(%rip)
> +       movq    %rdx, rdx(%rip)
> +       vmovdqu32       %zmm0, zmm_regs+0(%rip)
> +       vmovdqu32       %zmm1, zmm_regs+64(%rip)
> +       fstpt   x87_regs(%rip)
> +       fstpt   x87_regs+16(%rip)
> +       fldt    x87_regs+16(%rip)
> +       fldt    x87_regs(%rip)
> +       ret
> +       .size   snapshot_ret, .-snapshot_ret
> +
> +       .comm   callthis,8,8
> +       .comm   rax,8,8
> +       .comm   rbx,8,8
> +       .comm   rcx,8,8
> +       .comm   rdx,8,8
> +       .comm   rsi,8,8
> +       .comm   rdi,8,8
> +       .comm   rsp,8,8
> +       .comm   rbp,8,8
> +       .comm   r8,8,8
> +       .comm   r9,8,8
> +       .comm   r10,8,8
> +       .comm   r11,8,8
> +       .comm   r12,8,8
> +       .comm   r13,8,8
> +       .comm   r14,8,8
> +       .comm   r15,8,8
> +       .comm   zmm_regs,2048,64
> +       .comm   x87_regs,128,32
> +       .comm   volatile_var,8,8
> +#ifdef __linux__
> +       .section        .note.GNU-stack,"",@progbits
> +#endif
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
> new file mode 100644
> index 00000000000..8379fcfaf8c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
> @@ -0,0 +1,23 @@
> +#include <stdlib.h>
> +
> +static void do_test (void);
> +
> +int
> +main ()
> +{
> +
> +  if (__builtin_cpu_supports ("avx512f"))
> +    {
> +      do_test ();
> +#ifdef DEBUG
> +      printf ("PASSED\n");
> +#endif
> +      return 0;
> +    }
> +
> +#ifdef DEBUG
> +  printf ("SKIPPED\n");
> +#endif
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c
> new file mode 100644
> index 00000000000..1a2500bd883
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c
> @@ -0,0 +1,44 @@
> +#include <stdio.h>
> +#include "bf16-zmm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +               bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +               bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
> +
> +__m512bf16
> +fun_test_returning___m512bf16 (void)
> +{
> +  volatile_var++;
> +  return (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                       bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                       bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +                       bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
> +}
> +
> +__m512bf16 test_512bf16;
> +
> +static void
> +do_test (void)
> +{
> +  unsigned failed = 0;
> +  ZMM_T zmmt1, zmmt2;
> +
> +  clear_struct_registers;
> +  test_512bf16 = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                               bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +                               bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
> +  zmmt1._m512bf16[0] = test_512bf16;
> +  zmmt2._m512bf16[0] = WRAP_RET (fun_test_returning___m512bf16)();
> +  if (memcmp (&zmmt1, &zmmt2, sizeof (zmmt2)) != 0)
> +    printf ("fail m512bf16\n"), failed++;
> +
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c
> new file mode 100644
> index 00000000000..1c5c407efee
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c
> @@ -0,0 +1,243 @@
> +#include <stdio.h>
> +#include "bf16-zmm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +/* This struct holds values for argument checking.  */
> +struct
> +{
> +  ZMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
> +    i16, i17, i18, i19, i20, i21, i22, i23;
> +} values;
> +
> +char *pass;
> +int failed = 0;
> +
> +#undef assert
> +#define assert(c) do { \
> +  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
> +} while (0)
> +
> +#define compare(X1,X2,T) do { \
> +  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
> +} while (0)
> +
> +fun_check_passing_m512bf16_8_values (__m512bf16 i0 ATTRIBUTE_UNUSED,
> +                                    __m512bf16 i1 ATTRIBUTE_UNUSED,
> +                                    __m512bf16 i2 ATTRIBUTE_UNUSED,
> +                                    __m512bf16 i3 ATTRIBUTE_UNUSED,
> +                                    __m512bf16 i4 ATTRIBUTE_UNUSED,
> +                                    __m512bf16 i5 ATTRIBUTE_UNUSED,
> +                                    __m512bf16 i6 ATTRIBUTE_UNUSED,
> +                                    __m512bf16 i7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  compare (values.i0, i0, __m512bf16);
> +  compare (values.i1, i1, __m512bf16);
> +  compare (values.i2, i2, __m512bf16);
> +  compare (values.i3, i3, __m512bf16);
> +  compare (values.i4, i4, __m512bf16);
> +  compare (values.i5, i5, __m512bf16);
> +  compare (values.i6, i6, __m512bf16);
> +  compare (values.i7, i7, __m512bf16);
> +}
> +
> +void
> +fun_check_passing_m512bf16_8_regs (__m512bf16 i0 ATTRIBUTE_UNUSED,
> +                                  __m512bf16 i1 ATTRIBUTE_UNUSED,
> +                                  __m512bf16 i2 ATTRIBUTE_UNUSED,
> +                                  __m512bf16 i3 ATTRIBUTE_UNUSED,
> +                                  __m512bf16 i4 ATTRIBUTE_UNUSED,
> +                                  __m512bf16 i5 ATTRIBUTE_UNUSED,
> +                                  __m512bf16 i6 ATTRIBUTE_UNUSED,
> +                                  __m512bf16 i7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +void
> +fun_check_passing_m512bf16_20_values (__m512bf16 i0 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i1 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i2 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i3 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i4 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i5 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i6 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i7 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i8 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i9 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i10 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i11 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i12 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i13 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i14 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i15 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i16 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i17 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i18 ATTRIBUTE_UNUSED,
> +                                     __m512bf16 i19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  compare (values.i0, i0, __m512bf16);
> +  compare (values.i1, i1, __m512bf16);
> +  compare (values.i2, i2, __m512bf16);
> +  compare (values.i3, i3, __m512bf16);
> +  compare (values.i4, i4, __m512bf16);
> +  compare (values.i5, i5, __m512bf16);
> +  compare (values.i6, i6, __m512bf16);
> +  compare (values.i7, i7, __m512bf16);
> +  compare (values.i8, i8, __m512bf16);
> +  compare (values.i9, i9, __m512bf16);
> +  compare (values.i10, i10, __m512bf16);
> +  compare (values.i11, i11, __m512bf16);
> +  compare (values.i12, i12, __m512bf16);
> +  compare (values.i13, i13, __m512bf16);
> +  compare (values.i14, i14, __m512bf16);
> +  compare (values.i15, i15, __m512bf16);
> +  compare (values.i16, i16, __m512bf16);
> +  compare (values.i17, i17, __m512bf16);
> +  compare (values.i18, i18, __m512bf16);
> +  compare (values.i19, i19, __m512bf16);
> +}
> +
> +void
> +fun_check_passing_m512bf16_20_regs (__m512bf16 i0 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i1 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i2 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i3 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i4 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i5 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i6 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i7 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i8 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i9 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i10 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i11 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i12 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i13 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i14 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i15 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i16 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i17 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i18 ATTRIBUTE_UNUSED,
> +                                   __m512bf16 i19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +#define def_check_passing8(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _func1, _func2, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
> +  \
> +  clear_struct_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  fregs.F4.TYPE[0] = _i4; \
> +  fregs.F5.TYPE[0] = _i5; \
> +  fregs.F6.TYPE[0] = _i6; \
> +  fregs.F7.TYPE[0] = _i7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
> +
> +#define def_check_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, \
> +                           _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, \
> +                           _i18, _i19, _func1, _func2, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  values.i8.TYPE[0] = _i8; \
> +  values.i9.TYPE[0] = _i9; \
> +  values.i10.TYPE[0] = _i10; \
> +  values.i11.TYPE[0] = _i11; \
> +  values.i12.TYPE[0] = _i12; \
> +  values.i13.TYPE[0] = _i13; \
> +  values.i14.TYPE[0] = _i14; \
> +  values.i15.TYPE[0] = _i15; \
> +  values.i16.TYPE[0] = _i16; \
> +  values.i17.TYPE[0] = _i17; \
> +  values.i18.TYPE[0] = _i18; \
> +  values.i19.TYPE[0] = _i19; \
> +  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, \
> +                    _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, \
> +                    _i18, _i19); \
> +  \
> +  clear_struct_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  fregs.F4.TYPE[0] = _i4; \
> +  fregs.F5.TYPE[0] = _i5; \
> +  fregs.F6.TYPE[0] = _i6; \
> +  fregs.F7.TYPE[0] = _i7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, \
> +                    _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, \
> +                    _i18, _i19);
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +               bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +               bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
> +
> +void
> +test_m512bf16_on_stack ()
> +{
> +  __m512bf16 x[8];
> +  int i;
> +  for (i = 0; i < 8; i++)
> +    x[i] = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                         bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +                         bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
> +
> +  pass = "m512bf16-8";
> +  def_check_passing8 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
> +                     fun_check_passing_m512bf16_8_values,
> +                     fun_check_passing_m512bf16_8_regs, _m512bf16);
> +}
> +
> +void
> +test_too_many_m512bf16 ()
> +{
> +  __m512bf16 x[20];
> +  int i;
> +  for (i = 0; i < 20; i++)
> +    x[i] = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                         bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +                         bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
> +  pass = "m512bf16-20";
> +  def_check_passing20 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8],
> +                      x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16],
> +                      x[17], x[18], x[19], fun_check_passing_m512bf16_20_values,
> +                      fun_check_passing_m512bf16_20_regs, _m512bf16);
> +}
> +
> +static void
> +do_test (void)
> +{
> +  test_m512bf16_on_stack ();
> +  test_too_many_m512bf16 ();
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c
> new file mode 100644
> index 00000000000..f93a2b81086
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c
> @@ -0,0 +1,77 @@
> +#include "bf16-zmm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +struct m512bf16_struct
> +{
> +  __m512bf16 x;
> +};
> +
> +struct m512bf16_2_struct
> +{
> +  __m512bf16 x1, x2;
> +};
> +
> +/* Check that the struct is passed as the individual members in fregs.  */
> +void
> +check_struct_passing1bf16 (struct m512bf16_struct ms1 ATTRIBUTE_UNUSED,
> +                          struct m512bf16_struct ms2 ATTRIBUTE_UNUSED,
> +                          struct m512bf16_struct ms3 ATTRIBUTE_UNUSED,
> +                          struct m512bf16_struct ms4 ATTRIBUTE_UNUSED,
> +                          struct m512bf16_struct ms5 ATTRIBUTE_UNUSED,
> +                          struct m512bf16_struct ms6 ATTRIBUTE_UNUSED,
> +                          struct m512bf16_struct ms7 ATTRIBUTE_UNUSED,
> +                          struct m512bf16_struct ms8 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +void
> +check_struct_passing2bf16 (struct m512bf16_2_struct ms ATTRIBUTE_UNUSED)
> +{
> +  /* Check the passing on the stack by comparing the address of the
> +     stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&ms.x1 == rsp+8);
> +  assert ((unsigned long)&ms.x2 == rsp+72);
> +}
> +
> +static void
> +do_test (void)
> +{
> +  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +        bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +        bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +        bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
> +  struct m512bf16_struct m512bf16s [8];
> +  struct m512bf16_2_struct m512bf16_2s = {
> +    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +      bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +      bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 },
> +    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +      bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +      bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 }
> +  };
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      m512bf16s[i].x = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                                     bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                                     bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +                                     bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
> +    }
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    (&fregs.zmm0)[i]._m512bf16[0] = m512bf16s[i].x;
> +  num_fregs = 8;
> +  WRAP_CALL (check_struct_passing1bf16) (m512bf16s[0], m512bf16s[1], m512bf16s[2], m512bf16s[3],
> +                                        m512bf16s[4], m512bf16s[5], m512bf16s[6], m512bf16s[7]);
> +  WRAP_CALL (check_struct_passing2bf16) (m512bf16_2s);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c
> new file mode 100644
> index 00000000000..3769b38aeb7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c
> @@ -0,0 +1,222 @@
> +#include "bf16-zmm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +union un1b
> +{
> +  __m512bf16 x;
> +  float f;
> +};
> +
> +union un1bb
> +{
> +  __m512bf16 x;
> +  __bf16 f;
> +};
> +
> +union un2b
> +{
> +  __m512bf16 x;
> +  double d;
> +};
> +
> +union un3b
> +{
> +  __m512bf16 x;
> +  __m128 v;
> +};
> +
> +union un4b
> +{
> +  __m512bf16 x;
> +  long double ld;
> +};
> +
> +union un5b
> +{
> +  __m512bf16 x;
> +  int i;
> +};
> +
> +union un6b
> +{
> +  __m512bf16 x;
> +  __m256 v;
> +};
> +
> +void
> +check_union_passing1b (union un1b u1 ATTRIBUTE_UNUSED,
> +                      union un1b u2 ATTRIBUTE_UNUSED,
> +                      union un1b u3 ATTRIBUTE_UNUSED,
> +                      union un1b u4 ATTRIBUTE_UNUSED,
> +                      union un1b u5 ATTRIBUTE_UNUSED,
> +                      union un1b u6 ATTRIBUTE_UNUSED,
> +                      union un1b u7 ATTRIBUTE_UNUSED,
> +                      union un1b u8 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +void
> +check_union_passing1bb (union un1bb u1 ATTRIBUTE_UNUSED,
> +                       union un1bb u2 ATTRIBUTE_UNUSED,
> +                       union un1bb u3 ATTRIBUTE_UNUSED,
> +                       union un1bb u4 ATTRIBUTE_UNUSED,
> +                       union un1bb u5 ATTRIBUTE_UNUSED,
> +                       union un1bb u6 ATTRIBUTE_UNUSED,
> +                       union un1bb u7 ATTRIBUTE_UNUSED,
> +                       union un1bb u8 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +
> +void
> +check_union_passing2b (union un2b u1 ATTRIBUTE_UNUSED,
> +                      union un2b u2 ATTRIBUTE_UNUSED,
> +                      union un2b u3 ATTRIBUTE_UNUSED,
> +                      union un2b u4 ATTRIBUTE_UNUSED,
> +                      union un2b u5 ATTRIBUTE_UNUSED,
> +                      union un2b u6 ATTRIBUTE_UNUSED,
> +                      union un2b u7 ATTRIBUTE_UNUSED,
> +                      union un2b u8 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +void
> +check_union_passing3b (union un3b u1 ATTRIBUTE_UNUSED,
> +                      union un3b u2 ATTRIBUTE_UNUSED,
> +                      union un3b u3 ATTRIBUTE_UNUSED,
> +                      union un3b u4 ATTRIBUTE_UNUSED,
> +                      union un3b u5 ATTRIBUTE_UNUSED,
> +                      union un3b u6 ATTRIBUTE_UNUSED,
> +                      union un3b u7 ATTRIBUTE_UNUSED,
> +                      union un3b u8 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +void
> +check_union_passing4b (union un4b u ATTRIBUTE_UNUSED)
> +{
> +   /* Check the passing on the stack by comparing the address of the
> +      stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&u.x == rsp+8);
> +  assert ((unsigned long)&u.ld == rsp+8);
> +}
> +
> +void
> +check_union_passing5b (union un5b u ATTRIBUTE_UNUSED)
> +{
> +   /* Check the passing on the stack by comparing the address of the
> +      stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&u.x == rsp+8);
> +  assert ((unsigned long)&u.i == rsp+8);
> +}
> +
> +void
> +check_union_passing6b (union un6b u1 ATTRIBUTE_UNUSED,
> +                      union un6b u2 ATTRIBUTE_UNUSED,
> +                      union un6b u3 ATTRIBUTE_UNUSED,
> +                      union un6b u4 ATTRIBUTE_UNUSED,
> +                      union un6b u5 ATTRIBUTE_UNUSED,
> +                      union un6b u6 ATTRIBUTE_UNUSED,
> +                      union un6b u7 ATTRIBUTE_UNUSED,
> +                      union un6b u8 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m512_arguments;
> +}
> +
> +#define check_union_passing1b WRAP_CALL(check_union_passing1b)
> +#define check_union_passing1bf WRAP_CALL(check_union_passing1bf)
> +#define check_union_passing1bb WRAP_CALL(check_union_passing1bb)
> +#define check_union_passing2b WRAP_CALL(check_union_passing2b)
> +#define check_union_passing3b WRAP_CALL(check_union_passing3b)
> +#define check_union_passing4b WRAP_CALL(check_union_passing4b)
> +#define check_union_passing5b WRAP_CALL(check_union_passing5b)
> +#define check_union_passing6b WRAP_CALL(check_union_passing6b)
> +
> +
> +static void
> +do_test (void)
> +{
> +  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +        bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +        bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +        bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
> +  union un1b u1b[8];
> +  union un1bb u1bb[8];
> +  union un2b u2b[8];
> +  union un3b u3b[8];
> +  union un4b u4b;
> +  union un5b u5b;
> +  union un6b u6b[8];
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      u1b[i].x =  (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                                bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                                bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +                                bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
> +    }
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    (&fregs.zmm0)[i]._m512bf16[0] = u1b[i].x;
> +  num_fregs = 8;
> +  check_union_passing1b (u1b[0], u1b[1], u1b[2], u1b[3],
> +                        u1b[4], u1b[5], u1b[6], u1b[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u1bb[i].x = u1b[i].x;
> +      (&fregs.zmm0)[i]._m512bf16[0] = u1bb[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing1bb (u1bb[0], u1bb[1], u1bb[2], u1bb[3],
> +                         u1bb[4], u1bb[5], u1bb[6], u1bb[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u2b[i].x = u1bb[i].x;
> +      (&fregs.zmm0)[i]._m512bf16[0] = u2b[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing2b (u2b[0], u2b[1], u2b[2], u2b[3],
> +                        u2b[4], u2b[5], u2b[6], u2b[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u3b[i].x = u1b[i].x;
> +      (&fregs.zmm0)[i]._m512bf16[0] = u3b[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing3b (u3b[0], u3b[1], u3b[2], u3b[3],
> +                        u3b[4], u3b[5], u3b[6], u3b[7]);
> +
> +  check_union_passing4b (u4b);
> +  check_union_passing5b (u5b);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u6b[i].x = u1b[i].x;
> +      (&fregs.zmm0)[i]._m512bf16[0] = u6b[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing6b (u6b[0], u6b[1], u6b[2], u6b[3],
> +                        u6b[4], u6b[5], u6b[6], u6b[7]);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c
> new file mode 100644
> index 00000000000..2be57b8b5fb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c
> @@ -0,0 +1,111 @@
> +/* Test variable number of 512-bit vector arguments passed to functions.  */
> +
> +#include <stdio.h>
> +#include "bf16-zmm-check.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +
> +/* This struct holds values for argument checking.  */
> +struct
> +{
> +  ZMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9;
> +} values;
> +
> +char *pass;
> +int failed = 0;
> +
> +#undef assert
> +#define assert(c) do { \
> +  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
> +} while (0)
> +
> +#define compare(X1,X2,T) do { \
> +  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
> +} while (0)
> +
> +void
> +fun_check_passing_m512bf16_varargs (__m512bf16 i0, __m512bf16 i1, __m512bf16 i2,
> +                                __m512bf16 i3, ...)
> +{
> +  /* Check argument values.  */
> +  void **fp = __builtin_frame_address (0);
> +  void *ra = __builtin_return_address (0);
> +  __m512bf16 *argp;
> +
> +  compare (values.i0, i0, __m512bf16);
> +  compare (values.i1, i1, __m512bf16);
> +  compare (values.i2, i2, __m512bf16);
> +  compare (values.i3, i3, __m512bf16);
> +
> +  /* Get the pointer to the return address on stack.  */
> +  while (*fp != ra)
> +    fp++;
> +
> +  /* Skip the return address stack slot.  */
> +  argp = (__m512bf16 *)(((char *) fp) + 8);
> +
> +  /* Check __m512bf16 arguments passed on stack.  */
> +  compare (values.i4, argp[0], __m512bf16);
> +  compare (values.i5, argp[1], __m512bf16);
> +  compare (values.i6, argp[2], __m512bf16);
> +  compare (values.i7, argp[3], __m512bf16);
> +  compare (values.i8, argp[4], __m512bf16);
> +  compare (values.i9, argp[5], __m512bf16);
> +
> +  /* Check register contents.  */
> +  compare (fregs.zmm0, zmm_regs[0], __m512bf16);
> +  compare (fregs.zmm1, zmm_regs[1], __m512bf16);
> +  compare (fregs.zmm2, zmm_regs[2], __m512bf16);
> +  compare (fregs.zmm3, zmm_regs[3], __m512bf16);
> +}
> +
> +#define def_check_int_passing_varargs(_i0, _i1, _i2, _i3, _i4, _i5, \
> +                                     _i6, _i7, _i8, _i9, \
> +                                     _func, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  values.i8.TYPE[0] = _i8; \
> +  values.i9.TYPE[0] = _i9; \
> +  clear_struct_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  WRAP_CALL(_func) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9);
> +
> +void
> +test_m512bf16_varargs (void)
> +{
> +  __m512bf16 x[10];
> +  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +        bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +        bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +        bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
> +  int i;
> +  for (i = 0; i < 10; i++)
> +    x[i] = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                         bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
> +                         bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
> +  pass = "m512bf16-varargs";
> +  def_check_int_passing_varargs (x[0], x[1], x[2], x[3], x[4], x[5],
> +                                x[6], x[7], x[8], x[9],
> +                                fun_check_passing_m512bf16_varargs,
> +                                _m512bf16);
> +}
> +
> +void
> +do_test (void)
> +{
> +  test_m512bf16_varargs ();
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h
> new file mode 100644
> index 00000000000..98fbc660f27
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h
> @@ -0,0 +1,53 @@
> +#ifndef MACROS_H
> +
> +#define check_size(_t, _size) assert(sizeof(_t) == (_size))
> +
> +#define check_align(_t, _align) assert(__alignof__(_t) == (_align))
> +
> +#define check_align_lv(_t, _align) assert(__alignof__(_t) == (_align) \
> +                                         && (((unsigned long)&(_t)) & ((_align) - 1) ) == 0)
> +
> +#define check_basic_struct_size_and_align(_type, _size, _align) { \
> +  struct _str { _type dummy; } _t; \
> +  check_size(_t, _size); \
> +  check_align_lv(_t, _align); \
> +}
> +
> +#define check_array_size_and_align(_type, _size, _align) { \
> +  _type _a[1]; _type _b[2]; _type _c[16]; \
> +  struct _str { _type _a[1]; } _s; \
> +  check_align_lv(_a[0], _align); \
> +  check_size(_a, _size); \
> +  check_size(_b, (_size*2)); \
> +  check_size(_c, (_size*16)); \
> +  check_size(_s, _size); \
> +  check_align_lv(_s._a[0], _align); \
> +}
> +
> +#define check_basic_union_size_and_align(_type, _size, _align) { \
> +  union _union { _type dummy; } _u; \
> +  check_size(_u, _size); \
> +  check_align_lv(_u, _align); \
> +}
> +
> +#define run_signed_tests2(_function, _arg1, _arg2) \
> +  _function(_arg1, _arg2); \
> +  _function(signed _arg1, _arg2); \
> +  _function(unsigned _arg1, _arg2);
> +
> +#define run_signed_tests3(_function, _arg1, _arg2, _arg3) \
> +  _function(_arg1, _arg2, _arg3); \
> +  _function(signed _arg1, _arg2, _arg3); \
> +  _function(unsigned _arg1, _arg2, _arg3);
> +
> +/* Check size of a struct and a union of three types.  */
> +
> +#define check_struct_and_union3(type1, type2, type3, struct_size, align_size) \
> +{ \
> +  struct _str { type1 t1; type2 t2; type3 t3; } _t; \
> +  union _uni { type1 t1; type2 t2; type3 t3; } _u; \
> +  check_size(_t, struct_size); \
> +  check_size(_u, align_size); \
> +}
> +
> +#endif // MACROS_H
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c
> new file mode 100644
> index 00000000000..0c58db101e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c
> @@ -0,0 +1,214 @@
> +/* This is an autogenerated file. Do not edit.  */
> +
> +#include "defines.h"
> +#include "macros.h"
> +
> +/* Check structs and unions of all permutations of 3 basic types.  */
> +int
> +main (void)
> +{
> +  check_struct_and_union3(char, char, __bf16, 4, 2);
> +  check_struct_and_union3(char, __bf16, char, 6, 2);
> +  check_struct_and_union3(char, __bf16, __bf16, 6, 2);
> +  check_struct_and_union3(char, __bf16, int, 8, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(char, __bf16, long, 16, 8);
> +#endif
> +  check_struct_and_union3(char, __bf16, long long, 16, 8);
> +  check_struct_and_union3(char, __bf16, float, 8, 4);
> +  check_struct_and_union3(char, __bf16, double, 16, 8);
> +  check_struct_and_union3(char, __bf16, long double, 32, 16);
> +  check_struct_and_union3(char, int, __bf16, 12, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(char, long, __bf16, 24, 8);
> +#endif
> +  check_struct_and_union3(char, long long, __bf16, 24, 8);
> +  check_struct_and_union3(char, float, __bf16, 12, 4);
> +  check_struct_and_union3(char, double, __bf16, 24, 8);
> +  check_struct_and_union3(char, long double, __bf16, 48, 16);
> +  check_struct_and_union3(__bf16, char, char, 4, 2);
> +  check_struct_and_union3(__bf16, char, __bf16, 6, 2);
> +  check_struct_and_union3(__bf16, char, int, 8, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(__bf16, char, long, 16, 8);
> +#endif
> +  check_struct_and_union3(__bf16, char, long long, 16, 8);
> +  check_struct_and_union3(__bf16, char, float, 8, 4);
> +  check_struct_and_union3(__bf16, char, double, 16, 8);
> +  check_struct_and_union3(__bf16, char, long double, 32, 16);
> +  check_struct_and_union3(__bf16, __bf16, char, 6, 2);
> +  check_struct_and_union3(__bf16, __bf16, __bf16, 6, 2);
> +  check_struct_and_union3(__bf16, __bf16, int, 8, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(__bf16, __bf16, long, 16, 8);
> +#endif
> +  check_struct_and_union3(__bf16, __bf16, long long, 16, 8);
> +  check_struct_and_union3(__bf16, __bf16, float, 8, 4);
> +  check_struct_and_union3(__bf16, __bf16, double, 16, 8);
> +  check_struct_and_union3(__bf16, __bf16, long double, 32, 16);
> +  check_struct_and_union3(__bf16, int, char, 12, 4);
> +  check_struct_and_union3(__bf16, int, __bf16, 12, 4);
> +  check_struct_and_union3(__bf16, int, int, 12, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(__bf16, int, long, 16, 8);
> +#endif
> +  check_struct_and_union3(__bf16, int, long long, 16, 8);
> +  check_struct_and_union3(__bf16, int, float, 12, 4);
> +  check_struct_and_union3(__bf16, int, double, 16, 8);
> +  check_struct_and_union3(__bf16, int, long double, 32, 16);
> +#ifndef __ILP32__
> +  check_struct_and_union3(__bf16, long, char, 24, 8);
> +  check_struct_and_union3(__bf16, long, __bf16, 24, 8);
> +  check_struct_and_union3(__bf16, long, int, 24, 8);
> +  check_struct_and_union3(__bf16, long, long, 24, 8);
> +  check_struct_and_union3(__bf16, long, long long, 24, 8);
> +  check_struct_and_union3(__bf16, long, float, 24, 8);
> +  check_struct_and_union3(__bf16, long, double, 24, 8);
> +#endif
> +  check_struct_and_union3(__bf16, long, long double, 32, 16);
> +  check_struct_and_union3(__bf16, long long, char, 24, 8);
> +  check_struct_and_union3(__bf16, long long, __bf16, 24, 8);
> +  check_struct_and_union3(__bf16, long long, int, 24, 8);
> +  check_struct_and_union3(__bf16, long long, long, 24, 8);
> +  check_struct_and_union3(__bf16, long long, long long, 24, 8);
> +  check_struct_and_union3(__bf16, long long, float, 24, 8);
> +  check_struct_and_union3(__bf16, long long, double, 24, 8);
> +  check_struct_and_union3(__bf16, long long, long double, 32, 16);
> +  check_struct_and_union3(__bf16, float, char, 12, 4);
> +  check_struct_and_union3(__bf16, float, __bf16, 12, 4);
> +  check_struct_and_union3(__bf16, float, int, 12, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(__bf16, float, long, 16, 8);
> +#endif
> +  check_struct_and_union3(__bf16, float, long long, 16, 8);
> +  check_struct_and_union3(__bf16, float, float, 12, 4);
> +  check_struct_and_union3(__bf16, float, double, 16, 8);
> +  check_struct_and_union3(__bf16, float, long double, 32, 16);
> +  check_struct_and_union3(__bf16, double, char, 24, 8);
> +  check_struct_and_union3(__bf16, double, __bf16, 24, 8);
> +  check_struct_and_union3(__bf16, double, int, 24, 8);
> +  check_struct_and_union3(__bf16, double, long, 24, 8);
> +  check_struct_and_union3(__bf16, double, long long, 24, 8);
> +  check_struct_and_union3(__bf16, double, float, 24, 8);
> +  check_struct_and_union3(__bf16, double, double, 24, 8);
> +  check_struct_and_union3(__bf16, double, long double, 32, 16);
> +  check_struct_and_union3(__bf16, long double, char, 48, 16);
> +  check_struct_and_union3(__bf16, long double, __bf16, 48, 16);
> +  check_struct_and_union3(__bf16, long double, int, 48, 16);
> +  check_struct_and_union3(__bf16, long double, long, 48, 16);
> +  check_struct_and_union3(__bf16, long double, long long, 48, 16);
> +  check_struct_and_union3(__bf16, long double, float, 48, 16);
> +  check_struct_and_union3(__bf16, long double, double, 48, 16);
> +  check_struct_and_union3(__bf16, long double, long double, 48, 16);
> +  check_struct_and_union3(int, char, __bf16, 8, 4);
> +  check_struct_and_union3(int, __bf16, char, 8, 4);
> +  check_struct_and_union3(int, __bf16, __bf16, 8, 4);
> +  check_struct_and_union3(int, __bf16, int, 12, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(int, __bf16, long, 16, 8);
> +#endif
> +  check_struct_and_union3(int, __bf16, long long, 16, 8);
> +  check_struct_and_union3(int, __bf16, float, 12, 4);
> +  check_struct_and_union3(int, __bf16, double, 16, 8);
> +  check_struct_and_union3(int, __bf16, long double, 32, 16);
> +  check_struct_and_union3(int, int, __bf16, 12, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(int, long, __bf16, 24, 8);
> +#endif
> +  check_struct_and_union3(int, long long, __bf16, 24, 8);
> +  check_struct_and_union3(int, float, __bf16, 12, 4);
> +  check_struct_and_union3(int, double, __bf16, 24, 8);
> +  check_struct_and_union3(int, long double, __bf16, 48, 16);
> +#ifndef __ILP32__
> +  check_struct_and_union3(long, char, __bf16, 16, 8);
> +  check_struct_and_union3(long, __bf16, char, 16, 8);
> +  check_struct_and_union3(long, __bf16, __bf16, 16, 8);
> +  check_struct_and_union3(long, __bf16, int, 16, 8);
> +  check_struct_and_union3(long, __bf16, long, 24, 8);
> +  check_struct_and_union3(long, __bf16, long long, 24, 8);
> +  check_struct_and_union3(long, __bf16, float, 16, 8);
> +  check_struct_and_union3(long, __bf16, double, 24, 8);
> +#endif
> +  check_struct_and_union3(long, __bf16, long double, 32, 16);
> +#ifndef __ILP32__
> +  check_struct_and_union3(long, int, __bf16, 16, 8);
> +  check_struct_and_union3(long, long, __bf16, 24, 8);
> +  check_struct_and_union3(long, long long, __bf16, 24, 8);
> +  check_struct_and_union3(long, float, __bf16, 16, 8);
> +  check_struct_and_union3(long, double, __bf16, 24, 8);
> +#endif
> +  check_struct_and_union3(long, long double, __bf16, 48, 16);
> +  check_struct_and_union3(long long, char, __bf16, 16, 8);
> +  check_struct_and_union3(long long, __bf16, char, 16, 8);
> +  check_struct_and_union3(long long, __bf16, __bf16, 16, 8);
> +  check_struct_and_union3(long long, __bf16, int, 16, 8);
> +#ifndef __ILP32__
> +  check_struct_and_union3(long long, __bf16, long, 24, 8);
> +#endif
> +  check_struct_and_union3(long long, __bf16, long long, 24, 8);
> +  check_struct_and_union3(long long, __bf16, float, 16, 8);
> +  check_struct_and_union3(long long, __bf16, double, 24, 8);
> +  check_struct_and_union3(long long, __bf16, long double, 32, 16);
> +  check_struct_and_union3(long long, int, __bf16, 16, 8);
> +#ifndef __ILP32__
> +  check_struct_and_union3(long long, long, __bf16, 24, 8);
> +#endif
> +  check_struct_and_union3(long long, long long, __bf16, 24, 8);
> +  check_struct_and_union3(long long, float, __bf16, 16, 8);
> +  check_struct_and_union3(long long, double, __bf16, 24, 8);
> +  check_struct_and_union3(long long, long double, __bf16, 48, 16);
> +  check_struct_and_union3(float, char, __bf16, 8, 4);
> +  check_struct_and_union3(float, __bf16, char, 8, 4);
> +  check_struct_and_union3(float, __bf16, __bf16, 8, 4);
> +  check_struct_and_union3(float, __bf16, int, 12, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(float, __bf16, long, 16, 8);
> +#endif
> +  check_struct_and_union3(float, __bf16, long long, 16, 8);
> +  check_struct_and_union3(float, __bf16, float, 12, 4);
> +  check_struct_and_union3(float, __bf16, double, 16, 8);
> +  check_struct_and_union3(float, __bf16, long double, 32, 16);
> +  check_struct_and_union3(float, int, __bf16, 12, 4);
> +#ifndef __ILP32__
> +  check_struct_and_union3(float, long, __bf16, 24, 8);
> +#endif
> +  check_struct_and_union3(float, long long, __bf16, 24, 8);
> +  check_struct_and_union3(float, float, __bf16, 12, 4);
> +  check_struct_and_union3(float, double, __bf16, 24, 8);
> +  check_struct_and_union3(float, long double, __bf16, 48, 16);
> +  check_struct_and_union3(double, char, __bf16, 16, 8);
> +  check_struct_and_union3(double, __bf16, char, 16, 8);
> +  check_struct_and_union3(double, __bf16, __bf16, 16, 8);
> +  check_struct_and_union3(double, __bf16, int, 16, 8);
> +#ifndef __ILP32__
> +  check_struct_and_union3(double, __bf16, long, 24, 8);
> +#endif
> +  check_struct_and_union3(double, __bf16, long long, 24, 8);
> +  check_struct_and_union3(double, __bf16, float, 16, 8);
> +  check_struct_and_union3(double, __bf16, double, 24, 8);
> +  check_struct_and_union3(double, __bf16, long double, 32, 16);
> +  check_struct_and_union3(double, int, __bf16, 16, 8);
> +#ifndef __ILP32__
> +  check_struct_and_union3(double, long, __bf16, 24, 8);
> +#endif
> +  check_struct_and_union3(double, long long, __bf16, 24, 8);
> +  check_struct_and_union3(double, float, __bf16, 16, 8);
> +  check_struct_and_union3(double, double, __bf16, 24, 8);
> +  check_struct_and_union3(double, long double, __bf16, 48, 16);
> +  check_struct_and_union3(long double, char, __bf16, 32, 16);
> +  check_struct_and_union3(long double, __bf16, char, 32, 16);
> +  check_struct_and_union3(long double, __bf16, __bf16, 32, 16);
> +  check_struct_and_union3(long double, __bf16, int, 32, 16);
> +  check_struct_and_union3(long double, __bf16, long, 32, 16);
> +  check_struct_and_union3(long double, __bf16, long long, 32, 16);
> +  check_struct_and_union3(long double, __bf16, float, 32, 16);
> +  check_struct_and_union3(long double, __bf16, double, 32, 16);
> +  check_struct_and_union3(long double, __bf16, long double, 48, 16);
> +  check_struct_and_union3(long double, int, __bf16, 32, 16);
> +  check_struct_and_union3(long double, long, __bf16, 32, 16);
> +  check_struct_and_union3(long double, long long, __bf16, 32, 16);
> +  check_struct_and_union3(long double, float, __bf16, 32, 16);
> +  check_struct_and_union3(long double, double, __bf16, 32, 16);
> +  check_struct_and_union3(long double, long double, __bf16, 48, 16);
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c
> new file mode 100644
> index 00000000000..6490a5228ca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c
> @@ -0,0 +1,14 @@
> +/* This checks alignment of basic types.  */
> +
> +#include "defines.h"
> +#include "macros.h"
> +
> +
> +int
> +main (void)
> +{
> +  /* __bf16 point types.  */
> +  check_align(__bf16, TYPE_ALIGN_BF16);
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c
> new file mode 100644
> index 00000000000..c004c35bb83
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c
> @@ -0,0 +1,13 @@
> +/* This checks .  */
> +
> +#include "defines.h"
> +#include "macros.h"
> +
> +
> +int
> +main (void)
> +{
> +  check_array_size_and_align(__bf16, TYPE_SIZE_BF16, TYPE_ALIGN_BF16);
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c
> new file mode 100644
> index 00000000000..cfea2224733
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c
> @@ -0,0 +1,20 @@
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +#include "args.h"
> +
> +__bf16
> +fun_test_returning_bf16 (void)
> +{
> +  __bf16 b = make_f32_bf16 (72.0f);
> +  volatile_var++;
> +  return b;
> +}
> +
> +static void
> +do_test (void)
> +{
> +  __bf16 var = WRAP_RET (fun_test_returning_bf16) ();
> +  assert (check_bf16_float (xmm_regs[0].___bf16[0], 72.0f) == 1);
> +  assert (check_bf16_float (var, 72.0f) == 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c
> new file mode 100644
> index 00000000000..b81a8d971b5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c
> @@ -0,0 +1,14 @@
> +/* This checks sizes of basic types.  */
> +
> +#include "defines.h"
> +#include "macros.h"
> +
> +
> +int
> +main (void)
> +{
> +  /* Floating point types.  */
> +  check_size(__bf16, TYPE_SIZE_BF16);
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c
> new file mode 100644
> index 00000000000..f282506703c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c
> @@ -0,0 +1,14 @@
> +/* This checks size and alignment of structs with a single basic type
> +   element. All basic types are checked.  */
> +
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +
> +
> +static void
> +do_test (void)
> +{
> +  /* Floating point types.  */
> +  check_basic_struct_size_and_align(__bf16, TYPE_SIZE_BF16, TYPE_ALIGN_BF16);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c
> new file mode 100644
> index 00000000000..03afa68c0e4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c
> @@ -0,0 +1,12 @@
> +/* Test of simple unions, size and alignment.  */
> +
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +
> +static void
> +do_test (void)
> +{
> +  /* Floating point types.  */
> +  check_basic_union_size_and_align(__bf16, TYPE_SIZE_BF16, TYPE_ALIGN_BF16);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c
> new file mode 100644
> index 00000000000..64857ce7b71
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c
> @@ -0,0 +1,38 @@
> +#include <stdio.h>
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
> +
> +__m128bf16
> +fun_test_returning___m128bf16 (void)
> +{
> +  volatile_var++;
> +  return (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
> +}
> +
> +__m128bf16 test_128bf16;
> +
> +static void
> +do_test (void)
> +{
> +  unsigned failed = 0;
> +  XMM_T xmmt1, xmmt2;
> +
> +  clear_struct_registers;
> +  test_128bf16 = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
> +  xmmt1._m128bf16[0] = test_128bf16;
> +  xmmt2._m128bf16[0] = WRAP_RET (fun_test_returning___m128bf16)();
> +  if (xmmt1._longlong[0] != xmmt2._longlong[0]
> +      || xmmt1._longlong[0] != xmm_regs[0]._longlong[0])
> +    printf ("fail m128bf16\n"), failed++;
> +
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c
> new file mode 100644
> index 00000000000..fe08042286b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c
> @@ -0,0 +1,312 @@
> +/* This is an autogenerated file. Do not edit.  */
> +
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +#include "args.h"
> +
> +struct IntegerRegisters iregs;
> +struct FloatRegisters fregs;
> +unsigned int num_iregs, num_fregs;
> +
> +/* This struct holds values for argument checking.  */
> +struct
> +{
> +  __bf16 f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14,
> +    f15, f16, f17, f18, f19, f20, f21, f22, f23;
> +} values___bf16;
> +
> +void
> +fun_check_bf16_passing_8_values (__bf16 f0 ATTRIBUTE_UNUSED,
> +                                __bf16 f1 ATTRIBUTE_UNUSED,
> +                                __bf16 f2 ATTRIBUTE_UNUSED,
> +                                __bf16 f3 ATTRIBUTE_UNUSED,
> +                                __bf16 f4 ATTRIBUTE_UNUSED,
> +                                __bf16 f5 ATTRIBUTE_UNUSED,
> +                                __bf16 f6 ATTRIBUTE_UNUSED,
> +                                __bf16 f7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  check_bf16 (values___bf16.f0, f0);
> +  check_bf16 (values___bf16.f1, f1);
> +  check_bf16 (values___bf16.f2, f2);
> +  check_bf16 (values___bf16.f3, f3);
> +  check_bf16 (values___bf16.f4, f4);
> +  check_bf16 (values___bf16.f5, f5);
> +  check_bf16 (values___bf16.f6, f6);
> +  check_bf16 (values___bf16.f7, f7);
> +}
> +
> +void
> +fun_check_bf16_passing_8_regs (__bf16 f0 ATTRIBUTE_UNUSED,
> +                              __bf16 f1 ATTRIBUTE_UNUSED,
> +                              __bf16 f2 ATTRIBUTE_UNUSED,
> +                              __bf16 f3 ATTRIBUTE_UNUSED,
> +                              __bf16 f4 ATTRIBUTE_UNUSED,
> +                              __bf16 f5 ATTRIBUTE_UNUSED,
> +                              __bf16 f6 ATTRIBUTE_UNUSED,
> +                              __bf16 f7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_bf16_arguments;
> +}
> +
> +void
> +fun_check_bf16_passing_16_values (__bf16 f0 ATTRIBUTE_UNUSED,
> +                                 __bf16 f1 ATTRIBUTE_UNUSED,
> +                                 __bf16 f2 ATTRIBUTE_UNUSED,
> +                                 __bf16 f3 ATTRIBUTE_UNUSED,
> +                                 __bf16 f4 ATTRIBUTE_UNUSED,
> +                                 __bf16 f5 ATTRIBUTE_UNUSED,
> +                                 __bf16 f6 ATTRIBUTE_UNUSED,
> +                                 __bf16 f7 ATTRIBUTE_UNUSED,
> +                                 __bf16 f8 ATTRIBUTE_UNUSED,
> +                                 __bf16 f9 ATTRIBUTE_UNUSED,
> +                                 __bf16 f10 ATTRIBUTE_UNUSED,
> +                                 __bf16 f11 ATTRIBUTE_UNUSED,
> +                                 __bf16 f12 ATTRIBUTE_UNUSED,
> +                                 __bf16 f13 ATTRIBUTE_UNUSED,
> +                                 __bf16 f14 ATTRIBUTE_UNUSED,
> +                                 __bf16 f15 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  check_bf16 (values___bf16.f0, f0);
> +  check_bf16 (values___bf16.f1, f1);
> +  check_bf16 (values___bf16.f2, f2);
> +  check_bf16 (values___bf16.f3, f3);
> +  check_bf16 (values___bf16.f4, f4);
> +  check_bf16 (values___bf16.f5, f5);
> +  check_bf16 (values___bf16.f6, f6);
> +  check_bf16 (values___bf16.f7, f7);
> +  check_bf16 (values___bf16.f8, f8);
> +  check_bf16 (values___bf16.f9, f9);
> +  check_bf16 (values___bf16.f10, f10);
> +  check_bf16 (values___bf16.f11, f11);
> +  check_bf16 (values___bf16.f12, f12);
> +  check_bf16 (values___bf16.f13, f13);
> +  check_bf16 (values___bf16.f14, f14);
> +  check_bf16 (values___bf16.f15, f15);
> +}
> +
> +void
> +fun_check_bf16_passing_16_regs (__bf16 f0 ATTRIBUTE_UNUSED,
> +                               __bf16 f1 ATTRIBUTE_UNUSED,
> +                               __bf16 f2 ATTRIBUTE_UNUSED,
> +                               __bf16 f3 ATTRIBUTE_UNUSED,
> +                               __bf16 f4 ATTRIBUTE_UNUSED,
> +                               __bf16 f5 ATTRIBUTE_UNUSED,
> +                               __bf16 f6 ATTRIBUTE_UNUSED,
> +                               __bf16 f7 ATTRIBUTE_UNUSED,
> +                               __bf16 f8 ATTRIBUTE_UNUSED,
> +                               __bf16 f9 ATTRIBUTE_UNUSED,
> +                               __bf16 f10 ATTRIBUTE_UNUSED,
> +                               __bf16 f11 ATTRIBUTE_UNUSED,
> +                               __bf16 f12 ATTRIBUTE_UNUSED,
> +                               __bf16 f13 ATTRIBUTE_UNUSED,
> +                               __bf16 f14 ATTRIBUTE_UNUSED,
> +                               __bf16 f15 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_bf16_arguments;
> +}
> +
> +void
> +fun_check_bf16_passing_20_values (__bf16 f0 ATTRIBUTE_UNUSED,
> +                                 __bf16 f1 ATTRIBUTE_UNUSED,
> +                                 __bf16 f2 ATTRIBUTE_UNUSED,
> +                                 __bf16 f3 ATTRIBUTE_UNUSED,
> +                                 __bf16 f4 ATTRIBUTE_UNUSED,
> +                                 __bf16 f5 ATTRIBUTE_UNUSED,
> +                                 __bf16 f6 ATTRIBUTE_UNUSED,
> +                                 __bf16 f7 ATTRIBUTE_UNUSED,
> +                                 __bf16 f8 ATTRIBUTE_UNUSED,
> +                                 __bf16 f9 ATTRIBUTE_UNUSED,
> +                                 __bf16 f10 ATTRIBUTE_UNUSED,
> +                                 __bf16 f11 ATTRIBUTE_UNUSED,
> +                                 __bf16 f12 ATTRIBUTE_UNUSED,
> +                                 __bf16 f13 ATTRIBUTE_UNUSED,
> +                                 __bf16 f14 ATTRIBUTE_UNUSED,
> +                                 __bf16 f15 ATTRIBUTE_UNUSED,
> +                                 __bf16 f16 ATTRIBUTE_UNUSED,
> +                                 __bf16 f17 ATTRIBUTE_UNUSED,
> +                                 __bf16 f18 ATTRIBUTE_UNUSED,
> +                                 __bf16 f19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  check_bf16 (values___bf16.f0, f0);
> +  check_bf16 (values___bf16.f1, f1);
> +  check_bf16 (values___bf16.f2, f2);
> +  check_bf16 (values___bf16.f3, f3);
> +  check_bf16 (values___bf16.f4, f4);
> +  check_bf16 (values___bf16.f5, f5);
> +  check_bf16 (values___bf16.f6, f6);
> +  check_bf16 (values___bf16.f7, f7);
> +  check_bf16 (values___bf16.f8, f8);
> +  check_bf16 (values___bf16.f9, f9);
> +  check_bf16 (values___bf16.f10, f10);
> +  check_bf16 (values___bf16.f11, f11);
> +  check_bf16 (values___bf16.f12, f12);
> +  check_bf16 (values___bf16.f13, f13);
> +  check_bf16 (values___bf16.f14, f14);
> +  check_bf16 (values___bf16.f15, f15);
> +  check_bf16 (values___bf16.f16, f16);
> +  check_bf16 (values___bf16.f17, f17);
> +  check_bf16 (values___bf16.f18, f18);
> +  check_bf16 (values___bf16.f19, f19);
> +}
> +
> +void
> +fun_check_bf16_passing_20_regs (__bf16 f0 ATTRIBUTE_UNUSED,
> +                               __bf16 f1 ATTRIBUTE_UNUSED,
> +                               __bf16 f2 ATTRIBUTE_UNUSED,
> +                               __bf16 f3 ATTRIBUTE_UNUSED,
> +                               __bf16 f4 ATTRIBUTE_UNUSED,
> +                               __bf16 f5 ATTRIBUTE_UNUSED,
> +                               __bf16 f6 ATTRIBUTE_UNUSED,
> +                               __bf16 f7 ATTRIBUTE_UNUSED,
> +                               __bf16 f8 ATTRIBUTE_UNUSED,
> +                               __bf16 f9 ATTRIBUTE_UNUSED,
> +                               __bf16 f10 ATTRIBUTE_UNUSED,
> +                               __bf16 f11 ATTRIBUTE_UNUSED,
> +                               __bf16 f12 ATTRIBUTE_UNUSED,
> +                               __bf16 f13 ATTRIBUTE_UNUSED,
> +                               __bf16 f14 ATTRIBUTE_UNUSED,
> +                               __bf16 f15 ATTRIBUTE_UNUSED,
> +                               __bf16 f16 ATTRIBUTE_UNUSED,
> +                               __bf16 f17 ATTRIBUTE_UNUSED,
> +                               __bf16 f18 ATTRIBUTE_UNUSED,
> +                               __bf16 f19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_bf16_arguments;
> +}
> +
> +#define def_check_bf16_passing8(_f0, _f1, _f2, _f3, _f4, _f5, _f6,\
> +                                  _f7, _func1, _func2, TYPE) \
> +  values_ ## TYPE .f0 = _f0; \
> +  values_ ## TYPE .f1 = _f1; \
> +  values_ ## TYPE .f2 = _f2; \
> +  values_ ## TYPE .f3 = _f3; \
> +  values_ ## TYPE .f4 = _f4; \
> +  values_ ## TYPE .f5 = _f5; \
> +  values_ ## TYPE .f6 = _f6; \
> +  values_ ## TYPE .f7 = _f7; \
> +  WRAP_CALL(_func1) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7); \
> +  clear_float_registers; \
> +  fregs.F0._ ## TYPE [0] = _f0; \
> +  fregs.F1._ ## TYPE [0] = _f1; \
> +  fregs.F2._ ## TYPE [0] = _f2; \
> +  fregs.F3._ ## TYPE [0] = _f3; \
> +  fregs.F4._ ## TYPE [0] = _f4; \
> +  fregs.F5._ ## TYPE [0] = _f5; \
> +  fregs.F6._ ## TYPE [0] = _f6; \
> +  fregs.F7._ ## TYPE [0] = _f7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7);
> +
> +#define def_check_bf16_passing16(_f0, _f1, _f2, _f3, _f4, _f5, _f6, \
> +                                   _f7, _f8, _f9, _f10, _f11, _f12, _f13, \
> +                                   _f14, _f15, _func1, _func2, TYPE) \
> +  values_ ## TYPE .f0 = _f0; \
> +  values_ ## TYPE .f1 = _f1; \
> +  values_ ## TYPE .f2 = _f2; \
> +  values_ ## TYPE .f3 = _f3; \
> +  values_ ## TYPE .f4 = _f4; \
> +  values_ ## TYPE .f5 = _f5; \
> +  values_ ## TYPE .f6 = _f6; \
> +  values_ ## TYPE .f7 = _f7; \
> +  values_ ## TYPE .f8 = _f8; \
> +  values_ ## TYPE .f9 = _f9; \
> +  values_ ## TYPE .f10 = _f10; \
> +  values_ ## TYPE .f11 = _f11; \
> +  values_ ## TYPE .f12 = _f12; \
> +  values_ ## TYPE .f13 = _f13; \
> +  values_ ## TYPE .f14 = _f14; \
> +  values_ ## TYPE .f15 = _f15; \
> +  WRAP_CALL(_func1) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, _f9, \
> +                    _f10, _f11, _f12, _f13, _f14, _f15); \
> +  clear_float_registers; \
> +  fregs.F0._ ## TYPE [0] = _f0; \
> +  fregs.F1._ ## TYPE [0] = _f1; \
> +  fregs.F2._ ## TYPE [0] = _f2; \
> +  fregs.F3._ ## TYPE [0] = _f3; \
> +  fregs.F4._ ## TYPE [0] = _f4; \
> +  fregs.F5._ ## TYPE [0] = _f5; \
> +  fregs.F6._ ## TYPE [0] = _f6; \
> +  fregs.F7._ ## TYPE [0] = _f7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, _f9, \
> +                    _f10, _f11, _f12, _f13, _f14, _f15);
> +
> +#define def_check_bf16_passing20(_f0, _f1, _f2, _f3, _f4, _f5, _f6, \
> +                                   _f7, _f8, _f9, _f10, _f11, _f12, \
> +                                   _f13, _f14, _f15, _f16, _f17, \
> +                                   _f18, _f19, _func1, _func2, TYPE) \
> +  values_ ## TYPE .f0 = _f0; \
> +  values_ ## TYPE .f1 = _f1; \
> +  values_ ## TYPE .f2 = _f2; \
> +  values_ ## TYPE .f3 = _f3; \
> +  values_ ## TYPE .f4 = _f4; \
> +  values_ ## TYPE .f5 = _f5; \
> +  values_ ## TYPE .f6 = _f6; \
> +  values_ ## TYPE .f7 = _f7; \
> +  values_ ## TYPE .f8 = _f8; \
> +  values_ ## TYPE .f9 = _f9; \
> +  values_ ## TYPE .f10 = _f10; \
> +  values_ ## TYPE .f11 = _f11; \
> +  values_ ## TYPE .f12 = _f12; \
> +  values_ ## TYPE .f13 = _f13; \
> +  values_ ## TYPE .f14 = _f14; \
> +  values_ ## TYPE .f15 = _f15; \
> +  values_ ## TYPE .f16 = _f16; \
> +  values_ ## TYPE .f17 = _f17; \
> +  values_ ## TYPE .f18 = _f18; \
> +  values_ ## TYPE .f19 = _f19; \
> +  WRAP_CALL(_func1) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, \
> +                    _f9, _f10, _f11, _f12, _f13, _f14, _f15, _f16, \
> +                    _f17, _f18, _f19); \
> +  clear_float_registers; \
> +  fregs.F0._ ## TYPE [0] = _f0; \
> +  fregs.F1._ ## TYPE [0] = _f1; \
> +  fregs.F2._ ## TYPE [0] = _f2; \
> +  fregs.F3._ ## TYPE [0] = _f3; \
> +  fregs.F4._ ## TYPE [0] = _f4; \
> +  fregs.F5._ ## TYPE [0] = _f5; \
> +  fregs.F6._ ## TYPE [0] = _f6; \
> +  fregs.F7._ ## TYPE [0] = _f7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, _f9, \
> +                    _f10, _f11, _f12, _f13, _f14, _f15, _f16, _f17, \
> +                    _f18, _f19);
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10,
> +               bf11,bf12,bf13,bf14,bf15,bf16,bf17,bf18,bf19,bf20;
> +
> +void
> +test_bf16_on_stack ()
> +{
> +  def_check_bf16_passing8 (bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                          fun_check_bf16_passing_8_values,
> +                          fun_check_bf16_passing_8_regs, __bf16);
> +
> +  def_check_bf16_passing16 (bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +                           bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
> +                           fun_check_bf16_passing_16_values,
> +                           fun_check_bf16_passing_16_regs, __bf16);
> +}
> +
> +void
> +test_too_many_bf16 ()
> +{
> +  def_check_bf16_passing20 (bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10,
> +                           bf11,bf12,bf13,bf14,bf15,bf16,bf17,bf18,bf19,bf20,
> +                           fun_check_bf16_passing_20_values,
> +                           fun_check_bf16_passing_20_regs, __bf16);
> +}
> +
> +static void
> +do_test (void)
> +{
> +  test_bf16_on_stack ();
> +  test_too_many_bf16 ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c
> new file mode 100644
> index 00000000000..298b644e93d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c
> @@ -0,0 +1,238 @@
> +#include <stdio.h>
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +/* This struct holds values for argument checking.  */
> +struct
> +{
> +  XMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
> +    i16, i17, i18, i19, i20, i21, i22, i23;
> +} values;
> +
> +char *pass;
> +int failed = 0;
> +
> +#undef assert
> +#define assert(c) do { \
> +  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
> +} while (0)
> +
> +#define compare(X1,X2,T) do { \
> +  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
> +} while (0)
> +
> +void
> +fun_check_passing_m128bf16_8_values (__m128bf16 i0 ATTRIBUTE_UNUSED,
> +                                    __m128bf16 i1 ATTRIBUTE_UNUSED,
> +                                    __m128bf16 i2 ATTRIBUTE_UNUSED,
> +                                    __m128bf16 i3 ATTRIBUTE_UNUSED,
> +                                    __m128bf16 i4 ATTRIBUTE_UNUSED,
> +                                    __m128bf16 i5 ATTRIBUTE_UNUSED,
> +                                    __m128bf16 i6 ATTRIBUTE_UNUSED,
> +                                    __m128bf16 i7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  compare (values.i0, i0, __m128bf16);
> +  compare (values.i1, i1, __m128bf16);
> +  compare (values.i2, i2, __m128bf16);
> +  compare (values.i3, i3, __m128bf16);
> +  compare (values.i4, i4, __m128bf16);
> +  compare (values.i5, i5, __m128bf16);
> +  compare (values.i6, i6, __m128bf16);
> +  compare (values.i7, i7, __m128bf16);
> +}
> +
> +void
> +fun_check_passing_m128bf16_8_regs (__m128bf16 i0 ATTRIBUTE_UNUSED,
> +                                  __m128bf16 i1 ATTRIBUTE_UNUSED,
> +                                  __m128bf16 i2 ATTRIBUTE_UNUSED,
> +                                  __m128bf16 i3 ATTRIBUTE_UNUSED,
> +                                  __m128bf16 i4 ATTRIBUTE_UNUSED,
> +                                  __m128bf16 i5 ATTRIBUTE_UNUSED,
> +                                  __m128bf16 i6 ATTRIBUTE_UNUSED,
> +                                  __m128bf16 i7 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m128_arguments;
> +}
> +
> +void
> +fun_check_passing_m128bf16_20_values (__m128bf16 i0 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i1 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i2 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i3 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i4 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i5 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i6 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i7 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i8 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i9 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i10 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i11 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i12 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i13 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i14 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i15 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i16 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i17 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i18 ATTRIBUTE_UNUSED,
> +                                     __m128bf16 i19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check argument values.  */
> +  compare (values.i0, i0, __m128bf16);
> +  compare (values.i1, i1, __m128bf16);
> +  compare (values.i2, i2, __m128bf16);
> +  compare (values.i3, i3, __m128bf16);
> +  compare (values.i4, i4, __m128bf16);
> +  compare (values.i5, i5, __m128bf16);
> +  compare (values.i6, i6, __m128bf16);
> +  compare (values.i7, i7, __m128bf16);
> +  compare (values.i8, i8, __m128bf16);
> +  compare (values.i9, i9, __m128bf16);
> +  compare (values.i10, i10, __m128bf16);
> +  compare (values.i11, i11, __m128bf16);
> +  compare (values.i12, i12, __m128bf16);
> +  compare (values.i13, i13, __m128bf16);
> +  compare (values.i14, i14, __m128bf16);
> +  compare (values.i15, i15, __m128bf16);
> +  compare (values.i16, i16, __m128bf16);
> +  compare (values.i17, i17, __m128bf16);
> +  compare (values.i18, i18, __m128bf16);
> +  compare (values.i19, i19, __m128bf16);
> +}
> +
> +void
> +fun_check_passing_m128bf16_20_regs (__m128bf16 i0 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i1 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i2 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i3 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i4 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i5 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i6 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i7 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i8 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i9 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i10 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i11 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i12 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i13 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i14 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i15 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i16 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i17 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i18 ATTRIBUTE_UNUSED,
> +                                   __m128bf16 i19 ATTRIBUTE_UNUSED)
> +{
> +  /* Check register contents.  */
> +  check_m128_arguments;
> +}
> +
> +#define def_check_int_passing8(_i0, _i1, _i2, _i3, \
> +                              _i4, _i5, _i6, _i7, \
> +                              _func1, _func2, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
> +  clear_float_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  fregs.F4.TYPE[0] = _i4; \
> +  fregs.F5.TYPE[0] = _i5; \
> +  fregs.F6.TYPE[0] = _i6; \
> +  fregs.F7.TYPE[0] = _i7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
> +
> +#define def_check_int_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, \
> +                               _i7, _i8, _i9, _i10, _i11, _i12, _i13, \
> +                               _i14, _i15, _i16, _i17, _i18, _i19, \
> +                               _func1, _func2, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  values.i8.TYPE[0] = _i8; \
> +  values.i9.TYPE[0] = _i9; \
> +  values.i10.TYPE[0] = _i10; \
> +  values.i11.TYPE[0] = _i11; \
> +  values.i12.TYPE[0] = _i12; \
> +  values.i13.TYPE[0] = _i13; \
> +  values.i14.TYPE[0] = _i14; \
> +  values.i15.TYPE[0] = _i15; \
> +  values.i16.TYPE[0] = _i16; \
> +  values.i17.TYPE[0] = _i17; \
> +  values.i18.TYPE[0] = _i18; \
> +  values.i19.TYPE[0] = _i19; \
> +  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
> +                    _i9, _i10, _i11, _i12, _i13, _i14, _i15, _i16, \
> +                    _i17, _i18, _i19); \
> +  clear_float_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  fregs.F4.TYPE[0] = _i4; \
> +  fregs.F5.TYPE[0] = _i5; \
> +  fregs.F6.TYPE[0] = _i6; \
> +  fregs.F7.TYPE[0] = _i7; \
> +  num_fregs = 8; \
> +  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
> +                    _i9, _i10, _i11, _i12, _i13, _i14, _i15, _i16, \
> +                    _i17, _i18, _i19);
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
> +
> +void
> +test_m128bf16_on_stack ()
> +{
> +  __m128bf16 x[8];
> +  int i;
> +  for (i = 0; i < 8; i++)
> +    x[i] = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
> +  pass = "m128bf16-8";
> +  def_check_int_passing8 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
> +                         fun_check_passing_m128bf16_8_values,
> +                         fun_check_passing_m128bf16_8_regs, _m128bf16);
> +}
> +
> +void
> +test_too_many_m128bf16 ()
> +{
> +  __m128bf16 x[20];
> +  int i;
> +  for (i = 0; i < 20; i++)
> +    x[i] = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
> +  pass = "m128bf16-20";
> +  def_check_int_passing20 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
> +                          x[8], x[9], x[10], x[11], x[12], x[13], x[14],
> +                          x[15], x[16], x[17], x[18], x[19],
> +                          fun_check_passing_m128bf16_20_values,
> +                          fun_check_passing_m128bf16_20_regs, _m128bf16);
> +}
> +
> +static void
> +do_test (void)
> +{
> +  test_m128bf16_on_stack ();
> +  test_too_many_m128bf16 ();
> +  if (failed)
> +    abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c
> new file mode 100644
> index 00000000000..8d966005741
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c
> @@ -0,0 +1,67 @@
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +struct m128bf16_struct
> +{
> +  __m128bf16 x;
> +};
> +
> +struct m128bf16_2_struct
> +{
> +  __m128bf16 x1, x2;
> +};
> +
> +/* Check that the struct is passed as the individual members in fregs.  */
> +void
> +check_struct_passing1bf16 (struct m128bf16_struct ms1 ATTRIBUTE_UNUSED,
> +                          struct m128bf16_struct ms2 ATTRIBUTE_UNUSED,
> +                          struct m128bf16_struct ms3 ATTRIBUTE_UNUSED,
> +                          struct m128bf16_struct ms4 ATTRIBUTE_UNUSED,
> +                          struct m128bf16_struct ms5 ATTRIBUTE_UNUSED,
> +                          struct m128bf16_struct ms6 ATTRIBUTE_UNUSED,
> +                          struct m128bf16_struct ms7 ATTRIBUTE_UNUSED,
> +                          struct m128bf16_struct ms8 ATTRIBUTE_UNUSED)
> +{
> +  check_m128_arguments;
> +}
> +
> +void
> +check_struct_passing2bf16 (struct m128bf16_2_struct ms ATTRIBUTE_UNUSED)
> +{
> +  /* Check the passing on the stack by comparing the address of the
> +     stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&ms.x1 == rsp+8);
> +  assert ((unsigned long)&ms.x2 == rsp+24);
> +}
> +
> +volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
> +               bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
> +
> +static void
> +do_test (void)
> +{
> +  struct m128bf16_struct m128bf16s [8];
> +  struct m128bf16_2_struct m128bf16_2s = {
> +    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 },
> +    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 },
> +  };
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      m128bf16s[i].x = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
> +    }
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    (&fregs.xmm0)[i]._m128bf16[0] = m128bf16s[i].x;
> +  num_fregs = 8;
> +  WRAP_CALL (check_struct_passing1bf16) (m128bf16s[0], m128bf16s[1], m128bf16s[2], m128bf16s[3],
> +                                        m128bf16s[4], m128bf16s[5], m128bf16s[6], m128bf16s[7]);
> +  WRAP_CALL (check_struct_passing2bf16) (m128bf16_2s);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c
> new file mode 100644
> index 00000000000..83e4380512b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c
> @@ -0,0 +1,160 @@
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +unsigned int num_fregs, num_iregs;
> +
> +union un1b
> +{
> +  __m128bf16 x;
> +  float f;
> +};
> +
> +union un1bb
> +{
> +  __m128bf16 x;
> +  __bf16 f;
> +};
> +
> +union un2b
> +{
> +  __m128bf16 x;
> +  double d;
> +};
> +
> +union un3b
> +{
> +  __m128bf16 x;
> +  __m128 v;
> +};
> +
> +union un4b
> +{
> +  __m128bf16 x;
> +  long double ld;
> +};
> +
> +void
> +check_union_passing1b (union un1b u1 ATTRIBUTE_UNUSED,
> +                      union un1b u2 ATTRIBUTE_UNUSED,
> +                      union un1b u3 ATTRIBUTE_UNUSED,
> +                      union un1b u4 ATTRIBUTE_UNUSED,
> +                      union un1b u5 ATTRIBUTE_UNUSED,
> +                      union un1b u6 ATTRIBUTE_UNUSED,
> +                      union un1b u7 ATTRIBUTE_UNUSED,
> +                      union un1b u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m128_arguments;
> +}
> +
> +void
> +check_union_passing1bb (union un1bb u1 ATTRIBUTE_UNUSED,
> +                       union un1bb u2 ATTRIBUTE_UNUSED,
> +                       union un1bb u3 ATTRIBUTE_UNUSED,
> +                       union un1bb u4 ATTRIBUTE_UNUSED,
> +                       union un1bb u5 ATTRIBUTE_UNUSED,
> +                       union un1bb u6 ATTRIBUTE_UNUSED,
> +                       union un1bb u7 ATTRIBUTE_UNUSED,
> +                       union un1bb u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m128_arguments;
> +}
> +
> +void
> +check_union_passing2b (union un2b u1 ATTRIBUTE_UNUSED,
> +                      union un2b u2 ATTRIBUTE_UNUSED,
> +                      union un2b u3 ATTRIBUTE_UNUSED,
> +                      union un2b u4 ATTRIBUTE_UNUSED,
> +                      union un2b u5 ATTRIBUTE_UNUSED,
> +                      union un2b u6 ATTRIBUTE_UNUSED,
> +                      union un2b u7 ATTRIBUTE_UNUSED,
> +                      union un2b u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m128_arguments;
> +}
> +
> +void
> +check_union_passing3b (union un3b u1 ATTRIBUTE_UNUSED,
> +                      union un3b u2 ATTRIBUTE_UNUSED,
> +                      union un3b u3 ATTRIBUTE_UNUSED,
> +                      union un3b u4 ATTRIBUTE_UNUSED,
> +                      union un3b u5 ATTRIBUTE_UNUSED,
> +                      union un3b u6 ATTRIBUTE_UNUSED,
> +                      union un3b u7 ATTRIBUTE_UNUSED,
> +                      union un3b u8 ATTRIBUTE_UNUSED)
> +{
> +  check_m128_arguments;
> +}
> +
> +void
> +check_union_passing4b (union un4b u ATTRIBUTE_UNUSED)
> +{
> +   /* Check the passing on the stack by comparing the address of the
> +      stack elements to the expected place on the stack.  */
> +  assert ((unsigned long)&u.x == rsp+8);
> +  assert ((unsigned long)&u.ld == rsp+8);
> +}
> +
> +#define check_union_passing1b WRAP_CALL(check_union_passing1b)
> +#define check_union_passing1bb WRAP_CALL(check_union_passing1bb)
> +#define check_union_passing2b WRAP_CALL(check_union_passing2b)
> +#define check_union_passing3b WRAP_CALL(check_union_passing3b)
> +#define check_union_passing4b WRAP_CALL(check_union_passing4b)
> +
> +static void
> +do_test (void)
> +{
> +  union un1b u1b[8];
> +  union un1bb u1bb[8];
> +  union un2b u2b[8];
> +  union un3b u3b[8];
> +  union un4b u4b;
> +  int i;
> +  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      u1b[i].x = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
> +    }
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    (&fregs.xmm0)[i]._m128bf16[0] = u1b[i].x;
> +  num_fregs = 8;
> +  check_union_passing1b (u1b[0], u1b[1], u1b[2], u1b[3],
> +                        u1b[4], u1b[5], u1b[6], u1b[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u1bb[i].x = u1b[i].x;
> +      (&fregs.xmm0)[i]._m128bf16[0] = u1bb[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing1bb (u1bb[0], u1bb[1], u1bb[2], u1bb[3],
> +                         u1bb[4], u1bb[5], u1bb[6], u1bb[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u2b[i].x = u1b[i].x;
> +      (&fregs.xmm0)[i]._m128bf16[0] = u2b[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing2b (u2b[0], u2b[1], u2b[2], u2b[3],
> +                        u2b[4], u2b[5], u2b[6], u2b[7]);
> +
> +  clear_struct_registers;
> +  for (i = 0; i < 8; i++)
> +    {
> +      u3b[i].x = u1b[i].x;
> +      (&fregs.xmm0)[i]._m128bf16[0] = u3b[i].x;
> +    }
> +  num_fregs = 8;
> +  check_union_passing3b (u3b[0], u3b[1], u3b[2], u3b[3],
> +                        u3b[4], u3b[5], u3b[6], u3b[7]);
> +
> +  check_union_passing4b (u4b);
> +}
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c
> new file mode 100644
> index 00000000000..757ccc26b79
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c
> @@ -0,0 +1,176 @@
> +/* This tests returning of structures.  */
> +
> +#include <stdio.h>
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +#include "args.h"
> +
> +struct IntegerRegisters iregs;
> +struct FloatRegisters fregs;
> +unsigned int num_iregs, num_fregs;
> +
> +int current_test;
> +int num_failed = 0;
> +
> +#undef assert
> +#define assert(test) do { if (!(test)) {fprintf (stderr, "failed in test %d\n", current_test); num_failed++; } } while (0)
> +
> +#define xmm0b xmm_regs[0].___bf16
> +#define xmm1b xmm_regs[1].___bf16
> +#define xmm0f xmm_regs[0]._float
> +#define xmm0d xmm_regs[0]._double
> +#define xmm1f xmm_regs[1]._float
> +#define xmm1d xmm_regs[1]._double
> +
> +typedef enum {
> +  SSE_B = 0,
> +  SSE_D,
> +  MEM,
> +  INT_SSE,
> +  SSE_INT,
> +  SSE_F_H,
> +  SSE_F_H8
> +} Type;
> +
> +/* Structures which should be returned in SSE.  */
> +#define D(I,MEMBERS,C,B) struct S_ ## I { MEMBERS ; }; Type class_ ## I = C; \
> +struct S_ ## I f_ ## I (void) { struct S_ ## I s; memset (&s, 0, sizeof(s)); B; return s; }
> +
> +D(120,__bf16 f,SSE_B, s.f=make_f32_bf16(42.0f))
> +D(121,__bf16 f;__bf16 f2,SSE_B, s.f=make_f32_bf16(42.0f))
> +D(122,__bf16 f;float d,SSE_B, s.f=make_f32_bf16(42.0f))
> +D(123,__bf16 f;double d,SSE_B, s.f=make_f32_bf16(42.0f))
> +D(124,double d; __bf16 f,SSE_D, s.d=42)
> +D(125,__bf16 f[2],SSE_B, s.f[0]=make_f32_bf16(42.0f))
> +D(126,__bf16 f[3],SSE_B, s.f[0]=make_f32_bf16(42.0f))
> +D(127,__bf16 f[4],SSE_B, s.f[0]=make_f32_bf16(42.0f))
> +D(128,__bf16 f[2]; double d,SSE_B, s.f[0]=make_f32_bf16(42.0f))
> +D(129,double d;__bf16 f[2],SSE_D, s.d=42)
> +
> +#undef D
> +
> +#define D(I,MEMBERS) struct S_ ## I { MEMBERS ; }; Type class_ ## I = INT_SSE; \
> +struct S_ ## I f_ ## I (void) { struct S_ ## I s = { 42, make_f32_bf16(43.0f) }; return s; }
> +
> +D(310,char m1; __bf16 m2)
> +D(311,short m1; __bf16 m2)
> +D(312,int m1; __bf16 m2)
> +D(313,long long m1; __bf16 m2)
> +
> +#undef D
> +
> +void check_300 (void)
> +{
> +  XMM_T x;
> +  x._ulonglong[0] = rax;
> +  switch (current_test) {
> +    case 310: assert ((rax & 0xff) == 42
> +                     && check_bf16_float (x.___bf16[1], 43.0f) == 1); break;
> +    case 311: assert ((rax & 0xffff) == 42
> +                     && check_bf16_float (x.___bf16[1], 43.0f) == 1); break;
> +    case 312: assert ((rax & 0xffffffff) == 42
> +                     && check_bf16_float (x.___bf16[2], 43.0f) == 1); break;
> +    case 313: assert (rax == 42
> +                     && check_bf16_float (xmm0b[0], 43.0f) == 1); break;
> +
> +    default: assert (0); break;
> +  }
> +}
> +
> +/* Structures which should be returned in SSE (low) and INT (high).  */
> +#define D(I,MEMBERS,B) struct S_ ## I { MEMBERS ; }; Type class_ ## I = SSE_INT; \
> +struct S_ ## I f_ ## I (void) { struct S_ ## I s; memset (&s, 0, sizeof(s));  B; return s; }
> +
> +D(402,__bf16 f[4];char c, s.f[0]=make_f32_bf16(42.0f); s.c=43)
> +
> +#undef D
> +
> +void check_400 (void)
> +{
> +  switch (current_test) {
> +    case 402: assert (check_bf16_float (xmm0b[0], 42.0f) == 1 && (rax & 0xff) == 43); break;
> +
> +    default: assert (0); break;
> +  }
> +}
> +
> +/* Structures which should be returned in MEM.  */
> +void *struct_addr;
> +#define D(I,MEMBERS) struct S_ ## I { MEMBERS ; }; Type class_ ## I = MEM; \
> +struct S_ ## I f_ ## I (void) { union {unsigned char c; struct S_ ## I s;} u; memset (&u.s, 0, sizeof(u.s)); u.c = 42; return u.s; }
> +
> +/* Unnaturally aligned members.  */
> +D(540,__bf16 m1[10])
> +D(541,char m1[1];__bf16 f[8])
> +
> +#undef D
> +
> +
> +/* Special tests.  */
> +#define D(I,MEMBERS,C,B) struct S_ ## I { MEMBERS ; }; Type class_ ## I = C; \
> +struct S_ ## I f_ ## I (void) { struct S_ ## I s; B; return s; }
> +D(601,__bf16 f[4], SSE_F_H, s.f[0] = s.f[1] = s.f[2] = s.f[3] = make_f32_bf16 (42.0f))
> +D(602,__bf16 f[8], SSE_F_H8,
> +  s.f[0] = s.f[1] = s.f[2] = s.f[3] = s.f[4] = s.f[5] = s.f[6] = s.f[7] = make_f32_bf16 (42.0f))
> +#undef D
> +
> +void clear_all (void)
> +{
> +  clear_int_registers;
> +}
> +
> +void check_all (Type class, unsigned long size)
> +{
> +  switch (class) {
> +    case SSE_B: assert (check_bf16_float (xmm0b[0], 42.0f) == 1); break;
> +    case SSE_D: assert (xmm0d[0] == 42); break;
> +    case SSE_F_H: assert (check_bf16_float (xmm0b[0], 42) == 1
> +                         && check_bf16_float (xmm0b[1], 42) == 1
> +                         && check_bf16_float (xmm0b[2], 42) == 1
> +                         && check_bf16_float (xmm0b[3], 42) == 1); break;
> +    case SSE_F_H8: assert (check_bf16_float (xmm0b[0], 42) == 1
> +                          && check_bf16_float (xmm0b[1], 42) == 1
> +                          && check_bf16_float (xmm0b[2], 42) == 1
> +                          && check_bf16_float (xmm0b[3], 42) == 1
> +                          && check_bf16_float (xmm1b[0], 42) == 1
> +                          && check_bf16_float (xmm1b[1], 42) == 1
> +                          && check_bf16_float (xmm1b[2], 42) == 1
> +                          && check_bf16_float (xmm1b[3], 42) == 1); break;
> +    case INT_SSE: check_300(); break;
> +    case SSE_INT: check_400(); break;
> +    /* Ideally we would like to check that rax == struct_addr.
> +       Unfortunately the address of the target struct escapes (for setting
> +       struct_addr), so the return struct is a temporary one whose address
> +       is given to the f_* functions, otherwise a conforming program
> +       could notice the struct changing already before the function returns.
> +       This temporary struct could be anywhere.  For GCC it will be on
> +       stack, but no one is forbidding that it could be a static variable
> +       if there's no threading or proper locking.  Nobody in his right mind
> +       will not use the stack for that.  */
> +    case MEM: assert (*(unsigned char*)struct_addr == 42 && rdi == rax); break;
> +  }
> +}
> +
> +#define D(I) { struct S_ ## I s; current_test = I; struct_addr = (void*)&s; \
> +  clear_all(); \
> +  s = WRAP_RET(f_ ## I) (); \
> +  check_all(class_ ## I, sizeof(s)); \
> +}
> +
> +static void
> +do_test (void)
> +{
> +  D(120) D(121) D(122) D(123) D(124) D(125) D(126) D(127) D(128) D(129)
> +
> +  D(310) D(311) D(312) D(313)
> +
> +  D(402)
> +
> +  D(540) D(541)
> +
> +  D(601) D(602)
> +  if (num_failed)
> +    abort ();
> +}
> +#undef D
> diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c
> new file mode 100644
> index 00000000000..4eea7eb7d3c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c
> @@ -0,0 +1,111 @@
> +/* Test variable number of 128-bit vector arguments passed to functions.  */
> +
> +#include <stdio.h>
> +#include "bf16-check.h"
> +#include "defines.h"
> +#include "macros.h"
> +#include "args.h"
> +
> +struct FloatRegisters fregs;
> +struct IntegerRegisters iregs;
> +
> +/* This struct holds values for argument checking.  */
> +struct
> +{
> +  XMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9;
> +} values;
> +
> +char *pass;
> +int failed = 0;
> +
> +#undef assert
> +#define assert(c) do { \
> +  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
> +} while (0)
> +
> +#define compare(X1,X2,T) do { \
> +  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
> +} while (0)
> +
> +void
> +fun_check_passing_m128bf16_varargs (__m128bf16 i0, __m128bf16 i1, __m128bf16 i2,
> +                                __m128bf16 i3, ...)
> +{
> +  /* Check argument values.  */
> +  void **fp = __builtin_frame_address (0);
> +  void *ra = __builtin_return_address (0);
> +  __m128bf16 *argp;
> +
> +  compare (values.i0, i0, __m128bf16);
> +  compare (values.i1, i1, __m128bf16);
> +  compare (values.i2, i2, __m128bf16);
> +  compare (values.i3, i3, __m128bf16);
> +
> +  /* Get the pointer to the return address on stack.  */
> +  while (*fp != ra)
> +    fp++;
> +
> +  /* Skip the return address stack slot.  */
> +  argp = (__m128bf16 *) (((char *) fp) + 8);
> +
> +  /* Check __m128bf16 arguments passed on stack.  */
> +  compare (values.i8, argp[0], __m128bf16);
> +  compare (values.i9, argp[1], __m128bf16);
> +
> +  /* Check register contents.  */
> +  compare (fregs.xmm0, xmm_regs[0], __m128bf16);
> +  compare (fregs.xmm1, xmm_regs[1], __m128bf16);
> +  compare (fregs.xmm2, xmm_regs[2], __m128bf16);
> +  compare (fregs.xmm3, xmm_regs[3], __m128bf16);
> +  compare (fregs.xmm4, xmm_regs[4], __m128bf16);
> +  compare (fregs.xmm5, xmm_regs[5], __m128bf16);
> +  compare (fregs.xmm6, xmm_regs[6], __m128bf16);
> +  compare (fregs.xmm7, xmm_regs[7], __m128bf16);
> +}
> +
> +#define def_check_int_passing_varargs(_i0, _i1, _i2, _i3, _i4, _i5, \
> +                                     _i6, _i7, _i8, _i9, \
> +                                     _func, TYPE) \
> +  values.i0.TYPE[0] = _i0; \
> +  values.i1.TYPE[0] = _i1; \
> +  values.i2.TYPE[0] = _i2; \
> +  values.i3.TYPE[0] = _i3; \
> +  values.i4.TYPE[0] = _i4; \
> +  values.i5.TYPE[0] = _i5; \
> +  values.i6.TYPE[0] = _i6; \
> +  values.i7.TYPE[0] = _i7; \
> +  values.i8.TYPE[0] = _i8; \
> +  values.i9.TYPE[0] = _i9; \
> +  clear_float_registers; \
> +  fregs.F0.TYPE[0] = _i0; \
> +  fregs.F1.TYPE[0] = _i1; \
> +  fregs.F2.TYPE[0] = _i2; \
> +  fregs.F3.TYPE[0] = _i3; \
> +  fregs.F4.TYPE[0] = _i4; \
> +  fregs.F5.TYPE[0] = _i5; \
> +  fregs.F6.TYPE[0] = _i6; \
> +  fregs.F7.TYPE[0] = _i7; \
> +  WRAP_CALL(_func) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9);
> +
> +void
> +test_m128bf16_varargs (void)
> +{
> +  __m128bf16 x[10];
> +  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
> +  int i;
> +  for (i = 0; i < 10; i++)
> +    x[i] = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
> +  pass = "m128bf16-varargs";
> +  def_check_int_passing_varargs (x[0], x[1], x[2], x[3], x[4], x[5],
> +                                x[6], x[7], x[8], x[9],
> +                                fun_check_passing_m128bf16_varargs,
> +                                _m128bf16);
> +}
> +
> +static void
> +do_test (void)
> +{
> +  test_m128bf16_varargs ();
> +  if (failed)
> +    abort ();
> +}
> --
> 2.18.1
>
  
H.J. Lu Aug. 19, 2022, 5:30 p.m. UTC | #2
On Thu, Aug 18, 2022 at 5:56 PM Hongtao Liu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Aug 18, 2022 at 3:36 PM Haochen Jiang via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Hi all,
> >
> > This patch aims to add bf16 abi test after the whole __bf16 type is added.
> >
> > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
> Ok.

All BF16 ABI tests failed due to missing __m128bf16/__m256bf16/__m512bf16.
When will __bf16 types be added?

> >
> > BRs,
> > Haochen
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/x86_64/abi/bf16/abi-bf16.exp: New test.
> >         * gcc.target/x86_64/abi/bf16/args.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/asm-support.S: Ditto.
> >         * gcc.target/x86_64/abi/bf16/bf16-check.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/bf16-helper.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/defines.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/macros.h: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_basic_alignment.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_basic_returning.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_basic_sizes.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_m128_returning.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_passing_floats.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_passing_m128.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_passing_structs.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_passing_unions.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_struct_returning.c: Ditto.
> >         * gcc.target/x86_64/abi/bf16/test_varargs-m128.c: Ditto.
  
Hongtao Liu Aug. 22, 2022, 1:02 a.m. UTC | #3
On Sat, Aug 20, 2022 at 1:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Aug 18, 2022 at 5:56 PM Hongtao Liu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Thu, Aug 18, 2022 at 3:36 PM Haochen Jiang via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Hi all,
> > >
> > > This patch aims to add bf16 abi test after the whole __bf16 type is added.
> > >
> > > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
> > Ok.
>
> All BF16 ABI tests failed due to missing __m128bf16/__m256bf16/__m512bf16.
> When will __bf16 types be added?
It should be already in the trunk.
>
> > >
> > > BRs,
> > > Haochen
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * gcc.target/x86_64/abi/bf16/abi-bf16.exp: New test.
> > >         * gcc.target/x86_64/abi/bf16/args.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/asm-support.S: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/bf16-check.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/bf16-helper.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/defines.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/macros.h: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_basic_alignment.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_basic_returning.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_basic_sizes.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_m128_returning.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_passing_floats.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_passing_m128.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_passing_structs.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_passing_unions.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_struct_returning.c: Ditto.
> > >         * gcc.target/x86_64/abi/bf16/test_varargs-m128.c: Ditto.
>
>
>
> --
> H.J.
  
Hongtao Liu Aug. 22, 2022, 1:04 a.m. UTC | #4
On Mon, Aug 22, 2022 at 9:02 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Sat, Aug 20, 2022 at 1:31 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Aug 18, 2022 at 5:56 PM Hongtao Liu via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Thu, Aug 18, 2022 at 3:36 PM Haochen Jiang via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > Hi all,
> > > >
> > > > This patch aims to add bf16 abi test after the whole __bf16 type is added.
> > > >
> > > > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
> > > Ok.
> >
> > All BF16 ABI tests failed due to missing __m128bf16/__m256bf16/__m512bf16.
> > When will __bf16 types be added?
> It should be already in the trunk.
Oh, __m128bf16/__m256bf16/__m512bf16 is not added to the trunk.
> >
> > > >
> > > > BRs,
> > > > Haochen
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         * gcc.target/x86_64/abi/bf16/abi-bf16.exp: New test.
> > > >         * gcc.target/x86_64/abi/bf16/args.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/asm-support.S: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/bf16-check.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/bf16-helper.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/defines.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/macros.h: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_basic_alignment.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_basic_returning.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_basic_sizes.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_m128_returning.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_passing_floats.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_passing_m128.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_passing_structs.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_passing_unions.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_struct_returning.c: Ditto.
> > > >         * gcc.target/x86_64/abi/bf16/test_varargs-m128.c: Ditto.
> >
> >
> >
> > --
> > H.J.
>
>
>
> --
> BR,
> Hongtao
  

Patch

diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
new file mode 100644
index 00000000000..bd386f2a560
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp
@@ -0,0 +1,46 @@ 
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The x86-64 ABI testsuite needs one additional assembler file for most
+# testcases.  For simplicity we will just link it into each test.
+
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+load_lib clearcap.exp
+
+if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
+     || ![is-effective-target lp64]
+     || ![is-effective-target sse2] } then {
+  return
+}
+
+
+torture-init
+clearcap-init
+set-torture-options $C_TORTURE_OPTIONS
+set additional_flags "-W -Wall -msse2"
+
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
+    if {[runtest_file_p $runtests $src]} {
+        c-torture-execute [list $src \
+                                $srcdir/$subdir/asm-support.S] \
+                                $additional_flags
+    }
+}
+
+clearcap-finish
+torture-finish
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
new file mode 100644
index 00000000000..11d7e2b3a1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h
@@ -0,0 +1,164 @@ 
+#ifndef INCLUDED_ARGS_H
+#define INCLUDED_ARGS_H
+
+#include <string.h>
+
+/* This defines the calling sequences for integers and floats.  */
+#define I0 rdi
+#define I1 rsi
+#define I2 rdx
+#define I3 rcx
+#define I4 r8
+#define I5 r9
+#define F0 xmm0
+#define F1 xmm1
+#define F2 xmm2
+#define F3 xmm3
+#define F4 xmm4
+#define F5 xmm5
+#define F6 xmm6
+#define F7 xmm7
+
+typedef union {
+  __bf16 ___bf16[8];
+  float _float[4];
+  double _double[2];
+  long long _longlong[2];
+  int _int[4];
+  ulonglong _ulonglong[2];
+#ifdef CHECK_M64_M128
+  __m64 _m64[2];
+  __m128 _m128[1];
+  __m128bf16 _m128bf16[1];
+#endif
+} XMM_T;
+
+typedef union {
+  __bf16 ___bf16;
+  float _float;
+  double _double;
+  ldouble _ldouble;
+  ulonglong _ulonglong[2];
+} X87_T;
+extern void (*callthis)(void);
+extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
+XMM_T xmm_regs[16];
+X87_T x87_regs[8];
+extern volatile unsigned long long volatile_var;
+extern void snapshot (void);
+extern void snapshot_ret (void);
+#define WRAP_CALL(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
+#define WRAP_RET(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
+
+/* Clear all integer registers.  */
+#define clear_int_hardware_registers \
+  asm __volatile__ ("xor %%rax, %%rax\n\t" \
+		    "xor %%rbx, %%rbx\n\t" \
+		    "xor %%rcx, %%rcx\n\t" \
+		    "xor %%rdx, %%rdx\n\t" \
+		    "xor %%rsi, %%rsi\n\t" \
+		    "xor %%rdi, %%rdi\n\t" \
+		    "xor %%r8, %%r8\n\t" \
+		    "xor %%r9, %%r9\n\t" \
+		    "xor %%r10, %%r10\n\t" \
+		    "xor %%r11, %%r11\n\t" \
+		    "xor %%r12, %%r12\n\t" \
+		    "xor %%r13, %%r13\n\t" \
+		    "xor %%r14, %%r14\n\t" \
+		    "xor %%r15, %%r15\n\t" \
+		    ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
+		    "r9", "r10", "r11", "r12", "r13", "r14", "r15");
+
+/* This is the list of registers available for passing arguments. Not all of
+   these are used or even really available.  */
+struct IntegerRegisters
+{
+  unsigned long long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
+};
+struct FloatRegisters
+{
+  double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
+  ldouble st0, st1, st2, st3, st4, st5, st6, st7;
+  XMM_T xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9,
+        xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
+};
+
+/* Implemented in scalarargs.c  */
+extern struct IntegerRegisters iregs;
+extern struct FloatRegisters fregs;
+extern unsigned int num_iregs, num_fregs;
+
+/* Clear register struct.  */
+#define clear_struct_registers \
+  rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
+    = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
+  memset (&iregs, 0, sizeof (iregs)); \
+  memset (&fregs, 0, sizeof (fregs)); \
+  memset (xmm_regs, 0, sizeof (xmm_regs)); \
+  memset (x87_regs, 0, sizeof (x87_regs));
+
+/* Clear both hardware and register structs for integers.  */
+#define clear_int_registers \
+  clear_struct_registers \
+  clear_int_hardware_registers
+
+/* Do the checking.  */
+#define check_f_arguments(T) do { \
+  assert (num_fregs <= 0 || check_bf16 (fregs.xmm0._ ## T [0], xmm_regs[0]._ ## T [0]) == 1); \
+  assert (num_fregs <= 1 || check_bf16 (fregs.xmm1._ ## T [0], xmm_regs[1]._ ## T [0]) == 1); \
+  assert (num_fregs <= 2 || check_bf16 (fregs.xmm2._ ## T [0], xmm_regs[2]._ ## T [0]) == 1); \
+  assert (num_fregs <= 3 || check_bf16 (fregs.xmm3._ ## T [0], xmm_regs[3]._ ## T [0]) == 1); \
+  assert (num_fregs <= 4 || check_bf16 (fregs.xmm4._ ## T [0], xmm_regs[4]._ ## T [0]) == 1); \
+  assert (num_fregs <= 5 || check_bf16 (fregs.xmm5._ ## T [0], xmm_regs[5]._ ## T [0]) == 1); \
+  assert (num_fregs <= 6 || check_bf16 (fregs.xmm6._ ## T [0], xmm_regs[6]._ ## T [0]) == 1); \
+  assert (num_fregs <= 7 || check_bf16 (fregs.xmm7._ ## T [0], xmm_regs[7]._ ## T [0]) == 1); \
+  } while (0)
+
+#define check_bf16_arguments check_f_arguments(__bf16)
+
+#define check_vector_arguments(T,O) do { \
+  assert (num_fregs <= 0 \
+	  || memcmp (((char *) &fregs.xmm0) + (O), \
+		     &xmm_regs[0], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 1 \
+	  || memcmp (((char *) &fregs.xmm1) + (O), \
+		     &xmm_regs[1], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 2 \
+	  || memcmp (((char *) &fregs.xmm2) + (O), \
+		     &xmm_regs[2], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 3 \
+	  || memcmp (((char *) &fregs.xmm3) + (O), \
+		     &xmm_regs[3], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 4 \
+	  || memcmp (((char *) &fregs.xmm4) + (O), \
+		     &xmm_regs[4], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 5 \
+	  || memcmp (((char *) &fregs.xmm5) + (O), \
+		     &xmm_regs[5], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 6 \
+	  || memcmp (((char *) &fregs.xmm6) + (O), \
+		     &xmm_regs[6], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 7 \
+	  || memcmp (((char *) &fregs.xmm7) + (O), \
+		     &xmm_regs[7], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  } while (0)
+
+#define check_m128_arguments check_vector_arguments(m128, 0)
+
+#define clear_float_registers \
+  clear_struct_registers
+
+#define clear_x87_registers \
+  clear_struct_registers
+
+#endif /* INCLUDED_ARGS_H  */
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
new file mode 100644
index 00000000000..a8165d86317
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S
@@ -0,0 +1,84 @@ 
+	.text
+	.p2align 4,,15
+.globl snapshot
+	.type	snapshot, @function
+snapshot:
+.LFB3:
+	movq	%rax, rax(%rip)
+	movq	%rbx, rbx(%rip)
+	movq	%rcx, rcx(%rip)
+	movq	%rdx, rdx(%rip)
+	movq	%rdi, rdi(%rip)
+	movq	%rsi, rsi(%rip)
+	movq	%rbp, rbp(%rip)
+	movq	%rsp, rsp(%rip)
+	movq	%r8, r8(%rip)
+	movq	%r9, r9(%rip)
+	movq	%r10, r10(%rip)
+	movq	%r11, r11(%rip)
+	movq	%r12, r12(%rip)
+	movq	%r13, r13(%rip)
+	movq	%r14, r14(%rip)
+	movq	%r15, r15(%rip)
+	vmovdqu	%xmm0, xmm_regs+0(%rip)
+	vmovdqu	%xmm1, xmm_regs+16(%rip)
+	vmovdqu	%xmm2, xmm_regs+32(%rip)
+	vmovdqu	%xmm3, xmm_regs+48(%rip)
+	vmovdqu	%xmm4, xmm_regs+64(%rip)
+	vmovdqu	%xmm5, xmm_regs+80(%rip)
+	vmovdqu	%xmm6, xmm_regs+96(%rip)
+	vmovdqu	%xmm7, xmm_regs+112(%rip)
+	vmovdqu	%xmm8, xmm_regs+128(%rip)
+	vmovdqu	%xmm9, xmm_regs+144(%rip)
+	vmovdqu	%xmm10, xmm_regs+160(%rip)
+	vmovdqu	%xmm11, xmm_regs+176(%rip)
+	vmovdqu	%xmm12, xmm_regs+192(%rip)
+	vmovdqu	%xmm13, xmm_regs+208(%rip)
+	vmovdqu	%xmm14, xmm_regs+224(%rip)
+	vmovdqu	%xmm15, xmm_regs+240(%rip)
+	jmp	*callthis(%rip)
+.LFE3:
+	.size	snapshot, .-snapshot
+
+	.p2align 4,,15
+.globl snapshot_ret
+	.type	snapshot_ret, @function
+snapshot_ret:
+	movq	%rdi, rdi(%rip)
+	subq	$8, %rsp
+	call	*callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, rax(%rip)
+	movq	%rdx, rdx(%rip)
+	vmovdqu	%xmm0, xmm_regs+0(%rip)
+	vmovdqu	%xmm1, xmm_regs+16(%rip)
+	fstpt	x87_regs(%rip)
+	fstpt	x87_regs+16(%rip)
+	fldt	x87_regs+16(%rip)
+	fldt	x87_regs(%rip)
+	ret
+	.size	snapshot_ret, .-snapshot_ret
+
+	.comm	callthis,8,8
+	.comm	rax,8,8
+	.comm	rbx,8,8
+	.comm	rcx,8,8
+	.comm	rdx,8,8
+	.comm	rsi,8,8
+	.comm	rdi,8,8
+	.comm	rsp,8,8
+	.comm	rbp,8,8
+	.comm	r8,8,8
+	.comm	r9,8,8
+	.comm	r10,8,8
+	.comm	r11,8,8
+	.comm	r12,8,8
+	.comm	r13,8,8
+	.comm	r14,8,8
+	.comm	r15,8,8
+	.comm	xmm_regs,256,32
+	.comm	x87_regs,128,32
+	.comm   volatile_var,8,8
+#ifdef __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h
new file mode 100644
index 00000000000..25448fc6863
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-check.h
@@ -0,0 +1,24 @@ 
+#include <stdlib.h>
+#include "bf16-helper.h"
+
+static void do_test (void);
+
+int
+main ()
+{
+
+  if (__builtin_cpu_supports ("sse2"))
+    {
+      do_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+      return 0;
+    }
+
+#ifdef DEBUG
+  printf ("SKIPPED\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
new file mode 100644
index 00000000000..83d89fcf62c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/bf16-helper.h
@@ -0,0 +1,41 @@ 
+typedef union
+{
+  float f;
+  unsigned int u;
+  __bf16 b[2];
+} unionf_b;
+
+static __bf16 make_f32_bf16 (float f)
+{
+  unionf_b tmp;
+  tmp.f = f;
+  return tmp.b[1];
+}
+
+static float make_bf16_f32 (__bf16 bf)
+{
+  unionf_b tmp;
+  tmp.u = 0;
+  tmp.b[1] = bf;
+  return tmp.f;
+}
+
+static int check_bf16 (__bf16 bf1, __bf16 bf2)
+{
+  unionf_b tmp1, tmp2;
+  tmp1.u = 0;
+  tmp2.u = 0;
+  tmp1.b[1] = bf1;
+  tmp2.b[1] = bf2;
+  return (tmp1.u == tmp2.u);
+}
+
+static int check_bf16_float (__bf16 bf, float f)
+{
+  unionf_b tmp1, tmp2;
+  tmp1.u = 0;
+  tmp1.b[0] = bf;
+  tmp2.f = f;
+  tmp2.u >>= 16;
+  return (tmp1.u == tmp2.u);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h
new file mode 100644
index 00000000000..a4df0b0528d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/defines.h
@@ -0,0 +1,163 @@ 
+#ifndef DEFINED_DEFINES_H
+#define DEFINED_DEFINES_H
+
+/* Get __m64 and __m128. */
+#include <immintrin.h>
+
+typedef unsigned long long ulonglong;
+typedef long double ldouble;
+
+/* These defines determines what part of the test should be run.  When
+   GCC implements these parts, the defines should be uncommented to
+   enable testing.  */
+
+/* Scalar type __int128.  */
+/* #define CHECK_INT128 */
+
+/* Scalar type long double.  */
+#define CHECK_LONG_DOUBLE
+
+/* Scalar type __float128.  */
+/* #define CHECK_FLOAT128 */
+
+/* Scalar types __m64 and __m128.  */
+#define CHECK_M64_M128
+
+/* Structs with size >= 16.  */
+#define CHECK_LARGER_STRUCTS
+
+/* Checks for passing floats and doubles.  */
+#define CHECK_FLOAT_DOUBLE_PASSING
+
+/* Union passing with not-extremely-simple unions.  */
+#define CHECK_LARGER_UNION_PASSING
+
+/* Variable args.  */
+#define CHECK_VARARGS
+
+/* Check argument passing and returning for scalar types with sizeof = 16.  */
+/* TODO: Implement these tests. Don't activate them for now.  */
+#define CHECK_LARGE_SCALAR_PASSING
+
+/* Defines for sizing and alignment.  */
+
+#define TYPE_SIZE_CHAR         1
+#define TYPE_SIZE_SHORT        2
+#define TYPE_SIZE_INT          4
+#ifdef __ILP32__
+# define TYPE_SIZE_LONG        4
+#else
+# define TYPE_SIZE_LONG        8
+#endif
+#define TYPE_SIZE_LONG_LONG    8
+#define TYPE_SIZE_INT128       16
+#define TYPE_SIZE_BF16	       2
+#define TYPE_SIZE_FLOAT        4
+#define TYPE_SIZE_DOUBLE       8
+#define TYPE_SIZE_LONG_DOUBLE  16
+#define TYPE_SIZE_FLOAT128     16
+#define TYPE_SIZE_M64          8
+#define TYPE_SIZE_M128         16
+#define TYPE_SIZE_ENUM         4
+#ifdef __ILP32__
+# define TYPE_SIZE_POINTER     4
+#else
+# define TYPE_SIZE_POINTER     8
+#endif
+
+#define TYPE_ALIGN_CHAR        1
+#define TYPE_ALIGN_SHORT       2
+#define TYPE_ALIGN_INT         4
+#ifdef __ILP32__
+# define TYPE_ALIGN_LONG       4
+#else
+# define TYPE_ALIGN_LONG       8
+#endif
+#define TYPE_ALIGN_LONG_LONG   8
+#define TYPE_ALIGN_INT128      16
+#define TYPE_ALIGN_BF16	       2
+#define TYPE_ALIGN_FLOAT       4
+#define TYPE_ALIGN_DOUBLE      8
+#define TYPE_ALIGN_LONG_DOUBLE 16
+#define TYPE_ALIGN_FLOAT128    16
+#define TYPE_ALIGN_M64         8
+#define TYPE_ALIGN_M128        16
+#define TYPE_ALIGN_ENUM        4
+#ifdef __ILP32__
+# define TYPE_ALIGN_POINTER    4
+#else
+# define TYPE_ALIGN_POINTER    8
+#endif
+
+/* These defines control the building of the list of types to check. There
+   is a string identifying the type (with a comma after), a size of the type
+   (also with a comma and an integer for adding to the total amount of types)
+   and an alignment of the type (which is currently not really needed since
+   the abi specifies that alignof == sizeof for all scalar types).  */
+#ifdef CHECK_INT128
+#define CI128_STR "__int128",
+#define CI128_SIZ TYPE_SIZE_INT128,
+#define CI128_ALI TYPE_ALIGN_INT128,
+#define CI128_RET "???",
+#else
+#define CI128_STR
+#define CI128_SIZ
+#define CI128_ALI
+#define CI128_RET
+#endif
+#ifdef CHECK_LONG_DOUBLE
+#define CLD_STR "long double",
+#define CLD_SIZ TYPE_SIZE_LONG_DOUBLE,
+#define CLD_ALI TYPE_ALIGN_LONG_DOUBLE,
+#define CLD_RET "x87_regs[0]._ldouble",
+#else
+#define CLD_STR
+#define CLD_SIZ
+#define CLD_ALI
+#define CLD_RET
+#endif
+#ifdef CHECK_FLOAT128
+#define CF128_STR "__float128",
+#define CF128_SIZ TYPE_SIZE_FLOAT128,
+#define CF128_ALI TYPE_ALIGN_FLOAT128, 
+#define CF128_RET "???",
+#else
+#define CF128_STR
+#define CF128_SIZ
+#define CF128_ALI
+#define CF128_RET
+#endif
+#ifdef CHECK_M64_M128
+#define CMM_STR "__m64", "__m128",
+#define CMM_SIZ TYPE_SIZE_M64, TYPE_SIZE_M128,
+#define CMM_ALI TYPE_ALIGN_M64, TYPE_ALIGN_M128,
+#define CMM_RET "???", "???",
+#else
+#define CMM_STR
+#define CMM_SIZ
+#define CMM_ALI
+#define CMM_RET
+#endif
+
+/* Used in size and alignment tests.  */
+enum dummytype { enumtype };
+
+extern void abort (void);
+
+/* Assertion macro.  */
+#define assert(test) if (!(test)) abort()
+
+#ifdef __GNUC__
+#define ATTRIBUTE_UNUSED __attribute__((__unused__))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+
+#ifdef __GNUC__
+#define PACKED __attribute__((__packed__))
+#else
+#warning Some tests will fail due to missing __packed__ support
+#define PACKED
+#endif
+
+#endif /* DEFINED_DEFINES_H */
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
new file mode 100644
index 00000000000..309db8ff12e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp
@@ -0,0 +1,46 @@ 
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The x86-64 ABI testsuite needs one additional assembler file for most
+# testcases.  For simplicity we will just link it into each test.
+
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+load_lib clearcap.exp
+
+if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
+     || ![is-effective-target lp64]
+     || ![is-effective-target avx2] } then {
+  return
+}
+
+
+torture-init
+clearcap-init
+set-torture-options $C_TORTURE_OPTIONS
+set additional_flags "-W -Wall -mavx2"
+
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
+    if {[runtest_file_p $runtests $src]} {
+        c-torture-execute [list $src \
+                                $srcdir/$subdir/asm-support.S] \
+                                $additional_flags
+    }
+}
+
+clearcap-finish
+torture-finish
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
new file mode 100644
index 00000000000..94627ffbd44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h
@@ -0,0 +1,152 @@ 
+#ifndef INCLUDED_ARGS_H
+#define INCLUDED_ARGS_H
+
+#include <immintrin.h>
+#include <string.h>
+
+/* Assertion macro.  */
+#define assert(test) if (!(test)) abort()
+
+#ifdef __GNUC__
+#define ATTRIBUTE_UNUSED __attribute__((__unused__))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+
+/* This defines the calling sequences for integers and floats.  */
+#define I0 rdi
+#define I1 rsi
+#define I2 rdx
+#define I3 rcx
+#define I4 r8
+#define I5 r9
+#define F0 ymm0
+#define F1 ymm1
+#define F2 ymm2
+#define F3 ymm3
+#define F4 ymm4
+#define F5 ymm5
+#define F6 ymm6
+#define F7 ymm7
+
+typedef union {
+  __bf16 ___bf16[16];
+  float _float[8];
+  double _double[4];
+  long long _longlong[4];
+  int _int[8];
+  unsigned long long _ulonglong[4];
+  __m64 _m64[4];
+  __m128 _m128[2];
+  __m256 _m256[1];
+  __m256bf16 _m256bf16[1];
+} YMM_T;
+
+typedef union {
+  float _float;
+  double _double;
+  long double _ldouble;
+  unsigned long long _ulonglong[2];
+} X87_T;
+extern void (*callthis)(void);
+extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
+YMM_T ymm_regs[16];
+X87_T x87_regs[8];
+extern volatile unsigned long long volatile_var;
+extern void snapshot (void);
+extern void snapshot_ret (void);
+#define WRAP_CALL(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
+#define WRAP_RET(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
+
+/* Clear all integer registers.  */
+#define clear_int_hardware_registers \
+  asm __volatile__ ("xor %%rax, %%rax\n\t" \
+		    "xor %%rbx, %%rbx\n\t" \
+		    "xor %%rcx, %%rcx\n\t" \
+		    "xor %%rdx, %%rdx\n\t" \
+		    "xor %%rsi, %%rsi\n\t" \
+		    "xor %%rdi, %%rdi\n\t" \
+		    "xor %%r8, %%r8\n\t" \
+		    "xor %%r9, %%r9\n\t" \
+		    "xor %%r10, %%r10\n\t" \
+		    "xor %%r11, %%r11\n\t" \
+		    "xor %%r12, %%r12\n\t" \
+		    "xor %%r13, %%r13\n\t" \
+		    "xor %%r14, %%r14\n\t" \
+		    "xor %%r15, %%r15\n\t" \
+		    ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
+		    "r9", "r10", "r11", "r12", "r13", "r14", "r15");
+
+/* This is the list of registers available for passing arguments. Not all of
+   these are used or even really available.  */
+struct IntegerRegisters
+{
+  unsigned long long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
+};
+struct FloatRegisters
+{
+  double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
+  long double st0, st1, st2, st3, st4, st5, st6, st7;
+  YMM_T ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9,
+        ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
+};
+
+/* Implemented in scalarargs.c  */
+extern struct IntegerRegisters iregs;
+extern struct FloatRegisters fregs;
+extern unsigned int num_iregs, num_fregs;
+
+/* Clear register struct.  */
+#define clear_struct_registers \
+  rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
+    = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
+  memset (&iregs, 0, sizeof (iregs)); \
+  memset (&fregs, 0, sizeof (fregs)); \
+  memset (ymm_regs, 0, sizeof (ymm_regs)); \
+  memset (x87_regs, 0, sizeof (x87_regs));
+
+/* Clear both hardware and register structs for integers.  */
+#define clear_int_registers \
+  clear_struct_registers \
+  clear_int_hardware_registers
+
+#define check_vector_arguments(T,O) do { \
+  assert (num_fregs <= 0 \
+	  || memcmp (((char *) &fregs.ymm0) + (O), \
+		     &ymm_regs[0], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 1 \
+	  || memcmp (((char *) &fregs.ymm1) + (O), \
+		     &ymm_regs[1], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 2 \
+	  || memcmp (((char *) &fregs.ymm2) + (O), \
+		     &ymm_regs[2], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 3 \
+	  || memcmp (((char *) &fregs.ymm3) + (O), \
+		     &ymm_regs[3], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 4 \
+	  || memcmp (((char *) &fregs.ymm4) + (O), \
+		     &ymm_regs[4], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 5 \
+	  || memcmp (((char *) &fregs.ymm5) + (O), \
+		     &ymm_regs[5], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 6 \
+	  || memcmp (((char *) &fregs.ymm6) + (O), \
+		     &ymm_regs[6], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 7 \
+	  || memcmp (((char *) &fregs.ymm7) + (O), \
+		     &ymm_regs[7], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  } while (0)
+
+#define check_m256_arguments check_vector_arguments(m256, 0)
+
+#endif /* INCLUDED_ARGS_H  */
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
new file mode 100644
index 00000000000..24c8b3c9023
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support.S
@@ -0,0 +1,84 @@ 
+	.text
+	.p2align 4,,15
+.globl snapshot
+	.type	snapshot, @function
+snapshot:
+.LFB3:
+	movq	%rax, rax(%rip)
+	movq	%rbx, rbx(%rip)
+	movq	%rcx, rcx(%rip)
+	movq	%rdx, rdx(%rip)
+	movq	%rdi, rdi(%rip)
+	movq	%rsi, rsi(%rip)
+	movq	%rbp, rbp(%rip)
+	movq	%rsp, rsp(%rip)
+	movq	%r8, r8(%rip)
+	movq	%r9, r9(%rip)
+	movq	%r10, r10(%rip)
+	movq	%r11, r11(%rip)
+	movq	%r12, r12(%rip)
+	movq	%r13, r13(%rip)
+	movq	%r14, r14(%rip)
+	movq	%r15, r15(%rip)
+	vmovdqu	%ymm0, ymm_regs+0(%rip)
+	vmovdqu	%ymm1, ymm_regs+32(%rip)
+	vmovdqu	%ymm2, ymm_regs+64(%rip)
+	vmovdqu	%ymm3, ymm_regs+96(%rip)
+	vmovdqu	%ymm4, ymm_regs+128(%rip)
+	vmovdqu	%ymm5, ymm_regs+160(%rip)
+	vmovdqu	%ymm6, ymm_regs+192(%rip)
+	vmovdqu	%ymm7, ymm_regs+224(%rip)
+	vmovdqu	%ymm8, ymm_regs+256(%rip)
+	vmovdqu	%ymm9, ymm_regs+288(%rip)
+	vmovdqu	%ymm10, ymm_regs+320(%rip)
+	vmovdqu	%ymm11, ymm_regs+352(%rip)
+	vmovdqu	%ymm12, ymm_regs+384(%rip)
+	vmovdqu	%ymm13, ymm_regs+416(%rip)
+	vmovdqu	%ymm14, ymm_regs+448(%rip)
+	vmovdqu	%ymm15, ymm_regs+480(%rip)
+	jmp	*callthis(%rip)
+.LFE3:
+	.size	snapshot, .-snapshot
+
+	.p2align 4,,15
+.globl snapshot_ret
+	.type	snapshot_ret, @function
+snapshot_ret:
+	movq	%rdi, rdi(%rip)
+	subq	$8, %rsp
+	call	*callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, rax(%rip)
+	movq	%rdx, rdx(%rip)
+	vmovdqu	%ymm0, ymm_regs+0(%rip)
+	vmovdqu	%ymm1, ymm_regs+32(%rip)
+	fstpt	x87_regs(%rip)
+	fstpt	x87_regs+16(%rip)
+	fldt	x87_regs+16(%rip)
+	fldt	x87_regs(%rip)
+	ret
+	.size	snapshot_ret, .-snapshot_ret
+
+	.comm	callthis,8,8
+	.comm	rax,8,8
+	.comm	rbx,8,8
+	.comm	rcx,8,8
+	.comm	rdx,8,8
+	.comm	rsi,8,8
+	.comm	rdi,8,8
+	.comm	rsp,8,8
+	.comm	rbp,8,8
+	.comm	r8,8,8
+	.comm	r9,8,8
+	.comm	r10,8,8
+	.comm	r11,8,8
+	.comm	r12,8,8
+	.comm	r13,8,8
+	.comm	r14,8,8
+	.comm	r15,8,8
+	.comm	ymm_regs,512,32
+	.comm	x87_regs,128,32
+	.comm   volatile_var,8,8
+#ifdef __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h
new file mode 100644
index 00000000000..479ebc3ec3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/bf16-ymm-check.h
@@ -0,0 +1,24 @@ 
+#include <stdlib.h>
+#include "../bf16-helper.h"
+
+static void do_test (void);
+
+int
+main ()
+{
+
+  if (__builtin_cpu_supports ("avx2"))
+    {
+      do_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+      return 0;
+    }
+
+#ifdef DEBUG
+  printf ("SKIPPED\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c
new file mode 100644
index 00000000000..ea7512850ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_m256_returning.c
@@ -0,0 +1,38 @@ 
+#include <stdio.h>
+#include "bf16-ymm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+		bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
+
+__m256bf16
+fun_test_returning___m256bf16 (void)
+{
+  volatile_var++;
+  return (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
+}
+
+__m256bf16 test_256bf16;
+
+static void
+do_test (void)
+{
+  unsigned failed = 0;
+  YMM_T ymmt1, ymmt2;
+
+  clear_struct_registers;
+  test_256bf16 = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+				bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
+  ymmt1._m256bf16[0] = test_256bf16;
+  ymmt2._m256bf16[0] = WRAP_RET (fun_test_returning___m256bf16) ();
+  if (memcmp (&ymmt1, &ymmt2, sizeof (ymmt2)) != 0)
+    printf ("fail m256bf16\n"), failed++;
+
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c
new file mode 100644
index 00000000000..3fb2d7d20f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_m256.c
@@ -0,0 +1,235 @@ 
+#include <stdio.h>
+#include "bf16-ymm-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+/* This struct holds values for argument checking.  */
+struct
+{
+  YMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
+    i16, i17, i18, i19, i20, i21, i22, i23;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+fun_check_passing_m256bf16_8_values (__m256bf16 i0 ATTRIBUTE_UNUSED,
+				     __m256bf16 i1 ATTRIBUTE_UNUSED,
+				     __m256bf16 i2 ATTRIBUTE_UNUSED,
+				     __m256bf16 i3 ATTRIBUTE_UNUSED,
+				     __m256bf16 i4 ATTRIBUTE_UNUSED,
+				     __m256bf16 i5 ATTRIBUTE_UNUSED,
+				     __m256bf16 i6 ATTRIBUTE_UNUSED,
+				     __m256bf16 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m256bf16);
+  compare (values.i1, i1, __m256bf16);
+  compare (values.i2, i2, __m256bf16);
+  compare (values.i3, i3, __m256bf16);
+  compare (values.i4, i4, __m256bf16);
+  compare (values.i5, i5, __m256bf16);
+  compare (values.i6, i6, __m256bf16);
+  compare (values.i7, i7, __m256bf16);
+}
+
+void
+fun_check_passing_m256bf16_8_regs (__m256bf16 i0 ATTRIBUTE_UNUSED,
+				   __m256bf16 i1 ATTRIBUTE_UNUSED,
+				   __m256bf16 i2 ATTRIBUTE_UNUSED,
+				   __m256bf16 i3 ATTRIBUTE_UNUSED,
+				   __m256bf16 i4 ATTRIBUTE_UNUSED,
+				   __m256bf16 i5 ATTRIBUTE_UNUSED,
+				   __m256bf16 i6 ATTRIBUTE_UNUSED,
+				   __m256bf16 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m256_arguments;
+}
+
+void
+fun_check_passing_m256bf16_20_values (__m256bf16 i0 ATTRIBUTE_UNUSED,
+				      __m256bf16 i1 ATTRIBUTE_UNUSED,
+				      __m256bf16 i2 ATTRIBUTE_UNUSED,
+				      __m256bf16 i3 ATTRIBUTE_UNUSED,
+				      __m256bf16 i4 ATTRIBUTE_UNUSED,
+				      __m256bf16 i5 ATTRIBUTE_UNUSED,
+				      __m256bf16 i6 ATTRIBUTE_UNUSED,
+				      __m256bf16 i7 ATTRIBUTE_UNUSED,
+				      __m256bf16 i8 ATTRIBUTE_UNUSED,
+				      __m256bf16 i9 ATTRIBUTE_UNUSED,
+				      __m256bf16 i10 ATTRIBUTE_UNUSED,
+				      __m256bf16 i11 ATTRIBUTE_UNUSED,
+				      __m256bf16 i12 ATTRIBUTE_UNUSED,
+				      __m256bf16 i13 ATTRIBUTE_UNUSED,
+				      __m256bf16 i14 ATTRIBUTE_UNUSED,
+				      __m256bf16 i15 ATTRIBUTE_UNUSED,
+				      __m256bf16 i16 ATTRIBUTE_UNUSED,
+				      __m256bf16 i17 ATTRIBUTE_UNUSED,
+				      __m256bf16 i18 ATTRIBUTE_UNUSED,
+				      __m256bf16 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m256bf16);
+  compare (values.i1, i1, __m256bf16);
+  compare (values.i2, i2, __m256bf16);
+  compare (values.i3, i3, __m256bf16);
+  compare (values.i4, i4, __m256bf16);
+  compare (values.i5, i5, __m256bf16);
+  compare (values.i6, i6, __m256bf16);
+  compare (values.i7, i7, __m256bf16);
+  compare (values.i8, i8, __m256bf16);
+  compare (values.i9, i9, __m256bf16);
+  compare (values.i10, i10, __m256bf16);
+  compare (values.i11, i11, __m256bf16);
+  compare (values.i12, i12, __m256bf16);
+  compare (values.i13, i13, __m256bf16);
+  compare (values.i14, i14, __m256bf16);
+  compare (values.i15, i15, __m256bf16);
+  compare (values.i16, i16, __m256bf16);
+  compare (values.i17, i17, __m256bf16);
+  compare (values.i18, i18, __m256bf16);
+  compare (values.i19, i19, __m256bf16);
+}
+
+void
+fun_check_passing_m256bf16_20_regs (__m256bf16 i0 ATTRIBUTE_UNUSED,
+				    __m256bf16 i1 ATTRIBUTE_UNUSED,
+				    __m256bf16 i2 ATTRIBUTE_UNUSED,
+				    __m256bf16 i3 ATTRIBUTE_UNUSED,
+				    __m256bf16 i4 ATTRIBUTE_UNUSED,
+				    __m256bf16 i5 ATTRIBUTE_UNUSED,
+				    __m256bf16 i6 ATTRIBUTE_UNUSED,
+				    __m256bf16 i7 ATTRIBUTE_UNUSED,
+				    __m256bf16 i8 ATTRIBUTE_UNUSED,
+				    __m256bf16 i9 ATTRIBUTE_UNUSED,
+				    __m256bf16 i10 ATTRIBUTE_UNUSED,
+				    __m256bf16 i11 ATTRIBUTE_UNUSED,
+				    __m256bf16 i12 ATTRIBUTE_UNUSED,
+				    __m256bf16 i13 ATTRIBUTE_UNUSED,
+				    __m256bf16 i14 ATTRIBUTE_UNUSED,
+				    __m256bf16 i15 ATTRIBUTE_UNUSED,
+				    __m256bf16 i16 ATTRIBUTE_UNUSED,
+				    __m256bf16 i17 ATTRIBUTE_UNUSED,
+				    __m256bf16 i18 ATTRIBUTE_UNUSED,
+				    __m256bf16 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m256_arguments;
+}
+
+#define def_check_passing8(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _func1, _func2, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
+
+#define def_check_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, \
+			    _i8, _i9, _i10, _i11, _i12, _i13, _i14, \
+			    _i15, _i16, _i17, _i18, _i19, _func1, \
+			    _func2, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  values.i8.TYPE[0] = _i8; \
+  values.i9.TYPE[0] = _i9; \
+  values.i10.TYPE[0] = _i10; \
+  values.i11.TYPE[0] = _i11; \
+  values.i12.TYPE[0] = _i12; \
+  values.i13.TYPE[0] = _i13; \
+  values.i14.TYPE[0] = _i14; \
+  values.i15.TYPE[0] = _i15; \
+  values.i16.TYPE[0] = _i16; \
+  values.i17.TYPE[0] = _i17; \
+  values.i18.TYPE[0] = _i18; \
+  values.i19.TYPE[0] = _i19; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
+		     _i9, _i10, _i11, _i12, _i13, _i14, _i15, \
+		     _i16, _i17, _i18, _i19); \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
+		     _i9, _i10, _i11, _i12, _i13, _i14, _i15, \
+		     _i16, _i17, _i18, _i19);
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+		bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
+
+void
+test_m256bf16_on_stack ()
+{
+  __m256bf16 x[8];
+  int i;
+  for (i = 0; i < 8; i++)
+    x[i] = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			  bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
+  pass = "m256bf16-8";
+  def_check_passing8 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
+		      fun_check_passing_m256bf16_8_values,
+		      fun_check_passing_m256bf16_8_regs, _m256bf16);
+}
+
+void
+test_too_many_m256bf16 ()
+{
+  __m256bf16 x[20];
+  int i;
+  for (i = 0; i < 20; i++)
+    x[i] = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			  bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
+  pass = "m256bf16-20";
+  def_check_passing20 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8],
+		       x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16],
+		       x[17], x[18], x[19], fun_check_passing_m256bf16_20_values,
+		       fun_check_passing_m256bf16_20_regs, _m256bf16);
+}
+
+static void
+do_test (void)
+{
+  test_m256bf16_on_stack ();
+  test_too_many_m256bf16 ();
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c
new file mode 100644
index 00000000000..e06350ed493
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_structs.c
@@ -0,0 +1,69 @@ 
+#include "bf16-ymm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+struct m256bf16_struct
+{
+  __m256bf16 x;
+};
+
+struct m256bf16_2_struct
+{
+  __m256bf16 x1, x2;
+};
+
+/* Check that the struct is passed as the individual members in fregs.  */
+void
+check_struct_passing1bf16 (struct m256bf16_struct ms1 ATTRIBUTE_UNUSED,
+			   struct m256bf16_struct ms2 ATTRIBUTE_UNUSED,
+			   struct m256bf16_struct ms3 ATTRIBUTE_UNUSED,
+			   struct m256bf16_struct ms4 ATTRIBUTE_UNUSED,
+			   struct m256bf16_struct ms5 ATTRIBUTE_UNUSED,
+			   struct m256bf16_struct ms6 ATTRIBUTE_UNUSED,
+			   struct m256bf16_struct ms7 ATTRIBUTE_UNUSED,
+			   struct m256bf16_struct ms8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_struct_passing2bf16 (struct m256bf16_2_struct ms ATTRIBUTE_UNUSED)
+{
+  /* Check the passing on the stack by comparing the address of the
+     stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&ms.x1 == rsp+8);
+  assert ((unsigned long)&ms.x2 == rsp+40);
+}
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+		bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
+
+static void
+do_test (void)
+{
+  struct m256bf16_struct m256bf16s [8];
+  struct m256bf16_2_struct m256bf16_2s = { 
+    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16},
+    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16},
+  };
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      m256bf16s[i].x = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+				      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16};
+    }
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    (&fregs.ymm0)[i]._m256bf16[0] = m256bf16s[i].x;
+  num_fregs = 8;
+  WRAP_CALL (check_struct_passing1bf16) (m256bf16s[0], m256bf16s[1], m256bf16s[2], m256bf16s[3],
+					 m256bf16s[4], m256bf16s[5], m256bf16s[6], m256bf16s[7]);
+  WRAP_CALL (check_struct_passing2bf16) (m256bf16_2s);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c
new file mode 100644
index 00000000000..6d663b88b1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_passing_unions.c
@@ -0,0 +1,179 @@ 
+#include "bf16-ymm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+union un1b
+{
+  __m256bf16 x;
+  float f;
+};
+
+union un1bb
+{
+  __m256bf16 x;
+  __bf16 f;
+};
+
+union un2b
+{
+  __m256bf16 x;
+  double d;
+};
+
+union un3b
+{
+  __m256bf16 x;
+  __m128 v;
+};
+
+union un4b
+{
+  __m256bf16 x;
+  long double ld;
+};
+
+union un5b
+{
+  __m256bf16 x;
+  int i;
+};
+
+void
+check_union_passing1b (union un1b u1 ATTRIBUTE_UNUSED,
+		       union un1b u2 ATTRIBUTE_UNUSED,
+		       union un1b u3 ATTRIBUTE_UNUSED,
+		       union un1b u4 ATTRIBUTE_UNUSED,
+		       union un1b u5 ATTRIBUTE_UNUSED,
+		       union un1b u6 ATTRIBUTE_UNUSED,
+		       union un1b u7 ATTRIBUTE_UNUSED,
+		       union un1b u8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_union_passing1bb (union un1bb u1 ATTRIBUTE_UNUSED,
+		        union un1bb u2 ATTRIBUTE_UNUSED,
+		        union un1bb u3 ATTRIBUTE_UNUSED,
+		        union un1bb u4 ATTRIBUTE_UNUSED,
+		        union un1bb u5 ATTRIBUTE_UNUSED,
+		        union un1bb u6 ATTRIBUTE_UNUSED,
+		        union un1bb u7 ATTRIBUTE_UNUSED,
+		        union un1bb u8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_union_passing2b (union un2b u1 ATTRIBUTE_UNUSED,
+		       union un2b u2 ATTRIBUTE_UNUSED,
+		       union un2b u3 ATTRIBUTE_UNUSED,
+		       union un2b u4 ATTRIBUTE_UNUSED,
+		       union un2b u5 ATTRIBUTE_UNUSED,
+		       union un2b u6 ATTRIBUTE_UNUSED,
+		       union un2b u7 ATTRIBUTE_UNUSED,
+		       union un2b u8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_union_passing3b (union un3b u1 ATTRIBUTE_UNUSED,
+		       union un3b u2 ATTRIBUTE_UNUSED,
+		       union un3b u3 ATTRIBUTE_UNUSED,
+		       union un3b u4 ATTRIBUTE_UNUSED,
+		       union un3b u5 ATTRIBUTE_UNUSED,
+		       union un3b u6 ATTRIBUTE_UNUSED,
+		       union un3b u7 ATTRIBUTE_UNUSED,
+		       union un3b u8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_union_passing4b (union un4b u ATTRIBUTE_UNUSED)
+{
+   /* Check the passing on the stack by comparing the address of the
+      stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&u.x == rsp+8);
+  assert ((unsigned long)&u.ld == rsp+8);
+}
+
+void
+check_union_passing5b (union un5b u ATTRIBUTE_UNUSED)
+{
+   /* Check the passing on the stack by comparing the address of the
+      stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&u.x == rsp+8);
+  assert ((unsigned long)&u.i == rsp+8);
+}
+
+#define check_union_passing1b WRAP_CALL(check_union_passing1b)
+#define check_union_passing1bb WRAP_CALL(check_union_passing1bb)
+#define check_union_passing2b WRAP_CALL(check_union_passing2b)
+#define check_union_passing3b WRAP_CALL(check_union_passing3b)
+#define check_union_passing4b WRAP_CALL(check_union_passing4b)
+#define check_union_passing5b WRAP_CALL(check_union_passing5b)
+
+static void
+do_test (void)
+{
+  union un1b u1b[8];
+  union un1bb u1bb[8];
+  union un2b u2b[8];
+  union un3b u3b[8];
+  union un4b u4b;
+  union un5b u5b;
+  int i;
+  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+	 bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
+
+  for (i = 0; i < 8; i++)
+    {
+      u1b[i].x = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+				bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16 };
+    }
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    (&fregs.ymm0)[i]._m256bf16[0] = u1b[i].x;
+  num_fregs = 8;
+  check_union_passing1b (u1b[0], u1b[1], u1b[2], u1b[3],
+		         u1b[4], u1b[5], u1b[6], u1b[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u1bb[i].x = u1b[i].x;
+      (&fregs.ymm0)[i]._m256bf16[0] = u1bb[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing1bb (u1bb[0], u1bb[1], u1bb[2], u1bb[3],
+		          u1bb[4], u1bb[5], u1bb[6], u1bb[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u2b[i].x = u1b[i].x;
+      (&fregs.ymm0)[i]._m256bf16[0] = u2b[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing2b (u2b[0], u2b[1], u2b[2], u2b[3],
+		         u2b[4], u2b[5], u2b[6], u2b[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u3b[i].x = u1b[i].x;
+      (&fregs.ymm0)[i]._m256bf16[0] = u3b[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing3b (u3b[0], u3b[1], u3b[2], u3b[3],
+		         u3b[4], u3b[5], u3b[6], u3b[7]);
+
+  check_union_passing4b (u4b);
+  check_union_passing5b (u5b);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c
new file mode 100644
index 00000000000..b69e095d808
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/test_varargs-m256.c
@@ -0,0 +1,107 @@ 
+/* Test variable number of 256-bit vector arguments passed to functions.  */
+
+#include <stdio.h>
+#include "bf16-ymm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+
+/* This struct holds values for argument checking.  */
+struct 
+{
+  YMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+void
+fun_check_passing_m256bf16_varargs (__m256bf16 i0, __m256bf16 i1, __m256bf16 i2,
+				 __m256bf16 i3, ...)
+{
+  /* Check argument values.  */
+  void **fp = __builtin_frame_address (0);
+  void *ra = __builtin_return_address (0);
+  __m256bf16 *argp;
+
+  compare (values.i0, i0, __m256bf16);
+  compare (values.i1, i1, __m256bf16);
+  compare (values.i2, i2, __m256bf16);
+  compare (values.i3, i3, __m256bf16);
+
+  /* Get the pointer to the return address on stack.  */
+  while (*fp != ra)
+    fp++;
+
+  /* Skip the return address stack slot.  */
+  argp = (__m256bf16 *)(((char *) fp) + 8);
+
+  /* Check __m256bf16 arguments passed on stack.  */
+  compare (values.i4, argp[0], __m256bf16);
+  compare (values.i5, argp[1], __m256bf16);
+  compare (values.i6, argp[2], __m256bf16);
+  compare (values.i7, argp[3], __m256bf16);
+  compare (values.i8, argp[4], __m256bf16);
+  compare (values.i9, argp[5], __m256bf16);
+
+  /* Check register contents.  */
+  compare (fregs.ymm0, ymm_regs[0], __m256bf16);
+  compare (fregs.ymm1, ymm_regs[1], __m256bf16);
+  compare (fregs.ymm2, ymm_regs[2], __m256bf16);
+  compare (fregs.ymm3, ymm_regs[3], __m256bf16);
+}
+
+#define def_check_int_passing_varargs(_i0, _i1, _i2, _i3, _i4, _i5, \
+				      _i6, _i7, _i8, _i9, \
+				      _func, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  values.i8.TYPE[0] = _i8; \
+  values.i9.TYPE[0] = _i9; \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  WRAP_CALL(_func) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9);
+
+void
+test_m256bf16_varargs (void)
+{
+  __m256bf16 x[10];
+  int i;
+  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+         bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
+  for (i = 0; i < 10; i++)
+    x[i] = (__m256bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			  bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16 };
+  pass = "m256bf16-varargs";
+  def_check_int_passing_varargs (x[0], x[1], x[2], x[3], x[4], x[5],
+				 x[6], x[7], x[8], x[9],
+				 fun_check_passing_m256bf16_varargs,
+				 _m256bf16);
+}
+
+void
+do_test (void)
+{
+  test_m256bf16_varargs ();
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
new file mode 100644
index 00000000000..b6e0fed4cb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp
@@ -0,0 +1,46 @@ 
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The x86-64 ABI testsuite needs one additional assembler file for most
+# testcases.  For simplicity we will just link it into each test.
+
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+load_lib clearcap.exp
+
+if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
+     || ![is-effective-target lp64]
+     || ![is-effective-target avx512f] } then {
+  return
+}
+
+
+torture-init
+clearcap-init
+set-torture-options $C_TORTURE_OPTIONS
+set additional_flags "-W -Wall -mavx512f"
+
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] {
+    if {[runtest_file_p $runtests $src]} {
+        c-torture-execute [list $src \
+                                $srcdir/$subdir/asm-support.S] \
+                                $additional_flags
+    }
+}
+
+clearcap-finish
+torture-finish
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
new file mode 100644
index 00000000000..64b24783833
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h
@@ -0,0 +1,155 @@ 
+#ifndef INCLUDED_ARGS_H
+#define INCLUDED_ARGS_H
+
+#include <immintrin.h>
+#include <string.h>
+
+/* Assertion macro.  */
+#define assert(test) if (!(test)) abort()
+
+#ifdef __GNUC__
+#define ATTRIBUTE_UNUSED __attribute__((__unused__))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+
+/* This defines the calling sequences for integers and floats.  */
+#define I0 rdi
+#define I1 rsi
+#define I2 rdx
+#define I3 rcx
+#define I4 r8
+#define I5 r9
+#define F0 zmm0
+#define F1 zmm1
+#define F2 zmm2
+#define F3 zmm3
+#define F4 zmm4
+#define F5 zmm5
+#define F6 zmm6
+#define F7 zmm7
+
+typedef union {
+  __bf16 ___bf16[32];
+  float _float[16];
+  double _double[8];
+  long long _longlong[8];
+  int _int[16];
+  unsigned long long _ulonglong[8];
+  __m64 _m64[8];
+  __m128 _m128[4];
+  __m256 _m256[2];
+  __m512 _m512[1];
+  __m512bf16 _m512bf16[1];
+} ZMM_T;
+
+typedef union {
+  float _float;
+  double _double;
+  long double _ldouble;
+  unsigned long long _ulonglong[2];
+} X87_T;
+extern void (*callthis)(void);
+extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
+ZMM_T zmm_regs[32];
+X87_T x87_regs[8];
+extern volatile unsigned long long volatile_var;
+extern void snapshot (void);
+extern void snapshot_ret (void);
+#define WRAP_CALL(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
+#define WRAP_RET(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
+
+/* Clear all integer registers.  */
+#define clear_int_hardware_registers \
+  asm __volatile__ ("xor %%rax, %%rax\n\t" \
+		    "xor %%rbx, %%rbx\n\t" \
+		    "xor %%rcx, %%rcx\n\t" \
+		    "xor %%rdx, %%rdx\n\t" \
+		    "xor %%rsi, %%rsi\n\t" \
+		    "xor %%rdi, %%rdi\n\t" \
+		    "xor %%r8, %%r8\n\t" \
+		    "xor %%r9, %%r9\n\t" \
+		    "xor %%r10, %%r10\n\t" \
+		    "xor %%r11, %%r11\n\t" \
+		    "xor %%r12, %%r12\n\t" \
+		    "xor %%r13, %%r13\n\t" \
+		    "xor %%r14, %%r14\n\t" \
+		    "xor %%r15, %%r15\n\t" \
+		    ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
+		    "r9", "r10", "r11", "r12", "r13", "r14", "r15");
+
+/* This is the list of registers available for passing arguments. Not all of
+   these are used or even really available.  */
+struct IntegerRegisters
+{
+  unsigned long long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
+};
+struct FloatRegisters
+{
+  double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
+  long double st0, st1, st2, st3, st4, st5, st6, st7;
+  ZMM_T zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7, zmm8, zmm9,
+        zmm10, zmm11, zmm12, zmm13, zmm14, zmm15, zmm16, zmm17, zmm18,
+	zmm19, zmm20, zmm21, zmm22, zmm23, zmm24, zmm25, zmm26, zmm27,
+	zmm28, zmm29, zmm30, zmm31;
+};
+
+/* Implemented in scalarargs.c  */
+extern struct IntegerRegisters iregs;
+extern struct FloatRegisters fregs;
+extern unsigned int num_iregs, num_fregs;
+
+/* Clear register struct.  */
+#define clear_struct_registers \
+  rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
+    = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
+  memset (&iregs, 0, sizeof (iregs)); \
+  memset (&fregs, 0, sizeof (fregs)); \
+  memset (zmm_regs, 0, sizeof (zmm_regs)); \
+  memset (x87_regs, 0, sizeof (x87_regs));
+
+/* Clear both hardware and register structs for integers.  */
+#define clear_int_registers \
+  clear_struct_registers \
+  clear_int_hardware_registers
+
+#define check_vector_arguments(T,O) do { \
+  assert (num_fregs <= 0 \
+	  || memcmp (((char *) &fregs.zmm0) + (O), \
+		     &zmm_regs[0], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 1 \
+	  || memcmp (((char *) &fregs.zmm1) + (O), \
+		     &zmm_regs[1], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 2 \
+	  || memcmp (((char *) &fregs.zmm2) + (O), \
+		     &zmm_regs[2], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 3 \
+	  || memcmp (((char *) &fregs.zmm3) + (O), \
+		     &zmm_regs[3], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 4 \
+	  || memcmp (((char *) &fregs.zmm4) + (O), \
+		     &zmm_regs[4], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 5 \
+	  || memcmp (((char *) &fregs.zmm5) + (O), \
+		     &zmm_regs[5], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 6 \
+	  || memcmp (((char *) &fregs.zmm6) + (O), \
+		     &zmm_regs[6], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 7 \
+	  || memcmp (((char *) &fregs.zmm7) + (O), \
+		     &zmm_regs[7], \
+		     sizeof (__ ## T) - (O)) == 0); \
+  } while (0)
+
+#define check_m512_arguments check_vector_arguments(m512, 0)
+
+#endif /* INCLUDED_ARGS_H  */
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
new file mode 100644
index 00000000000..86d54d11c58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support.S
@@ -0,0 +1,100 @@ 
+	.text
+	.p2align 4,,15
+.globl snapshot
+	.type	snapshot, @function
+snapshot:
+.LFB3:
+	movq	%rax, rax(%rip)
+	movq	%rbx, rbx(%rip)
+	movq	%rcx, rcx(%rip)
+	movq	%rdx, rdx(%rip)
+	movq	%rdi, rdi(%rip)
+	movq	%rsi, rsi(%rip)
+	movq	%rbp, rbp(%rip)
+	movq	%rsp, rsp(%rip)
+	movq	%r8, r8(%rip)
+	movq	%r9, r9(%rip)
+	movq	%r10, r10(%rip)
+	movq	%r11, r11(%rip)
+	movq	%r12, r12(%rip)
+	movq	%r13, r13(%rip)
+	movq	%r14, r14(%rip)
+	movq	%r15, r15(%rip)
+	vmovdqu32 %zmm0, zmm_regs+0(%rip)
+	vmovdqu32 %zmm1, zmm_regs+64(%rip)
+	vmovdqu32 %zmm2, zmm_regs+128(%rip)
+	vmovdqu32 %zmm3, zmm_regs+192(%rip)
+	vmovdqu32 %zmm4, zmm_regs+256(%rip)
+	vmovdqu32 %zmm5, zmm_regs+320(%rip)
+	vmovdqu32 %zmm6, zmm_regs+384(%rip)
+	vmovdqu32 %zmm7, zmm_regs+448(%rip)
+	vmovdqu32 %zmm8, zmm_regs+512(%rip)
+	vmovdqu32 %zmm9, zmm_regs+576(%rip)
+	vmovdqu32 %zmm10, zmm_regs+640(%rip)
+	vmovdqu32 %zmm11, zmm_regs+704(%rip)
+	vmovdqu32 %zmm12, zmm_regs+768(%rip)
+	vmovdqu32 %zmm13, zmm_regs+832(%rip)
+	vmovdqu32 %zmm14, zmm_regs+896(%rip)
+	vmovdqu32 %zmm15, zmm_regs+960(%rip)
+	vmovdqu32 %zmm16, zmm_regs+1024(%rip)
+	vmovdqu32 %zmm17, zmm_regs+1088(%rip)
+	vmovdqu32 %zmm18, zmm_regs+1152(%rip)
+	vmovdqu32 %zmm19, zmm_regs+1216(%rip)
+	vmovdqu32 %zmm20, zmm_regs+1280(%rip)
+	vmovdqu32 %zmm21, zmm_regs+1344(%rip)
+	vmovdqu32 %zmm22, zmm_regs+1408(%rip)
+	vmovdqu32 %zmm23, zmm_regs+1472(%rip)
+	vmovdqu32 %zmm24, zmm_regs+1536(%rip)
+	vmovdqu32 %zmm25, zmm_regs+1600(%rip)
+	vmovdqu32 %zmm26, zmm_regs+1664(%rip)
+	vmovdqu32 %zmm27, zmm_regs+1728(%rip)
+	vmovdqu32 %zmm28, zmm_regs+1792(%rip)
+	vmovdqu32 %zmm29, zmm_regs+1856(%rip)
+	vmovdqu32 %zmm30, zmm_regs+1920(%rip)
+	vmovdqu32 %zmm31, zmm_regs+1984(%rip)
+	jmp	*callthis(%rip)
+.LFE3:
+	.size	snapshot, .-snapshot
+
+	.p2align 4,,15
+.globl snapshot_ret
+	.type	snapshot_ret, @function
+snapshot_ret:
+	movq	%rdi, rdi(%rip)
+	subq	$8, %rsp
+	call	*callthis(%rip)
+	addq	$8, %rsp
+	movq	%rax, rax(%rip)
+	movq	%rdx, rdx(%rip)
+	vmovdqu32	%zmm0, zmm_regs+0(%rip)
+	vmovdqu32	%zmm1, zmm_regs+64(%rip)
+	fstpt	x87_regs(%rip)
+	fstpt	x87_regs+16(%rip)
+	fldt	x87_regs+16(%rip)
+	fldt	x87_regs(%rip)
+	ret
+	.size	snapshot_ret, .-snapshot_ret
+
+	.comm	callthis,8,8
+	.comm	rax,8,8
+	.comm	rbx,8,8
+	.comm	rcx,8,8
+	.comm	rdx,8,8
+	.comm	rsi,8,8
+	.comm	rdi,8,8
+	.comm	rsp,8,8
+	.comm	rbp,8,8
+	.comm	r8,8,8
+	.comm	r9,8,8
+	.comm	r10,8,8
+	.comm	r11,8,8
+	.comm	r12,8,8
+	.comm	r13,8,8
+	.comm	r14,8,8
+	.comm	r15,8,8
+	.comm	zmm_regs,2048,64
+	.comm	x87_regs,128,32
+	.comm   volatile_var,8,8
+#ifdef __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
new file mode 100644
index 00000000000..8379fcfaf8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/bf16-zmm-check.h
@@ -0,0 +1,23 @@ 
+#include <stdlib.h>
+
+static void do_test (void);
+
+int
+main ()
+{
+
+  if (__builtin_cpu_supports ("avx512f"))
+    {
+      do_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+      return 0;
+    }
+
+#ifdef DEBUG
+  printf ("SKIPPED\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c
new file mode 100644
index 00000000000..1a2500bd883
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_m512_returning.c
@@ -0,0 +1,44 @@ 
+#include <stdio.h>
+#include "bf16-zmm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+		bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+		bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+		bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
+
+__m512bf16
+fun_test_returning___m512bf16 (void)
+{
+  volatile_var++;
+  return (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+			bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+			bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
+}
+
+__m512bf16 test_512bf16;
+
+static void
+do_test (void)
+{
+  unsigned failed = 0;
+  ZMM_T zmmt1, zmmt2;
+
+  clear_struct_registers;
+  test_512bf16 = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+				bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+				bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+				bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
+  zmmt1._m512bf16[0] = test_512bf16;
+  zmmt2._m512bf16[0] = WRAP_RET (fun_test_returning___m512bf16)();
+  if (memcmp (&zmmt1, &zmmt2, sizeof (zmmt2)) != 0)
+    printf ("fail m512bf16\n"), failed++;
+
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c
new file mode 100644
index 00000000000..1c5c407efee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_m512.c
@@ -0,0 +1,243 @@ 
+#include <stdio.h>
+#include "bf16-zmm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+/* This struct holds values for argument checking.  */
+struct
+{
+  ZMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
+    i16, i17, i18, i19, i20, i21, i22, i23;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+fun_check_passing_m512bf16_8_values (__m512bf16 i0 ATTRIBUTE_UNUSED,
+				     __m512bf16 i1 ATTRIBUTE_UNUSED,
+				     __m512bf16 i2 ATTRIBUTE_UNUSED,
+				     __m512bf16 i3 ATTRIBUTE_UNUSED,
+				     __m512bf16 i4 ATTRIBUTE_UNUSED,
+				     __m512bf16 i5 ATTRIBUTE_UNUSED,
+				     __m512bf16 i6 ATTRIBUTE_UNUSED,
+				     __m512bf16 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m512bf16);
+  compare (values.i1, i1, __m512bf16);
+  compare (values.i2, i2, __m512bf16);
+  compare (values.i3, i3, __m512bf16);
+  compare (values.i4, i4, __m512bf16);
+  compare (values.i5, i5, __m512bf16);
+  compare (values.i6, i6, __m512bf16);
+  compare (values.i7, i7, __m512bf16);
+}
+
+void
+fun_check_passing_m512bf16_8_regs (__m512bf16 i0 ATTRIBUTE_UNUSED,
+				   __m512bf16 i1 ATTRIBUTE_UNUSED,
+				   __m512bf16 i2 ATTRIBUTE_UNUSED,
+				   __m512bf16 i3 ATTRIBUTE_UNUSED,
+				   __m512bf16 i4 ATTRIBUTE_UNUSED,
+				   __m512bf16 i5 ATTRIBUTE_UNUSED,
+				   __m512bf16 i6 ATTRIBUTE_UNUSED,
+				   __m512bf16 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+void
+fun_check_passing_m512bf16_20_values (__m512bf16 i0 ATTRIBUTE_UNUSED,
+				      __m512bf16 i1 ATTRIBUTE_UNUSED,
+				      __m512bf16 i2 ATTRIBUTE_UNUSED,
+				      __m512bf16 i3 ATTRIBUTE_UNUSED,
+				      __m512bf16 i4 ATTRIBUTE_UNUSED,
+				      __m512bf16 i5 ATTRIBUTE_UNUSED,
+				      __m512bf16 i6 ATTRIBUTE_UNUSED,
+				      __m512bf16 i7 ATTRIBUTE_UNUSED,
+				      __m512bf16 i8 ATTRIBUTE_UNUSED,
+				      __m512bf16 i9 ATTRIBUTE_UNUSED,
+				      __m512bf16 i10 ATTRIBUTE_UNUSED,
+				      __m512bf16 i11 ATTRIBUTE_UNUSED,
+				      __m512bf16 i12 ATTRIBUTE_UNUSED,
+				      __m512bf16 i13 ATTRIBUTE_UNUSED,
+				      __m512bf16 i14 ATTRIBUTE_UNUSED,
+				      __m512bf16 i15 ATTRIBUTE_UNUSED,
+				      __m512bf16 i16 ATTRIBUTE_UNUSED,
+				      __m512bf16 i17 ATTRIBUTE_UNUSED,
+				      __m512bf16 i18 ATTRIBUTE_UNUSED,
+				      __m512bf16 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m512bf16);
+  compare (values.i1, i1, __m512bf16);
+  compare (values.i2, i2, __m512bf16);
+  compare (values.i3, i3, __m512bf16);
+  compare (values.i4, i4, __m512bf16);
+  compare (values.i5, i5, __m512bf16);
+  compare (values.i6, i6, __m512bf16);
+  compare (values.i7, i7, __m512bf16);
+  compare (values.i8, i8, __m512bf16);
+  compare (values.i9, i9, __m512bf16);
+  compare (values.i10, i10, __m512bf16);
+  compare (values.i11, i11, __m512bf16);
+  compare (values.i12, i12, __m512bf16);
+  compare (values.i13, i13, __m512bf16);
+  compare (values.i14, i14, __m512bf16);
+  compare (values.i15, i15, __m512bf16);
+  compare (values.i16, i16, __m512bf16);
+  compare (values.i17, i17, __m512bf16);
+  compare (values.i18, i18, __m512bf16);
+  compare (values.i19, i19, __m512bf16);
+}
+
+void
+fun_check_passing_m512bf16_20_regs (__m512bf16 i0 ATTRIBUTE_UNUSED,
+				    __m512bf16 i1 ATTRIBUTE_UNUSED,
+				    __m512bf16 i2 ATTRIBUTE_UNUSED,
+				    __m512bf16 i3 ATTRIBUTE_UNUSED,
+				    __m512bf16 i4 ATTRIBUTE_UNUSED,
+				    __m512bf16 i5 ATTRIBUTE_UNUSED,
+				    __m512bf16 i6 ATTRIBUTE_UNUSED,
+				    __m512bf16 i7 ATTRIBUTE_UNUSED,
+				    __m512bf16 i8 ATTRIBUTE_UNUSED,
+				    __m512bf16 i9 ATTRIBUTE_UNUSED,
+				    __m512bf16 i10 ATTRIBUTE_UNUSED,
+				    __m512bf16 i11 ATTRIBUTE_UNUSED,
+				    __m512bf16 i12 ATTRIBUTE_UNUSED,
+				    __m512bf16 i13 ATTRIBUTE_UNUSED,
+				    __m512bf16 i14 ATTRIBUTE_UNUSED,
+				    __m512bf16 i15 ATTRIBUTE_UNUSED,
+				    __m512bf16 i16 ATTRIBUTE_UNUSED,
+				    __m512bf16 i17 ATTRIBUTE_UNUSED,
+				    __m512bf16 i18 ATTRIBUTE_UNUSED,
+				    __m512bf16 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+#define def_check_passing8(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _func1, _func2, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
+  \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
+
+#define def_check_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, \
+			    _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, \
+			    _i18, _i19, _func1, _func2, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  values.i8.TYPE[0] = _i8; \
+  values.i9.TYPE[0] = _i9; \
+  values.i10.TYPE[0] = _i10; \
+  values.i11.TYPE[0] = _i11; \
+  values.i12.TYPE[0] = _i12; \
+  values.i13.TYPE[0] = _i13; \
+  values.i14.TYPE[0] = _i14; \
+  values.i15.TYPE[0] = _i15; \
+  values.i16.TYPE[0] = _i16; \
+  values.i17.TYPE[0] = _i17; \
+  values.i18.TYPE[0] = _i18; \
+  values.i19.TYPE[0] = _i19; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, \
+		     _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, \
+		     _i18, _i19); \
+  \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, \
+		     _i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, \
+		     _i18, _i19);
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+		bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+		bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+		bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
+
+void
+test_m512bf16_on_stack ()
+{
+  __m512bf16 x[8];
+  int i;
+  for (i = 0; i < 8; i++)
+    x[i] = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			  bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+			  bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+			  bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
+
+  pass = "m512bf16-8";
+  def_check_passing8 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
+		      fun_check_passing_m512bf16_8_values,
+		      fun_check_passing_m512bf16_8_regs, _m512bf16);
+}
+
+void
+test_too_many_m512bf16 ()
+{
+  __m512bf16 x[20];
+  int i;
+  for (i = 0; i < 20; i++)
+    x[i] = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			  bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+			  bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+			  bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
+  pass = "m512bf16-20";
+  def_check_passing20 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8],
+		       x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16],
+		       x[17], x[18], x[19], fun_check_passing_m512bf16_20_values,
+		       fun_check_passing_m512bf16_20_regs, _m512bf16);
+}
+
+static void
+do_test (void)
+{
+  test_m512bf16_on_stack ();
+  test_too_many_m512bf16 ();
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c
new file mode 100644
index 00000000000..f93a2b81086
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_structs.c
@@ -0,0 +1,77 @@ 
+#include "bf16-zmm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+struct m512bf16_struct
+{
+  __m512bf16 x;
+};
+
+struct m512bf16_2_struct
+{
+  __m512bf16 x1, x2;
+};
+
+/* Check that the struct is passed as the individual members in fregs.  */
+void
+check_struct_passing1bf16 (struct m512bf16_struct ms1 ATTRIBUTE_UNUSED,
+			   struct m512bf16_struct ms2 ATTRIBUTE_UNUSED,
+			   struct m512bf16_struct ms3 ATTRIBUTE_UNUSED,
+			   struct m512bf16_struct ms4 ATTRIBUTE_UNUSED,
+			   struct m512bf16_struct ms5 ATTRIBUTE_UNUSED,
+			   struct m512bf16_struct ms6 ATTRIBUTE_UNUSED,
+			   struct m512bf16_struct ms7 ATTRIBUTE_UNUSED,
+			   struct m512bf16_struct ms8 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+void
+check_struct_passing2bf16 (struct m512bf16_2_struct ms ATTRIBUTE_UNUSED)
+{
+  /* Check the passing on the stack by comparing the address of the
+     stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&ms.x1 == rsp+8);
+  assert ((unsigned long)&ms.x2 == rsp+72);
+}
+
+static void
+do_test (void)
+{
+  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+	 bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+	 bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+	 bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
+  struct m512bf16_struct m512bf16s [8];
+  struct m512bf16_2_struct m512bf16_2s = {
+    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+      bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+      bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 },
+    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+      bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+      bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 }
+  };
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      m512bf16s[i].x = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+				      bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+				      bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+				      bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
+    }
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    (&fregs.zmm0)[i]._m512bf16[0] = m512bf16s[i].x;
+  num_fregs = 8;
+  WRAP_CALL (check_struct_passing1bf16) (m512bf16s[0], m512bf16s[1], m512bf16s[2], m512bf16s[3],
+					 m512bf16s[4], m512bf16s[5], m512bf16s[6], m512bf16s[7]);
+  WRAP_CALL (check_struct_passing2bf16) (m512bf16_2s);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c
new file mode 100644
index 00000000000..3769b38aeb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_passing_unions.c
@@ -0,0 +1,222 @@ 
+#include "bf16-zmm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+union un1b
+{
+  __m512bf16 x;
+  float f;
+};
+
+union un1bb
+{
+  __m512bf16 x;
+  __bf16 f;
+};
+
+union un2b
+{
+  __m512bf16 x;
+  double d;
+};
+
+union un3b
+{
+  __m512bf16 x;
+  __m128 v;
+};
+
+union un4b
+{
+  __m512bf16 x;
+  long double ld;
+};
+
+union un5b
+{
+  __m512bf16 x;
+  int i;
+};
+
+union un6b
+{
+  __m512bf16 x;
+  __m256 v;
+};
+
+void
+check_union_passing1b (union un1b u1 ATTRIBUTE_UNUSED,
+		       union un1b u2 ATTRIBUTE_UNUSED,
+		       union un1b u3 ATTRIBUTE_UNUSED,
+		       union un1b u4 ATTRIBUTE_UNUSED,
+		       union un1b u5 ATTRIBUTE_UNUSED,
+		       union un1b u6 ATTRIBUTE_UNUSED,
+		       union un1b u7 ATTRIBUTE_UNUSED,
+		       union un1b u8 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+void
+check_union_passing1bb (union un1bb u1 ATTRIBUTE_UNUSED,
+		        union un1bb u2 ATTRIBUTE_UNUSED,
+		        union un1bb u3 ATTRIBUTE_UNUSED,
+		        union un1bb u4 ATTRIBUTE_UNUSED,
+		        union un1bb u5 ATTRIBUTE_UNUSED,
+		        union un1bb u6 ATTRIBUTE_UNUSED,
+		        union un1bb u7 ATTRIBUTE_UNUSED,
+		        union un1bb u8 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+
+void
+check_union_passing2b (union un2b u1 ATTRIBUTE_UNUSED,
+		       union un2b u2 ATTRIBUTE_UNUSED,
+		       union un2b u3 ATTRIBUTE_UNUSED,
+		       union un2b u4 ATTRIBUTE_UNUSED,
+		       union un2b u5 ATTRIBUTE_UNUSED,
+		       union un2b u6 ATTRIBUTE_UNUSED,
+		       union un2b u7 ATTRIBUTE_UNUSED,
+		       union un2b u8 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+void
+check_union_passing3b (union un3b u1 ATTRIBUTE_UNUSED,
+		       union un3b u2 ATTRIBUTE_UNUSED,
+		       union un3b u3 ATTRIBUTE_UNUSED,
+		       union un3b u4 ATTRIBUTE_UNUSED,
+		       union un3b u5 ATTRIBUTE_UNUSED,
+		       union un3b u6 ATTRIBUTE_UNUSED,
+		       union un3b u7 ATTRIBUTE_UNUSED,
+		       union un3b u8 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+void
+check_union_passing4b (union un4b u ATTRIBUTE_UNUSED)
+{
+   /* Check the passing on the stack by comparing the address of the
+      stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&u.x == rsp+8);
+  assert ((unsigned long)&u.ld == rsp+8);
+}
+
+void
+check_union_passing5b (union un5b u ATTRIBUTE_UNUSED)
+{
+   /* Check the passing on the stack by comparing the address of the
+      stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&u.x == rsp+8);
+  assert ((unsigned long)&u.i == rsp+8);
+}
+
+void
+check_union_passing6b (union un6b u1 ATTRIBUTE_UNUSED,
+		       union un6b u2 ATTRIBUTE_UNUSED,
+		       union un6b u3 ATTRIBUTE_UNUSED,
+		       union un6b u4 ATTRIBUTE_UNUSED,
+		       union un6b u5 ATTRIBUTE_UNUSED,
+		       union un6b u6 ATTRIBUTE_UNUSED,
+		       union un6b u7 ATTRIBUTE_UNUSED,
+		       union un6b u8 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m512_arguments;
+}
+
+#define check_union_passing1b WRAP_CALL(check_union_passing1b)
+#define check_union_passing1bf WRAP_CALL(check_union_passing1bf)
+#define check_union_passing1bb WRAP_CALL(check_union_passing1bb)
+#define check_union_passing2b WRAP_CALL(check_union_passing2b)
+#define check_union_passing3b WRAP_CALL(check_union_passing3b)
+#define check_union_passing4b WRAP_CALL(check_union_passing4b)
+#define check_union_passing5b WRAP_CALL(check_union_passing5b)
+#define check_union_passing6b WRAP_CALL(check_union_passing6b)
+
+
+static void
+do_test (void)
+{
+  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+	 bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+	 bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+	 bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
+  union un1b u1b[8];
+  union un1bb u1bb[8];
+  union un2b u2b[8];
+  union un3b u3b[8];
+  union un4b u4b;
+  union un5b u5b;
+  union un6b u6b[8];
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      u1b[i].x =  (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+				 bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+				 bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+				 bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
+    }
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    (&fregs.zmm0)[i]._m512bf16[0] = u1b[i].x;
+  num_fregs = 8;
+  check_union_passing1b (u1b[0], u1b[1], u1b[2], u1b[3],
+		         u1b[4], u1b[5], u1b[6], u1b[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u1bb[i].x = u1b[i].x;
+      (&fregs.zmm0)[i]._m512bf16[0] = u1bb[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing1bb (u1bb[0], u1bb[1], u1bb[2], u1bb[3],
+		          u1bb[4], u1bb[5], u1bb[6], u1bb[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u2b[i].x = u1bb[i].x;
+      (&fregs.zmm0)[i]._m512bf16[0] = u2b[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing2b (u2b[0], u2b[1], u2b[2], u2b[3],
+		         u2b[4], u2b[5], u2b[6], u2b[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u3b[i].x = u1b[i].x;
+      (&fregs.zmm0)[i]._m512bf16[0] = u3b[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing3b (u3b[0], u3b[1], u3b[2], u3b[3],
+			 u3b[4], u3b[5], u3b[6], u3b[7]);
+
+  check_union_passing4b (u4b);
+  check_union_passing5b (u5b);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u6b[i].x = u1b[i].x;
+      (&fregs.zmm0)[i]._m512bf16[0] = u6b[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing6b (u6b[0], u6b[1], u6b[2], u6b[3],
+			 u6b[4], u6b[5], u6b[6], u6b[7]);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c
new file mode 100644
index 00000000000..2be57b8b5fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/test_varargs-m512.c
@@ -0,0 +1,111 @@ 
+/* Test variable number of 512-bit vector arguments passed to functions.  */
+
+#include <stdio.h>
+#include "bf16-zmm-check.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+
+/* This struct holds values for argument checking.  */
+struct 
+{
+  ZMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+void
+fun_check_passing_m512bf16_varargs (__m512bf16 i0, __m512bf16 i1, __m512bf16 i2,
+				 __m512bf16 i3, ...)
+{
+  /* Check argument values.  */
+  void **fp = __builtin_frame_address (0);
+  void *ra = __builtin_return_address (0);
+  __m512bf16 *argp;
+
+  compare (values.i0, i0, __m512bf16);
+  compare (values.i1, i1, __m512bf16);
+  compare (values.i2, i2, __m512bf16);
+  compare (values.i3, i3, __m512bf16);
+
+  /* Get the pointer to the return address on stack.  */
+  while (*fp != ra)
+    fp++;
+
+  /* Skip the return address stack slot.  */
+  argp = (__m512bf16 *)(((char *) fp) + 8);
+
+  /* Check __m512bf16 arguments passed on stack.  */
+  compare (values.i4, argp[0], __m512bf16);
+  compare (values.i5, argp[1], __m512bf16);
+  compare (values.i6, argp[2], __m512bf16);
+  compare (values.i7, argp[3], __m512bf16);
+  compare (values.i8, argp[4], __m512bf16);
+  compare (values.i9, argp[5], __m512bf16);
+
+  /* Check register contents.  */
+  compare (fregs.zmm0, zmm_regs[0], __m512bf16);
+  compare (fregs.zmm1, zmm_regs[1], __m512bf16);
+  compare (fregs.zmm2, zmm_regs[2], __m512bf16);
+  compare (fregs.zmm3, zmm_regs[3], __m512bf16);
+}
+
+#define def_check_int_passing_varargs(_i0, _i1, _i2, _i3, _i4, _i5, \
+				      _i6, _i7, _i8, _i9, \
+				      _func, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  values.i8.TYPE[0] = _i8; \
+  values.i9.TYPE[0] = _i9; \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  WRAP_CALL(_func) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9);
+
+void
+test_m512bf16_varargs (void)
+{
+  __m512bf16 x[10];
+  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+	 bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+	 bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+	 bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32;
+  int i;
+  for (i = 0; i < 10; i++)
+    x[i] = (__m512bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			  bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+			  bf17,bf18,bf19,bf20,bf21,bf22,bf23,bf24,
+			  bf25,bf26,bf27,bf28,bf29,bf30,bf31,bf32 };
+  pass = "m512bf16-varargs";
+  def_check_int_passing_varargs (x[0], x[1], x[2], x[3], x[4], x[5],
+				 x[6], x[7], x[8], x[9],
+				 fun_check_passing_m512bf16_varargs,
+				 _m512bf16);
+}
+
+void
+do_test (void)
+{
+  test_m512bf16_varargs ();
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h
new file mode 100644
index 00000000000..98fbc660f27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/macros.h
@@ -0,0 +1,53 @@ 
+#ifndef MACROS_H
+
+#define check_size(_t, _size) assert(sizeof(_t) == (_size))
+
+#define check_align(_t, _align) assert(__alignof__(_t) == (_align))
+
+#define check_align_lv(_t, _align) assert(__alignof__(_t) == (_align) \
+					  && (((unsigned long)&(_t)) & ((_align) - 1) ) == 0)
+
+#define check_basic_struct_size_and_align(_type, _size, _align) { \
+  struct _str { _type dummy; } _t; \
+  check_size(_t, _size); \
+  check_align_lv(_t, _align); \
+}
+
+#define check_array_size_and_align(_type, _size, _align) { \
+  _type _a[1]; _type _b[2]; _type _c[16]; \
+  struct _str { _type _a[1]; } _s; \
+  check_align_lv(_a[0], _align); \
+  check_size(_a, _size); \
+  check_size(_b, (_size*2)); \
+  check_size(_c, (_size*16)); \
+  check_size(_s, _size); \
+  check_align_lv(_s._a[0], _align); \
+}
+
+#define check_basic_union_size_and_align(_type, _size, _align) { \
+  union _union { _type dummy; } _u; \
+  check_size(_u, _size); \
+  check_align_lv(_u, _align); \
+}
+
+#define run_signed_tests2(_function, _arg1, _arg2) \
+  _function(_arg1, _arg2); \
+  _function(signed _arg1, _arg2); \
+  _function(unsigned _arg1, _arg2);
+
+#define run_signed_tests3(_function, _arg1, _arg2, _arg3) \
+  _function(_arg1, _arg2, _arg3); \
+  _function(signed _arg1, _arg2, _arg3); \
+  _function(unsigned _arg1, _arg2, _arg3);
+
+/* Check size of a struct and a union of three types.  */
+
+#define check_struct_and_union3(type1, type2, type3, struct_size, align_size) \
+{ \
+  struct _str { type1 t1; type2 t2; type3 t3; } _t; \
+  union _uni { type1 t1; type2 t2; type3 t3; } _u; \
+  check_size(_t, struct_size); \
+  check_size(_u, align_size); \
+}
+
+#endif // MACROS_H
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c
new file mode 100644
index 00000000000..0c58db101e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_3_element_struct_and_unions.c
@@ -0,0 +1,214 @@ 
+/* This is an autogenerated file. Do not edit.  */
+
+#include "defines.h"
+#include "macros.h"
+
+/* Check structs and unions of all permutations of 3 basic types.  */
+int
+main (void)
+{
+  check_struct_and_union3(char, char, __bf16, 4, 2);
+  check_struct_and_union3(char, __bf16, char, 6, 2);
+  check_struct_and_union3(char, __bf16, __bf16, 6, 2);
+  check_struct_and_union3(char, __bf16, int, 8, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(char, __bf16, long, 16, 8);
+#endif
+  check_struct_and_union3(char, __bf16, long long, 16, 8);
+  check_struct_and_union3(char, __bf16, float, 8, 4);
+  check_struct_and_union3(char, __bf16, double, 16, 8);
+  check_struct_and_union3(char, __bf16, long double, 32, 16);
+  check_struct_and_union3(char, int, __bf16, 12, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(char, long, __bf16, 24, 8);
+#endif
+  check_struct_and_union3(char, long long, __bf16, 24, 8);
+  check_struct_and_union3(char, float, __bf16, 12, 4);
+  check_struct_and_union3(char, double, __bf16, 24, 8);
+  check_struct_and_union3(char, long double, __bf16, 48, 16);
+  check_struct_and_union3(__bf16, char, char, 4, 2);
+  check_struct_and_union3(__bf16, char, __bf16, 6, 2);
+  check_struct_and_union3(__bf16, char, int, 8, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(__bf16, char, long, 16, 8);
+#endif
+  check_struct_and_union3(__bf16, char, long long, 16, 8);
+  check_struct_and_union3(__bf16, char, float, 8, 4);
+  check_struct_and_union3(__bf16, char, double, 16, 8);
+  check_struct_and_union3(__bf16, char, long double, 32, 16);
+  check_struct_and_union3(__bf16, __bf16, char, 6, 2);
+  check_struct_and_union3(__bf16, __bf16, __bf16, 6, 2);
+  check_struct_and_union3(__bf16, __bf16, int, 8, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(__bf16, __bf16, long, 16, 8);
+#endif
+  check_struct_and_union3(__bf16, __bf16, long long, 16, 8);
+  check_struct_and_union3(__bf16, __bf16, float, 8, 4);
+  check_struct_and_union3(__bf16, __bf16, double, 16, 8);
+  check_struct_and_union3(__bf16, __bf16, long double, 32, 16);
+  check_struct_and_union3(__bf16, int, char, 12, 4);
+  check_struct_and_union3(__bf16, int, __bf16, 12, 4);
+  check_struct_and_union3(__bf16, int, int, 12, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(__bf16, int, long, 16, 8);
+#endif
+  check_struct_and_union3(__bf16, int, long long, 16, 8);
+  check_struct_and_union3(__bf16, int, float, 12, 4);
+  check_struct_and_union3(__bf16, int, double, 16, 8);
+  check_struct_and_union3(__bf16, int, long double, 32, 16);
+#ifndef __ILP32__
+  check_struct_and_union3(__bf16, long, char, 24, 8);
+  check_struct_and_union3(__bf16, long, __bf16, 24, 8);
+  check_struct_and_union3(__bf16, long, int, 24, 8);
+  check_struct_and_union3(__bf16, long, long, 24, 8);
+  check_struct_and_union3(__bf16, long, long long, 24, 8);
+  check_struct_and_union3(__bf16, long, float, 24, 8);
+  check_struct_and_union3(__bf16, long, double, 24, 8);
+#endif
+  check_struct_and_union3(__bf16, long, long double, 32, 16);
+  check_struct_and_union3(__bf16, long long, char, 24, 8);
+  check_struct_and_union3(__bf16, long long, __bf16, 24, 8);
+  check_struct_and_union3(__bf16, long long, int, 24, 8);
+  check_struct_and_union3(__bf16, long long, long, 24, 8);
+  check_struct_and_union3(__bf16, long long, long long, 24, 8);
+  check_struct_and_union3(__bf16, long long, float, 24, 8);
+  check_struct_and_union3(__bf16, long long, double, 24, 8);
+  check_struct_and_union3(__bf16, long long, long double, 32, 16);
+  check_struct_and_union3(__bf16, float, char, 12, 4);
+  check_struct_and_union3(__bf16, float, __bf16, 12, 4);
+  check_struct_and_union3(__bf16, float, int, 12, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(__bf16, float, long, 16, 8);
+#endif
+  check_struct_and_union3(__bf16, float, long long, 16, 8);
+  check_struct_and_union3(__bf16, float, float, 12, 4);
+  check_struct_and_union3(__bf16, float, double, 16, 8);
+  check_struct_and_union3(__bf16, float, long double, 32, 16);
+  check_struct_and_union3(__bf16, double, char, 24, 8);
+  check_struct_and_union3(__bf16, double, __bf16, 24, 8);
+  check_struct_and_union3(__bf16, double, int, 24, 8);
+  check_struct_and_union3(__bf16, double, long, 24, 8);
+  check_struct_and_union3(__bf16, double, long long, 24, 8);
+  check_struct_and_union3(__bf16, double, float, 24, 8);
+  check_struct_and_union3(__bf16, double, double, 24, 8);
+  check_struct_and_union3(__bf16, double, long double, 32, 16);
+  check_struct_and_union3(__bf16, long double, char, 48, 16);
+  check_struct_and_union3(__bf16, long double, __bf16, 48, 16);
+  check_struct_and_union3(__bf16, long double, int, 48, 16);
+  check_struct_and_union3(__bf16, long double, long, 48, 16);
+  check_struct_and_union3(__bf16, long double, long long, 48, 16);
+  check_struct_and_union3(__bf16, long double, float, 48, 16);
+  check_struct_and_union3(__bf16, long double, double, 48, 16);
+  check_struct_and_union3(__bf16, long double, long double, 48, 16);
+  check_struct_and_union3(int, char, __bf16, 8, 4);
+  check_struct_and_union3(int, __bf16, char, 8, 4);
+  check_struct_and_union3(int, __bf16, __bf16, 8, 4);
+  check_struct_and_union3(int, __bf16, int, 12, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(int, __bf16, long, 16, 8);
+#endif
+  check_struct_and_union3(int, __bf16, long long, 16, 8);
+  check_struct_and_union3(int, __bf16, float, 12, 4);
+  check_struct_and_union3(int, __bf16, double, 16, 8);
+  check_struct_and_union3(int, __bf16, long double, 32, 16);
+  check_struct_and_union3(int, int, __bf16, 12, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(int, long, __bf16, 24, 8);
+#endif
+  check_struct_and_union3(int, long long, __bf16, 24, 8);
+  check_struct_and_union3(int, float, __bf16, 12, 4);
+  check_struct_and_union3(int, double, __bf16, 24, 8);
+  check_struct_and_union3(int, long double, __bf16, 48, 16);
+#ifndef __ILP32__
+  check_struct_and_union3(long, char, __bf16, 16, 8);
+  check_struct_and_union3(long, __bf16, char, 16, 8);
+  check_struct_and_union3(long, __bf16, __bf16, 16, 8);
+  check_struct_and_union3(long, __bf16, int, 16, 8);
+  check_struct_and_union3(long, __bf16, long, 24, 8);
+  check_struct_and_union3(long, __bf16, long long, 24, 8);
+  check_struct_and_union3(long, __bf16, float, 16, 8);
+  check_struct_and_union3(long, __bf16, double, 24, 8);
+#endif
+  check_struct_and_union3(long, __bf16, long double, 32, 16);
+#ifndef __ILP32__
+  check_struct_and_union3(long, int, __bf16, 16, 8);
+  check_struct_and_union3(long, long, __bf16, 24, 8);
+  check_struct_and_union3(long, long long, __bf16, 24, 8);
+  check_struct_and_union3(long, float, __bf16, 16, 8);
+  check_struct_and_union3(long, double, __bf16, 24, 8);
+#endif
+  check_struct_and_union3(long, long double, __bf16, 48, 16);
+  check_struct_and_union3(long long, char, __bf16, 16, 8);
+  check_struct_and_union3(long long, __bf16, char, 16, 8);
+  check_struct_and_union3(long long, __bf16, __bf16, 16, 8);
+  check_struct_and_union3(long long, __bf16, int, 16, 8);
+#ifndef __ILP32__
+  check_struct_and_union3(long long, __bf16, long, 24, 8);
+#endif
+  check_struct_and_union3(long long, __bf16, long long, 24, 8);
+  check_struct_and_union3(long long, __bf16, float, 16, 8);
+  check_struct_and_union3(long long, __bf16, double, 24, 8);
+  check_struct_and_union3(long long, __bf16, long double, 32, 16);
+  check_struct_and_union3(long long, int, __bf16, 16, 8);
+#ifndef __ILP32__
+  check_struct_and_union3(long long, long, __bf16, 24, 8);
+#endif
+  check_struct_and_union3(long long, long long, __bf16, 24, 8);
+  check_struct_and_union3(long long, float, __bf16, 16, 8);
+  check_struct_and_union3(long long, double, __bf16, 24, 8);
+  check_struct_and_union3(long long, long double, __bf16, 48, 16);
+  check_struct_and_union3(float, char, __bf16, 8, 4);
+  check_struct_and_union3(float, __bf16, char, 8, 4);
+  check_struct_and_union3(float, __bf16, __bf16, 8, 4);
+  check_struct_and_union3(float, __bf16, int, 12, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(float, __bf16, long, 16, 8);
+#endif
+  check_struct_and_union3(float, __bf16, long long, 16, 8);
+  check_struct_and_union3(float, __bf16, float, 12, 4);
+  check_struct_and_union3(float, __bf16, double, 16, 8);
+  check_struct_and_union3(float, __bf16, long double, 32, 16);
+  check_struct_and_union3(float, int, __bf16, 12, 4);
+#ifndef __ILP32__
+  check_struct_and_union3(float, long, __bf16, 24, 8);
+#endif
+  check_struct_and_union3(float, long long, __bf16, 24, 8);
+  check_struct_and_union3(float, float, __bf16, 12, 4);
+  check_struct_and_union3(float, double, __bf16, 24, 8);
+  check_struct_and_union3(float, long double, __bf16, 48, 16);
+  check_struct_and_union3(double, char, __bf16, 16, 8);
+  check_struct_and_union3(double, __bf16, char, 16, 8);
+  check_struct_and_union3(double, __bf16, __bf16, 16, 8);
+  check_struct_and_union3(double, __bf16, int, 16, 8);
+#ifndef __ILP32__
+  check_struct_and_union3(double, __bf16, long, 24, 8);
+#endif
+  check_struct_and_union3(double, __bf16, long long, 24, 8);
+  check_struct_and_union3(double, __bf16, float, 16, 8);
+  check_struct_and_union3(double, __bf16, double, 24, 8);
+  check_struct_and_union3(double, __bf16, long double, 32, 16);
+  check_struct_and_union3(double, int, __bf16, 16, 8);
+#ifndef __ILP32__
+  check_struct_and_union3(double, long, __bf16, 24, 8);
+#endif
+  check_struct_and_union3(double, long long, __bf16, 24, 8);
+  check_struct_and_union3(double, float, __bf16, 16, 8);
+  check_struct_and_union3(double, double, __bf16, 24, 8);
+  check_struct_and_union3(double, long double, __bf16, 48, 16);
+  check_struct_and_union3(long double, char, __bf16, 32, 16);
+  check_struct_and_union3(long double, __bf16, char, 32, 16);
+  check_struct_and_union3(long double, __bf16, __bf16, 32, 16);
+  check_struct_and_union3(long double, __bf16, int, 32, 16);
+  check_struct_and_union3(long double, __bf16, long, 32, 16);
+  check_struct_and_union3(long double, __bf16, long long, 32, 16);
+  check_struct_and_union3(long double, __bf16, float, 32, 16);
+  check_struct_and_union3(long double, __bf16, double, 32, 16);
+  check_struct_and_union3(long double, __bf16, long double, 48, 16);
+  check_struct_and_union3(long double, int, __bf16, 32, 16);
+  check_struct_and_union3(long double, long, __bf16, 32, 16);
+  check_struct_and_union3(long double, long long, __bf16, 32, 16);
+  check_struct_and_union3(long double, float, __bf16, 32, 16);
+  check_struct_and_union3(long double, double, __bf16, 32, 16);
+  check_struct_and_union3(long double, long double, __bf16, 48, 16);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c
new file mode 100644
index 00000000000..6490a5228ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_alignment.c
@@ -0,0 +1,14 @@ 
+/* This checks alignment of basic types.  */
+
+#include "defines.h"
+#include "macros.h"
+
+
+int
+main (void)
+{
+  /* __bf16 point types.  */
+  check_align(__bf16, TYPE_ALIGN_BF16);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c
new file mode 100644
index 00000000000..c004c35bb83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_array_size_and_align.c
@@ -0,0 +1,13 @@ 
+/* This checks .  */
+
+#include "defines.h"
+#include "macros.h"
+
+
+int
+main (void)
+{
+  check_array_size_and_align(__bf16, TYPE_SIZE_BF16, TYPE_ALIGN_BF16);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c
new file mode 100644
index 00000000000..cfea2224733
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_returning.c
@@ -0,0 +1,20 @@ 
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+#include "args.h"
+
+__bf16
+fun_test_returning_bf16 (void)
+{
+  __bf16 b = make_f32_bf16 (72.0f);
+  volatile_var++;
+  return b;
+}
+
+static void
+do_test (void)
+{
+  __bf16 var = WRAP_RET (fun_test_returning_bf16) ();
+  assert (check_bf16_float (xmm_regs[0].___bf16[0], 72.0f) == 1);
+  assert (check_bf16_float (var, 72.0f) == 1);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c
new file mode 100644
index 00000000000..b81a8d971b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_sizes.c
@@ -0,0 +1,14 @@ 
+/* This checks sizes of basic types.  */
+
+#include "defines.h"
+#include "macros.h"
+
+
+int
+main (void)
+{
+  /* Floating point types.  */
+  check_size(__bf16, TYPE_SIZE_BF16);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c
new file mode 100644
index 00000000000..f282506703c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_struct_size_and_align.c
@@ -0,0 +1,14 @@ 
+/* This checks size and alignment of structs with a single basic type
+   element. All basic types are checked.  */
+
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+
+
+static void
+do_test (void)
+{
+  /* Floating point types.  */
+  check_basic_struct_size_and_align(__bf16, TYPE_SIZE_BF16, TYPE_ALIGN_BF16);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c
new file mode 100644
index 00000000000..03afa68c0e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_basic_union_size_and_align.c
@@ -0,0 +1,12 @@ 
+/* Test of simple unions, size and alignment.  */
+
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+
+static void
+do_test (void)
+{
+  /* Floating point types.  */
+  check_basic_union_size_and_align(__bf16, TYPE_SIZE_BF16, TYPE_ALIGN_BF16);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c
new file mode 100644
index 00000000000..64857ce7b71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_m128_returning.c
@@ -0,0 +1,38 @@ 
+#include <stdio.h>
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
+
+__m128bf16
+fun_test_returning___m128bf16 (void)
+{
+  volatile_var++;
+  return (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
+}
+
+__m128bf16 test_128bf16;
+
+static void
+do_test (void)
+{
+  unsigned failed = 0;
+  XMM_T xmmt1, xmmt2;
+
+  clear_struct_registers;
+  test_128bf16 = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
+  xmmt1._m128bf16[0] = test_128bf16;
+  xmmt2._m128bf16[0] = WRAP_RET (fun_test_returning___m128bf16)();
+  if (xmmt1._longlong[0] != xmmt2._longlong[0]
+      || xmmt1._longlong[0] != xmm_regs[0]._longlong[0])
+    printf ("fail m128bf16\n"), failed++;
+
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c
new file mode 100644
index 00000000000..fe08042286b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_floats.c
@@ -0,0 +1,312 @@ 
+/* This is an autogenerated file. Do not edit.  */
+
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+/* This struct holds values for argument checking.  */
+struct
+{
+  __bf16 f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14,
+    f15, f16, f17, f18, f19, f20, f21, f22, f23;
+} values___bf16;
+
+void
+fun_check_bf16_passing_8_values (__bf16 f0 ATTRIBUTE_UNUSED,
+				 __bf16 f1 ATTRIBUTE_UNUSED,
+				 __bf16 f2 ATTRIBUTE_UNUSED,
+				 __bf16 f3 ATTRIBUTE_UNUSED,
+				 __bf16 f4 ATTRIBUTE_UNUSED,
+				 __bf16 f5 ATTRIBUTE_UNUSED,
+				 __bf16 f6 ATTRIBUTE_UNUSED,
+				 __bf16 f7 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  check_bf16 (values___bf16.f0, f0);
+  check_bf16 (values___bf16.f1, f1);
+  check_bf16 (values___bf16.f2, f2);
+  check_bf16 (values___bf16.f3, f3);
+  check_bf16 (values___bf16.f4, f4);
+  check_bf16 (values___bf16.f5, f5);
+  check_bf16 (values___bf16.f6, f6);
+  check_bf16 (values___bf16.f7, f7);
+}
+
+void
+fun_check_bf16_passing_8_regs (__bf16 f0 ATTRIBUTE_UNUSED,
+			       __bf16 f1 ATTRIBUTE_UNUSED,
+			       __bf16 f2 ATTRIBUTE_UNUSED,
+			       __bf16 f3 ATTRIBUTE_UNUSED,
+			       __bf16 f4 ATTRIBUTE_UNUSED,
+			       __bf16 f5 ATTRIBUTE_UNUSED,
+			       __bf16 f6 ATTRIBUTE_UNUSED,
+			       __bf16 f7 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_bf16_arguments;
+}
+
+void
+fun_check_bf16_passing_16_values (__bf16 f0 ATTRIBUTE_UNUSED,
+				  __bf16 f1 ATTRIBUTE_UNUSED,
+				  __bf16 f2 ATTRIBUTE_UNUSED,
+				  __bf16 f3 ATTRIBUTE_UNUSED,
+				  __bf16 f4 ATTRIBUTE_UNUSED,
+				  __bf16 f5 ATTRIBUTE_UNUSED,
+				  __bf16 f6 ATTRIBUTE_UNUSED,
+				  __bf16 f7 ATTRIBUTE_UNUSED,
+				  __bf16 f8 ATTRIBUTE_UNUSED,
+				  __bf16 f9 ATTRIBUTE_UNUSED,
+				  __bf16 f10 ATTRIBUTE_UNUSED,
+				  __bf16 f11 ATTRIBUTE_UNUSED,
+				  __bf16 f12 ATTRIBUTE_UNUSED,
+				  __bf16 f13 ATTRIBUTE_UNUSED,
+				  __bf16 f14 ATTRIBUTE_UNUSED,
+				  __bf16 f15 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  check_bf16 (values___bf16.f0, f0);
+  check_bf16 (values___bf16.f1, f1);
+  check_bf16 (values___bf16.f2, f2);
+  check_bf16 (values___bf16.f3, f3);
+  check_bf16 (values___bf16.f4, f4);
+  check_bf16 (values___bf16.f5, f5);
+  check_bf16 (values___bf16.f6, f6);
+  check_bf16 (values___bf16.f7, f7);
+  check_bf16 (values___bf16.f8, f8);
+  check_bf16 (values___bf16.f9, f9);
+  check_bf16 (values___bf16.f10, f10);
+  check_bf16 (values___bf16.f11, f11);
+  check_bf16 (values___bf16.f12, f12);
+  check_bf16 (values___bf16.f13, f13);
+  check_bf16 (values___bf16.f14, f14);
+  check_bf16 (values___bf16.f15, f15);
+}
+
+void
+fun_check_bf16_passing_16_regs (__bf16 f0 ATTRIBUTE_UNUSED,
+				__bf16 f1 ATTRIBUTE_UNUSED,
+				__bf16 f2 ATTRIBUTE_UNUSED,
+				__bf16 f3 ATTRIBUTE_UNUSED,
+				__bf16 f4 ATTRIBUTE_UNUSED,
+				__bf16 f5 ATTRIBUTE_UNUSED,
+				__bf16 f6 ATTRIBUTE_UNUSED,
+				__bf16 f7 ATTRIBUTE_UNUSED,
+				__bf16 f8 ATTRIBUTE_UNUSED,
+				__bf16 f9 ATTRIBUTE_UNUSED,
+				__bf16 f10 ATTRIBUTE_UNUSED,
+				__bf16 f11 ATTRIBUTE_UNUSED,
+				__bf16 f12 ATTRIBUTE_UNUSED,
+				__bf16 f13 ATTRIBUTE_UNUSED,
+				__bf16 f14 ATTRIBUTE_UNUSED,
+				__bf16 f15 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_bf16_arguments;
+}
+
+void
+fun_check_bf16_passing_20_values (__bf16 f0 ATTRIBUTE_UNUSED,
+				  __bf16 f1 ATTRIBUTE_UNUSED,
+				  __bf16 f2 ATTRIBUTE_UNUSED,
+				  __bf16 f3 ATTRIBUTE_UNUSED,
+				  __bf16 f4 ATTRIBUTE_UNUSED,
+				  __bf16 f5 ATTRIBUTE_UNUSED,
+				  __bf16 f6 ATTRIBUTE_UNUSED,
+				  __bf16 f7 ATTRIBUTE_UNUSED,
+				  __bf16 f8 ATTRIBUTE_UNUSED,
+				  __bf16 f9 ATTRIBUTE_UNUSED,
+				  __bf16 f10 ATTRIBUTE_UNUSED,
+				  __bf16 f11 ATTRIBUTE_UNUSED,
+				  __bf16 f12 ATTRIBUTE_UNUSED,
+				  __bf16 f13 ATTRIBUTE_UNUSED,
+				  __bf16 f14 ATTRIBUTE_UNUSED,
+				  __bf16 f15 ATTRIBUTE_UNUSED,
+				  __bf16 f16 ATTRIBUTE_UNUSED,
+				  __bf16 f17 ATTRIBUTE_UNUSED,
+				  __bf16 f18 ATTRIBUTE_UNUSED,
+				  __bf16 f19 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  check_bf16 (values___bf16.f0, f0);
+  check_bf16 (values___bf16.f1, f1);
+  check_bf16 (values___bf16.f2, f2);
+  check_bf16 (values___bf16.f3, f3);
+  check_bf16 (values___bf16.f4, f4);
+  check_bf16 (values___bf16.f5, f5);
+  check_bf16 (values___bf16.f6, f6);
+  check_bf16 (values___bf16.f7, f7);
+  check_bf16 (values___bf16.f8, f8);
+  check_bf16 (values___bf16.f9, f9);
+  check_bf16 (values___bf16.f10, f10);
+  check_bf16 (values___bf16.f11, f11);
+  check_bf16 (values___bf16.f12, f12);
+  check_bf16 (values___bf16.f13, f13);
+  check_bf16 (values___bf16.f14, f14);
+  check_bf16 (values___bf16.f15, f15);
+  check_bf16 (values___bf16.f16, f16);
+  check_bf16 (values___bf16.f17, f17);
+  check_bf16 (values___bf16.f18, f18);
+  check_bf16 (values___bf16.f19, f19);
+}
+
+void
+fun_check_bf16_passing_20_regs (__bf16 f0 ATTRIBUTE_UNUSED,
+				__bf16 f1 ATTRIBUTE_UNUSED,
+				__bf16 f2 ATTRIBUTE_UNUSED,
+				__bf16 f3 ATTRIBUTE_UNUSED,
+				__bf16 f4 ATTRIBUTE_UNUSED,
+				__bf16 f5 ATTRIBUTE_UNUSED,
+				__bf16 f6 ATTRIBUTE_UNUSED,
+				__bf16 f7 ATTRIBUTE_UNUSED,
+				__bf16 f8 ATTRIBUTE_UNUSED,
+				__bf16 f9 ATTRIBUTE_UNUSED,
+				__bf16 f10 ATTRIBUTE_UNUSED,
+				__bf16 f11 ATTRIBUTE_UNUSED,
+				__bf16 f12 ATTRIBUTE_UNUSED,
+				__bf16 f13 ATTRIBUTE_UNUSED,
+				__bf16 f14 ATTRIBUTE_UNUSED,
+				__bf16 f15 ATTRIBUTE_UNUSED,
+				__bf16 f16 ATTRIBUTE_UNUSED,
+				__bf16 f17 ATTRIBUTE_UNUSED,
+				__bf16 f18 ATTRIBUTE_UNUSED,
+				__bf16 f19 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_bf16_arguments;
+}
+
+#define def_check_bf16_passing8(_f0, _f1, _f2, _f3, _f4, _f5, _f6,\
+				   _f7, _func1, _func2, TYPE) \
+  values_ ## TYPE .f0 = _f0; \
+  values_ ## TYPE .f1 = _f1; \
+  values_ ## TYPE .f2 = _f2; \
+  values_ ## TYPE .f3 = _f3; \
+  values_ ## TYPE .f4 = _f4; \
+  values_ ## TYPE .f5 = _f5; \
+  values_ ## TYPE .f6 = _f6; \
+  values_ ## TYPE .f7 = _f7; \
+  WRAP_CALL(_func1) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7); \
+  clear_float_registers; \
+  fregs.F0._ ## TYPE [0] = _f0; \
+  fregs.F1._ ## TYPE [0] = _f1; \
+  fregs.F2._ ## TYPE [0] = _f2; \
+  fregs.F3._ ## TYPE [0] = _f3; \
+  fregs.F4._ ## TYPE [0] = _f4; \
+  fregs.F5._ ## TYPE [0] = _f5; \
+  fregs.F6._ ## TYPE [0] = _f6; \
+  fregs.F7._ ## TYPE [0] = _f7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7);
+
+#define def_check_bf16_passing16(_f0, _f1, _f2, _f3, _f4, _f5, _f6, \
+				    _f7, _f8, _f9, _f10, _f11, _f12, _f13, \
+				    _f14, _f15, _func1, _func2, TYPE) \
+  values_ ## TYPE .f0 = _f0; \
+  values_ ## TYPE .f1 = _f1; \
+  values_ ## TYPE .f2 = _f2; \
+  values_ ## TYPE .f3 = _f3; \
+  values_ ## TYPE .f4 = _f4; \
+  values_ ## TYPE .f5 = _f5; \
+  values_ ## TYPE .f6 = _f6; \
+  values_ ## TYPE .f7 = _f7; \
+  values_ ## TYPE .f8 = _f8; \
+  values_ ## TYPE .f9 = _f9; \
+  values_ ## TYPE .f10 = _f10; \
+  values_ ## TYPE .f11 = _f11; \
+  values_ ## TYPE .f12 = _f12; \
+  values_ ## TYPE .f13 = _f13; \
+  values_ ## TYPE .f14 = _f14; \
+  values_ ## TYPE .f15 = _f15; \
+  WRAP_CALL(_func1) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, _f9, \
+		     _f10, _f11, _f12, _f13, _f14, _f15); \
+  clear_float_registers; \
+  fregs.F0._ ## TYPE [0] = _f0; \
+  fregs.F1._ ## TYPE [0] = _f1; \
+  fregs.F2._ ## TYPE [0] = _f2; \
+  fregs.F3._ ## TYPE [0] = _f3; \
+  fregs.F4._ ## TYPE [0] = _f4; \
+  fregs.F5._ ## TYPE [0] = _f5; \
+  fregs.F6._ ## TYPE [0] = _f6; \
+  fregs.F7._ ## TYPE [0] = _f7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, _f9, \
+		     _f10, _f11, _f12, _f13, _f14, _f15);
+
+#define def_check_bf16_passing20(_f0, _f1, _f2, _f3, _f4, _f5, _f6, \
+				    _f7, _f8, _f9, _f10, _f11, _f12, \
+				    _f13, _f14, _f15, _f16, _f17, \
+				    _f18, _f19, _func1, _func2, TYPE) \
+  values_ ## TYPE .f0 = _f0; \
+  values_ ## TYPE .f1 = _f1; \
+  values_ ## TYPE .f2 = _f2; \
+  values_ ## TYPE .f3 = _f3; \
+  values_ ## TYPE .f4 = _f4; \
+  values_ ## TYPE .f5 = _f5; \
+  values_ ## TYPE .f6 = _f6; \
+  values_ ## TYPE .f7 = _f7; \
+  values_ ## TYPE .f8 = _f8; \
+  values_ ## TYPE .f9 = _f9; \
+  values_ ## TYPE .f10 = _f10; \
+  values_ ## TYPE .f11 = _f11; \
+  values_ ## TYPE .f12 = _f12; \
+  values_ ## TYPE .f13 = _f13; \
+  values_ ## TYPE .f14 = _f14; \
+  values_ ## TYPE .f15 = _f15; \
+  values_ ## TYPE .f16 = _f16; \
+  values_ ## TYPE .f17 = _f17; \
+  values_ ## TYPE .f18 = _f18; \
+  values_ ## TYPE .f19 = _f19; \
+  WRAP_CALL(_func1) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, \
+		     _f9, _f10, _f11, _f12, _f13, _f14, _f15, _f16, \
+		     _f17, _f18, _f19); \
+  clear_float_registers; \
+  fregs.F0._ ## TYPE [0] = _f0; \
+  fregs.F1._ ## TYPE [0] = _f1; \
+  fregs.F2._ ## TYPE [0] = _f2; \
+  fregs.F3._ ## TYPE [0] = _f3; \
+  fregs.F4._ ## TYPE [0] = _f4; \
+  fregs.F5._ ## TYPE [0] = _f5; \
+  fregs.F6._ ## TYPE [0] = _f6; \
+  fregs.F7._ ## TYPE [0] = _f7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_f0, _f1, _f2, _f3, _f4, _f5, _f6, _f7, _f8, _f9, \
+		     _f10, _f11, _f12, _f13, _f14, _f15, _f16, _f17, \
+		     _f18, _f19);
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10,
+		bf11,bf12,bf13,bf14,bf15,bf16,bf17,bf18,bf19,bf20;
+
+void
+test_bf16_on_stack ()
+{
+  def_check_bf16_passing8 (bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			   fun_check_bf16_passing_8_values,
+			   fun_check_bf16_passing_8_regs, __bf16);
+
+  def_check_bf16_passing16 (bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+			    bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16,
+			    fun_check_bf16_passing_16_values,
+			    fun_check_bf16_passing_16_regs, __bf16);
+}
+
+void
+test_too_many_bf16 ()
+{
+  def_check_bf16_passing20 (bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10,
+			    bf11,bf12,bf13,bf14,bf15,bf16,bf17,bf18,bf19,bf20,
+			    fun_check_bf16_passing_20_values,
+			    fun_check_bf16_passing_20_regs, __bf16);
+}
+
+static void
+do_test (void)
+{
+  test_bf16_on_stack ();
+  test_too_many_bf16 ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c
new file mode 100644
index 00000000000..298b644e93d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_m128.c
@@ -0,0 +1,238 @@ 
+#include <stdio.h>
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+/* This struct holds values for argument checking.  */
+struct
+{
+  XMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
+    i16, i17, i18, i19, i20, i21, i22, i23;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+void
+fun_check_passing_m128bf16_8_values (__m128bf16 i0 ATTRIBUTE_UNUSED,
+				     __m128bf16 i1 ATTRIBUTE_UNUSED,
+				     __m128bf16 i2 ATTRIBUTE_UNUSED,
+				     __m128bf16 i3 ATTRIBUTE_UNUSED,
+				     __m128bf16 i4 ATTRIBUTE_UNUSED,
+				     __m128bf16 i5 ATTRIBUTE_UNUSED,
+				     __m128bf16 i6 ATTRIBUTE_UNUSED,
+				     __m128bf16 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m128bf16);
+  compare (values.i1, i1, __m128bf16);
+  compare (values.i2, i2, __m128bf16);
+  compare (values.i3, i3, __m128bf16);
+  compare (values.i4, i4, __m128bf16);
+  compare (values.i5, i5, __m128bf16);
+  compare (values.i6, i6, __m128bf16);
+  compare (values.i7, i7, __m128bf16);
+}
+
+void
+fun_check_passing_m128bf16_8_regs (__m128bf16 i0 ATTRIBUTE_UNUSED,
+				   __m128bf16 i1 ATTRIBUTE_UNUSED,
+				   __m128bf16 i2 ATTRIBUTE_UNUSED,
+				   __m128bf16 i3 ATTRIBUTE_UNUSED,
+				   __m128bf16 i4 ATTRIBUTE_UNUSED,
+				   __m128bf16 i5 ATTRIBUTE_UNUSED,
+				   __m128bf16 i6 ATTRIBUTE_UNUSED,
+				   __m128bf16 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m128_arguments;
+}
+
+void
+fun_check_passing_m128bf16_20_values (__m128bf16 i0 ATTRIBUTE_UNUSED,
+				      __m128bf16 i1 ATTRIBUTE_UNUSED,
+				      __m128bf16 i2 ATTRIBUTE_UNUSED,
+				      __m128bf16 i3 ATTRIBUTE_UNUSED,
+				      __m128bf16 i4 ATTRIBUTE_UNUSED,
+				      __m128bf16 i5 ATTRIBUTE_UNUSED,
+				      __m128bf16 i6 ATTRIBUTE_UNUSED,
+				      __m128bf16 i7 ATTRIBUTE_UNUSED,
+				      __m128bf16 i8 ATTRIBUTE_UNUSED,
+				      __m128bf16 i9 ATTRIBUTE_UNUSED,
+				      __m128bf16 i10 ATTRIBUTE_UNUSED,
+				      __m128bf16 i11 ATTRIBUTE_UNUSED,
+				      __m128bf16 i12 ATTRIBUTE_UNUSED,
+				      __m128bf16 i13 ATTRIBUTE_UNUSED,
+				      __m128bf16 i14 ATTRIBUTE_UNUSED,
+				      __m128bf16 i15 ATTRIBUTE_UNUSED,
+				      __m128bf16 i16 ATTRIBUTE_UNUSED,
+				      __m128bf16 i17 ATTRIBUTE_UNUSED,
+				      __m128bf16 i18 ATTRIBUTE_UNUSED,
+				      __m128bf16 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m128bf16);
+  compare (values.i1, i1, __m128bf16);
+  compare (values.i2, i2, __m128bf16);
+  compare (values.i3, i3, __m128bf16);
+  compare (values.i4, i4, __m128bf16);
+  compare (values.i5, i5, __m128bf16);
+  compare (values.i6, i6, __m128bf16);
+  compare (values.i7, i7, __m128bf16);
+  compare (values.i8, i8, __m128bf16);
+  compare (values.i9, i9, __m128bf16);
+  compare (values.i10, i10, __m128bf16);
+  compare (values.i11, i11, __m128bf16);
+  compare (values.i12, i12, __m128bf16);
+  compare (values.i13, i13, __m128bf16);
+  compare (values.i14, i14, __m128bf16);
+  compare (values.i15, i15, __m128bf16);
+  compare (values.i16, i16, __m128bf16);
+  compare (values.i17, i17, __m128bf16);
+  compare (values.i18, i18, __m128bf16);
+  compare (values.i19, i19, __m128bf16);
+}
+
+void
+fun_check_passing_m128bf16_20_regs (__m128bf16 i0 ATTRIBUTE_UNUSED,
+				    __m128bf16 i1 ATTRIBUTE_UNUSED,
+				    __m128bf16 i2 ATTRIBUTE_UNUSED,
+				    __m128bf16 i3 ATTRIBUTE_UNUSED,
+				    __m128bf16 i4 ATTRIBUTE_UNUSED,
+				    __m128bf16 i5 ATTRIBUTE_UNUSED,
+				    __m128bf16 i6 ATTRIBUTE_UNUSED,
+				    __m128bf16 i7 ATTRIBUTE_UNUSED,
+				    __m128bf16 i8 ATTRIBUTE_UNUSED,
+				    __m128bf16 i9 ATTRIBUTE_UNUSED,
+				    __m128bf16 i10 ATTRIBUTE_UNUSED,
+				    __m128bf16 i11 ATTRIBUTE_UNUSED,
+				    __m128bf16 i12 ATTRIBUTE_UNUSED,
+				    __m128bf16 i13 ATTRIBUTE_UNUSED,
+				    __m128bf16 i14 ATTRIBUTE_UNUSED,
+				    __m128bf16 i15 ATTRIBUTE_UNUSED,
+				    __m128bf16 i16 ATTRIBUTE_UNUSED,
+				    __m128bf16 i17 ATTRIBUTE_UNUSED,
+				    __m128bf16 i18 ATTRIBUTE_UNUSED,
+				    __m128bf16 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m128_arguments;
+}
+
+#define def_check_int_passing8(_i0, _i1, _i2, _i3, \
+			       _i4, _i5, _i6, _i7, \
+			       _func1, _func2, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
+  clear_float_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
+
+#define def_check_int_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, \
+				_i7, _i8, _i9, _i10, _i11, _i12, _i13, \
+				_i14, _i15, _i16, _i17, _i18, _i19, \
+				_func1, _func2, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  values.i8.TYPE[0] = _i8; \
+  values.i9.TYPE[0] = _i9; \
+  values.i10.TYPE[0] = _i10; \
+  values.i11.TYPE[0] = _i11; \
+  values.i12.TYPE[0] = _i12; \
+  values.i13.TYPE[0] = _i13; \
+  values.i14.TYPE[0] = _i14; \
+  values.i15.TYPE[0] = _i15; \
+  values.i16.TYPE[0] = _i16; \
+  values.i17.TYPE[0] = _i17; \
+  values.i18.TYPE[0] = _i18; \
+  values.i19.TYPE[0] = _i19; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
+		     _i9, _i10, _i11, _i12, _i13, _i14, _i15, _i16, \
+		     _i17, _i18, _i19); \
+  clear_float_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, \
+		     _i9, _i10, _i11, _i12, _i13, _i14, _i15, _i16, \
+		     _i17, _i18, _i19);
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
+
+void
+test_m128bf16_on_stack ()
+{
+  __m128bf16 x[8];
+  int i;
+  for (i = 0; i < 8; i++)
+    x[i] = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
+  pass = "m128bf16-8";
+  def_check_int_passing8 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
+			  fun_check_passing_m128bf16_8_values,
+			  fun_check_passing_m128bf16_8_regs, _m128bf16);
+}
+
+void
+test_too_many_m128bf16 ()
+{
+  __m128bf16 x[20];
+  int i;
+  for (i = 0; i < 20; i++)
+    x[i] = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
+  pass = "m128bf16-20";
+  def_check_int_passing20 (x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
+			   x[8], x[9], x[10], x[11], x[12], x[13], x[14],
+			   x[15], x[16], x[17], x[18], x[19],
+			   fun_check_passing_m128bf16_20_values,
+			   fun_check_passing_m128bf16_20_regs, _m128bf16);
+}
+
+static void
+do_test (void)
+{
+  test_m128bf16_on_stack ();
+  test_too_many_m128bf16 ();
+  if (failed)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c
new file mode 100644
index 00000000000..8d966005741
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_structs.c
@@ -0,0 +1,67 @@ 
+#include "bf16-check.h"
+#include "defines.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+struct m128bf16_struct
+{
+  __m128bf16 x;
+};
+
+struct m128bf16_2_struct
+{
+  __m128bf16 x1, x2;
+};
+
+/* Check that the struct is passed as the individual members in fregs.  */
+void
+check_struct_passing1bf16 (struct m128bf16_struct ms1 ATTRIBUTE_UNUSED,
+			   struct m128bf16_struct ms2 ATTRIBUTE_UNUSED,
+			   struct m128bf16_struct ms3 ATTRIBUTE_UNUSED,
+			   struct m128bf16_struct ms4 ATTRIBUTE_UNUSED,
+			   struct m128bf16_struct ms5 ATTRIBUTE_UNUSED,
+			   struct m128bf16_struct ms6 ATTRIBUTE_UNUSED,
+			   struct m128bf16_struct ms7 ATTRIBUTE_UNUSED,
+			   struct m128bf16_struct ms8 ATTRIBUTE_UNUSED)
+{
+  check_m128_arguments;
+}
+
+void
+check_struct_passing2bf16 (struct m128bf16_2_struct ms ATTRIBUTE_UNUSED)
+{
+  /* Check the passing on the stack by comparing the address of the
+     stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&ms.x1 == rsp+8);
+  assert ((unsigned long)&ms.x2 == rsp+24);
+}
+
+volatile __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+		bf9, bf10,bf11,bf12,bf13,bf14,bf15,bf16;
+
+static void
+do_test (void)
+{
+  struct m128bf16_struct m128bf16s [8];
+  struct m128bf16_2_struct m128bf16_2s = { 
+    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 },
+    { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 },
+  };
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      m128bf16s[i].x = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
+    }
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    (&fregs.xmm0)[i]._m128bf16[0] = m128bf16s[i].x;
+  num_fregs = 8;
+  WRAP_CALL (check_struct_passing1bf16) (m128bf16s[0], m128bf16s[1], m128bf16s[2], m128bf16s[3],
+					 m128bf16s[4], m128bf16s[5], m128bf16s[6], m128bf16s[7]);
+  WRAP_CALL (check_struct_passing2bf16) (m128bf16_2s);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c
new file mode 100644
index 00000000000..83e4380512b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_passing_unions.c
@@ -0,0 +1,160 @@ 
+#include "bf16-check.h"
+#include "defines.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+unsigned int num_fregs, num_iregs;
+
+union un1b
+{
+  __m128bf16 x;
+  float f;
+};
+
+union un1bb
+{
+  __m128bf16 x;
+  __bf16 f;
+};
+
+union un2b
+{
+  __m128bf16 x;
+  double d;
+};
+
+union un3b
+{
+  __m128bf16 x;
+  __m128 v;
+};
+
+union un4b
+{
+  __m128bf16 x;
+  long double ld;
+};
+
+void
+check_union_passing1b (union un1b u1 ATTRIBUTE_UNUSED,
+		       union un1b u2 ATTRIBUTE_UNUSED,
+		       union un1b u3 ATTRIBUTE_UNUSED,
+		       union un1b u4 ATTRIBUTE_UNUSED,
+		       union un1b u5 ATTRIBUTE_UNUSED,
+		       union un1b u6 ATTRIBUTE_UNUSED,
+		       union un1b u7 ATTRIBUTE_UNUSED,
+		       union un1b u8 ATTRIBUTE_UNUSED)
+{
+  check_m128_arguments;
+}
+
+void
+check_union_passing1bb (union un1bb u1 ATTRIBUTE_UNUSED,
+		        union un1bb u2 ATTRIBUTE_UNUSED,
+		        union un1bb u3 ATTRIBUTE_UNUSED,
+		        union un1bb u4 ATTRIBUTE_UNUSED,
+		        union un1bb u5 ATTRIBUTE_UNUSED,
+		        union un1bb u6 ATTRIBUTE_UNUSED,
+		        union un1bb u7 ATTRIBUTE_UNUSED,
+		        union un1bb u8 ATTRIBUTE_UNUSED)
+{
+  check_m128_arguments;
+}
+
+void
+check_union_passing2b (union un2b u1 ATTRIBUTE_UNUSED,
+		       union un2b u2 ATTRIBUTE_UNUSED,
+		       union un2b u3 ATTRIBUTE_UNUSED,
+		       union un2b u4 ATTRIBUTE_UNUSED,
+		       union un2b u5 ATTRIBUTE_UNUSED,
+		       union un2b u6 ATTRIBUTE_UNUSED,
+		       union un2b u7 ATTRIBUTE_UNUSED,
+		       union un2b u8 ATTRIBUTE_UNUSED)
+{
+  check_m128_arguments;
+}
+
+void
+check_union_passing3b (union un3b u1 ATTRIBUTE_UNUSED,
+		       union un3b u2 ATTRIBUTE_UNUSED,
+		       union un3b u3 ATTRIBUTE_UNUSED,
+		       union un3b u4 ATTRIBUTE_UNUSED,
+		       union un3b u5 ATTRIBUTE_UNUSED,
+		       union un3b u6 ATTRIBUTE_UNUSED,
+		       union un3b u7 ATTRIBUTE_UNUSED,
+		       union un3b u8 ATTRIBUTE_UNUSED)
+{
+  check_m128_arguments;
+}
+
+void
+check_union_passing4b (union un4b u ATTRIBUTE_UNUSED)
+{
+   /* Check the passing on the stack by comparing the address of the
+      stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&u.x == rsp+8);
+  assert ((unsigned long)&u.ld == rsp+8);
+}
+
+#define check_union_passing1b WRAP_CALL(check_union_passing1b)
+#define check_union_passing1bb WRAP_CALL(check_union_passing1bb)
+#define check_union_passing2b WRAP_CALL(check_union_passing2b)
+#define check_union_passing3b WRAP_CALL(check_union_passing3b)
+#define check_union_passing4b WRAP_CALL(check_union_passing4b)
+
+static void
+do_test (void)
+{
+  union un1b u1b[8];
+  union un1bb u1bb[8];
+  union un2b u2b[8];
+  union un3b u3b[8];
+  union un4b u4b;
+  int i;
+  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
+
+  for (i = 0; i < 8; i++)
+    {
+      u1b[i].x = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
+    }
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    (&fregs.xmm0)[i]._m128bf16[0] = u1b[i].x;
+  num_fregs = 8;
+  check_union_passing1b (u1b[0], u1b[1], u1b[2], u1b[3],
+		         u1b[4], u1b[5], u1b[6], u1b[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u1bb[i].x = u1b[i].x;
+      (&fregs.xmm0)[i]._m128bf16[0] = u1bb[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing1bb (u1bb[0], u1bb[1], u1bb[2], u1bb[3],
+		          u1bb[4], u1bb[5], u1bb[6], u1bb[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u2b[i].x = u1b[i].x;
+      (&fregs.xmm0)[i]._m128bf16[0] = u2b[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing2b (u2b[0], u2b[1], u2b[2], u2b[3],
+		         u2b[4], u2b[5], u2b[6], u2b[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u3b[i].x = u1b[i].x;
+      (&fregs.xmm0)[i]._m128bf16[0] = u3b[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing3b (u3b[0], u3b[1], u3b[2], u3b[3],
+		         u3b[4], u3b[5], u3b[6], u3b[7]);
+
+  check_union_passing4b (u4b);
+}
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c
new file mode 100644
index 00000000000..757ccc26b79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_struct_returning.c
@@ -0,0 +1,176 @@ 
+/* This tests returning of structures.  */
+
+#include <stdio.h>
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+int current_test;
+int num_failed = 0;
+
+#undef assert
+#define assert(test) do { if (!(test)) {fprintf (stderr, "failed in test %d\n", current_test); num_failed++; } } while (0)
+
+#define xmm0b xmm_regs[0].___bf16
+#define xmm1b xmm_regs[1].___bf16
+#define xmm0f xmm_regs[0]._float
+#define xmm0d xmm_regs[0]._double
+#define xmm1f xmm_regs[1]._float
+#define xmm1d xmm_regs[1]._double
+
+typedef enum {
+  SSE_B = 0,
+  SSE_D,
+  MEM,
+  INT_SSE,
+  SSE_INT,
+  SSE_F_H,
+  SSE_F_H8
+} Type;
+
+/* Structures which should be returned in SSE.  */
+#define D(I,MEMBERS,C,B) struct S_ ## I { MEMBERS ; }; Type class_ ## I = C; \
+struct S_ ## I f_ ## I (void) { struct S_ ## I s; memset (&s, 0, sizeof(s)); B; return s; }
+
+D(120,__bf16 f,SSE_B, s.f=make_f32_bf16(42.0f))
+D(121,__bf16 f;__bf16 f2,SSE_B, s.f=make_f32_bf16(42.0f))
+D(122,__bf16 f;float d,SSE_B, s.f=make_f32_bf16(42.0f))
+D(123,__bf16 f;double d,SSE_B, s.f=make_f32_bf16(42.0f))
+D(124,double d; __bf16 f,SSE_D, s.d=42)
+D(125,__bf16 f[2],SSE_B, s.f[0]=make_f32_bf16(42.0f))
+D(126,__bf16 f[3],SSE_B, s.f[0]=make_f32_bf16(42.0f))
+D(127,__bf16 f[4],SSE_B, s.f[0]=make_f32_bf16(42.0f))
+D(128,__bf16 f[2]; double d,SSE_B, s.f[0]=make_f32_bf16(42.0f))
+D(129,double d;__bf16 f[2],SSE_D, s.d=42)
+
+#undef D
+
+#define D(I,MEMBERS) struct S_ ## I { MEMBERS ; }; Type class_ ## I = INT_SSE; \
+struct S_ ## I f_ ## I (void) { struct S_ ## I s = { 42, make_f32_bf16(43.0f) }; return s; }
+
+D(310,char m1; __bf16 m2)
+D(311,short m1; __bf16 m2)
+D(312,int m1; __bf16 m2)
+D(313,long long m1; __bf16 m2)
+
+#undef D
+
+void check_300 (void)
+{
+  XMM_T x;
+  x._ulonglong[0] = rax;
+  switch (current_test) {
+    case 310: assert ((rax & 0xff) == 42
+		      && check_bf16_float (x.___bf16[1], 43.0f) == 1); break;
+    case 311: assert ((rax & 0xffff) == 42
+		      && check_bf16_float (x.___bf16[1], 43.0f) == 1); break;
+    case 312: assert ((rax & 0xffffffff) == 42
+		      && check_bf16_float (x.___bf16[2], 43.0f) == 1); break;
+    case 313: assert (rax == 42
+		      && check_bf16_float (xmm0b[0], 43.0f) == 1); break;
+
+    default: assert (0); break;
+  }
+}
+
+/* Structures which should be returned in SSE (low) and INT (high).  */
+#define D(I,MEMBERS,B) struct S_ ## I { MEMBERS ; }; Type class_ ## I = SSE_INT; \
+struct S_ ## I f_ ## I (void) { struct S_ ## I s; memset (&s, 0, sizeof(s));  B; return s; }
+
+D(402,__bf16 f[4];char c, s.f[0]=make_f32_bf16(42.0f); s.c=43)
+
+#undef D
+
+void check_400 (void)
+{
+  switch (current_test) {
+    case 402: assert (check_bf16_float (xmm0b[0], 42.0f) == 1 && (rax & 0xff) == 43); break;
+
+    default: assert (0); break;
+  }
+}
+
+/* Structures which should be returned in MEM.  */
+void *struct_addr;
+#define D(I,MEMBERS) struct S_ ## I { MEMBERS ; }; Type class_ ## I = MEM; \
+struct S_ ## I f_ ## I (void) { union {unsigned char c; struct S_ ## I s;} u; memset (&u.s, 0, sizeof(u.s)); u.c = 42; return u.s; }
+
+/* Unnaturally aligned members.  */
+D(540,__bf16 m1[10])
+D(541,char m1[1];__bf16 f[8])
+
+#undef D
+
+
+/* Special tests.  */
+#define D(I,MEMBERS,C,B) struct S_ ## I { MEMBERS ; }; Type class_ ## I = C; \
+struct S_ ## I f_ ## I (void) { struct S_ ## I s; B; return s; }
+D(601,__bf16 f[4], SSE_F_H, s.f[0] = s.f[1] = s.f[2] = s.f[3] = make_f32_bf16 (42.0f))
+D(602,__bf16 f[8], SSE_F_H8,
+  s.f[0] = s.f[1] = s.f[2] = s.f[3] = s.f[4] = s.f[5] = s.f[6] = s.f[7] = make_f32_bf16 (42.0f))
+#undef D
+
+void clear_all (void)
+{
+  clear_int_registers;
+}
+
+void check_all (Type class, unsigned long size)
+{
+  switch (class) {
+    case SSE_B: assert (check_bf16_float (xmm0b[0], 42.0f) == 1); break;
+    case SSE_D: assert (xmm0d[0] == 42); break;
+    case SSE_F_H: assert (check_bf16_float (xmm0b[0], 42) == 1
+			  && check_bf16_float (xmm0b[1], 42) == 1
+			  && check_bf16_float (xmm0b[2], 42) == 1
+			  && check_bf16_float (xmm0b[3], 42) == 1); break;
+    case SSE_F_H8: assert (check_bf16_float (xmm0b[0], 42) == 1
+			   && check_bf16_float (xmm0b[1], 42) == 1
+			   && check_bf16_float (xmm0b[2], 42) == 1
+			   && check_bf16_float (xmm0b[3], 42) == 1
+			   && check_bf16_float (xmm1b[0], 42) == 1
+			   && check_bf16_float (xmm1b[1], 42) == 1
+			   && check_bf16_float (xmm1b[2], 42) == 1
+			   && check_bf16_float (xmm1b[3], 42) == 1); break;
+    case INT_SSE: check_300(); break;
+    case SSE_INT: check_400(); break;
+    /* Ideally we would like to check that rax == struct_addr.
+       Unfortunately the address of the target struct escapes (for setting
+       struct_addr), so the return struct is a temporary one whose address
+       is given to the f_* functions, otherwise a conforming program
+       could notice the struct changing already before the function returns.
+       This temporary struct could be anywhere.  For GCC it will be on
+       stack, but no one is forbidding that it could be a static variable
+       if there's no threading or proper locking.  Nobody in his right mind
+       will not use the stack for that.  */
+    case MEM: assert (*(unsigned char*)struct_addr == 42 && rdi == rax); break;
+  }
+}
+
+#define D(I) { struct S_ ## I s; current_test = I; struct_addr = (void*)&s; \
+  clear_all(); \
+  s = WRAP_RET(f_ ## I) (); \
+  check_all(class_ ## I, sizeof(s)); \
+}
+
+static void
+do_test (void)
+{
+  D(120) D(121) D(122) D(123) D(124) D(125) D(126) D(127) D(128) D(129)
+
+  D(310) D(311) D(312) D(313)
+
+  D(402)
+
+  D(540) D(541)
+
+  D(601) D(602)
+  if (num_failed)
+    abort ();
+}
+#undef D
diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c
new file mode 100644
index 00000000000..4eea7eb7d3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/test_varargs-m128.c
@@ -0,0 +1,111 @@ 
+/* Test variable number of 128-bit vector arguments passed to functions.  */
+
+#include <stdio.h>
+#include "bf16-check.h"
+#include "defines.h"
+#include "macros.h"
+#include "args.h"
+
+struct FloatRegisters fregs;
+struct IntegerRegisters iregs;
+
+/* This struct holds values for argument checking.  */
+struct 
+{
+  XMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+void
+fun_check_passing_m128bf16_varargs (__m128bf16 i0, __m128bf16 i1, __m128bf16 i2,
+				 __m128bf16 i3, ...)
+{
+  /* Check argument values.  */
+  void **fp = __builtin_frame_address (0);
+  void *ra = __builtin_return_address (0);
+  __m128bf16 *argp;
+
+  compare (values.i0, i0, __m128bf16);
+  compare (values.i1, i1, __m128bf16);
+  compare (values.i2, i2, __m128bf16);
+  compare (values.i3, i3, __m128bf16);
+
+  /* Get the pointer to the return address on stack.  */
+  while (*fp != ra)
+    fp++;
+
+  /* Skip the return address stack slot.  */
+  argp = (__m128bf16 *) (((char *) fp) + 8);
+
+  /* Check __m128bf16 arguments passed on stack.  */
+  compare (values.i8, argp[0], __m128bf16);
+  compare (values.i9, argp[1], __m128bf16);
+
+  /* Check register contents.  */
+  compare (fregs.xmm0, xmm_regs[0], __m128bf16);
+  compare (fregs.xmm1, xmm_regs[1], __m128bf16);
+  compare (fregs.xmm2, xmm_regs[2], __m128bf16);
+  compare (fregs.xmm3, xmm_regs[3], __m128bf16);
+  compare (fregs.xmm4, xmm_regs[4], __m128bf16);
+  compare (fregs.xmm5, xmm_regs[5], __m128bf16);
+  compare (fregs.xmm6, xmm_regs[6], __m128bf16);
+  compare (fregs.xmm7, xmm_regs[7], __m128bf16);
+}
+
+#define def_check_int_passing_varargs(_i0, _i1, _i2, _i3, _i4, _i5, \
+				      _i6, _i7, _i8, _i9, \
+				      _func, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  values.i8.TYPE[0] = _i8; \
+  values.i9.TYPE[0] = _i9; \
+  clear_float_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  WRAP_CALL(_func) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9);
+
+void
+test_m128bf16_varargs (void)
+{
+  __m128bf16 x[10];
+  __bf16 bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8;
+  int i;
+  for (i = 0; i < 10; i++)
+    x[i] = (__m128bf16) { bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8 };
+  pass = "m128bf16-varargs";
+  def_check_int_passing_varargs (x[0], x[1], x[2], x[3], x[4], x[5],
+				 x[6], x[7], x[8], x[9],
+				 fun_check_passing_m128bf16_varargs,
+				 _m128bf16);
+}
+
+static void
+do_test (void)
+{
+  test_m128bf16_varargs ();
+  if (failed)
+    abort ();
+}