x86: Enable __bf16 type for TARGET_SSE2 and above

Message ID DM4PR11MB54879A5BFCAFCABE60F73806EC949@DM4PR11MB5487.namprd11.prod.outlook.com
State New
Headers
Series x86: Enable __bf16 type for TARGET_SSE2 and above |

Commit Message

Li, Pan2 via Gcc-patches July 26, 2022, 1:31 a.m. UTC
  Hi,

The patch is enable __bf16 scalar type for target sse2 and above according to psABI(https://gitlab.com/x86-psABIs/x86-64-ABI/-/merge_requests/35/diffs).
The __bf16 type is a storage type like arm.

OK for master?

gcc/ChangeLog:

	* config/i386/i386-builtin-types.def (BFLOAT16): New primitive type.
	* config/i386/i386-builtins.cc : Support __bf16 type for i386 backend.
	(ix86_register_bf16_builtin_type): New function.
	(ix86_bf16_type_node): New.
	(ix86_bf16_ptr_type_node): Ditto.
	(ix86_init_builtin_types): Add ix86_register_bf16_builtin_type function call.
	* config/i386/i386-modes.def (FLOAT_MODE): Add BFmode.
	(ADJUST_FLOAT_FORMAT): Ditto.
	* config/i386/i386.cc (merge_classes): Handle BFmode.
	(classify_argument): Ditto.
	(examine_argument): Ditto.
	(construct_container): Ditto.
	(function_value_32): Return __bf16 by %xmm0.
	(function_value_64): Return __bf16 by SSE register.
	(ix86_print_operand): Handle CONST_DOUBLE BFmode.
	(ix86_secondary_reload): Require gpr as intermediate register
	to store __bf16 from sse register when sse4 is not available.
	(ix86_scalar_mode_supported_p): Enable __bf16 under sse2.
	(ix86_mangle_type): Add manlging for __bf16 type.
	(ix86_invalid_conversion): New function for target hook.
	(ix86_invalid_unary_op): Ditto.
	(ix86_invalid_binary_op): Ditto.
	(TARGET_INVALID_CONVERSION): New define for target hook.
	(TARGET_INVALID_UNARY_OP): Ditto.
	(TARGET_INVALID_BINARY_OP): Ditto.
	* config/i386/i386.h (host_detect_local_cpu): Add BFmode.
	* config/i386/i386.md (*pushhf_rex64): Change for BFmode.
	(*push<mode>_rex64): Ditto.
	(*pushhf): Ditto.
	(*push<mode>): Ditto.
	(*movhf_internal): Ditto.
	(*mov<mode>_internal): Ditto.

gcc/testsuite/ChangeLog:

	* g++.target/i386/bfloat_cpp_typecheck.C: New test.
	* gcc.target/i386/bfloat16-1.c: Ditto.
	* gcc.target/i386/sse2-bfloat16-1.c: Ditto.
	* gcc.target/i386/sse2-bfloat16-2.c: Ditto.
	* gcc.target/i386/sse2-bfloat16-scalar-typecheck.c: Ditto.
---
 gcc/config/i386/i386-builtin-types.def        |   1 +
 gcc/config/i386/i386-builtins.cc              |  21 ++
 gcc/config/i386/i386-modes.def                |   2 +
 gcc/config/i386/i386.cc                       |  75 +++++-
 gcc/config/i386/i386.h                        |   4 +-
 gcc/config/i386/i386.md                       |  32 +--
 .../g++.target/i386/bfloat_cpp_typecheck.C    |  10 +
 gcc/testsuite/gcc.target/i386/bfloat16-1.c    |  12 +
 .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +
 .../gcc.target/i386/sse2-bfloat16-2.c         |  17 ++
 .../i386/sse2-bfloat16-scalar-typecheck.c     | 215 ++++++++++++++++++
 11 files changed, 375 insertions(+), 22 deletions(-)  create mode 100644 gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
 create mode 100644 gcc/testsuite/gcc.target/i386/bfloat16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c

+  bfloat_ptr != &scalar0;
+  bfloat_ptr < &scalar0;
+  bfloat_ptr <= &scalar0;
+  bfloat_ptr > &scalar0;
+  bfloat_ptr >= &scalar0;
+  bfloat_ptr == bfloat_ptr2;
+  bfloat_ptr != bfloat_ptr2;
+  bfloat_ptr < bfloat_ptr2;
+  bfloat_ptr <= bfloat_ptr2;
+  bfloat_ptr > bfloat_ptr2;
+  bfloat_ptr >= bfloat_ptr2;
+
+  /* Conditional expressions.  */
+
+  0 ? scalar0 : scalar0;
+  0 ? scalar0 : is_a_float; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  0 ? is_a_float : scalar0; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  0 ? scalar0 : 0; /* { dg-error {invalid conversion to type '__bf16'} 
+ } */
+  0 ? 0 : scalar0; /* { dg-error {invalid conversion to type '__bf16'} 
+ } */
+  0 ? 0.1 : scalar0; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  0 ? scalar0 : 0.1; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  0 ? bfloat_ptr : bfloat_ptr2;
+  0 ? bfloat_ptr : float_ptr; /* { dg-warning {pointer type mismatch in 
+ conditional expression} } */
+  0 ? float_ptr : bfloat_ptr; /* { dg-warning {pointer type mismatch in 
+ conditional expression} } */
+
+  scalar0 ? scalar0 : scalar0; /* { dg-error {operation not permitted 
+ on type '__bf16'} } */
+  scalar0 ? is_a_float : scalar0; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+  scalar0 ? scalar0 : is_a_float; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+  scalar0 ? is_a_float : is_a_float; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+
+  /* Unary operators.  */
+
+  +scalar0; /* { dg-error {operation not permitted on type '__bf16'} } 
+ */  -scalar0; /* { dg-error {operation not permitted on type '__bf16'} 
+ } */  ~scalar0; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */  !scalar0; /* { dg-error {operation not permitted on 
+ type '__bf16'} } */  *scalar0; /* { dg-error {invalid type argument of 
+ unary '\*'} } */  __real scalar0; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */  __imag scalar0; /* { dg-error 
+ {operation not permitted on type '__bf16'} } */  ++scalar0; /* { 
+ dg-error {operation not permitted on type '__bf16'} } */  --scalar0; 
+ /* { dg-error {operation not permitted on type '__bf16'} } */  
+ scalar0++; /* { dg-error {operation not permitted on type '__bf16'} } 
+ */  scalar0--; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */
+
+  /* Binary arithmetic operations.  */
+
+  scalar0 = glob_bfloat + *bfloat_ptr; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+  scalar0 = glob_bfloat + 0.1; /* { dg-error {operation not permitted 
+ on type '__bf16'} } */
+  scalar0 = glob_bfloat + 0; /* { dg-error {operation not permitted on 
+ type '__bf16'} } */
+  scalar0 = glob_bfloat + is_a_float; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+
+  return scalar0;
+}
+
--
2.18.2
  

Comments

Li, Pan2 via Gcc-patches Aug. 3, 2022, 8:40 a.m. UTC | #1
Hi,

Old patch has some mistake in `*movbf_internal` , now disable BFmode constant double move in `*movbf_internal`.

Thanks,
Lingling

> -----Original Message-----
> From: Kong, Lingling <lingling.kong@intel.com>
> Sent: Tuesday, July 26, 2022 9:31 AM
> To: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
> Cc: Kong, Lingling <lingling.kong@intel.com>
> Subject: [PATCH] x86: Enable __bf16 type for TARGET_SSE2 and above
> 
> Hi,
> 
> The patch is enable __bf16 scalar type for target sse2 and above according to
> psABI(https://gitlab.com/x86-psABIs/x86-64-ABI/-/merge_requests/35/diffs).
> The __bf16 type is a storage type like arm.
> 
> OK for master?
> 
> gcc/ChangeLog:
> 
> 	* config/i386/i386-builtin-types.def (BFLOAT16): New primitive type.
> 	* config/i386/i386-builtins.cc : Support __bf16 type for i386 backend.
> 	(ix86_register_bf16_builtin_type): New function.
> 	(ix86_bf16_type_node): New.
> 	(ix86_bf16_ptr_type_node): Ditto.
> 	(ix86_init_builtin_types): Add ix86_register_bf16_builtin_type function
> call.
> 	* config/i386/i386-modes.def (FLOAT_MODE): Add BFmode.
> 	(ADJUST_FLOAT_FORMAT): Ditto.
> 	* config/i386/i386.cc (merge_classes): Handle BFmode.
> 	(classify_argument): Ditto.
> 	(examine_argument): Ditto.
> 	(construct_container): Ditto.
> 	(function_value_32): Return __bf16 by %xmm0.
> 	(function_value_64): Return __bf16 by SSE register.
> 	(ix86_print_operand): Handle CONST_DOUBLE BFmode.
> 	(ix86_secondary_reload): Require gpr as intermediate register
> 	to store __bf16 from sse register when sse4 is not available.
> 	(ix86_scalar_mode_supported_p): Enable __bf16 under sse2.
> 	(ix86_mangle_type): Add manlging for __bf16 type.
> 	(ix86_invalid_conversion): New function for target hook.
> 	(ix86_invalid_unary_op): Ditto.
> 	(ix86_invalid_binary_op): Ditto.
> 	(TARGET_INVALID_CONVERSION): New define for target hook.
> 	(TARGET_INVALID_UNARY_OP): Ditto.
> 	(TARGET_INVALID_BINARY_OP): Ditto.
> 	* config/i386/i386.h (host_detect_local_cpu): Add BFmode.
> 	* config/i386/i386.md (*pushhf_rex64): Change for BFmode.
> 	(*push<mode>_rex64): Ditto.
> 	(*pushhf): Ditto.
> 	(*push<mode>): Ditto.
> 	(*movhf_internal): Ditto.
> 	(*mov<mode>_internal): Ditto.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.target/i386/bfloat_cpp_typecheck.C: New test.
> 	* gcc.target/i386/bfloat16-1.c: Ditto.
> 	* gcc.target/i386/sse2-bfloat16-1.c: Ditto.
> 	* gcc.target/i386/sse2-bfloat16-2.c: Ditto.
> 	* gcc.target/i386/sse2-bfloat16-scalar-typecheck.c: Ditto.
> ---
>  gcc/config/i386/i386-builtin-types.def        |   1 +
>  gcc/config/i386/i386-builtins.cc              |  21 ++
>  gcc/config/i386/i386-modes.def                |   2 +
>  gcc/config/i386/i386.cc                       |  75 +++++-
>  gcc/config/i386/i386.h                        |   4 +-
>  gcc/config/i386/i386.md                       |  32 +--
>  .../g++.target/i386/bfloat_cpp_typecheck.C    |  10 +
>  gcc/testsuite/gcc.target/i386/bfloat16-1.c    |  12 +
>  .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +
>  .../gcc.target/i386/sse2-bfloat16-2.c         |  17 ++
>  .../i386/sse2-bfloat16-scalar-typecheck.c     | 215 ++++++++++++++++++
>  11 files changed, 375 insertions(+), 22 deletions(-)  create mode 100644
> gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/bfloat16-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-
> typecheck.c
> 
> diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-
> builtin-types.def
> index 7a2da1db0b0..63a360b0f8b 100644
> --- a/gcc/config/i386/i386-builtin-types.def
> +++ b/gcc/config/i386/i386-builtin-types.def
> @@ -69,6 +69,7 @@ DEF_PRIMITIVE_TYPE (UINT16,
> short_unsigned_type_node)  DEF_PRIMITIVE_TYPE (INT64,
> long_long_integer_type_node)  DEF_PRIMITIVE_TYPE (UINT64,
> long_long_unsigned_type_node)  DEF_PRIMITIVE_TYPE (FLOAT16,
> ix86_float16_type_node)
> +DEF_PRIMITIVE_TYPE (BFLOAT16, ix86_bf16_type_node)
>  DEF_PRIMITIVE_TYPE (FLOAT, float_type_node)  DEF_PRIMITIVE_TYPE
> (DOUBLE, double_type_node)  DEF_PRIMITIVE_TYPE (FLOAT80,
> float80_type_node) diff --git a/gcc/config/i386/i386-builtins.cc
> b/gcc/config/i386/i386-builtins.cc
> index fe7243c3837..6a04fb57e65 100644
> --- a/gcc/config/i386/i386-builtins.cc
> +++ b/gcc/config/i386/i386-builtins.cc
> @@ -126,6 +126,9 @@ BDESC_VERIFYS (IX86_BUILTIN_MAX,  static GTY(()) tree
> ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
> 
>  tree ix86_float16_type_node = NULL_TREE;
> +tree ix86_bf16_type_node = NULL_TREE;
> +tree ix86_bf16_ptr_type_node = NULL_TREE;
> +
>  /* Retrieve an element from the above table, building some of
>     the types lazily.  */
> 
> @@ -1366,6 +1369,22 @@ ix86_register_float16_builtin_type (void)
>  					    "_Float16");
>  }
> 
> +static void
> +ix86_register_bf16_builtin_type (void)
> +{
> +  ix86_bf16_type_node = make_node (REAL_TYPE);
> +  TYPE_PRECISION (ix86_bf16_type_node) = 16;
> +  SET_TYPE_MODE (ix86_bf16_type_node, BFmode);
> +  layout_type (ix86_bf16_type_node);
> +
> +  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
> +    {
> +      lang_hooks.types.register_builtin_type (ix86_bf16_type_node,
> +					    "__bf16");
> +      ix86_bf16_ptr_type_node = build_pointer_type (ix86_bf16_type_node);
> +    }
> +}
> +
>  static void
>  ix86_init_builtin_types (void)
>  {
> @@ -1396,6 +1415,8 @@ ix86_init_builtin_types (void)
> 
>    ix86_register_float16_builtin_type ();
> 
> +  ix86_register_bf16_builtin_type ();
> +
>    const_string_type_node
>      = build_pointer_type (build_qualified_type
>  			  (char_type_node, TYPE_QUAL_CONST)); diff --git
> a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index
> e2e1e18d24d..b49daaef253 100644
> --- a/gcc/config/i386/i386-modes.def
> +++ b/gcc/config/i386/i386-modes.def
> @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
> FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
> FLOAT_MODE (TF, 16, ieee_quad_format);  FLOAT_MODE (HF, 2,
> ieee_half_format);
> +FLOAT_MODE (BF, 2, 0);
> +ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
> 
>  /* In ILP32 mode, XFmode has size 12 and alignment 4.
>     In LP64 mode, XFmode has size and alignment 16.  */ diff --git
> a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index
> e03f86d4a23..5d589f6a05c 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -2399,6 +2399,7 @@ classify_argument (machine_mode mode, const_tree
> type,
>      case E_CTImode:
>        return 0;
>      case E_HFmode:
> +    case E_BFmode:
>        if (!(bit_offset % 64))
>  	classes[0] = X86_64_SSEHF_CLASS;
>        else
> @@ -2792,9 +2793,10 @@ construct_container (machine_mode mode,
> machine_mode orig_mode,
>  	    intreg++;
>  	    break;
>  	  case X86_64_SSEHF_CLASS:
> +	    tmpmode = (mode == BFmode ? BFmode : HFmode);
>  	    exp [nexps++]
>  	      = gen_rtx_EXPR_LIST (VOIDmode,
> -				   gen_rtx_REG (HFmode,
> +				   gen_rtx_REG (tmpmode,
>  						GET_SSE_REGNO (sse_regno)),
>  				   GEN_INT (i*8));
>  	    sse_regno++;
> @@ -4001,8 +4003,8 @@ function_value_32 (machine_mode orig_mode,
> machine_mode mode,
>      /* Most things go in %eax.  */
>      regno = AX_REG;
> 
> -  /* Return _Float16/_Complex _Foat16 by sse register.  */
> -  if (mode == HFmode)
> +  /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */  if
> + (mode == HFmode || mode == BFmode)
>      regno = FIRST_SSE_REG;
>    if (mode == HCmode)
>      {
> @@ -4050,6 +4052,7 @@ function_value_64 (machine_mode orig_mode,
> machine_mode mode,
> 
>        switch (mode)
>  	{
> +	case E_BFmode:
>  	case E_HFmode:
>  	case E_HCmode:
>  	case E_SFmode:
> @@ -5631,6 +5634,7 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
>  	return "%vmovss\t{%1, %0|%0, %1}";
> 
>      case MODE_HF:
> +    case MODE_BF:
>        if (REG_P (operands[0]) && REG_P (operands[1]))
>  	return "vmovsh\t{%d1, %0|%0, %d1}";
>        else
> @@ -19402,7 +19406,8 @@ ix86_secondary_reload (bool in_p, rtx x,
> reg_class_t rclass,
>      }
> 
>    /* Require movement to gpr, and then store to memory.  */
> -  if ((mode == HFmode || mode == HImode || mode == V2QImode)
> +  if ((mode == HFmode || mode == HImode || mode == V2QImode
> +       || mode == BFmode)
>        && !TARGET_SSE4_1
>        && SSE_CLASS_P (rclass)
>        && !in_p && MEM_P (x))
> @@ -22331,7 +22336,7 @@ ix86_scalar_mode_supported_p (scalar_mode
> mode)
>      return default_decimal_float_supported_p ();
>    else if (mode == TFmode)
>      return true;
> -  else if (mode == HFmode && TARGET_SSE2)
> +  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
>      return true;
>    else
>      return default_scalar_mode_supported_p (mode); @@ -22646,6 +22651,8
> @@ ix86_mangle_type (const_tree type)
> 
>    switch (TYPE_MODE (type))
>      {
> +    case E_BFmode:
> +      return "u6__bf16";
>      case E_HFmode:
>        /* _Float16 is "DF16_".
>  	 Align with clang's decision in https://reviews.llvm.org/D33719. */ @@ -
> 22661,6 +22668,55 @@ ix86_mangle_type (const_tree type)
>      }
>  }
> 
> +/* Return the diagnostic message string if conversion from FROMTYPE to
> +   TOTYPE is not allowed, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_conversion (const_tree fromtype, const_tree totype) {
> +  if (element_mode (fromtype) != element_mode (totype))
> +    {
> +      /* Do no allow conversions to/from BFmode scalar types.  */
> +      if (TYPE_MODE (fromtype) == BFmode)
> +	return N_("invalid conversion from type %<__bf16%>");
> +      if (TYPE_MODE (totype) == BFmode)
> +	return N_("invalid conversion to type %<__bf16%>");
> +    }
> +
> +  /* Conversion allowed.  */
> +  return NULL;
> +}
> +
> +/* Return the diagnostic message string if the unary operation OP is
> +   not permitted on TYPE, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_unary_op (int op, const_tree type) {
> +  /* Reject all single-operand operations on BFmode except for &.  */
> +  if (element_mode (type) == BFmode && op != ADDR_EXPR)
> +    return N_("operation not permitted on type %<__bf16%>");
> +
> +  /* Operation allowed.  */
> +  return NULL;
> +}
> +
> +/* Return the diagnostic message string if the binary operation OP is
> +   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
> +			   const_tree type2)
> +{
> +  /* Reject all 2-operand operations on BFmode.  */
> +  if (element_mode (type1) == BFmode
> +      || element_mode (type2) == BFmode)
> +    return N_("operation not permitted on type %<__bf16%>");
> +
> +  /* Operation allowed.  */
> +  return NULL;
> +}
> +
>  static GTY(()) tree ix86_tls_stack_chk_guard_decl;
> 
>  static tree
> @@ -24718,6 +24774,15 @@ ix86_libgcc_floating_mode_supported_p
>  #undef TARGET_MANGLE_TYPE
>  #define TARGET_MANGLE_TYPE ix86_mangle_type
> 
> +#undef TARGET_INVALID_CONVERSION
> +#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
> +
> +#undef TARGET_INVALID_UNARY_OP
> +#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
> +
> +#undef TARGET_INVALID_BINARY_OP
> +#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
> +
>  #undef TARGET_STACK_PROTECT_GUARD
>  #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
> 
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index
> f16df633e84..0da3dce1d31 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1046,7 +1046,7 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
>     || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode
> 	\
>     || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode
> 	\
>     || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode
> 	\
> -   || (MODE) == HFmode)
> +   || (MODE) == HFmode || (MODE) == BFmode)
> 
>  #define VALID_SSE_REG_MODE(MODE)					\
>    ((MODE) == V1TImode || (MODE) == TImode				\
> @@ -1077,7 +1077,7 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
>     || (MODE) == CQImode || (MODE) == CHImode
> 	\
>     || (MODE) == CSImode || (MODE) == CDImode
> 	\
>     || (MODE) == SDmode || (MODE) == DDmode				\
> -   || (MODE) == HFmode || (MODE) == HCmode				\
> +   || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode
> 	\
>     || (MODE) == V2HImode || (MODE) == V2HFmode
> 	\
>     || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode
> 	\
>     || (TARGET_64BIT							\
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index
> 9aaeb695f0f..1f7b018913a 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -499,7 +499,7 @@
> 
>  ;; Main data type used by the insn
>  (define_attr "mode"
> -  "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
> +
> + "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
>     V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
>    (const_string "unknown"))
> 
> @@ -1104,7 +1104,7 @@
>  ;; GET_MODE_SIZE (<MODE>mode).  For XFmode which depends on  ;;
> command line options just use GET_MODE_SIZE macro.
>  (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
> -			     (TI "16") (HF "2") (SF "4") (DF "8")
> +			     (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
>  			     (XF "GET_MODE_SIZE (XFmode)")
>  			     (V16QI "16") (V32QI "32") (V64QI "64")
>  			     (V8HI "16") (V16HI "32") (V32HI "64") @@ -1248,7
> +1248,7 @@  (define_mode_iterator X87MODEF [SF DF XF])
> 
>  ;; All x87 floating point modes plus HFmode -(define_mode_iterator
> X87MODEFH [HF SF DF XF])
> +(define_mode_iterator X87MODEFH [HF SF DF XF BF])
> 
>  ;; All SSE floating point modes
>  (define_mode_iterator SSEMODEF [HF SF DF TF]) @@ -3408,9 +3408,11 @@
>    operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
>  })
> 
> -(define_insn "*pushhf_rex64"
> -  [(set (match_operand:HF 0 "push_operand" "=X,X")
> -	(match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))]
> +(define_mode_iterator HFBF [HF BF])
> +
> +(define_insn "*push<mode>_rex64"
> +  [(set (match_operand:HFBF 0 "push_operand" "=X,X")
> +	(match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
>    "TARGET_64BIT"
>  {
>    /* Anything else should be already split before reg-stack.  */ @@ -3421,9
> +3423,9 @@
>     (set_attr "type" "push,multi")
>     (set_attr "mode" "DI,TI")])
> 
> -(define_insn "*pushhf"
> -  [(set (match_operand:HF 0 "push_operand" "=X,X")
> -	(match_operand:HF 1 "general_no_elim_operand" "rmF,x"))]
> +(define_insn "*push<mode>"
> +  [(set (match_operand:HFBF 0 "push_operand" "=X,X")
> +	(match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
>    "!TARGET_64BIT"
>  {
>    /* Anything else should be already split before reg-stack.  */ @@ -3462,7
> +3464,7 @@
>     (set_attr "unit" "i387,*,*")
>     (set_attr "mode" "SF,SI,SF")])
> 
> -(define_mode_iterator MODESH [SF HF])
> +(define_mode_iterator MODESH [SF HF BF])
>  ;; %%% Kill this when call knows how to work this out.
>  (define_split
>    [(set (match_operand:MODESH 0 "push_operand") @@ -3950,18 +3952,18
> @@
>  	   ]
>  	   (const_string "*")))])
> 
> -(define_insn "*movhf_internal"
> - [(set (match_operand:HF 0 "nonimmediate_operand"
> +(define_insn "*mov<mode>_internal"
> + [(set (match_operand:HFBF 0 "nonimmediate_operand"
>  	 "=?r,?r,?r,?m,v,v,?r,m,?v,v")
> -       (match_operand:HF 1 "general_operand"
> +       (match_operand:HFBF 1 "general_operand"
>  	 "r  ,F ,m ,rF,C,v, v,v,r ,m"))]
>   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
>    && (lra_in_progress
>        || reload_completed
>        || !CONST_DOUBLE_P (operands[1])
>        || (TARGET_SSE2
> -	  && standard_sse_constant_p (operands[1], HFmode) == 1)
> -      || memory_operand (operands[0], HFmode))"
> +	  && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
> +      || memory_operand (operands[0], <MODE>mode))"
>  {
>    switch (get_attr_type (insn))
>      {
> diff --git a/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
> b/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
> new file mode 100644
> index 00000000000..962c8504775
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
> @@ -0,0 +1,10 @@
> +/* { dg-do assemble { target sse2} } */
> +/* { dg-options "-msse2 -O3 --save-temps" } */
> +
> +void foo (void)
> +{
> +  __bf16 (); /* { dg-bogus {invalid conversion to type '__bf16'}  } */
> +  __bf16 a = __bf16(); /* { dg-bogus {invalid conversion to type
> +'__bf16'} } */
> +  __bf16 (0x1234); /* { dg-error {invalid conversion to type '__bf16'}
> +} */
> +  __bf16 (0.1); /* { dg-error {invalid conversion to type '__bf16'} }
> +*/ }
> diff --git a/gcc/testsuite/gcc.target/i386/bfloat16-1.c
> b/gcc/testsuite/gcc.target/i386/bfloat16-1.c
> new file mode 100644
> index 00000000000..6aaec28efd2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/bfloat16-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msse2 -O2" } */
> +__bf16
> +foo (int a)
> +{
> +  union {
> +    int a;
> +    __bf16 b;
> +  }c;
> +  c.a = a;
> +  return c.b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> new file mode 100644
> index 00000000000..612d55be826
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-sse2" } */
> +
> +__bf16/* { dg-error "unknown type name '__bf16'" } */ foo (__bf16 x) /*
> +{ dg-error "unknown type name '__bf16'" } */ {
> +  return x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
> b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
> new file mode 100644
> index 00000000000..a3286e26c48
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -mno-avx512f" } */
> +
> +union flt
> +{
> +  __bf16 flt;
> +  short s;
> +};
> +
> +__bf16
> +foo (union flt x)
> +{
> +  return x.flt;
> +}
> +
> +/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} { target ia32 }
> +} } */
> +/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} { target { ! ia32
> +} } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
> b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
> new file mode 100644
> index 00000000000..f76d5547758
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
> @@ -0,0 +1,215 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msse2 -O2" } */
> +
> +
> +__bf16 glob_bfloat;
> +
> +int is_an_int;
> +short is_a_short_int;
> +float is_a_float;
> +float is_a_float16;
> +double is_a_double;
> +
> +float *float_ptr;
> +
> +__bf16 foo1 (void) { return (__bf16) 0x1234; } /* { dg-error {invalid
> +conversion to type '__bf16'} } */
> +__bf16 foo2 (void) { return (__bf16) (short) 0x1234; } /* { dg-error
> +{invalid conversion to type '__bf16'} } */
> +
> +__bf16 footest (__bf16 scalar0)
> +{
> +
> +  /* Initialisation  */
> +
> +  __bf16 scalar1_1;
> +  __bf16 scalar1_2 = glob_bfloat;
> +  __bf16 scalar1_3 = 0;   /* { dg-error {invalid conversion to type '__bf16'} } */
> +  __bf16 scalar1_4 = 0.1; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  __bf16 scalar1_5 = is_a_float; /* { dg-error {invalid conversion to
> + type '__bf16'} } */
> +  __bf16 scalar1_6 = is_an_int;  /* { dg-error {invalid conversion to
> + type '__bf16'} } */
> +  __bf16 scalar1_7 = is_a_float16; /* { dg-error {invalid conversion to
> + type '__bf16'} } */
> +  __bf16 scalar1_8 = is_a_double; /* { dg-error {invalid conversion to
> + type '__bf16'} } */
> +  __bf16 scalar1_9 = is_a_short_int; /* { dg-error {invalid conversion
> + to type '__bf16'} } */
> +
> +  int initi_1_1 = glob_bfloat; /* { dg-error {invalid conversion from
> + type '__bf16'} } */  float initi_1_2 = glob_bfloat; /* { dg-error
> + {invalid conversion from type '__bf16'} } */
> +  _Float16 initi_1_3 = glob_bfloat; /* { dg-error {invalid conversion
> + from type '__bf16'} } */  short initi_1_4 = glob_bfloat; /* { dg-error
> + {invalid conversion from type '__bf16'} } */  double initi_1_5 =
> + glob_bfloat; /* { dg-error {invalid conversion from type '__bf16'} }
> + */
> +
> +  __bf16 scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */
> +  __bf16 scalar2_2 = { glob_bfloat };
> +  __bf16 scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  __bf16 scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  __bf16 scalar2_5 = { is_a_float }; /* { dg-error {invalid conversion
> + to type '__bf16'} } */
> +  __bf16 scalar2_6 = { is_an_int }; /* { dg-error {invalid conversion
> + to type '__bf16'} } */
> +  __bf16 scalar2_7 = { is_a_float16 }; /* { dg-error {invalid
> + conversion to type '__bf16'} } */
> +  __bf16 scalar2_8 = { is_a_double }; /* { dg-error {invalid conversion
> + to type '__bf16'} } */
> +  __bf16 scalar2_9 = { is_a_short_int }; /* { dg-error {invalid
> + conversion to type '__bf16'} } */
> +
> +  int initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion
> + from type '__bf16'} } */  float initi_2_2 = { glob_bfloat }; /* {
> + dg-error {invalid conversion from type '__bf16'} } */
> +  _Float16 initi_2_3 = { glob_bfloat }; /* { dg-error {invalid
> + conversion from type '__bf16'} } */  short initi_2_4 = { glob_bfloat
> + }; /* { dg-error {invalid conversion from type '__bf16'} } */  double
> + initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from
> + type '__bf16'} } */
> +
> +  /* Assignments.  */
> +
> +  glob_bfloat = glob_bfloat;
> +  glob_bfloat = 0;   /* { dg-error {invalid conversion to type '__bf16'} } */
> +  glob_bfloat = 0.1; /* { dg-error {invalid conversion to type
> + '__bf16'} } */  glob_bfloat = is_a_float; /* { dg-error {invalid
> + conversion to type '__bf16'} } */  glob_bfloat = is_an_int; /* {
> + dg-error {invalid conversion to type '__bf16'} } */  glob_bfloat =
> + is_a_float16; /* { dg-error {invalid conversion to type '__bf16'} } */
> + glob_bfloat = is_a_double; /* { dg-error {invalid conversion to type
> + '__bf16'} } */  glob_bfloat = is_a_short_int; /* { dg-error {invalid
> + conversion to type '__bf16'} } */
> +
> +  is_an_int = glob_bfloat; /* { dg-error {invalid conversion from type
> + '__bf16'} } */  is_a_float = glob_bfloat; /* { dg-error {invalid
> + conversion from type '__bf16'} } */
> +  is_a_float16 = glob_bfloat; /* { dg-error {invalid conversion from
> + type '__bf16'} } */  is_a_double = glob_bfloat; /* { dg-error {invalid
> + conversion from type '__bf16'} } */  is_a_short_int = glob_bfloat; /*
> + { dg-error {invalid conversion from type '__bf16'} } */
> +
> +  /* Casting.  */
> +
> +  (void) glob_bfloat;
> +  (__bf16) glob_bfloat;
> +
> +  (int) glob_bfloat; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  (float) glob_bfloat; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  (_Float16) glob_bfloat; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  (double) glob_bfloat; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  (short) glob_bfloat; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +
> +  (__bf16) is_an_int; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) is_a_float; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) is_a_float16; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) is_a_double; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) is_a_short_int; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +
> +  /* Compound literals.  */
> +
> +  (__bf16) {}; /* { dg-error {empty scalar initializer} } */
> +  (__bf16) { glob_bfloat };
> +  (__bf16) { 0 }; /* { dg-error {invalid conversion to type '__bf16'} }
> + */
> +  (__bf16) { 0.1 }; /* { dg-error {invalid conversion to type '__bf16'}
> + } */
> +  (__bf16) { is_a_float }; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) { is_an_int }; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) { is_a_float16 }; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) { is_a_double }; /* { dg-error {invalid conversion to type
> + '__bf16'} } */
> +  (__bf16) { is_a_short_int }; /* { dg-error {invalid conversion to
> + type '__bf16'} } */
> +
> +  (int) { glob_bfloat }; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  (float) { glob_bfloat }; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  (_Float16) { glob_bfloat }; /* { dg-error {invalid conversion from
> + type '__bf16'} } */
> +  (double) { glob_bfloat }; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  (short) { glob_bfloat }; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +
> +  /* Arrays and Structs.  */
> +
> +  typedef __bf16 array_type[2];
> +  extern __bf16 extern_array[];
> +
> +  __bf16 array[2];
> +  __bf16 zero_length_array[0];
> +  __bf16 empty_init_array[] = {};
> +  typedef __bf16 some_other_type[is_an_int];
> +
> +  struct struct1 {
> +    __bf16 a;
> +  };
> +
> +  union union1 {
> +    __bf16 a;
> +  };
> +
> +  /* Addressing and dereferencing.  */
> +
> +  __bf16 *bfloat_ptr = &scalar0;
> +  scalar0 = *bfloat_ptr;
> +
> +  /* Pointer assignment.  */
> +
> +  __bf16 *bfloat_ptr2 = bfloat_ptr;
> +  __bf16 *bfloat_ptr3 = array;
> +
> +  /* Pointer arithmetic.  */
> +
> +  ++bfloat_ptr;
> +  --bfloat_ptr;
> +  bfloat_ptr++;
> +  bfloat_ptr--;
> +  bfloat_ptr += 1;
> +  bfloat_ptr -= 1;
> +  bfloat_ptr - bfloat_ptr2;
> +  bfloat_ptr = &bfloat_ptr3[0];
> +  bfloat_ptr = &bfloat_ptr3[1];
> +
> +  /* Simple comparison.  */
> +  scalar0 > glob_bfloat; /* { dg-error {operation not permitted on type
> + '__bf16'} } */  glob_bfloat == scalar0; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +  scalar0 > is_a_float; /* { dg-error {operation not permitted on type
> + '__bf16'} } */  is_a_float == scalar0; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +  scalar0 > 0; /* { dg-error {operation not permitted on type '__bf16'}
> + } */
> +  0 == scalar0; /* { dg-error {operation not permitted on type
> + '__bf16'} } */
> +  scalar0 > 0.1; /* { dg-error {operation not permitted on type
> + '__bf16'} } */
> +  0.1 == scalar0; /* { dg-error {operation not permitted on type
> + '__bf16'} } */
> +  scalar0 > is_an_int; /* { dg-error {operation not permitted on type
> + '__bf16'} } */  is_an_int == scalar0; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +
> +  /* Pointer comparison.  */
> +
> +  bfloat_ptr == &scalar0;
> +  bfloat_ptr != &scalar0;
> +  bfloat_ptr < &scalar0;
> +  bfloat_ptr <= &scalar0;
> +  bfloat_ptr > &scalar0;
> +  bfloat_ptr >= &scalar0;
> +  bfloat_ptr == bfloat_ptr2;
> +  bfloat_ptr != bfloat_ptr2;
> +  bfloat_ptr < bfloat_ptr2;
> +  bfloat_ptr <= bfloat_ptr2;
> +  bfloat_ptr > bfloat_ptr2;
> +  bfloat_ptr >= bfloat_ptr2;
> +
> +  /* Conditional expressions.  */
> +
> +  0 ? scalar0 : scalar0;
> +  0 ? scalar0 : is_a_float; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  0 ? is_a_float : scalar0; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  0 ? scalar0 : 0; /* { dg-error {invalid conversion to type '__bf16'}
> + } */
> +  0 ? 0 : scalar0; /* { dg-error {invalid conversion to type '__bf16'}
> + } */
> +  0 ? 0.1 : scalar0; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  0 ? scalar0 : 0.1; /* { dg-error {invalid conversion from type
> + '__bf16'} } */
> +  0 ? bfloat_ptr : bfloat_ptr2;
> +  0 ? bfloat_ptr : float_ptr; /* { dg-warning {pointer type mismatch in
> + conditional expression} } */
> +  0 ? float_ptr : bfloat_ptr; /* { dg-warning {pointer type mismatch in
> + conditional expression} } */
> +
> +  scalar0 ? scalar0 : scalar0; /* { dg-error {operation not permitted
> + on type '__bf16'} } */
> +  scalar0 ? is_a_float : scalar0; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +  scalar0 ? scalar0 : is_a_float; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +  scalar0 ? is_a_float : is_a_float; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +
> +  /* Unary operators.  */
> +
> +  +scalar0; /* { dg-error {operation not permitted on type '__bf16'} }
> + */  -scalar0; /* { dg-error {operation not permitted on type '__bf16'}
> + } */  ~scalar0; /* { dg-error {operation not permitted on type
> + '__bf16'} } */  !scalar0; /* { dg-error {operation not permitted on
> + type '__bf16'} } */  *scalar0; /* { dg-error {invalid type argument of
> + unary '\*'} } */  __real scalar0; /* { dg-error {operation not
> + permitted on type '__bf16'} } */  __imag scalar0; /* { dg-error
> + {operation not permitted on type '__bf16'} } */  ++scalar0; /* {
> + dg-error {operation not permitted on type '__bf16'} } */  --scalar0;
> + /* { dg-error {operation not permitted on type '__bf16'} } */
> + scalar0++; /* { dg-error {operation not permitted on type '__bf16'} }
> + */  scalar0--; /* { dg-error {operation not permitted on type
> + '__bf16'} } */
> +
> +  /* Binary arithmetic operations.  */
> +
> +  scalar0 = glob_bfloat + *bfloat_ptr; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +  scalar0 = glob_bfloat + 0.1; /* { dg-error {operation not permitted
> + on type '__bf16'} } */
> +  scalar0 = glob_bfloat + 0; /* { dg-error {operation not permitted on
> + type '__bf16'} } */
> +  scalar0 = glob_bfloat + is_a_float; /* { dg-error {operation not
> + permitted on type '__bf16'} } */
> +
> +  return scalar0;
> +}
> +
> --
> 2.18.2
  
Hongtao Liu Aug. 4, 2022, 4:35 a.m. UTC | #2
On Wed, Aug 3, 2022 at 4:41 PM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> Old patch has some mistake in `*movbf_internal` , now disable BFmode constant double move in `*movbf_internal`.
LGTM.
>
> Thanks,
> Lingling
>
> > -----Original Message-----
> > From: Kong, Lingling <lingling.kong@intel.com>
> > Sent: Tuesday, July 26, 2022 9:31 AM
> > To: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
> > Cc: Kong, Lingling <lingling.kong@intel.com>
> > Subject: [PATCH] x86: Enable __bf16 type for TARGET_SSE2 and above
> >
> > Hi,
> >
> > The patch is enable __bf16 scalar type for target sse2 and above according to
> > psABI(https://gitlab.com/x86-psABIs/x86-64-ABI/-/merge_requests/35/diffs).
> > The __bf16 type is a storage type like arm.
> >
> > OK for master?
> >
> > gcc/ChangeLog:
> >
> >       * config/i386/i386-builtin-types.def (BFLOAT16): New primitive type.
> >       * config/i386/i386-builtins.cc : Support __bf16 type for i386 backend.
> >       (ix86_register_bf16_builtin_type): New function.
> >       (ix86_bf16_type_node): New.
> >       (ix86_bf16_ptr_type_node): Ditto.
> >       (ix86_init_builtin_types): Add ix86_register_bf16_builtin_type function
> > call.
> >       * config/i386/i386-modes.def (FLOAT_MODE): Add BFmode.
> >       (ADJUST_FLOAT_FORMAT): Ditto.
> >       * config/i386/i386.cc (merge_classes): Handle BFmode.
> >       (classify_argument): Ditto.
> >       (examine_argument): Ditto.
> >       (construct_container): Ditto.
> >       (function_value_32): Return __bf16 by %xmm0.
> >       (function_value_64): Return __bf16 by SSE register.
> >       (ix86_print_operand): Handle CONST_DOUBLE BFmode.
> >       (ix86_secondary_reload): Require gpr as intermediate register
> >       to store __bf16 from sse register when sse4 is not available.
> >       (ix86_scalar_mode_supported_p): Enable __bf16 under sse2.
> >       (ix86_mangle_type): Add manlging for __bf16 type.
> >       (ix86_invalid_conversion): New function for target hook.
> >       (ix86_invalid_unary_op): Ditto.
> >       (ix86_invalid_binary_op): Ditto.
> >       (TARGET_INVALID_CONVERSION): New define for target hook.
> >       (TARGET_INVALID_UNARY_OP): Ditto.
> >       (TARGET_INVALID_BINARY_OP): Ditto.
> >       * config/i386/i386.h (host_detect_local_cpu): Add BFmode.
> >       * config/i386/i386.md (*pushhf_rex64): Change for BFmode.
> >       (*push<mode>_rex64): Ditto.
> >       (*pushhf): Ditto.
> >       (*push<mode>): Ditto.
> >       (*movhf_internal): Ditto.
> >       (*mov<mode>_internal): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * g++.target/i386/bfloat_cpp_typecheck.C: New test.
> >       * gcc.target/i386/bfloat16-1.c: Ditto.
> >       * gcc.target/i386/sse2-bfloat16-1.c: Ditto.
> >       * gcc.target/i386/sse2-bfloat16-2.c: Ditto.
> >       * gcc.target/i386/sse2-bfloat16-scalar-typecheck.c: Ditto.
> > ---
> >  gcc/config/i386/i386-builtin-types.def        |   1 +
> >  gcc/config/i386/i386-builtins.cc              |  21 ++
> >  gcc/config/i386/i386-modes.def                |   2 +
> >  gcc/config/i386/i386.cc                       |  75 +++++-
> >  gcc/config/i386/i386.h                        |   4 +-
> >  gcc/config/i386/i386.md                       |  32 +--
> >  .../g++.target/i386/bfloat_cpp_typecheck.C    |  10 +
> >  gcc/testsuite/gcc.target/i386/bfloat16-1.c    |  12 +
> >  .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +
> >  .../gcc.target/i386/sse2-bfloat16-2.c         |  17 ++
> >  .../i386/sse2-bfloat16-scalar-typecheck.c     | 215 ++++++++++++++++++
> >  11 files changed, 375 insertions(+), 22 deletions(-)  create mode 100644
> > gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/bfloat16-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-
> > typecheck.c
> >
> > diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-
> > builtin-types.def
> > index 7a2da1db0b0..63a360b0f8b 100644
> > --- a/gcc/config/i386/i386-builtin-types.def
> > +++ b/gcc/config/i386/i386-builtin-types.def
> > @@ -69,6 +69,7 @@ DEF_PRIMITIVE_TYPE (UINT16,
> > short_unsigned_type_node)  DEF_PRIMITIVE_TYPE (INT64,
> > long_long_integer_type_node)  DEF_PRIMITIVE_TYPE (UINT64,
> > long_long_unsigned_type_node)  DEF_PRIMITIVE_TYPE (FLOAT16,
> > ix86_float16_type_node)
> > +DEF_PRIMITIVE_TYPE (BFLOAT16, ix86_bf16_type_node)
> >  DEF_PRIMITIVE_TYPE (FLOAT, float_type_node)  DEF_PRIMITIVE_TYPE
> > (DOUBLE, double_type_node)  DEF_PRIMITIVE_TYPE (FLOAT80,
> > float80_type_node) diff --git a/gcc/config/i386/i386-builtins.cc
> > b/gcc/config/i386/i386-builtins.cc
> > index fe7243c3837..6a04fb57e65 100644
> > --- a/gcc/config/i386/i386-builtins.cc
> > +++ b/gcc/config/i386/i386-builtins.cc
> > @@ -126,6 +126,9 @@ BDESC_VERIFYS (IX86_BUILTIN_MAX,  static GTY(()) tree
> > ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
> >
> >  tree ix86_float16_type_node = NULL_TREE;
> > +tree ix86_bf16_type_node = NULL_TREE;
> > +tree ix86_bf16_ptr_type_node = NULL_TREE;
> > +
> >  /* Retrieve an element from the above table, building some of
> >     the types lazily.  */
> >
> > @@ -1366,6 +1369,22 @@ ix86_register_float16_builtin_type (void)
> >                                           "_Float16");
> >  }
> >
> > +static void
> > +ix86_register_bf16_builtin_type (void)
> > +{
> > +  ix86_bf16_type_node = make_node (REAL_TYPE);
> > +  TYPE_PRECISION (ix86_bf16_type_node) = 16;
> > +  SET_TYPE_MODE (ix86_bf16_type_node, BFmode);
> > +  layout_type (ix86_bf16_type_node);
> > +
> > +  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
> > +    {
> > +      lang_hooks.types.register_builtin_type (ix86_bf16_type_node,
> > +                                         "__bf16");
> > +      ix86_bf16_ptr_type_node = build_pointer_type (ix86_bf16_type_node);
> > +    }
> > +}
> > +
> >  static void
> >  ix86_init_builtin_types (void)
> >  {
> > @@ -1396,6 +1415,8 @@ ix86_init_builtin_types (void)
> >
> >    ix86_register_float16_builtin_type ();
> >
> > +  ix86_register_bf16_builtin_type ();
> > +
> >    const_string_type_node
> >      = build_pointer_type (build_qualified_type
> >                         (char_type_node, TYPE_QUAL_CONST)); diff --git
> > a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index
> > e2e1e18d24d..b49daaef253 100644
> > --- a/gcc/config/i386/i386-modes.def
> > +++ b/gcc/config/i386/i386-modes.def
> > @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
> > FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
> > FLOAT_MODE (TF, 16, ieee_quad_format);  FLOAT_MODE (HF, 2,
> > ieee_half_format);
> > +FLOAT_MODE (BF, 2, 0);
> > +ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
> >
> >  /* In ILP32 mode, XFmode has size 12 and alignment 4.
> >     In LP64 mode, XFmode has size and alignment 16.  */ diff --git
> > a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index
> > e03f86d4a23..5d589f6a05c 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -2399,6 +2399,7 @@ classify_argument (machine_mode mode, const_tree
> > type,
> >      case E_CTImode:
> >        return 0;
> >      case E_HFmode:
> > +    case E_BFmode:
> >        if (!(bit_offset % 64))
> >       classes[0] = X86_64_SSEHF_CLASS;
> >        else
> > @@ -2792,9 +2793,10 @@ construct_container (machine_mode mode,
> > machine_mode orig_mode,
> >           intreg++;
> >           break;
> >         case X86_64_SSEHF_CLASS:
> > +         tmpmode = (mode == BFmode ? BFmode : HFmode);
> >           exp [nexps++]
> >             = gen_rtx_EXPR_LIST (VOIDmode,
> > -                                gen_rtx_REG (HFmode,
> > +                                gen_rtx_REG (tmpmode,
> >                                               GET_SSE_REGNO (sse_regno)),
> >                                  GEN_INT (i*8));
> >           sse_regno++;
> > @@ -4001,8 +4003,8 @@ function_value_32 (machine_mode orig_mode,
> > machine_mode mode,
> >      /* Most things go in %eax.  */
> >      regno = AX_REG;
> >
> > -  /* Return _Float16/_Complex _Foat16 by sse register.  */
> > -  if (mode == HFmode)
> > +  /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */  if
> > + (mode == HFmode || mode == BFmode)
> >      regno = FIRST_SSE_REG;
> >    if (mode == HCmode)
> >      {
> > @@ -4050,6 +4052,7 @@ function_value_64 (machine_mode orig_mode,
> > machine_mode mode,
> >
> >        switch (mode)
> >       {
> > +     case E_BFmode:
> >       case E_HFmode:
> >       case E_HCmode:
> >       case E_SFmode:
> > @@ -5631,6 +5634,7 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
> >       return "%vmovss\t{%1, %0|%0, %1}";
> >
> >      case MODE_HF:
> > +    case MODE_BF:
> >        if (REG_P (operands[0]) && REG_P (operands[1]))
> >       return "vmovsh\t{%d1, %0|%0, %d1}";
> >        else
> > @@ -19402,7 +19406,8 @@ ix86_secondary_reload (bool in_p, rtx x,
> > reg_class_t rclass,
> >      }
> >
> >    /* Require movement to gpr, and then store to memory.  */
> > -  if ((mode == HFmode || mode == HImode || mode == V2QImode)
> > +  if ((mode == HFmode || mode == HImode || mode == V2QImode
> > +       || mode == BFmode)
> >        && !TARGET_SSE4_1
> >        && SSE_CLASS_P (rclass)
> >        && !in_p && MEM_P (x))
> > @@ -22331,7 +22336,7 @@ ix86_scalar_mode_supported_p (scalar_mode
> > mode)
> >      return default_decimal_float_supported_p ();
> >    else if (mode == TFmode)
> >      return true;
> > -  else if (mode == HFmode && TARGET_SSE2)
> > +  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> >      return true;
> >    else
> >      return default_scalar_mode_supported_p (mode); @@ -22646,6 +22651,8
> > @@ ix86_mangle_type (const_tree type)
> >
> >    switch (TYPE_MODE (type))
> >      {
> > +    case E_BFmode:
> > +      return "u6__bf16";
> >      case E_HFmode:
> >        /* _Float16 is "DF16_".
> >        Align with clang's decision in https://reviews.llvm.org/D33719. */ @@ -
> > 22661,6 +22668,55 @@ ix86_mangle_type (const_tree type)
> >      }
> >  }
> >
> > +/* Return the diagnostic message string if conversion from FROMTYPE to
> > +   TOTYPE is not allowed, NULL otherwise.  */
> > +
> > +static const char *
> > +ix86_invalid_conversion (const_tree fromtype, const_tree totype) {
> > +  if (element_mode (fromtype) != element_mode (totype))
> > +    {
> > +      /* Do no allow conversions to/from BFmode scalar types.  */
> > +      if (TYPE_MODE (fromtype) == BFmode)
> > +     return N_("invalid conversion from type %<__bf16%>");
> > +      if (TYPE_MODE (totype) == BFmode)
> > +     return N_("invalid conversion to type %<__bf16%>");
> > +    }
> > +
> > +  /* Conversion allowed.  */
> > +  return NULL;
> > +}
> > +
> > +/* Return the diagnostic message string if the unary operation OP is
> > +   not permitted on TYPE, NULL otherwise.  */
> > +
> > +static const char *
> > +ix86_invalid_unary_op (int op, const_tree type) {
> > +  /* Reject all single-operand operations on BFmode except for &.  */
> > +  if (element_mode (type) == BFmode && op != ADDR_EXPR)
> > +    return N_("operation not permitted on type %<__bf16%>");
> > +
> > +  /* Operation allowed.  */
> > +  return NULL;
> > +}
> > +
> > +/* Return the diagnostic message string if the binary operation OP is
> > +   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
> > +
> > +static const char *
> > +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
> > +                        const_tree type2)
> > +{
> > +  /* Reject all 2-operand operations on BFmode.  */
> > +  if (element_mode (type1) == BFmode
> > +      || element_mode (type2) == BFmode)
> > +    return N_("operation not permitted on type %<__bf16%>");
> > +
> > +  /* Operation allowed.  */
> > +  return NULL;
> > +}
> > +
> >  static GTY(()) tree ix86_tls_stack_chk_guard_decl;
> >
> >  static tree
> > @@ -24718,6 +24774,15 @@ ix86_libgcc_floating_mode_supported_p
> >  #undef TARGET_MANGLE_TYPE
> >  #define TARGET_MANGLE_TYPE ix86_mangle_type
> >
> > +#undef TARGET_INVALID_CONVERSION
> > +#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
> > +
> > +#undef TARGET_INVALID_UNARY_OP
> > +#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
> > +
> > +#undef TARGET_INVALID_BINARY_OP
> > +#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
> > +
> >  #undef TARGET_STACK_PROTECT_GUARD
> >  #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
> >
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index
> > f16df633e84..0da3dce1d31 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -1046,7 +1046,7 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> >     || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode
> >       \
> >     || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode
> >       \
> >     || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode
> >       \
> > -   || (MODE) == HFmode)
> > +   || (MODE) == HFmode || (MODE) == BFmode)
> >
> >  #define VALID_SSE_REG_MODE(MODE)                                     \
> >    ((MODE) == V1TImode || (MODE) == TImode                            \
> > @@ -1077,7 +1077,7 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> >     || (MODE) == CQImode || (MODE) == CHImode
> >       \
> >     || (MODE) == CSImode || (MODE) == CDImode
> >       \
> >     || (MODE) == SDmode || (MODE) == DDmode                           \
> > -   || (MODE) == HFmode || (MODE) == HCmode                           \
> > +   || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode
> >       \
> >     || (MODE) == V2HImode || (MODE) == V2HFmode
> >       \
> >     || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode
> >       \
> >     || (TARGET_64BIT                                                  \
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index
> > 9aaeb695f0f..1f7b018913a 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -499,7 +499,7 @@
> >
> >  ;; Main data type used by the insn
> >  (define_attr "mode"
> > -  "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
> > +
> > + "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
> >     V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
> >    (const_string "unknown"))
> >
> > @@ -1104,7 +1104,7 @@
> >  ;; GET_MODE_SIZE (<MODE>mode).  For XFmode which depends on  ;;
> > command line options just use GET_MODE_SIZE macro.
> >  (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
> > -                          (TI "16") (HF "2") (SF "4") (DF "8")
> > +                          (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
> >                            (XF "GET_MODE_SIZE (XFmode)")
> >                            (V16QI "16") (V32QI "32") (V64QI "64")
> >                            (V8HI "16") (V16HI "32") (V32HI "64") @@ -1248,7
> > +1248,7 @@  (define_mode_iterator X87MODEF [SF DF XF])
> >
> >  ;; All x87 floating point modes plus HFmode -(define_mode_iterator
> > X87MODEFH [HF SF DF XF])
> > +(define_mode_iterator X87MODEFH [HF SF DF XF BF])
> >
> >  ;; All SSE floating point modes
> >  (define_mode_iterator SSEMODEF [HF SF DF TF]) @@ -3408,9 +3408,11 @@
> >    operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
> >  })
> >
> > -(define_insn "*pushhf_rex64"
> > -  [(set (match_operand:HF 0 "push_operand" "=X,X")
> > -     (match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))]
> > +(define_mode_iterator HFBF [HF BF])
> > +
> > +(define_insn "*push<mode>_rex64"
> > +  [(set (match_operand:HFBF 0 "push_operand" "=X,X")
> > +     (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
> >    "TARGET_64BIT"
> >  {
> >    /* Anything else should be already split before reg-stack.  */ @@ -3421,9
> > +3423,9 @@
> >     (set_attr "type" "push,multi")
> >     (set_attr "mode" "DI,TI")])
> >
> > -(define_insn "*pushhf"
> > -  [(set (match_operand:HF 0 "push_operand" "=X,X")
> > -     (match_operand:HF 1 "general_no_elim_operand" "rmF,x"))]
> > +(define_insn "*push<mode>"
> > +  [(set (match_operand:HFBF 0 "push_operand" "=X,X")
> > +     (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
> >    "!TARGET_64BIT"
> >  {
> >    /* Anything else should be already split before reg-stack.  */ @@ -3462,7
> > +3464,7 @@
> >     (set_attr "unit" "i387,*,*")
> >     (set_attr "mode" "SF,SI,SF")])
> >
> > -(define_mode_iterator MODESH [SF HF])
> > +(define_mode_iterator MODESH [SF HF BF])
> >  ;; %%% Kill this when call knows how to work this out.
> >  (define_split
> >    [(set (match_operand:MODESH 0 "push_operand") @@ -3950,18 +3952,18
> > @@
> >          ]
> >          (const_string "*")))])
> >
> > -(define_insn "*movhf_internal"
> > - [(set (match_operand:HF 0 "nonimmediate_operand"
> > +(define_insn "*mov<mode>_internal"
> > + [(set (match_operand:HFBF 0 "nonimmediate_operand"
> >        "=?r,?r,?r,?m,v,v,?r,m,?v,v")
> > -       (match_operand:HF 1 "general_operand"
> > +       (match_operand:HFBF 1 "general_operand"
> >        "r  ,F ,m ,rF,C,v, v,v,r ,m"))]
> >   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
> >    && (lra_in_progress
> >        || reload_completed
> >        || !CONST_DOUBLE_P (operands[1])
> >        || (TARGET_SSE2
> > -       && standard_sse_constant_p (operands[1], HFmode) == 1)
> > -      || memory_operand (operands[0], HFmode))"
> > +       && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
> > +      || memory_operand (operands[0], <MODE>mode))"
> >  {
> >    switch (get_attr_type (insn))
> >      {
> > diff --git a/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
> > b/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
> > new file mode 100644
> > index 00000000000..962c8504775
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
> > @@ -0,0 +1,10 @@
> > +/* { dg-do assemble { target sse2} } */
> > +/* { dg-options "-msse2 -O3 --save-temps" } */
> > +
> > +void foo (void)
> > +{
> > +  __bf16 (); /* { dg-bogus {invalid conversion to type '__bf16'}  } */
> > +  __bf16 a = __bf16(); /* { dg-bogus {invalid conversion to type
> > +'__bf16'} } */
> > +  __bf16 (0x1234); /* { dg-error {invalid conversion to type '__bf16'}
> > +} */
> > +  __bf16 (0.1); /* { dg-error {invalid conversion to type '__bf16'} }
> > +*/ }
> > diff --git a/gcc/testsuite/gcc.target/i386/bfloat16-1.c
> > b/gcc/testsuite/gcc.target/i386/bfloat16-1.c
> > new file mode 100644
> > index 00000000000..6aaec28efd2
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/bfloat16-1.c
> > @@ -0,0 +1,12 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-msse2 -O2" } */
> > +__bf16
> > +foo (int a)
> > +{
> > +  union {
> > +    int a;
> > +    __bf16 b;
> > +  }c;
> > +  c.a = a;
> > +  return c.b;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > new file mode 100644
> > index 00000000000..612d55be826
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > @@ -0,0 +1,8 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mno-sse2" } */
> > +
> > +__bf16/* { dg-error "unknown type name '__bf16'" } */ foo (__bf16 x) /*
> > +{ dg-error "unknown type name '__bf16'" } */ {
> > +  return x;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
> > b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
> > new file mode 100644
> > index 00000000000..a3286e26c48
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -msse2 -mno-avx512f" } */
> > +
> > +union flt
> > +{
> > +  __bf16 flt;
> > +  short s;
> > +};
> > +
> > +__bf16
> > +foo (union flt x)
> > +{
> > +  return x.flt;
> > +}
> > +
> > +/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} { target ia32 }
> > +} } */
> > +/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} { target { ! ia32
> > +} } } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
> > b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
> > new file mode 100644
> > index 00000000000..f76d5547758
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
> > @@ -0,0 +1,215 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-msse2 -O2" } */
> > +
> > +
> > +__bf16 glob_bfloat;
> > +
> > +int is_an_int;
> > +short is_a_short_int;
> > +float is_a_float;
> > +float is_a_float16;
> > +double is_a_double;
> > +
> > +float *float_ptr;
> > +
> > +__bf16 foo1 (void) { return (__bf16) 0x1234; } /* { dg-error {invalid
> > +conversion to type '__bf16'} } */
> > +__bf16 foo2 (void) { return (__bf16) (short) 0x1234; } /* { dg-error
> > +{invalid conversion to type '__bf16'} } */
> > +
> > +__bf16 footest (__bf16 scalar0)
> > +{
> > +
> > +  /* Initialisation  */
> > +
> > +  __bf16 scalar1_1;
> > +  __bf16 scalar1_2 = glob_bfloat;
> > +  __bf16 scalar1_3 = 0;   /* { dg-error {invalid conversion to type '__bf16'} } */
> > +  __bf16 scalar1_4 = 0.1; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  __bf16 scalar1_5 = is_a_float; /* { dg-error {invalid conversion to
> > + type '__bf16'} } */
> > +  __bf16 scalar1_6 = is_an_int;  /* { dg-error {invalid conversion to
> > + type '__bf16'} } */
> > +  __bf16 scalar1_7 = is_a_float16; /* { dg-error {invalid conversion to
> > + type '__bf16'} } */
> > +  __bf16 scalar1_8 = is_a_double; /* { dg-error {invalid conversion to
> > + type '__bf16'} } */
> > +  __bf16 scalar1_9 = is_a_short_int; /* { dg-error {invalid conversion
> > + to type '__bf16'} } */
> > +
> > +  int initi_1_1 = glob_bfloat; /* { dg-error {invalid conversion from
> > + type '__bf16'} } */  float initi_1_2 = glob_bfloat; /* { dg-error
> > + {invalid conversion from type '__bf16'} } */
> > +  _Float16 initi_1_3 = glob_bfloat; /* { dg-error {invalid conversion
> > + from type '__bf16'} } */  short initi_1_4 = glob_bfloat; /* { dg-error
> > + {invalid conversion from type '__bf16'} } */  double initi_1_5 =
> > + glob_bfloat; /* { dg-error {invalid conversion from type '__bf16'} }
> > + */
> > +
> > +  __bf16 scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */
> > +  __bf16 scalar2_2 = { glob_bfloat };
> > +  __bf16 scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  __bf16 scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  __bf16 scalar2_5 = { is_a_float }; /* { dg-error {invalid conversion
> > + to type '__bf16'} } */
> > +  __bf16 scalar2_6 = { is_an_int }; /* { dg-error {invalid conversion
> > + to type '__bf16'} } */
> > +  __bf16 scalar2_7 = { is_a_float16 }; /* { dg-error {invalid
> > + conversion to type '__bf16'} } */
> > +  __bf16 scalar2_8 = { is_a_double }; /* { dg-error {invalid conversion
> > + to type '__bf16'} } */
> > +  __bf16 scalar2_9 = { is_a_short_int }; /* { dg-error {invalid
> > + conversion to type '__bf16'} } */
> > +
> > +  int initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion
> > + from type '__bf16'} } */  float initi_2_2 = { glob_bfloat }; /* {
> > + dg-error {invalid conversion from type '__bf16'} } */
> > +  _Float16 initi_2_3 = { glob_bfloat }; /* { dg-error {invalid
> > + conversion from type '__bf16'} } */  short initi_2_4 = { glob_bfloat
> > + }; /* { dg-error {invalid conversion from type '__bf16'} } */  double
> > + initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from
> > + type '__bf16'} } */
> > +
> > +  /* Assignments.  */
> > +
> > +  glob_bfloat = glob_bfloat;
> > +  glob_bfloat = 0;   /* { dg-error {invalid conversion to type '__bf16'} } */
> > +  glob_bfloat = 0.1; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */  glob_bfloat = is_a_float; /* { dg-error {invalid
> > + conversion to type '__bf16'} } */  glob_bfloat = is_an_int; /* {
> > + dg-error {invalid conversion to type '__bf16'} } */  glob_bfloat =
> > + is_a_float16; /* { dg-error {invalid conversion to type '__bf16'} } */
> > + glob_bfloat = is_a_double; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */  glob_bfloat = is_a_short_int; /* { dg-error {invalid
> > + conversion to type '__bf16'} } */
> > +
> > +  is_an_int = glob_bfloat; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */  is_a_float = glob_bfloat; /* { dg-error {invalid
> > + conversion from type '__bf16'} } */
> > +  is_a_float16 = glob_bfloat; /* { dg-error {invalid conversion from
> > + type '__bf16'} } */  is_a_double = glob_bfloat; /* { dg-error {invalid
> > + conversion from type '__bf16'} } */  is_a_short_int = glob_bfloat; /*
> > + { dg-error {invalid conversion from type '__bf16'} } */
> > +
> > +  /* Casting.  */
> > +
> > +  (void) glob_bfloat;
> > +  (__bf16) glob_bfloat;
> > +
> > +  (int) glob_bfloat; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  (float) glob_bfloat; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  (_Float16) glob_bfloat; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  (double) glob_bfloat; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  (short) glob_bfloat; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +
> > +  (__bf16) is_an_int; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) is_a_float; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) is_a_float16; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) is_a_double; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) is_a_short_int; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +
> > +  /* Compound literals.  */
> > +
> > +  (__bf16) {}; /* { dg-error {empty scalar initializer} } */
> > +  (__bf16) { glob_bfloat };
> > +  (__bf16) { 0 }; /* { dg-error {invalid conversion to type '__bf16'} }
> > + */
> > +  (__bf16) { 0.1 }; /* { dg-error {invalid conversion to type '__bf16'}
> > + } */
> > +  (__bf16) { is_a_float }; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) { is_an_int }; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) { is_a_float16 }; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) { is_a_double }; /* { dg-error {invalid conversion to type
> > + '__bf16'} } */
> > +  (__bf16) { is_a_short_int }; /* { dg-error {invalid conversion to
> > + type '__bf16'} } */
> > +
> > +  (int) { glob_bfloat }; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  (float) { glob_bfloat }; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  (_Float16) { glob_bfloat }; /* { dg-error {invalid conversion from
> > + type '__bf16'} } */
> > +  (double) { glob_bfloat }; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  (short) { glob_bfloat }; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +
> > +  /* Arrays and Structs.  */
> > +
> > +  typedef __bf16 array_type[2];
> > +  extern __bf16 extern_array[];
> > +
> > +  __bf16 array[2];
> > +  __bf16 zero_length_array[0];
> > +  __bf16 empty_init_array[] = {};
> > +  typedef __bf16 some_other_type[is_an_int];
> > +
> > +  struct struct1 {
> > +    __bf16 a;
> > +  };
> > +
> > +  union union1 {
> > +    __bf16 a;
> > +  };
> > +
> > +  /* Addressing and dereferencing.  */
> > +
> > +  __bf16 *bfloat_ptr = &scalar0;
> > +  scalar0 = *bfloat_ptr;
> > +
> > +  /* Pointer assignment.  */
> > +
> > +  __bf16 *bfloat_ptr2 = bfloat_ptr;
> > +  __bf16 *bfloat_ptr3 = array;
> > +
> > +  /* Pointer arithmetic.  */
> > +
> > +  ++bfloat_ptr;
> > +  --bfloat_ptr;
> > +  bfloat_ptr++;
> > +  bfloat_ptr--;
> > +  bfloat_ptr += 1;
> > +  bfloat_ptr -= 1;
> > +  bfloat_ptr - bfloat_ptr2;
> > +  bfloat_ptr = &bfloat_ptr3[0];
> > +  bfloat_ptr = &bfloat_ptr3[1];
> > +
> > +  /* Simple comparison.  */
> > +  scalar0 > glob_bfloat; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */  glob_bfloat == scalar0; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +  scalar0 > is_a_float; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */  is_a_float == scalar0; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +  scalar0 > 0; /* { dg-error {operation not permitted on type '__bf16'}
> > + } */
> > +  0 == scalar0; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */
> > +  scalar0 > 0.1; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */
> > +  0.1 == scalar0; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */
> > +  scalar0 > is_an_int; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */  is_an_int == scalar0; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +
> > +  /* Pointer comparison.  */
> > +
> > +  bfloat_ptr == &scalar0;
> > +  bfloat_ptr != &scalar0;
> > +  bfloat_ptr < &scalar0;
> > +  bfloat_ptr <= &scalar0;
> > +  bfloat_ptr > &scalar0;
> > +  bfloat_ptr >= &scalar0;
> > +  bfloat_ptr == bfloat_ptr2;
> > +  bfloat_ptr != bfloat_ptr2;
> > +  bfloat_ptr < bfloat_ptr2;
> > +  bfloat_ptr <= bfloat_ptr2;
> > +  bfloat_ptr > bfloat_ptr2;
> > +  bfloat_ptr >= bfloat_ptr2;
> > +
> > +  /* Conditional expressions.  */
> > +
> > +  0 ? scalar0 : scalar0;
> > +  0 ? scalar0 : is_a_float; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  0 ? is_a_float : scalar0; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  0 ? scalar0 : 0; /* { dg-error {invalid conversion to type '__bf16'}
> > + } */
> > +  0 ? 0 : scalar0; /* { dg-error {invalid conversion to type '__bf16'}
> > + } */
> > +  0 ? 0.1 : scalar0; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  0 ? scalar0 : 0.1; /* { dg-error {invalid conversion from type
> > + '__bf16'} } */
> > +  0 ? bfloat_ptr : bfloat_ptr2;
> > +  0 ? bfloat_ptr : float_ptr; /* { dg-warning {pointer type mismatch in
> > + conditional expression} } */
> > +  0 ? float_ptr : bfloat_ptr; /* { dg-warning {pointer type mismatch in
> > + conditional expression} } */
> > +
> > +  scalar0 ? scalar0 : scalar0; /* { dg-error {operation not permitted
> > + on type '__bf16'} } */
> > +  scalar0 ? is_a_float : scalar0; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +  scalar0 ? scalar0 : is_a_float; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +  scalar0 ? is_a_float : is_a_float; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +
> > +  /* Unary operators.  */
> > +
> > +  +scalar0; /* { dg-error {operation not permitted on type '__bf16'} }
> > + */  -scalar0; /* { dg-error {operation not permitted on type '__bf16'}
> > + } */  ~scalar0; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */  !scalar0; /* { dg-error {operation not permitted on
> > + type '__bf16'} } */  *scalar0; /* { dg-error {invalid type argument of
> > + unary '\*'} } */  __real scalar0; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */  __imag scalar0; /* { dg-error
> > + {operation not permitted on type '__bf16'} } */  ++scalar0; /* {
> > + dg-error {operation not permitted on type '__bf16'} } */  --scalar0;
> > + /* { dg-error {operation not permitted on type '__bf16'} } */
> > + scalar0++; /* { dg-error {operation not permitted on type '__bf16'} }
> > + */  scalar0--; /* { dg-error {operation not permitted on type
> > + '__bf16'} } */
> > +
> > +  /* Binary arithmetic operations.  */
> > +
> > +  scalar0 = glob_bfloat + *bfloat_ptr; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +  scalar0 = glob_bfloat + 0.1; /* { dg-error {operation not permitted
> > + on type '__bf16'} } */
> > +  scalar0 = glob_bfloat + 0; /* { dg-error {operation not permitted on
> > + type '__bf16'} } */
> > +  scalar0 = glob_bfloat + is_a_float; /* { dg-error {operation not
> > + permitted on type '__bf16'} } */
> > +
> > +  return scalar0;
> > +}
> > +
> > --
> > 2.18.2
>
  

Patch

diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 7a2da1db0b0..63a360b0f8b 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -69,6 +69,7 @@  DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node)  DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node)  DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node)  DEF_PRIMITIVE_TYPE (FLOAT16, ix86_float16_type_node)
+DEF_PRIMITIVE_TYPE (BFLOAT16, ix86_bf16_type_node)
 DEF_PRIMITIVE_TYPE (FLOAT, float_type_node)  DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node)  DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fe7243c3837..6a04fb57e65 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -126,6 +126,9 @@  BDESC_VERIFYS (IX86_BUILTIN_MAX,  static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
 
 tree ix86_float16_type_node = NULL_TREE;
+tree ix86_bf16_type_node = NULL_TREE;
+tree ix86_bf16_ptr_type_node = NULL_TREE;
+
 /* Retrieve an element from the above table, building some of
    the types lazily.  */
 
@@ -1366,6 +1369,22 @@  ix86_register_float16_builtin_type (void)
 					    "_Float16");
 }
 
+static void
+ix86_register_bf16_builtin_type (void)
+{
+  ix86_bf16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (ix86_bf16_type_node) = 16;
+  SET_TYPE_MODE (ix86_bf16_type_node, BFmode);
+  layout_type (ix86_bf16_type_node);
+
+  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
+    {
+      lang_hooks.types.register_builtin_type (ix86_bf16_type_node,
+					    "__bf16");
+      ix86_bf16_ptr_type_node = build_pointer_type (ix86_bf16_type_node);
+    }
+}
+
 static void
 ix86_init_builtin_types (void)
 {
@@ -1396,6 +1415,8 @@  ix86_init_builtin_types (void)
 
   ix86_register_float16_builtin_type ();
 
+  ix86_register_bf16_builtin_type ();
+
   const_string_type_node
     = build_pointer_type (build_qualified_type
 			  (char_type_node, TYPE_QUAL_CONST)); diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index e2e1e18d24d..b49daaef253 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -24,6 +24,8 @@  along with GCC; see the file COPYING3.  If not see  FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);  FLOAT_MODE (TF, 16, ieee_quad_format);  FLOAT_MODE (HF, 2, ieee_half_format);
+FLOAT_MODE (BF, 2, 0);
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
 
 /* In ILP32 mode, XFmode has size 12 and alignment 4.
    In LP64 mode, XFmode has size and alignment 16.  */ diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index e03f86d4a23..5d589f6a05c 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2399,6 +2399,7 @@  classify_argument (machine_mode mode, const_tree type,
     case E_CTImode:
       return 0;
     case E_HFmode:
+    case E_BFmode:
       if (!(bit_offset % 64))
 	classes[0] = X86_64_SSEHF_CLASS;
       else
@@ -2792,9 +2793,10 @@  construct_container (machine_mode mode, machine_mode orig_mode,
 	    intreg++;
 	    break;
 	  case X86_64_SSEHF_CLASS:
+	    tmpmode = (mode == BFmode ? BFmode : HFmode);
 	    exp [nexps++]
 	      = gen_rtx_EXPR_LIST (VOIDmode,
-				   gen_rtx_REG (HFmode,
+				   gen_rtx_REG (tmpmode,
 						GET_SSE_REGNO (sse_regno)),
 				   GEN_INT (i*8));
 	    sse_regno++;
@@ -4001,8 +4003,8 @@  function_value_32 (machine_mode orig_mode, machine_mode mode,
     /* Most things go in %eax.  */
     regno = AX_REG;
 
-  /* Return _Float16/_Complex _Foat16 by sse register.  */
-  if (mode == HFmode)
+  /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */  if 
+ (mode == HFmode || mode == BFmode)
     regno = FIRST_SSE_REG;
   if (mode == HCmode)
     {
@@ -4050,6 +4052,7 @@  function_value_64 (machine_mode orig_mode, machine_mode mode,
 
       switch (mode)
 	{
+	case E_BFmode:
 	case E_HFmode:
 	case E_HCmode:
 	case E_SFmode:
@@ -5631,6 +5634,7 @@  ix86_output_ssemov (rtx_insn *insn, rtx *operands)
 	return "%vmovss\t{%1, %0|%0, %1}";
 
     case MODE_HF:
+    case MODE_BF:
       if (REG_P (operands[0]) && REG_P (operands[1]))
 	return "vmovsh\t{%d1, %0|%0, %d1}";
       else
@@ -19402,7 +19406,8 @@  ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
     }
 
   /* Require movement to gpr, and then store to memory.  */
-  if ((mode == HFmode || mode == HImode || mode == V2QImode)
+  if ((mode == HFmode || mode == HImode || mode == V2QImode
+       || mode == BFmode)
       && !TARGET_SSE4_1
       && SSE_CLASS_P (rclass)
       && !in_p && MEM_P (x))
@@ -22331,7 +22336,7 @@  ix86_scalar_mode_supported_p (scalar_mode mode)
     return default_decimal_float_supported_p ();
   else if (mode == TFmode)
     return true;
-  else if (mode == HFmode && TARGET_SSE2)
+  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
     return true;
   else
     return default_scalar_mode_supported_p (mode); @@ -22646,6 +22651,8 @@ ix86_mangle_type (const_tree type)
 
   switch (TYPE_MODE (type))
     {
+    case E_BFmode:
+      return "u6__bf16";
     case E_HFmode:
       /* _Float16 is "DF16_".
 	 Align with clang's decision in https://reviews.llvm.org/D33719. */ @@ -22661,6 +22668,55 @@ ix86_mangle_type (const_tree type)
     }
 }
 
+/* Return the diagnostic message string if conversion from FROMTYPE to
+   TOTYPE is not allowed, NULL otherwise.  */
+
+static const char *
+ix86_invalid_conversion (const_tree fromtype, const_tree totype) {
+  if (element_mode (fromtype) != element_mode (totype))
+    {
+      /* Do no allow conversions to/from BFmode scalar types.  */
+      if (TYPE_MODE (fromtype) == BFmode)
+	return N_("invalid conversion from type %<__bf16%>");
+      if (TYPE_MODE (totype) == BFmode)
+	return N_("invalid conversion to type %<__bf16%>");
+    }
+
+  /* Conversion allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+   not permitted on TYPE, NULL otherwise.  */
+
+static const char *
+ix86_invalid_unary_op (int op, const_tree type) {
+  /* Reject all single-operand operations on BFmode except for &.  */
+  if (element_mode (type) == BFmode && op != ADDR_EXPR)
+    return N_("operation not permitted on type %<__bf16%>");
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+
+static const char *
+ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
+			   const_tree type2)
+{
+  /* Reject all 2-operand operations on BFmode.  */
+  if (element_mode (type1) == BFmode
+      || element_mode (type2) == BFmode)
+    return N_("operation not permitted on type %<__bf16%>");
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
 
 static tree
@@ -24718,6 +24774,15 @@  ix86_libgcc_floating_mode_supported_p
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE ix86_mangle_type
 
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
+
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
+
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
+
 #undef TARGET_STACK_PROTECT_GUARD
 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f16df633e84..0da3dce1d31 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1046,7 +1046,7 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode	\
    || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode	\
    || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode	\
-   || (MODE) == HFmode)
+   || (MODE) == HFmode || (MODE) == BFmode)
 
 #define VALID_SSE_REG_MODE(MODE)					\
   ((MODE) == V1TImode || (MODE) == TImode				\
@@ -1077,7 +1077,7 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == CQImode || (MODE) == CHImode				\
    || (MODE) == CSImode || (MODE) == CDImode				\
    || (MODE) == SDmode || (MODE) == DDmode				\
-   || (MODE) == HFmode || (MODE) == HCmode				\
+   || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode		\
    || (MODE) == V2HImode || (MODE) == V2HFmode				\
    || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode	\
    || (TARGET_64BIT							\
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9aaeb695f0f..1f7b018913a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -499,7 +499,7 @@ 
 
 ;; Main data type used by the insn
 (define_attr "mode"
-  "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
+  
+ "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
    V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
   (const_string "unknown"))
 
@@ -1104,7 +1104,7 @@ 
 ;; GET_MODE_SIZE (<MODE>mode).  For XFmode which depends on  ;; command line options just use GET_MODE_SIZE macro.
 (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
-			     (TI "16") (HF "2") (SF "4") (DF "8")
+			     (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
 			     (XF "GET_MODE_SIZE (XFmode)")
 			     (V16QI "16") (V32QI "32") (V64QI "64")
 			     (V8HI "16") (V16HI "32") (V32HI "64") @@ -1248,7 +1248,7 @@  (define_mode_iterator X87MODEF [SF DF XF])
 
 ;; All x87 floating point modes plus HFmode -(define_mode_iterator X87MODEFH [HF SF DF XF])
+(define_mode_iterator X87MODEFH [HF SF DF XF BF])
 
 ;; All SSE floating point modes
 (define_mode_iterator SSEMODEF [HF SF DF TF]) @@ -3408,9 +3408,11 @@
   operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
 })
 
-(define_insn "*pushhf_rex64"
-  [(set (match_operand:HF 0 "push_operand" "=X,X")
-	(match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))]
+(define_mode_iterator HFBF [HF BF])
+
+(define_insn "*push<mode>_rex64"
+  [(set (match_operand:HFBF 0 "push_operand" "=X,X")
+	(match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
   "TARGET_64BIT"
 {
   /* Anything else should be already split before reg-stack.  */ @@ -3421,9 +3423,9 @@
    (set_attr "type" "push,multi")
    (set_attr "mode" "DI,TI")])
 
-(define_insn "*pushhf"
-  [(set (match_operand:HF 0 "push_operand" "=X,X")
-	(match_operand:HF 1 "general_no_elim_operand" "rmF,x"))]
+(define_insn "*push<mode>"
+  [(set (match_operand:HFBF 0 "push_operand" "=X,X")
+	(match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
   "!TARGET_64BIT"
 {
   /* Anything else should be already split before reg-stack.  */ @@ -3462,7 +3464,7 @@
    (set_attr "unit" "i387,*,*")
    (set_attr "mode" "SF,SI,SF")])
 
-(define_mode_iterator MODESH [SF HF])
+(define_mode_iterator MODESH [SF HF BF])
 ;; %%% Kill this when call knows how to work this out.
 (define_split
   [(set (match_operand:MODESH 0 "push_operand") @@ -3950,18 +3952,18 @@
 	   ]
 	   (const_string "*")))])
 
-(define_insn "*movhf_internal"
- [(set (match_operand:HF 0 "nonimmediate_operand"
+(define_insn "*mov<mode>_internal"
+ [(set (match_operand:HFBF 0 "nonimmediate_operand"
 	 "=?r,?r,?r,?m,v,v,?r,m,?v,v")
-       (match_operand:HF 1 "general_operand"
+       (match_operand:HFBF 1 "general_operand"
 	 "r  ,F ,m ,rF,C,v, v,v,r ,m"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
   && (lra_in_progress
       || reload_completed
       || !CONST_DOUBLE_P (operands[1])
       || (TARGET_SSE2
-	  && standard_sse_constant_p (operands[1], HFmode) == 1)
-      || memory_operand (operands[0], HFmode))"
+	  && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
+      || memory_operand (operands[0], <MODE>mode))"
 {
   switch (get_attr_type (insn))
     {
diff --git a/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C b/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
new file mode 100644
index 00000000000..962c8504775
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/bfloat_cpp_typecheck.C
@@ -0,0 +1,10 @@ 
+/* { dg-do assemble { target sse2} } */
+/* { dg-options "-msse2 -O3 --save-temps" } */
+
+void foo (void)
+{
+  __bf16 (); /* { dg-bogus {invalid conversion to type '__bf16'}  } */
+  __bf16 a = __bf16(); /* { dg-bogus {invalid conversion to type 
+'__bf16'} } */
+  __bf16 (0x1234); /* { dg-error {invalid conversion to type '__bf16'} 
+} */
+  __bf16 (0.1); /* { dg-error {invalid conversion to type '__bf16'} } 
+*/ }
diff --git a/gcc/testsuite/gcc.target/i386/bfloat16-1.c b/gcc/testsuite/gcc.target/i386/bfloat16-1.c
new file mode 100644
index 00000000000..6aaec28efd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bfloat16-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+__bf16
+foo (int a)
+{
+  union {
+    int a;
+    __bf16 b;
+  }c;
+  c.a = a;
+  return c.b;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
new file mode 100644
index 00000000000..612d55be826
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
@@ -0,0 +1,8 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-sse2" } */
+
+__bf16/* { dg-error "unknown type name '__bf16'" } */ foo (__bf16 x) /* 
+{ dg-error "unknown type name '__bf16'" } */ {
+  return x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
new file mode 100644
index 00000000000..a3286e26c48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-2.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-avx512f" } */
+
+union flt
+{
+  __bf16 flt;
+  short s;
+};
+
+__bf16
+foo (union flt x)
+{
+  return x.flt;
+}
+
+/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} { target ia32 } 
+} } */
+/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} { target { ! ia32 
+} } } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
new file mode 100644
index 00000000000..f76d5547758
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-scalar-typecheck.c
@@ -0,0 +1,215 @@ 
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+
+
+__bf16 glob_bfloat;
+
+int is_an_int;
+short is_a_short_int;
+float is_a_float;
+float is_a_float16;
+double is_a_double;
+
+float *float_ptr;
+
+__bf16 foo1 (void) { return (__bf16) 0x1234; } /* { dg-error {invalid 
+conversion to type '__bf16'} } */
+__bf16 foo2 (void) { return (__bf16) (short) 0x1234; } /* { dg-error 
+{invalid conversion to type '__bf16'} } */
+
+__bf16 footest (__bf16 scalar0)
+{
+
+  /* Initialisation  */
+
+  __bf16 scalar1_1;
+  __bf16 scalar1_2 = glob_bfloat;
+  __bf16 scalar1_3 = 0;   /* { dg-error {invalid conversion to type '__bf16'} } */
+  __bf16 scalar1_4 = 0.1; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  __bf16 scalar1_5 = is_a_float; /* { dg-error {invalid conversion to 
+ type '__bf16'} } */
+  __bf16 scalar1_6 = is_an_int;  /* { dg-error {invalid conversion to 
+ type '__bf16'} } */
+  __bf16 scalar1_7 = is_a_float16; /* { dg-error {invalid conversion to 
+ type '__bf16'} } */
+  __bf16 scalar1_8 = is_a_double; /* { dg-error {invalid conversion to 
+ type '__bf16'} } */
+  __bf16 scalar1_9 = is_a_short_int; /* { dg-error {invalid conversion 
+ to type '__bf16'} } */
+
+  int initi_1_1 = glob_bfloat; /* { dg-error {invalid conversion from 
+ type '__bf16'} } */  float initi_1_2 = glob_bfloat; /* { dg-error 
+ {invalid conversion from type '__bf16'} } */
+  _Float16 initi_1_3 = glob_bfloat; /* { dg-error {invalid conversion 
+ from type '__bf16'} } */  short initi_1_4 = glob_bfloat; /* { dg-error 
+ {invalid conversion from type '__bf16'} } */  double initi_1_5 = 
+ glob_bfloat; /* { dg-error {invalid conversion from type '__bf16'} } 
+ */
+
+  __bf16 scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */
+  __bf16 scalar2_2 = { glob_bfloat };
+  __bf16 scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  __bf16 scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  __bf16 scalar2_5 = { is_a_float }; /* { dg-error {invalid conversion 
+ to type '__bf16'} } */
+  __bf16 scalar2_6 = { is_an_int }; /* { dg-error {invalid conversion 
+ to type '__bf16'} } */
+  __bf16 scalar2_7 = { is_a_float16 }; /* { dg-error {invalid 
+ conversion to type '__bf16'} } */
+  __bf16 scalar2_8 = { is_a_double }; /* { dg-error {invalid conversion 
+ to type '__bf16'} } */
+  __bf16 scalar2_9 = { is_a_short_int }; /* { dg-error {invalid 
+ conversion to type '__bf16'} } */
+
+  int initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion 
+ from type '__bf16'} } */  float initi_2_2 = { glob_bfloat }; /* { 
+ dg-error {invalid conversion from type '__bf16'} } */
+  _Float16 initi_2_3 = { glob_bfloat }; /* { dg-error {invalid 
+ conversion from type '__bf16'} } */  short initi_2_4 = { glob_bfloat 
+ }; /* { dg-error {invalid conversion from type '__bf16'} } */  double 
+ initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from 
+ type '__bf16'} } */
+
+  /* Assignments.  */
+
+  glob_bfloat = glob_bfloat;
+  glob_bfloat = 0;   /* { dg-error {invalid conversion to type '__bf16'} } */
+  glob_bfloat = 0.1; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */  glob_bfloat = is_a_float; /* { dg-error {invalid 
+ conversion to type '__bf16'} } */  glob_bfloat = is_an_int; /* { 
+ dg-error {invalid conversion to type '__bf16'} } */  glob_bfloat = 
+ is_a_float16; /* { dg-error {invalid conversion to type '__bf16'} } */  
+ glob_bfloat = is_a_double; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */  glob_bfloat = is_a_short_int; /* { dg-error {invalid 
+ conversion to type '__bf16'} } */
+
+  is_an_int = glob_bfloat; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */  is_a_float = glob_bfloat; /* { dg-error {invalid 
+ conversion from type '__bf16'} } */
+  is_a_float16 = glob_bfloat; /* { dg-error {invalid conversion from 
+ type '__bf16'} } */  is_a_double = glob_bfloat; /* { dg-error {invalid 
+ conversion from type '__bf16'} } */  is_a_short_int = glob_bfloat; /* 
+ { dg-error {invalid conversion from type '__bf16'} } */
+
+  /* Casting.  */
+
+  (void) glob_bfloat;
+  (__bf16) glob_bfloat;
+
+  (int) glob_bfloat; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  (float) glob_bfloat; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  (_Float16) glob_bfloat; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  (double) glob_bfloat; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  (short) glob_bfloat; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+
+  (__bf16) is_an_int; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) is_a_float; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) is_a_float16; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) is_a_double; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) is_a_short_int; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+
+  /* Compound literals.  */
+
+  (__bf16) {}; /* { dg-error {empty scalar initializer} } */
+  (__bf16) { glob_bfloat };
+  (__bf16) { 0 }; /* { dg-error {invalid conversion to type '__bf16'} } 
+ */
+  (__bf16) { 0.1 }; /* { dg-error {invalid conversion to type '__bf16'} 
+ } */
+  (__bf16) { is_a_float }; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) { is_an_int }; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) { is_a_float16 }; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) { is_a_double }; /* { dg-error {invalid conversion to type 
+ '__bf16'} } */
+  (__bf16) { is_a_short_int }; /* { dg-error {invalid conversion to 
+ type '__bf16'} } */
+
+  (int) { glob_bfloat }; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  (float) { glob_bfloat }; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  (_Float16) { glob_bfloat }; /* { dg-error {invalid conversion from 
+ type '__bf16'} } */
+  (double) { glob_bfloat }; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+  (short) { glob_bfloat }; /* { dg-error {invalid conversion from type 
+ '__bf16'} } */
+
+  /* Arrays and Structs.  */
+
+  typedef __bf16 array_type[2];
+  extern __bf16 extern_array[];
+
+  __bf16 array[2];
+  __bf16 zero_length_array[0];
+  __bf16 empty_init_array[] = {};
+  typedef __bf16 some_other_type[is_an_int];
+
+  struct struct1 {
+    __bf16 a;
+  };
+
+  union union1 {
+    __bf16 a;
+  };
+
+  /* Addressing and dereferencing.  */
+
+  __bf16 *bfloat_ptr = &scalar0;
+  scalar0 = *bfloat_ptr;
+
+  /* Pointer assignment.  */
+
+  __bf16 *bfloat_ptr2 = bfloat_ptr;
+  __bf16 *bfloat_ptr3 = array;
+
+  /* Pointer arithmetic.  */
+
+  ++bfloat_ptr;
+  --bfloat_ptr;
+  bfloat_ptr++;
+  bfloat_ptr--;
+  bfloat_ptr += 1;
+  bfloat_ptr -= 1;
+  bfloat_ptr - bfloat_ptr2;
+  bfloat_ptr = &bfloat_ptr3[0];
+  bfloat_ptr = &bfloat_ptr3[1];
+
+  /* Simple comparison.  */
+  scalar0 > glob_bfloat; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */  glob_bfloat == scalar0; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+  scalar0 > is_a_float; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */  is_a_float == scalar0; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+  scalar0 > 0; /* { dg-error {operation not permitted on type '__bf16'} 
+ } */
+  0 == scalar0; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */
+  scalar0 > 0.1; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */
+  0.1 == scalar0; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */
+  scalar0 > is_an_int; /* { dg-error {operation not permitted on type 
+ '__bf16'} } */  is_an_int == scalar0; /* { dg-error {operation not 
+ permitted on type '__bf16'} } */
+
+  /* Pointer comparison.  */
+
+  bfloat_ptr == &scalar0;