fold-const: Ignore padding bits in native_interpret_expr REAL_CST reverse verification [PR108934]

Message ID ZABdUSaG8Dw/avH7@tucnak
State New
Headers
Series fold-const: Ignore padding bits in native_interpret_expr REAL_CST reverse verification [PR108934] |

Commit Message

Jakub Jelinek March 2, 2023, 8:24 a.m. UTC
  Hi!

In the following testcase we try to std::bit_cast a (pair of) integral
value(s) which has some non-zero bits in the place of x86 long double
(for 64-bit 16 byte type with 10 bytes actually loaded/stored by hw,
for 32-bit 12 byte) and starting with my PR104522 change we reject that
as native_interpret_expr fails on it.  The PR104522 change extends what
has been done before for MODE_COMPOSITE_P (but those don't have any padding
bits) to all floating point types, because e.g. the exact x86 long double
has various bit combinations we don't support, like
pseudo-(denormals,infinities,NaNs) or unnormals.  The HW handles some of
those as exceptional cases and others similarly to the non-pseudo ones.
But for the padding bits it actually doesn't load/store those bits at all,
it loads/stores 10 bytes.  So, I think we should exempt the padding bits
from the reverse comparison (the native_encode_expr bits for the padding
will be all zeros), which the following patch does.  For bit_cast it is
similar to e.g. ignoring padding bits if the destination is a structure
which has padding bits in there.

The change changed auto-init-4.c to how it has been behaving before the
PR105259 change, where some more VCEs can be now done.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-03-02  Jakub Jelinek  <jakub@redhat.com>

	PR c++/108934
	* fold-const.cc (native_interpret_expr) <case REAL_CST>: Before memcmp
	comparison copy the bytes from ptr to a temporary buffer and clearing
	padding bits in there.

	* gcc.target/i386/auto-init-4.c: Revert PR105259 change.
	* g++.target/i386/pr108934.C: New test.


	Jakub
  

Comments

Richard Biener March 2, 2023, 8:26 a.m. UTC | #1
On Thu, 2 Mar 2023, Jakub Jelinek wrote:

> Hi!
> 
> In the following testcase we try to std::bit_cast a (pair of) integral
> value(s) which has some non-zero bits in the place of x86 long double
> (for 64-bit 16 byte type with 10 bytes actually loaded/stored by hw,
> for 32-bit 12 byte) and starting with my PR104522 change we reject that
> as native_interpret_expr fails on it.  The PR104522 change extends what
> has been done before for MODE_COMPOSITE_P (but those don't have any padding
> bits) to all floating point types, because e.g. the exact x86 long double
> has various bit combinations we don't support, like
> pseudo-(denormals,infinities,NaNs) or unnormals.  The HW handles some of
> those as exceptional cases and others similarly to the non-pseudo ones.
> But for the padding bits it actually doesn't load/store those bits at all,
> it loads/stores 10 bytes.  So, I think we should exempt the padding bits
> from the reverse comparison (the native_encode_expr bits for the padding
> will be all zeros), which the following patch does.  For bit_cast it is
> similar to e.g. ignoring padding bits if the destination is a structure
> which has padding bits in there.
> 
> The change changed auto-init-4.c to how it has been behaving before the
> PR105259 change, where some more VCEs can be now done.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2023-03-02  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR c++/108934
> 	* fold-const.cc (native_interpret_expr) <case REAL_CST>: Before memcmp
> 	comparison copy the bytes from ptr to a temporary buffer and clearing
> 	padding bits in there.
> 
> 	* gcc.target/i386/auto-init-4.c: Revert PR105259 change.
> 	* g++.target/i386/pr108934.C: New test.
> 
> --- gcc/fold-const.cc.jj	2023-01-04 10:52:43.124897826 +0100
> +++ gcc/fold-const.cc	2023-03-01 16:49:14.531490482 +0100
> @@ -8873,11 +8873,13 @@ native_interpret_expr (tree type, const
>  	     valid values that GCC can't really represent accurately.
>  	     See PR95450.  Even for other modes, e.g. x86 XFmode can have some
>  	     bit combinationations which GCC doesn't preserve.  */
> -	  unsigned char buf[24];
> +	  unsigned char buf[24 * 2];
>  	  scalar_float_mode mode = SCALAR_FLOAT_TYPE_MODE (type);
>  	  int total_bytes = GET_MODE_SIZE (mode);
> +	  memcpy (buf + 24, ptr, total_bytes);
> +	  clear_type_padding_in_mask (type, buf + 24);
>  	  if (native_encode_expr (ret, buf, total_bytes, 0) != total_bytes
> -	      || memcmp (ptr, buf, total_bytes) != 0)
> +	      || memcmp (buf + 24, buf, total_bytes) != 0)
>  	    return NULL_TREE;
>  	  return ret;
>  	}
> --- gcc/testsuite/gcc.target/i386/auto-init-4.c.jj	2022-04-13 15:42:39.105365390 +0200
> +++ gcc/testsuite/gcc.target/i386/auto-init-4.c	2023-03-02 08:56:53.788029181 +0100
> @@ -15,6 +15,5 @@ long double foo()
>  }
>  
>  
> -/* The long double init isn't expanded optimally, see PR105259.  For ia32
> -   it uses zero-initialization.  */
> -/* { dg-final { scan-assembler-times "long\t-16843010" 3 } } */
> +/* { dg-final { scan-assembler-times "long\t-16843010" 5  { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "long\t-16843010" 3  { target { ia32 } } } } */
> --- gcc/testsuite/g++.target/i386/pr108934.C.jj	2023-03-01 17:04:19.931299866 +0100
> +++ gcc/testsuite/g++.target/i386/pr108934.C	2023-03-01 17:03:27.567062785 +0100
> @@ -0,0 +1,28 @@
> +// PR c++/108934
> +// { dg-do compile { target c++11 } }
> +
> +struct S { unsigned long long a[2]; };
> +struct T { unsigned long long b[6]; };
> +struct U { unsigned long long c[2]; long double d; unsigned long long e[2]; };
> +
> +#if __SIZEOF_LONG_DOUBLE__ == 16 && __LDBL_MANT_DIG__ == 64 && __SIZEOF_LONG_LONG__ == 8
> +constexpr long double
> +foo (S x)
> +{
> +  return __builtin_bit_cast (long double, x);
> +}
> +
> +constexpr S a = { 0ULL, 0xffffffffffff0000ULL };
> +constexpr long double b = foo (a);
> +static_assert (b == 0.0L, "");
> +
> +constexpr U
> +bar (T x)
> +{
> +  return __builtin_bit_cast (U, x);
> +}
> +
> +constexpr T c = { 0ULL, 0ULL, 0ULL, 0xffffffffffff0000ULL, 0ULL, 0ULL };
> +constexpr U d = bar (c);
> +static_assert (d.d == 0.0L, "");
> +#endif
> 
> 	Jakub
> 
>
  

Patch

--- gcc/fold-const.cc.jj	2023-01-04 10:52:43.124897826 +0100
+++ gcc/fold-const.cc	2023-03-01 16:49:14.531490482 +0100
@@ -8873,11 +8873,13 @@  native_interpret_expr (tree type, const
 	     valid values that GCC can't really represent accurately.
 	     See PR95450.  Even for other modes, e.g. x86 XFmode can have some
 	     bit combinationations which GCC doesn't preserve.  */
-	  unsigned char buf[24];
+	  unsigned char buf[24 * 2];
 	  scalar_float_mode mode = SCALAR_FLOAT_TYPE_MODE (type);
 	  int total_bytes = GET_MODE_SIZE (mode);
+	  memcpy (buf + 24, ptr, total_bytes);
+	  clear_type_padding_in_mask (type, buf + 24);
 	  if (native_encode_expr (ret, buf, total_bytes, 0) != total_bytes
-	      || memcmp (ptr, buf, total_bytes) != 0)
+	      || memcmp (buf + 24, buf, total_bytes) != 0)
 	    return NULL_TREE;
 	  return ret;
 	}
--- gcc/testsuite/gcc.target/i386/auto-init-4.c.jj	2022-04-13 15:42:39.105365390 +0200
+++ gcc/testsuite/gcc.target/i386/auto-init-4.c	2023-03-02 08:56:53.788029181 +0100
@@ -15,6 +15,5 @@  long double foo()
 }
 
 
-/* The long double init isn't expanded optimally, see PR105259.  For ia32
-   it uses zero-initialization.  */
-/* { dg-final { scan-assembler-times "long\t-16843010" 3 } } */
+/* { dg-final { scan-assembler-times "long\t-16843010" 5  { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "long\t-16843010" 3  { target { ia32 } } } } */
--- gcc/testsuite/g++.target/i386/pr108934.C.jj	2023-03-01 17:04:19.931299866 +0100
+++ gcc/testsuite/g++.target/i386/pr108934.C	2023-03-01 17:03:27.567062785 +0100
@@ -0,0 +1,28 @@ 
+// PR c++/108934
+// { dg-do compile { target c++11 } }
+
+struct S { unsigned long long a[2]; };
+struct T { unsigned long long b[6]; };
+struct U { unsigned long long c[2]; long double d; unsigned long long e[2]; };
+
+#if __SIZEOF_LONG_DOUBLE__ == 16 && __LDBL_MANT_DIG__ == 64 && __SIZEOF_LONG_LONG__ == 8
+constexpr long double
+foo (S x)
+{
+  return __builtin_bit_cast (long double, x);
+}
+
+constexpr S a = { 0ULL, 0xffffffffffff0000ULL };
+constexpr long double b = foo (a);
+static_assert (b == 0.0L, "");
+
+constexpr U
+bar (T x)
+{
+  return __builtin_bit_cast (U, x);
+}
+
+constexpr T c = { 0ULL, 0ULL, 0ULL, 0xffffffffffff0000ULL, 0ULL, 0ULL };
+constexpr U d = bar (c);
+static_assert (d.d == 0.0L, "");
+#endif