i386: Fix some intrinsics without alignment requirements.
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Testing passed
|
Commit Message
Hi all,
This patch aims to fix some intrinsics without alignment requirement, but
raised runtime error's problem.
Bootstrapped and tested on x86_64-linux-gnu, OK for trunk?
BRs,
Lin
gcc/ChangeLog:
PR target/84508
* config/i386/emmintrin.h
(_mm_load_sd): Remove alignment requirement.
(_mm_store_sd): Ditto.
(_mm_loadh_pd): Ditto.
(_mm_loadl_pd): Ditto.
(_mm_storel_pd): Add alignment requirement.
* config/i386/xmmintrin.h
(_mm_loadh_pi): Remove alignment requirement.
(_mm_loadl_pi): Ditto.
(_mm_load_ss): Ditto.
(_mm_store_ss): Ditto.
gcc/testsuite/ChangeLog:
PR target/84508
* gcc.target/i386/pr84508-1.c: New test.
* gcc.target/i386/pr84508-2.c: Ditto.
---
gcc/config/i386/emmintrin.h | 11 ++++++-----
gcc/config/i386/xmmintrin.h | 9 +++++----
gcc/testsuite/gcc.target/i386/pr84508-1.c | 11 +++++++++++
gcc/testsuite/gcc.target/i386/pr84508-2.c | 11 +++++++++++
4 files changed, 33 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr84508-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr84508-2.c
Comments
On Wed, May 8, 2024 at 10:13 AM Hu, Lin1 <lin1.hu@intel.com> wrote:
>
> Hi all,
>
> This patch aims to fix some intrinsics without alignment requirement, but
> raised runtime error's problem.
>
> Bootstrapped and tested on x86_64-linux-gnu, OK for trunk?
Ok.
>
> BRs,
> Lin
>
> gcc/ChangeLog:
>
> PR target/84508
> * config/i386/emmintrin.h
> (_mm_load_sd): Remove alignment requirement.
> (_mm_store_sd): Ditto.
> (_mm_loadh_pd): Ditto.
> (_mm_loadl_pd): Ditto.
> (_mm_storel_pd): Add alignment requirement.
> * config/i386/xmmintrin.h
> (_mm_loadh_pi): Remove alignment requirement.
> (_mm_loadl_pi): Ditto.
> (_mm_load_ss): Ditto.
> (_mm_store_ss): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> PR target/84508
> * gcc.target/i386/pr84508-1.c: New test.
> * gcc.target/i386/pr84508-2.c: Ditto.
> ---
> gcc/config/i386/emmintrin.h | 11 ++++++-----
> gcc/config/i386/xmmintrin.h | 9 +++++----
> gcc/testsuite/gcc.target/i386/pr84508-1.c | 11 +++++++++++
> gcc/testsuite/gcc.target/i386/pr84508-2.c | 11 +++++++++++
> 4 files changed, 33 insertions(+), 9 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr84508-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr84508-2.c
>
> diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
> index 915a5234c38..d7fc1af9687 100644
> --- a/gcc/config/i386/emmintrin.h
> +++ b/gcc/config/i386/emmintrin.h
> @@ -56,6 +56,7 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> /* Unaligned version of the same types. */
> typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
> typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
> +typedef double double_u __attribute__ ((__may_alias__, __aligned__ (1)));
>
> /* Create a selector for use with the SHUFPD instruction. */
> #define _MM_SHUFFLE2(fp1,fp0) \
> @@ -145,7 +146,7 @@ _mm_load1_pd (double const *__P)
> extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_load_sd (double const *__P)
> {
> - return _mm_set_sd (*__P);
> + return __extension__ (__m128d){ *(double_u *)__P, 0.0 };
> }
>
> extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> @@ -180,7 +181,7 @@ _mm_storeu_pd (double *__P, __m128d __A)
> extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_store_sd (double *__P, __m128d __A)
> {
> - *__P = ((__v2df)__A)[0];
> + *(double_u *)__P = ((__v2df)__A)[0] ;
> }
>
> extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> @@ -192,7 +193,7 @@ _mm_cvtsd_f64 (__m128d __A)
> extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_storel_pd (double *__P, __m128d __A)
> {
> - _mm_store_sd (__P, __A);
> + *__P = ((__v2df)__A)[0];
> }
>
> /* Stores the upper DPFP value. */
> @@ -973,13 +974,13 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B)
> extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_loadh_pd (__m128d __A, double const *__B)
> {
> - return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
> + return __extension__ (__m128d) { ((__v2df)__A)[0], *(double_u*)__B };
> }
>
> extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_loadl_pd (__m128d __A, double const *__B)
> {
> - return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
> + return __extension__ (__m128d) { *(double_u*)__B, ((__v2df)__A)[1] };
> }
>
> extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
> index 71b9955b843..9e20f262839 100644
> --- a/gcc/config/i386/xmmintrin.h
> +++ b/gcc/config/i386/xmmintrin.h
> @@ -73,6 +73,7 @@ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
>
> /* Unaligned version of the same type. */
> typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
> +typedef float float_u __attribute__ ((__may_alias__, __aligned__ (1)));
>
> /* Internal data types for implementing the intrinsics. */
> typedef float __v4sf __attribute__ ((__vector_size__ (16)));
> @@ -774,7 +775,7 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B)
> /* Sets the upper two SPFP values with 64-bits of data loaded from P;
> the lower two values are passed through from A. */
> extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_loadh_pi (__m128 __A, __m64 const *__P)
> +_mm_loadh_pi (__m128 __A, __m64_u const *__P)
> {
> return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
> }
> @@ -803,7 +804,7 @@ _mm_movelh_ps (__m128 __A, __m128 __B)
> /* Sets the lower two SPFP values with 64-bits of data loaded from P;
> the upper two values are passed through from A. */
> extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_loadl_pi (__m128 __A, __m64 const *__P)
> +_mm_loadl_pi (__m128 __A, __m64_u const *__P)
> {
> return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
> }
> @@ -910,7 +911,7 @@ _mm_set_ps1 (float __F)
> extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_load_ss (float const *__P)
> {
> - return _mm_set_ss (*__P);
> + return __extension__ (__m128) (__v4sf){ *(float_u *)__P, 0.0f, 0.0f, 0.0f };
> }
>
> /* Create a vector with all four elements equal to *P. */
> @@ -966,7 +967,7 @@ _mm_setr_ps (float __Z, float __Y, float __X, float __W)
> extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> _mm_store_ss (float *__P, __m128 __A)
> {
> - *__P = ((__v4sf)__A)[0];
> + *(float_u *)__P = ((__v4sf)__A)[0];
> }
>
> extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> diff --git a/gcc/testsuite/gcc.target/i386/pr84508-1.c b/gcc/testsuite/gcc.target/i386/pr84508-1.c
> new file mode 100644
> index 00000000000..bb3e28d017e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr84508-1.c
> @@ -0,0 +1,11 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-fsanitize=undefined" } */
> +#include <emmintrin.h>
> +
> +int main()
> +{
> + unsigned char t[16+1];
> + __m128d x = _mm_load_sd((const double *)(t+1));
> + _mm_store_sd((double*)(t+1), x);
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr84508-2.c b/gcc/testsuite/gcc.target/i386/pr84508-2.c
> new file mode 100644
> index 00000000000..32a8f20a536
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr84508-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-fsanitize=undefined" } */
> +#include <emmintrin.h>
> +
> +int main()
> +{
> + unsigned char t[8+1];
> + __m128 x = _mm_load_ss((const float *)(t));
> + _mm_store_ss((float*)(t), x);
> + return 0;
> +}
> --
> 2.31.1
>
@@ -56,6 +56,7 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
/* Unaligned version of the same types. */
typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
+typedef double double_u __attribute__ ((__may_alias__, __aligned__ (1)));
/* Create a selector for use with the SHUFPD instruction. */
#define _MM_SHUFFLE2(fp1,fp0) \
@@ -145,7 +146,7 @@ _mm_load1_pd (double const *__P)
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sd (double const *__P)
{
- return _mm_set_sd (*__P);
+ return __extension__ (__m128d){ *(double_u *)__P, 0.0 };
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -180,7 +181,7 @@ _mm_storeu_pd (double *__P, __m128d __A)
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sd (double *__P, __m128d __A)
{
- *__P = ((__v2df)__A)[0];
+ *(double_u *)__P = ((__v2df)__A)[0] ;
}
extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -192,7 +193,7 @@ _mm_cvtsd_f64 (__m128d __A)
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pd (double *__P, __m128d __A)
{
- _mm_store_sd (__P, __A);
+ *__P = ((__v2df)__A)[0];
}
/* Stores the upper DPFP value. */
@@ -973,13 +974,13 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B)
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pd (__m128d __A, double const *__B)
{
- return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+ return __extension__ (__m128d) { ((__v2df)__A)[0], *(double_u*)__B };
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pd (__m128d __A, double const *__B)
{
- return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+ return __extension__ (__m128d) { *(double_u*)__B, ((__v2df)__A)[1] };
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -73,6 +73,7 @@ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
/* Unaligned version of the same type. */
typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
+typedef float float_u __attribute__ ((__may_alias__, __aligned__ (1)));
/* Internal data types for implementing the intrinsics. */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
@@ -774,7 +775,7 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B)
/* Sets the upper two SPFP values with 64-bits of data loaded from P;
the lower two values are passed through from A. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadh_pi (__m128 __A, __m64 const *__P)
+_mm_loadh_pi (__m128 __A, __m64_u const *__P)
{
return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
}
@@ -803,7 +804,7 @@ _mm_movelh_ps (__m128 __A, __m128 __B)
/* Sets the lower two SPFP values with 64-bits of data loaded from P;
the upper two values are passed through from A. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadl_pi (__m128 __A, __m64 const *__P)
+_mm_loadl_pi (__m128 __A, __m64_u const *__P)
{
return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
}
@@ -910,7 +911,7 @@ _mm_set_ps1 (float __F)
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ss (float const *__P)
{
- return _mm_set_ss (*__P);
+ return __extension__ (__m128) (__v4sf){ *(float_u *)__P, 0.0f, 0.0f, 0.0f };
}
/* Create a vector with all four elements equal to *P. */
@@ -966,7 +967,7 @@ _mm_setr_ps (float __Z, float __Y, float __X, float __W)
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ss (float *__P, __m128 __A)
{
- *__P = ((__v4sf)__A)[0];
+ *(float_u *)__P = ((__v4sf)__A)[0];
}
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-fsanitize=undefined" } */
+#include <emmintrin.h>
+
+int main()
+{
+ unsigned char t[16+1];
+ __m128d x = _mm_load_sd((const double *)(t+1));
+ _mm_store_sd((double*)(t+1), x);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-fsanitize=undefined" } */
+#include <emmintrin.h>
+
+int main()
+{
+ unsigned char t[8+1];
+ __m128 x = _mm_load_ss((const float *)(t));
+ _mm_store_ss((float*)(t), x);
+ return 0;
+}