rs6000: Make _mm_slli_si128 and _mm_bslli_si128 consistent [PR109167]

Message ID 9cac9802-cb71-ad06-fc2d-a79b486091fa@linux.ibm.com
State New
Headers
Series rs6000: Make _mm_slli_si128 and _mm_bslli_si128 consistent [PR109167] |

Commit Message

Kewen.Lin March 20, 2023, 6:32 a.m. UTC
  Hi,

As PR109167 shows, it's unexpected to have two different
implementation ways for _mm_slli_si128 and _mm_bslli_si128,
as gcc/config/i386/emmintrin.h they should be the same.  So
this patch is to fix it accordingly.

Bootstrapped and regtested on powerpc64-linux-gnu P8 and
powerpc64le-linux-gnu P9 and P10.

I'm going to push this soon if no objections.

BR,
Kewen
-----
	PR target/109167

gcc/ChangeLog:

	* config/rs6000/emmintrin.h (_mm_bslli_si128): Move the implementation
	from ...
	(_mm_slli_si128): ... here.  Change to call _mm_bslli_si128 directly.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr109167.c: New test.
---
 gcc/config/rs6000/emmintrin.h               | 26 ++++--------
 gcc/testsuite/gcc.target/powerpc/pr109167.c | 47 +++++++++++++++++++++
 2 files changed, 56 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109167.c

--
2.31.1
  

Patch

diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
index bfff7ff6fea..44d01a83d8d 100644
--- a/gcc/config/rs6000/emmintrin.h
+++ b/gcc/config/rs6000/emmintrin.h
@@ -1601,8 +1601,14 @@  _mm_bslli_si128 (__m128i __A, const int __N)
   __v16qu __result;
   const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };

-  if (__N >= 0 && __N < 16)
+  if (__N == 0)
+    return __A;
+  else if (__N > 0 && __N < 16)
+#ifdef __LITTLE_ENDIAN__
     __result = vec_sld ((__v16qu) __A, __zeros, __N);
+#else
+    __result = vec_sld (__zeros, (__v16qu) __A, (16 - __N));
+#endif
   else
     __result = __zeros;

@@ -1647,23 +1653,9 @@  _mm_srli_si128 (__m128i __A, const int __N)
 }

 extern __inline  __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_si128 (__m128i __A, const int _imm5)
+_mm_slli_si128 (__m128i __A, const int __N)
 {
-  __v16qu __result;
-  const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-  if (_imm5 == 0)
-    return __A;
-  else if (_imm5 > 0 && _imm5 < 16)
-#ifdef __LITTLE_ENDIAN__
-    __result = vec_sld ((__v16qu) __A, __zeros, _imm5);
-#else
-    __result = vec_sld (__zeros, (__v16qu) __A, (16 - _imm5));
-#endif
-  else
-    __result = __zeros;
-
-  return (__m128i) __result;
+  return _mm_bslli_si128 (__A, __N);
 }

 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109167.c b/gcc/testsuite/gcc.target/powerpc/pr109167.c
new file mode 100644
index 00000000000..d490c995b14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109167.c
@@ -0,0 +1,47 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Verify there is no warning message.  */
+
+#define NO_WARN_X86_INTRINSICS 1
+
+#include <emmintrin.h>
+
+#define N 5
+
+__attribute__ ((noipa)) __m128i
+test1 (__m128i v)
+{
+  return _mm_bslli_si128 (v, N);
+}
+
+__attribute__ ((noipa)) __m128i
+test2 (__m128i v)
+{
+  return _mm_slli_si128 (v, N);
+}
+
+typedef union
+{
+  __m128i x;
+  unsigned char a[16];
+} union128i_ub;
+
+int main()
+{
+  union128i_ub v;
+  v.x
+    = _mm_set_epi8 (1, 2, 3, 4, 10, 20, 30, 90, 80, 40, 100, 15, 98, 25, 98, 7);
+
+  union128i_ub r1, r2;
+  r1.x = test1 (v.x);
+  r2.x = test2 (v.x);
+
+  for (int i = 0; i < 16; i++)
+    if (r1.a[i] != r2.a[i])
+      __builtin_abort();
+
+  return 0;
+}
+