[BACKPORT,COMMITTED] rs6000: __Uglify non-uglified local variables in headers

Message ID 20220523172822.1689378-1-pc@us.ibm.com
State Committed
Headers
Series [BACKPORT,COMMITTED] rs6000: __Uglify non-uglified local variables in headers |

Commit Message

Paul A. Clarke May 23, 2022, 5:28 p.m. UTC
  Properly prefix (with "__")  all local variables in shipped headers for x86
compatibility intrinsics implementations.  This avoids possible problems with
usages like:
```
 #define result foo()
 #include <emmintrin.h>
```

2022-05-23  Paul A. Clarke  <pc@us.ibm.com>

gcc
	PR target/104257
	* config/rs6000/bmi2intrin.h: Uglify local variables.
	* config/rs6000/emmintrin.h: Likewise.
	* config/rs6000/mm_malloc.h: Likewise.
	* config/rs6000/mmintrin.h: Likewise.
	* config/rs6000/pmmintrin.h: Likewise.
	* config/rs6000/smmintrin.h: Likewise.
	* config/rs6000/tmmintrin.h: Likewise.
	* config/rs6000/xmmintrin.h: Likewise.
---
Backport: needed to protect a couple of routines that, when compiled for
Power7, produce warnings. This likely needs to be forward ported, as
well.

The broken change from the original patch that corrupted
"posix_memalign" is not included. :-)

 gcc/config/rs6000/bmi2intrin.h |  68 +--
 gcc/config/rs6000/emmintrin.h  | 780 +++++++++++++++--------------
 gcc/config/rs6000/mm_malloc.h  |  26 +-
 gcc/config/rs6000/mmintrin.h   | 768 ++++++++++++++---------------
 gcc/config/rs6000/pmmintrin.h  |  28 +-
 gcc/config/rs6000/smmintrin.h  |  18 +-
 gcc/config/rs6000/tmmintrin.h  |   4 +-
 gcc/config/rs6000/xmmintrin.h  | 861 ++++++++++++++++-----------------
 8 files changed, 1273 insertions(+), 1280 deletions(-)
  

Patch

diff --git a/gcc/config/rs6000/bmi2intrin.h b/gcc/config/rs6000/bmi2intrin.h
index 5b7b761b9d5e..03299e6cccd3 100644
--- a/gcc/config/rs6000/bmi2intrin.h
+++ b/gcc/config/rs6000/bmi2intrin.h
@@ -77,39 +77,39 @@  extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _pdep_u64 (unsigned long long __X, unsigned long long __M)
 {
-  unsigned long result = 0x0UL;
-  const unsigned long mask = 0x8000000000000000UL;
-  unsigned long m = __M;
-  unsigned long c, t;
-  unsigned long p;
+  unsigned long __result = 0x0UL;
+  const unsigned long __mask = 0x8000000000000000UL;
+  unsigned long __m = __M;
+  unsigned long __c, __t;
+  unsigned long __p;
 
   /* The pop-count of the mask gives the number of the bits from
    source to process.  This is also needed to shift bits from the
    source into the correct position for the result.  */
-  p = 64 - __builtin_popcountl (__M);
+  __p = 64 - __builtin_popcountl (__M);
 
   /* The loop is for the number of '1' bits in the mask and clearing
    each mask bit as it is processed.  */
-  while (m != 0)
+  while (__m != 0)
     {
-      c = __builtin_clzl (m);
-      t = __X << (p - c);
-      m ^= (mask >> c);
-      result |= (t & (mask >> c));
-      p++;
+      __c = __builtin_clzl (__m);
+      __t = __X << (__p - __c);
+      __m ^= (__mask >> __c);
+      __result |= (__t & (__mask >> __c));
+      __p++;
     }
-  return (result);
+  return __result;
 }
 
 extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _pext_u64 (unsigned long long __X, unsigned long long __M)
 {
-  unsigned long p = 0x4040404040404040UL; // initial bit permute control
-  const unsigned long mask = 0x8000000000000000UL;
-  unsigned long m = __M;
-  unsigned long c;
-  unsigned long result;
+  unsigned long __p = 0x4040404040404040UL; // initial bit permute control
+  const unsigned long __mask = 0x8000000000000000UL;
+  unsigned long __m = __M;
+  unsigned long __c;
+  unsigned long __result;
 
   /* if the mask is constant and selects 8 bits or less we can use
    the Power8 Bit permute instruction.  */
@@ -118,34 +118,34 @@  _pext_u64 (unsigned long long __X, unsigned long long __M)
       /* Also if the pext mask is constant, then the popcount is
        constant, we can evaluate the following loop at compile
        time and use a constant bit permute vector.  */
-      for (long i = 0; i < __builtin_popcountl (__M); i++)
+      for (long __i = 0; __i < __builtin_popcountl (__M); __i++)
 	{
-	  c = __builtin_clzl (m);
-	  p = (p << 8) | c;
-	  m ^= (mask >> c);
+	  __c = __builtin_clzl (__m);
+	  __p = (__p << 8) | __c;
+	  __m ^= (__mask >> __c);
 	}
-      result = __builtin_bpermd (p, __X);
+      __result = __builtin_bpermd (__p, __X);
     }
   else
     {
-      p = 64 - __builtin_popcountl (__M);
-      result = 0;
+      __p = 64 - __builtin_popcountl (__M);
+      __result = 0;
       /* We could a use a for loop here, but that combined with
        -funroll-loops can expand to a lot of code.  The while
        loop avoids unrolling and the compiler commons the xor
-       from clearing the mask bit with the (m != 0) test.  The
+       from clearing the mask bit with the (__m != 0) test.  The
        result is a more compact loop setup and body.  */
-      while (m != 0)
+      while (__m != 0)
 	{
-	  unsigned long t;
-	  c = __builtin_clzl (m);
-	  t = (__X & (mask >> c)) >> (p - c);
-	  m ^= (mask >> c);
-	  result |= (t);
-	  p++;
+	  unsigned long __t;
+	  __c = __builtin_clzl (__m);
+	  __t = (__X & (__mask >> __c)) >> (__p - __c);
+	  __m ^= (__mask >> __c);
+	  __result |= (__t);
+	  __p++;
 	}
     }
-  return (result);
+  return __result;
 }
 
 /* these 32-bit implementations depend on 64-bit pdep/pext
diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
index ce1287edf782..991368095ddd 100644
--- a/gcc/config/rs6000/emmintrin.h
+++ b/gcc/config/rs6000/emmintrin.h
@@ -141,9 +141,9 @@  _mm_setzero_pd (void)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_move_sd (__m128d __A, __m128d __B)
 {
-  __v2df result = (__v2df) __A;
-  result [0] = ((__v2df) __B)[0];
-  return (__m128d) result;
+  __v2df __result = (__v2df) __A;
+  __result [0] = ((__v2df) __B)[0];
+  return (__m128d) __result;
 }
 
 /* Load two DPFP values from P.  The address must be 16-byte aligned.  */
@@ -329,9 +329,9 @@  _mm_sqrt_pd (__m128d __A)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sqrt_sd (__m128d __A, __m128d __B)
 {
-  __v2df c;
-  c = vec_sqrt ((__v2df) _mm_set1_pd (__B[0]));
-  return (__m128d) _mm_setr_pd (c[0], __A[1]);
+  __v2df __c;
+  __c = vec_sqrt ((__v2df) _mm_set1_pd (__B[0]));
+  return (__m128d) _mm_setr_pd (__c[0], __A[1]);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -343,11 +343,11 @@  _mm_min_pd (__m128d __A, __m128d __B)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_sd (__m128d __A, __m128d __B)
 {
-  __v2df a, b, c;
-  a = vec_splats (__A[0]);
-  b = vec_splats (__B[0]);
-  c = vec_min (a, b);
-  return (__m128d) _mm_setr_pd (c[0], __A[1]);
+  __v2df __a, __b, __c;
+  __a = vec_splats (__A[0]);
+  __b = vec_splats (__B[0]);
+  __c = vec_min (__a, __b);
+  return (__m128d) _mm_setr_pd (__c[0], __A[1]);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -359,11 +359,11 @@  _mm_max_pd (__m128d __A, __m128d __B)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_sd (__m128d __A, __m128d __B)
 {
-  __v2df a, b, c;
-  a = vec_splats (__A[0]);
-  b = vec_splats (__B[0]);
-  c = vec_max (a, b);
-  return (__m128d) _mm_setr_pd (c[0], __A[1]);
+  __v2df __a, __b, __c;
+  __a = vec_splats (__A[0]);
+  __b = vec_splats (__B[0]);
+  __c = vec_max (__a, __b);
+  return (__m128d) _mm_setr_pd (__c[0], __A[1]);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -399,8 +399,8 @@  _mm_cmpge_pd (__m128d __A, __m128d __B)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpneq_pd (__m128d __A, __m128d __B)
 {
-  __v2df temp = (__v2df) vec_cmpeq ((__v2df) __A, (__v2df)__B);
-  return ((__m128d)vec_nor (temp, temp));
+  __v2df __temp = (__v2df) vec_cmpeq ((__v2df) __A, (__v2df)__B);
+  return ((__m128d)vec_nor (__temp, __temp));
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -427,26 +427,18 @@  _mm_cmpnge_pd (__m128d __A, __m128d __B)
   return ((__m128d)vec_cmplt ((__v2df) __A, (__v2df) __B));
 }
 
+#if _ARCH_PWR8
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpord_pd (__m128d __A, __m128d __B)
 {
-#if _ARCH_PWR8
   __v2du c, d;
   /* Compare against self will return false (0's) if NAN.  */
   c = (__v2du)vec_cmpeq (__A, __A);
   d = (__v2du)vec_cmpeq (__B, __B);
-#else
-  __v2du a, b;
-  __v2du c, d;
-  const __v2du double_exp_mask  = {0x7ff0000000000000, 0x7ff0000000000000};
-  a = (__v2du)vec_abs ((__v2df)__A);
-  b = (__v2du)vec_abs ((__v2df)__B);
-  c = (__v2du)vec_cmpgt (double_exp_mask, a);
-  d = (__v2du)vec_cmpgt (double_exp_mask, b);
-#endif
   /* A != NAN and B != NAN.  */
   return ((__m128d)vec_and(c, d));
 }
+#endif
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpunord_pd (__m128d __A, __m128d __B)
@@ -583,6 +575,7 @@  _mm_cmpnge_sd (__m128d __A, __m128d __B)
   return (__m128d) _mm_setr_pd (c[0], __A[1]);
 }
 
+#if _ARCH_PWR8
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpord_sd (__m128d __A, __m128d __B)
 {
@@ -590,6 +583,7 @@  _mm_cmpord_sd (__m128d __A, __m128d __B)
   r = (__v2df)_mm_cmpord_pd (vec_splats (__A[0]), vec_splats (__B[0]));
   return (__m128d) _mm_setr_pd (r[0], ((__v2df)__A)[1]);
 }
+#endif
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpunord_sd (__m128d __A, __m128d __B)
@@ -855,12 +849,12 @@  _mm_setzero_si128 (void)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtepi32_pd (__m128i __A)
 {
-  __v2di val;
+  __v2di __val;
   /* For LE need to generate Vector Unpack Low Signed Word.
      Which is generated from unpackh.  */
-  val = (__v2di)vec_unpackh ((__v4si)__A);
+  __val = (__v2di)vec_unpackh ((__v4si)__A);
 
-  return (__m128d)vec_ctf (val, 0);
+  return (__m128d)vec_ctf (__val, 0);
 }
 #endif
 
@@ -873,116 +867,116 @@  _mm_cvtepi32_ps (__m128i __A)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpd_epi32 (__m128d __A)
 {
-  __v2df rounded = vec_rint (__A);
-  __v4si result, temp;
-  const __v4si vzero =
+  __v2df __rounded = vec_rint (__A);
+  __v4si __result, __temp;
+  const __v4si __vzero =
     { 0, 0, 0, 0 };
 
   /* VSX Vector truncate Double-Precision to integer and Convert to
    Signed Integer Word format with Saturate.  */
   __asm__(
       "xvcvdpsxws %x0,%x1"
-      : "=wa" (temp)
-      : "wa" (rounded)
+      : "=wa" (__temp)
+      : "wa" (__rounded)
       : );
 
 #ifdef _ARCH_PWR8
 #ifdef __LITTLE_ENDIAN__
-  temp = vec_mergeo (temp, temp);
+  __temp = vec_mergeo (__temp, __temp);
 #else
-  temp = vec_mergee (temp, temp);
+  __temp = vec_mergee (__temp, __temp);
 #endif
-  result = (__v4si) vec_vpkudum ((__vector long long) temp,
-				 (__vector long long) vzero);
+  __result = (__v4si) vec_vpkudum ((__vector long long) __temp,
+				 (__vector long long) __vzero);
 #else
   {
-    const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
+    const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
 	0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f };
-    result = (__v4si) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm);
+    __result = (__v4si) vec_perm ((__v16qu) __temp, (__v16qu) __vzero, __pkperm);
   }
 #endif
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpd_pi32 (__m128d __A)
 {
-  __m128i result = _mm_cvtpd_epi32(__A);
+  __m128i __result = _mm_cvtpd_epi32(__A);
 
-  return (__m64) result[0];
+  return (__m64) __result[0];
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpd_ps (__m128d __A)
 {
-  __v4sf result;
-  __v4si temp;
-  const __v4si vzero = { 0, 0, 0, 0 };
+  __v4sf __result;
+  __v4si __temp;
+  const __v4si __vzero = { 0, 0, 0, 0 };
 
   __asm__(
       "xvcvdpsp %x0,%x1"
-      : "=wa" (temp)
+      : "=wa" (__temp)
       : "wa" (__A)
       : );
 
 #ifdef _ARCH_PWR8
 #ifdef __LITTLE_ENDIAN__
-  temp = vec_mergeo (temp, temp);
+  __temp = vec_mergeo (__temp, __temp);
 #else
-  temp = vec_mergee (temp, temp);
+  __temp = vec_mergee (__temp, __temp);
 #endif
-  result = (__v4sf) vec_vpkudum ((__vector long long) temp,
-				 (__vector long long) vzero);
+  __result = (__v4sf) vec_vpkudum ((__vector long long) __temp,
+				 (__vector long long) __vzero);
 #else
   {
-    const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
+    const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
 	0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f };
-    result = (__v4sf) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm);
+    __result = (__v4sf) vec_perm ((__v16qu) __temp, (__v16qu) __vzero, __pkperm);
   }
 #endif
-  return ((__m128)result);
+  return ((__m128)__result);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttpd_epi32 (__m128d __A)
 {
-  __v4si result;
-  __v4si temp;
-  const __v4si vzero = { 0, 0, 0, 0 };
+  __v4si __result;
+  __v4si __temp;
+  const __v4si __vzero = { 0, 0, 0, 0 };
 
   /* VSX Vector truncate Double-Precision to integer and Convert to
    Signed Integer Word format with Saturate.  */
   __asm__(
       "xvcvdpsxws %x0,%x1"
-      : "=wa" (temp)
+      : "=wa" (__temp)
       : "wa" (__A)
       : );
 
 #ifdef _ARCH_PWR8
 #ifdef __LITTLE_ENDIAN__
-  temp = vec_mergeo (temp, temp);
+  __temp = vec_mergeo (__temp, __temp);
 #else
-  temp = vec_mergee (temp, temp);
+  __temp = vec_mergee (__temp, __temp);
 #endif
-  result = (__v4si) vec_vpkudum ((__vector long long) temp,
-				 (__vector long long) vzero);
+  __result = (__v4si) vec_vpkudum ((__vector long long) __temp,
+				 (__vector long long) __vzero);
 #else
   {
-    const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
+    const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
 	0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f };
-    result = (__v4si) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm);
+    __result = (__v4si) vec_perm ((__v16qu) __temp, (__v16qu) __vzero, __pkperm);
   }
 #endif
 
-  return ((__m128i) result);
+  return ((__m128i) __result);
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttpd_pi32 (__m128d __A)
 {
-  __m128i result = _mm_cvttpd_epi32 (__A);
+  __m128i __result = _mm_cvttpd_epi32 (__A);
 
-  return (__m64) result[0];
+  return (__m64) __result[0];
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -995,35 +989,35 @@  _mm_cvtsi128_si32 (__m128i __A)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpi32_pd (__m64 __A)
 {
-  __v4si temp;
-  __v2di tmp2;
-  __v2df result;
+  __v4si __temp;
+  __v2di __tmp2;
+  __v2df __result;
 
-  temp = (__v4si)vec_splats (__A);
-  tmp2 = (__v2di)vec_unpackl (temp);
-  result = vec_ctf ((__vector signed long long) tmp2, 0);
-  return (__m128d)result;
+  __temp = (__v4si)vec_splats (__A);
+  __tmp2 = (__v2di)vec_unpackl (__temp);
+  __result = vec_ctf ((__vector signed long long) __tmp2, 0);
+  return (__m128d)__result;
 }
 #endif
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtps_epi32 (__m128 __A)
 {
-  __v4sf rounded;
-  __v4si result;
+  __v4sf __rounded;
+  __v4si __result;
 
-  rounded = vec_rint((__v4sf) __A);
-  result = vec_cts (rounded, 0);
-  return (__m128i) result;
+  __rounded = vec_rint((__v4sf) __A);
+  __result = vec_cts (__rounded, 0);
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttps_epi32 (__m128 __A)
 {
-  __v4si result;
+  __v4si __result;
 
-  result = vec_cts ((__v4sf) __A, 0);
-  return (__m128i) result;
+  __result = vec_cts ((__v4sf) __A, 0);
+  return (__m128i) __result;
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1035,48 +1029,48 @@  _mm_cvtps_pd (__m128 __A)
 #else
   /* Otherwise the compiler is not current and so need to generate the
      equivalent code.  */
-  __v4sf a = (__v4sf)__A;
-  __v4sf temp;
-  __v2df result;
+  __v4sf __a = (__v4sf)__A;
+  __v4sf __temp;
+  __v2df __result;
 #ifdef __LITTLE_ENDIAN__
   /* The input float values are in elements {[0], [1]} but the convert
      instruction needs them in elements {[1], [3]}, So we use two
      shift left double vector word immediates to get the elements
      lined up.  */
-  temp = __builtin_vsx_xxsldwi (a, a, 3);
-  temp = __builtin_vsx_xxsldwi (a, temp, 2);
+  __temp = __builtin_vsx_xxsldwi (__a, __a, 3);
+  __temp = __builtin_vsx_xxsldwi (__a, __temp, 2);
 #else
   /* The input float values are in elements {[0], [1]} but the convert
      instruction needs them in elements {[0], [2]}, So we use two
      shift left double vector word immediates to get the elements
      lined up.  */
-  temp = vec_vmrghw (a, a);
+  __temp = vec_vmrghw (__a, __a);
 #endif
   __asm__(
       " xvcvspdp %x0,%x1"
-      : "=wa" (result)
-      : "wa" (temp)
+      : "=wa" (__result)
+      : "wa" (__temp)
       : );
-  return (__m128d) result;
+  return (__m128d) __result;
 #endif
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsd_si32 (__m128d __A)
 {
-  __v2df rounded = vec_rint((__v2df) __A);
-  int result = ((__v2df)rounded)[0];
+  __v2df __rounded = vec_rint((__v2df) __A);
+  int __result = ((__v2df)__rounded)[0];
 
-  return result;
+  return __result;
 }
 /* Intel intrinsic.  */
 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsd_si64 (__m128d __A)
 {
-  __v2df rounded = vec_rint ((__v2df) __A );
-  long long result = ((__v2df) rounded)[0];
+  __v2df __rounded = vec_rint ((__v2df) __A );
+  long long __result = ((__v2df) __rounded)[0];
 
-  return result;
+  return __result;
 }
 
 /* Microsoft intrinsic.  */
@@ -1089,18 +1083,18 @@  _mm_cvtsd_si64x (__m128d __A)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttsd_si32 (__m128d __A)
 {
-  int result = ((__v2df)__A)[0];
+  int __result = ((__v2df)__A)[0];
 
-  return result;
+  return __result;
 }
 
 /* Intel intrinsic.  */
 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttsd_si64 (__m128d __A)
 {
-  long long result = ((__v2df)__A)[0];
+  long long __result = ((__v2df)__A)[0];
 
-  return result;
+  return __result;
 }
 
 /* Microsoft intrinsic.  */
@@ -1113,46 +1107,46 @@  _mm_cvttsd_si64x (__m128d __A)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsd_ss (__m128 __A, __m128d __B)
 {
-  __v4sf result = (__v4sf)__A;
+  __v4sf __result = (__v4sf)__A;
 
 #ifdef __LITTLE_ENDIAN__
-  __v4sf temp_s;
+  __v4sf __temp_s;
   /* Copy double element[0] to element [1] for conversion.  */
-  __v2df temp_b = vec_splat((__v2df)__B, 0);
+  __v2df __temp_b = vec_splat((__v2df)__B, 0);
 
   /* Pre-rotate __A left 3 (logically right 1) elements.  */
-  result = __builtin_vsx_xxsldwi (result, result, 3);
+  __result = __builtin_vsx_xxsldwi (__result, __result, 3);
   /* Convert double to single float scalar in a vector.  */
   __asm__(
       "xscvdpsp %x0,%x1"
-      : "=wa" (temp_s)
-      : "wa" (temp_b)
+      : "=wa" (__temp_s)
+      : "wa" (__temp_b)
       : );
   /* Shift the resulting scalar into vector element [0].  */
-  result = __builtin_vsx_xxsldwi (result, temp_s, 1);
+  __result = __builtin_vsx_xxsldwi (__result, __temp_s, 1);
 #else
-  result [0] = ((__v2df)__B)[0];
+  __result [0] = ((__v2df)__B)[0];
 #endif
-  return (__m128) result;
+  return (__m128) __result;
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsi32_sd (__m128d __A, int __B)
 {
-  __v2df result = (__v2df)__A;
-  double db = __B;
-  result [0] = db;
-  return (__m128d)result;
+  __v2df __result = (__v2df)__A;
+  double __db = __B;
+  __result [0] = __db;
+  return (__m128d)__result;
 }
 
 /* Intel intrinsic.  */
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsi64_sd (__m128d __A, long long __B)
 {
-  __v2df result = (__v2df)__A;
-  double db = __B;
-  result [0] = db;
-  return (__m128d)result;
+  __v2df __result = (__v2df)__A;
+  double __db = __B;
+  __result [0] = __db;
+  return (__m128d)__result;
 }
 
 /* Microsoft intrinsic.  */
@@ -1167,45 +1161,45 @@  _mm_cvtss_sd (__m128d __A, __m128 __B)
 {
 #ifdef __LITTLE_ENDIAN__
   /* Use splat to move element [0] into position for the convert. */
-  __v4sf temp = vec_splat ((__v4sf)__B, 0);
-  __v2df res;
+  __v4sf __temp = vec_splat ((__v4sf)__B, 0);
+  __v2df __res;
   /* Convert single float scalar to double in a vector.  */
   __asm__(
       "xscvspdp %x0,%x1"
-      : "=wa" (res)
-      : "wa" (temp)
+      : "=wa" (__res)
+      : "wa" (__temp)
       : );
-  return (__m128d) vec_mergel (res, (__v2df)__A);
+  return (__m128d) vec_mergel (__res, (__v2df)__A);
 #else
-  __v2df res = (__v2df)__A;
-  res [0] = ((__v4sf)__B) [0];
-  return (__m128d) res;
+  __v2df __res = (__v2df)__A;
+  __res [0] = ((__v4sf)__B) [0];
+  return (__m128d) __res;
 #endif
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
 {
-  __vector double result;
-  const int litmsk = __mask & 0x3;
+  __vector double __result;
+  const int __litmsk = __mask & 0x3;
 
-  if (litmsk == 0)
-    result = vec_mergeh (__A, __B);
+  if (__litmsk == 0)
+    __result = vec_mergeh (__A, __B);
 #if __GNUC__ < 6
-  else if (litmsk == 1)
-    result = vec_xxpermdi (__B, __A, 2);
-  else if (litmsk == 2)
-    result = vec_xxpermdi (__B, __A, 1);
+  else if (__litmsk == 1)
+    __result = vec_xxpermdi (__B, __A, 2);
+  else if (__litmsk == 2)
+    __result = vec_xxpermdi (__B, __A, 1);
 #else
-  else if (litmsk == 1)
-    result = vec_xxpermdi (__A, __B, 2);
-  else if (litmsk == 2)
-    result = vec_xxpermdi (__A, __B, 1);
+  else if (__litmsk == 1)
+    __result = vec_xxpermdi (__A, __B, 2);
+  else if (__litmsk == 2)
+    __result = vec_xxpermdi (__A, __B, 1);
 #endif
   else
-    result = vec_mergel (__A, __B);
+    __result = vec_mergel (__A, __B);
 
-  return result;
+  return __result;
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1223,17 +1217,17 @@  _mm_unpacklo_pd (__m128d __A, __m128d __B)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadh_pd (__m128d __A, double const *__B)
 {
-  __v2df result = (__v2df)__A;
-  result [1] = *__B;
-  return (__m128d)result;
+  __v2df __result = (__v2df)__A;
+  __result [1] = *__B;
+  return (__m128d)__result;
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_loadl_pd (__m128d __A, double const *__B)
 {
-  __v2df result = (__v2df)__A;
-  result [0] = *__B;
-  return (__m128d)result;
+  __v2df __result = (__v2df)__A;
+  __result [0] = *__B;
+  return (__m128d)__result;
 }
 
 #ifdef _ARCH_PWR8
@@ -1243,8 +1237,8 @@  _mm_loadl_pd (__m128d __A, double const *__B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movemask_pd (__m128d  __A)
 {
-  __vector unsigned long long result;
-  static const __vector unsigned int perm_mask =
+  __vector unsigned long long __result;
+  static const __vector unsigned int __perm_mask =
     {
 #ifdef __LITTLE_ENDIAN__
 	0x80800040, 0x80808080, 0x80808080, 0x80808080
@@ -1253,14 +1247,14 @@  _mm_movemask_pd (__m128d  __A)
 #endif
     };
 
-  result = ((__vector unsigned long long)
+  __result = ((__vector unsigned long long)
 	    vec_vbpermq ((__vector unsigned char) __A,
-			 (__vector unsigned char) perm_mask));
+			 (__vector unsigned char) __perm_mask));
 
 #ifdef __LITTLE_ENDIAN__
-  return result[1];
+  return __result[1];
 #else
-  return result[0];
+  return __result[0];
 #endif
 }
 #endif /* _ARCH_PWR8 */
@@ -1432,17 +1426,17 @@  _mm_subs_epu16 (__m128i __A, __m128i __B)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_madd_epi16 (__m128i __A, __m128i __B)
 {
-  __vector signed int zero = {0, 0, 0, 0};
+  __vector signed int __zero = {0, 0, 0, 0};
 
-  return (__m128i) vec_vmsumshm ((__v8hi)__A, (__v8hi)__B, zero);
+  return (__m128i) vec_vmsumshm ((__v8hi)__A, (__v8hi)__B, __zero);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mulhi_epi16 (__m128i __A, __m128i __B)
 {
-  __vector signed int w0, w1;
+  __vector signed int __w0, __w1;
 
-  __vector unsigned char xform1 = {
+  __vector unsigned char __xform1 = {
 #ifdef __LITTLE_ENDIAN__
       0x02, 0x03, 0x12, 0x13,  0x06, 0x07, 0x16, 0x17,
       0x0A, 0x0B, 0x1A, 0x1B,  0x0E, 0x0F, 0x1E, 0x1F
@@ -1452,9 +1446,9 @@  _mm_mulhi_epi16 (__m128i __A, __m128i __B)
 #endif
     };
 
-  w0 = vec_vmulesh ((__v8hi)__A, (__v8hi)__B);
-  w1 = vec_vmulosh ((__v8hi)__A, (__v8hi)__B);
-  return (__m128i) vec_perm (w0, w1, xform1);
+  __w0 = vec_vmulesh ((__v8hi)__A, (__v8hi)__B);
+  __w1 = vec_vmulosh ((__v8hi)__A, (__v8hi)__B);
+  return (__m128i) vec_perm (__w0, __w1, __xform1);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1466,34 +1460,34 @@  _mm_mullo_epi16 (__m128i __A, __m128i __B)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mul_su32 (__m64 __A, __m64 __B)
 {
-  unsigned int a = __A;
-  unsigned int b = __B;
+  unsigned int __a = __A;
+  unsigned int __b = __B;
 
-  return ((__m64)a * (__m64)b);
+  return ((__m64)__a * (__m64)__b);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mul_epu32 (__m128i __A, __m128i __B)
 {
-#if __GNUC__ < 8
-  __v2du result;
+#if __GNUC__ < 8 || !defined (_ARCH_PWR8)
+  __v2du __result;
 
 #ifdef __LITTLE_ENDIAN__
   /* VMX Vector Multiply Odd Unsigned Word.  */
   __asm__(
       "vmulouw %0,%1,%2"
-      : "=v" (result)
+      : "=v" (__result)
       : "v" (__A), "v" (__B)
       : );
 #else
   /* VMX Vector Multiply Even Unsigned Word.  */
   __asm__(
       "vmuleuw %0,%1,%2"
-      : "=v" (result)
+      : "=v" (__result)
       : "v" (__A), "v" (__B)
       : );
 #endif
-  return (__m128i) result;
+  return (__m128i) __result;
 #else
   return (__m128i) vec_mule ((__v4su)__A, (__v4su)__B);
 #endif
@@ -1502,122 +1496,122 @@  _mm_mul_epu32 (__m128i __A, __m128i __B)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_slli_epi16 (__m128i __A, int __B)
 {
-  __v8hu lshift;
-  __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
+  __v8hu __lshift;
+  __v8hi __result = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
   if (__B >= 0 && __B < 16)
     {
       if (__builtin_constant_p(__B))
-	lshift = (__v8hu) vec_splat_s16(__B);
+	__lshift = (__v8hu) vec_splat_s16(__B);
       else
-	lshift = vec_splats ((unsigned short) __B);
+	__lshift = vec_splats ((unsigned short) __B);
 
-      result = vec_sl ((__v8hi) __A, lshift);
+      __result = vec_sl ((__v8hi) __A, __lshift);
     }
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_slli_epi32 (__m128i __A, int __B)
 {
-  __v4su lshift;
-  __v4si result = { 0, 0, 0, 0 };
+  __v4su __lshift;
+  __v4si __result = { 0, 0, 0, 0 };
 
   if (__B >= 0 && __B < 32)
     {
       if (__builtin_constant_p(__B) && __B < 16)
-	lshift = (__v4su) vec_splat_s32(__B);
+	__lshift = (__v4su) vec_splat_s32(__B);
       else
-	lshift = vec_splats ((unsigned int) __B);
+	__lshift = vec_splats ((unsigned int) __B);
 
-      result = vec_sl ((__v4si) __A, lshift);
+      __result = vec_sl ((__v4si) __A, __lshift);
     }
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 #ifdef _ARCH_PWR8
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_slli_epi64 (__m128i __A, int __B)
 {
-  __v2du lshift;
-  __v2di result = { 0, 0 };
+  __v2du __lshift;
+  __v2di __result = { 0, 0 };
 
   if (__B >= 0 && __B < 64)
     {
       if (__builtin_constant_p(__B) && __B < 16)
-	lshift = (__v2du) vec_splat_s32(__B);
+	__lshift = (__v2du) vec_splat_s32(__B);
       else
-	lshift = (__v2du) vec_splats ((unsigned int) __B);
+	__lshift = (__v2du) vec_splats ((unsigned int) __B);
 
-      result = vec_sl ((__v2di) __A, lshift);
+      __result = vec_sl ((__v2di) __A, __lshift);
     }
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 #endif
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srai_epi16 (__m128i __A, int __B)
 {
-  __v8hu rshift = { 15, 15, 15, 15, 15, 15, 15, 15 };
-  __v8hi result;
+  __v8hu __rshift = { 15, 15, 15, 15, 15, 15, 15, 15 };
+  __v8hi __result;
 
   if (__B < 16)
     {
       if (__builtin_constant_p(__B))
-	rshift = (__v8hu) vec_splat_s16(__B);
+	__rshift = (__v8hu) vec_splat_s16(__B);
       else
-	rshift = vec_splats ((unsigned short) __B);
+	__rshift = vec_splats ((unsigned short) __B);
     }
-  result = vec_sra ((__v8hi) __A, rshift);
+  __result = vec_sra ((__v8hi) __A, __rshift);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srai_epi32 (__m128i __A, int __B)
 {
-  __v4su rshift = { 31, 31, 31, 31 };
-  __v4si result;
+  __v4su __rshift = { 31, 31, 31, 31 };
+  __v4si __result;
 
   if (__B < 32)
     {
       if (__builtin_constant_p(__B))
 	{
 	  if (__B < 16)
-	      rshift = (__v4su) vec_splat_s32(__B);
+	      __rshift = (__v4su) vec_splat_s32(__B);
 	    else
-	      rshift = (__v4su) vec_splats((unsigned int)__B);
+	      __rshift = (__v4su) vec_splats((unsigned int)__B);
 	}
       else
-	rshift = vec_splats ((unsigned int) __B);
+	__rshift = vec_splats ((unsigned int) __B);
     }
-  result = vec_sra ((__v4si) __A, rshift);
+  __result = vec_sra ((__v4si) __A, __rshift);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_bslli_si128 (__m128i __A, const int __N)
 {
-  __v16qu result;
-  const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  __v16qu __result;
+  const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
   if (__N < 16)
-    result = vec_sld ((__v16qu) __A, zeros, __N);
+    __result = vec_sld ((__v16qu) __A, __zeros, __N);
   else
-    result = zeros;
+    __result = __zeros;
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_bsrli_si128 (__m128i __A, const int __N)
 {
-  __v16qu result;
-  const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  __v16qu __result;
+  const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
   if (__N < 16)
 #ifdef __LITTLE_ENDIAN__
@@ -1625,21 +1619,21 @@  _mm_bsrli_si128 (__m128i __A, const int __N)
       /* Would like to use Vector Shift Left Double by Octet
 	 Immediate here to use the immediate form and avoid
 	 load of __N * 8 value into a separate VR.  */
-      result = vec_sld (zeros, (__v16qu) __A, (16 - __N));
+      __result = vec_sld (__zeros, (__v16qu) __A, (16 - __N));
     else
 #endif
       {
-	__v16qu shift = vec_splats((unsigned char)(__N*8));
+	__v16qu __shift = vec_splats((unsigned char)(__N*8));
 #ifdef __LITTLE_ENDIAN__
-	result = vec_sro ((__v16qu)__A, shift);
+	__result = vec_sro ((__v16qu)__A, __shift);
 #else
-	result = vec_slo ((__v16qu)__A, shift);
+	__result = vec_slo ((__v16qu)__A, __shift);
 #endif
       }
   else
-    result = zeros;
+    __result = __zeros;
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1651,239 +1645,239 @@  _mm_srli_si128 (__m128i __A, const int __N)
 extern __inline  __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_slli_si128 (__m128i __A, const int _imm5)
 {
-  __v16qu result;
-  const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  __v16qu __result;
+  const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
   if (_imm5 < 16)
 #ifdef __LITTLE_ENDIAN__
-    result = vec_sld ((__v16qu) __A, zeros, _imm5);
+    __result = vec_sld ((__v16qu) __A, __zeros, _imm5);
 #else
-    result = vec_sld (zeros, (__v16qu) __A, (16 - _imm5));
+    __result = vec_sld (__zeros, (__v16qu) __A, (16 - _imm5));
 #endif
   else
-    result = zeros;
+    __result = __zeros;
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 
 _mm_srli_epi16 (__m128i  __A, int __B)
 {
-  __v8hu rshift;
-  __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
+  __v8hu __rshift;
+  __v8hi __result = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
   if (__B < 16)
     {
       if (__builtin_constant_p(__B))
-	rshift = (__v8hu) vec_splat_s16(__B);
+	__rshift = (__v8hu) vec_splat_s16(__B);
       else
-	rshift = vec_splats ((unsigned short) __B);
+	__rshift = vec_splats ((unsigned short) __B);
 
-      result = vec_sr ((__v8hi) __A, rshift);
+      __result = vec_sr ((__v8hi) __A, __rshift);
     }
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srli_epi32 (__m128i __A, int __B)
 {
-  __v4su rshift;
-  __v4si result = { 0, 0, 0, 0 };
+  __v4su __rshift;
+  __v4si __result = { 0, 0, 0, 0 };
 
   if (__B < 32)
     {
       if (__builtin_constant_p(__B))
 	{
 	  if (__B < 16)
-	      rshift = (__v4su) vec_splat_s32(__B);
+	      __rshift = (__v4su) vec_splat_s32(__B);
 	    else
-	      rshift = (__v4su) vec_splats((unsigned int)__B);
+	      __rshift = (__v4su) vec_splats((unsigned int)__B);
 	}
       else
-	rshift = vec_splats ((unsigned int) __B);
+	__rshift = vec_splats ((unsigned int) __B);
 
-      result = vec_sr ((__v4si) __A, rshift);
+      __result = vec_sr ((__v4si) __A, __rshift);
     }
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 #ifdef _ARCH_PWR8
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srli_epi64 (__m128i __A, int __B)
 {
-  __v2du rshift;
-  __v2di result = { 0, 0 };
+  __v2du __rshift;
+  __v2di __result = { 0, 0 };
 
   if (__B < 64)
     {
       if (__builtin_constant_p(__B))
 	{
 	  if (__B < 16)
-	      rshift = (__v2du) vec_splat_s32(__B);
+	      __rshift = (__v2du) vec_splat_s32(__B);
 	    else
-	      rshift = (__v2du) vec_splats((unsigned long long)__B);
+	      __rshift = (__v2du) vec_splats((unsigned long long)__B);
 	}
       else
-	rshift = (__v2du) vec_splats ((unsigned int) __B);
+	__rshift = (__v2du) vec_splats ((unsigned int) __B);
 
-      result = vec_sr ((__v2di) __A, rshift);
+      __result = vec_sr ((__v2di) __A, __rshift);
     }
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 #endif
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sll_epi16 (__m128i __A, __m128i __B)
 {
-  __v8hu lshift;
-  __vector __bool short shmask;
-  const __v8hu shmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
-  __v8hu result;
+  __v8hu __lshift;
+  __vector __bool short __shmask;
+  const __v8hu __shmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
+  __v8hu __result;
 
 #ifdef __LITTLE_ENDIAN__
-  lshift = vec_splat ((__v8hu) __B, 0);
+  __lshift = vec_splat ((__v8hu) __B, 0);
 #else
-  lshift = vec_splat ((__v8hu) __B, 3);
+  __lshift = vec_splat ((__v8hu) __B, 3);
 #endif
-  shmask = vec_cmple (lshift, shmax);
-  result = vec_sl ((__v8hu) __A, lshift);
-  result = vec_sel ((__v8hu) shmask, result, shmask);
+  __shmask = vec_cmple (__lshift, __shmax);
+  __result = vec_sl ((__v8hu) __A, __lshift);
+  __result = vec_sel ((__v8hu) __shmask, __result, __shmask);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sll_epi32 (__m128i __A, __m128i __B)
 {
-  __v4su lshift;
-  __vector __bool int shmask;
-  const __v4su shmax = { 32, 32, 32, 32 };
-  __v4su result;
+  __v4su __lshift;
+  __vector __bool int __shmask;
+  const __v4su __shmax = { 32, 32, 32, 32 };
+  __v4su __result;
 #ifdef __LITTLE_ENDIAN__
-  lshift = vec_splat ((__v4su) __B, 0);
+  __lshift = vec_splat ((__v4su) __B, 0);
 #else
-  lshift = vec_splat ((__v4su) __B, 1);
+  __lshift = vec_splat ((__v4su) __B, 1);
 #endif
-  shmask = vec_cmplt (lshift, shmax);
-  result = vec_sl ((__v4su) __A, lshift);
-  result = vec_sel ((__v4su) shmask, result, shmask);
+  __shmask = vec_cmplt (__lshift, __shmax);
+  __result = vec_sl ((__v4su) __A, __lshift);
+  __result = vec_sel ((__v4su) __shmask, __result, __shmask);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 #ifdef _ARCH_PWR8
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sll_epi64 (__m128i __A, __m128i __B)
 {
-  __v2du lshift;
-  __vector __bool long long shmask;
-  const __v2du shmax = { 64, 64 };
-  __v2du result;
+  __v2du __lshift;
+  __vector __bool long long __shmask;
+  const __v2du __shmax = { 64, 64 };
+  __v2du __result;
 
-  lshift = vec_splat ((__v2du) __B, 0);
-  shmask = vec_cmplt (lshift, shmax);
-  result = vec_sl ((__v2du) __A, lshift);
-  result = vec_sel ((__v2du) shmask, result, shmask);
+  __lshift = vec_splat ((__v2du) __B, 0);
+  __shmask = vec_cmplt (__lshift, __shmax);
+  __result = vec_sl ((__v2du) __A, __lshift);
+  __result = vec_sel ((__v2du) __shmask, __result, __shmask);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 #endif
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sra_epi16 (__m128i __A, __m128i __B)
 {
-  const __v8hu rshmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
-  __v8hu rshift;
-  __v8hi result;
+  const __v8hu __rshmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
+  __v8hu __rshift;
+  __v8hi __result;
 
 #ifdef __LITTLE_ENDIAN__
-  rshift = vec_splat ((__v8hu)__B, 0);
+  __rshift = vec_splat ((__v8hu)__B, 0);
 #else
-  rshift = vec_splat ((__v8hu)__B, 3);
+  __rshift = vec_splat ((__v8hu)__B, 3);
 #endif
-  rshift = vec_min (rshift, rshmax);
-  result = vec_sra ((__v8hi) __A, rshift);
+  __rshift = vec_min (__rshift, __rshmax);
+  __result = vec_sra ((__v8hi) __A, __rshift);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sra_epi32 (__m128i __A, __m128i __B)
 {
-  const __v4su rshmax = { 31, 31, 31, 31 };
-  __v4su rshift;
-  __v4si result;
+  const __v4su __rshmax = { 31, 31, 31, 31 };
+  __v4su __rshift;
+  __v4si __result;
 
 #ifdef __LITTLE_ENDIAN__
-  rshift = vec_splat ((__v4su)__B, 0);
+  __rshift = vec_splat ((__v4su)__B, 0);
 #else
-  rshift = vec_splat ((__v4su)__B, 1);
+  __rshift = vec_splat ((__v4su)__B, 1);
 #endif
-  rshift = vec_min (rshift, rshmax);
-  result = vec_sra ((__v4si) __A, rshift);
+  __rshift = vec_min (__rshift, __rshmax);
+  __result = vec_sra ((__v4si) __A, __rshift);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srl_epi16 (__m128i __A, __m128i __B)
 {
-  __v8hu rshift;
-  __vector __bool short shmask;
-  const __v8hu shmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
-  __v8hu result;
+  __v8hu __rshift;
+  __vector __bool short __shmask;
+  const __v8hu __shmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
+  __v8hu __result;
 
 #ifdef __LITTLE_ENDIAN__
-  rshift = vec_splat ((__v8hu) __B, 0);
+  __rshift = vec_splat ((__v8hu) __B, 0);
 #else
-  rshift = vec_splat ((__v8hu) __B, 3);
+  __rshift = vec_splat ((__v8hu) __B, 3);
 #endif
-  shmask = vec_cmple (rshift, shmax);
-  result = vec_sr ((__v8hu) __A, rshift);
-  result = vec_sel ((__v8hu) shmask, result, shmask);
+  __shmask = vec_cmple (__rshift, __shmax);
+  __result = vec_sr ((__v8hu) __A, __rshift);
+  __result = vec_sel ((__v8hu) __shmask, __result, __shmask);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srl_epi32 (__m128i __A, __m128i __B)
 {
-  __v4su rshift;
-  __vector __bool int shmask;
-  const __v4su shmax = { 32, 32, 32, 32 };
-  __v4su result;
+  __v4su __rshift;
+  __vector __bool int __shmask;
+  const __v4su __shmax = { 32, 32, 32, 32 };
+  __v4su __result;
 
 #ifdef __LITTLE_ENDIAN__
-  rshift = vec_splat ((__v4su) __B, 0);
+  __rshift = vec_splat ((__v4su) __B, 0);
 #else
-  rshift = vec_splat ((__v4su) __B, 1);
+  __rshift = vec_splat ((__v4su) __B, 1);
 #endif
-  shmask = vec_cmplt (rshift, shmax);
-  result = vec_sr ((__v4su) __A, rshift);
-  result = vec_sel ((__v4su) shmask, result, shmask);
+  __shmask = vec_cmplt (__rshift, __shmax);
+  __result = vec_sr ((__v4su) __A, __rshift);
+  __result = vec_sel ((__v4su) __shmask, __result, __shmask);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 #ifdef _ARCH_PWR8
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srl_epi64 (__m128i __A, __m128i __B)
 {
-  __v2du rshift;
-  __vector __bool long long shmask;
-  const __v2du shmax = { 64, 64 };
-  __v2du result;
+  __v2du __rshift;
+  __vector __bool long long __shmask;
+  const __v2du __shmax = { 64, 64 };
+  __v2du __result;
 
-  rshift = vec_splat ((__v2du) __B, 0);
-  shmask = vec_cmplt (rshift, shmax);
-  result = vec_sr ((__v2du) __A, rshift);
-  result = vec_sel ((__v2du) shmask, result, shmask);
+  __rshift = vec_splat ((__v2du) __B, 0);
+  __shmask = vec_cmplt (__rshift, __shmax);
+  __result = vec_sr ((__v2du) __A, __rshift);
+  __result = vec_sel ((__v2du) __shmask, __result, __shmask);
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 #endif
 
@@ -1998,11 +1992,11 @@  _mm_extract_epi16 (__m128i const __A, int const __N)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
 {
-  __v8hi result = (__v8hi)__A;
+  __v8hi __result = (__v8hi)__A;
 
-  result [(__N & 7)] = __D;
+  __result [(__N & 7)] = __D;
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -2038,21 +2032,21 @@  _mm_min_epu8 (__m128i __A, __m128i __B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movemask_epi8 (__m128i __A)
 {
-  __vector unsigned long long result;
-  static const __vector unsigned char perm_mask =
+  __vector unsigned long long __result;
+  static const __vector unsigned char __perm_mask =
     {
 	0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
 	0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00
     };
 
-  result = ((__vector unsigned long long)
+  __result = ((__vector unsigned long long)
 	    vec_vbpermq ((__vector unsigned char) __A,
-			 (__vector unsigned char) perm_mask));
+			 (__vector unsigned char) __perm_mask));
 
 #ifdef __LITTLE_ENDIAN__
-  return result[1];
+  return __result[1];
 #else
-  return result[0];
+  return __result[0];
 #endif
 }
 #endif /* _ARCH_PWR8 */
@@ -2060,8 +2054,8 @@  _mm_movemask_epi8 (__m128i __A)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mulhi_epu16 (__m128i __A, __m128i __B)
 {
-  __v4su w0, w1;
-  __v16qu xform1 = {
+  __v4su __w0, __w1;
+  __v16qu __xform1 = {
 #ifdef __LITTLE_ENDIAN__
       0x02, 0x03, 0x12, 0x13,  0x06, 0x07, 0x16, 0x17,
       0x0A, 0x0B, 0x1A, 0x1B,  0x0E, 0x0F, 0x1E, 0x1F
@@ -2071,19 +2065,19 @@  _mm_mulhi_epu16 (__m128i __A, __m128i __B)
 #endif
     };
 
-  w0 = vec_vmuleuh ((__v8hu)__A, (__v8hu)__B);
-  w1 = vec_vmulouh ((__v8hu)__A, (__v8hu)__B);
-  return (__m128i) vec_perm (w0, w1, xform1);
+  __w0 = vec_vmuleuh ((__v8hu)__A, (__v8hu)__B);
+  __w1 = vec_vmulouh ((__v8hu)__A, (__v8hu)__B);
+  return (__m128i) vec_perm (__w0, __w1, __xform1);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_shufflehi_epi16 (__m128i __A, const int __mask)
 {
-  unsigned long element_selector_98 = __mask & 0x03;
-  unsigned long element_selector_BA = (__mask >> 2) & 0x03;
-  unsigned long element_selector_DC = (__mask >> 4) & 0x03;
-  unsigned long element_selector_FE = (__mask >> 6) & 0x03;
-  static const unsigned short permute_selectors[4] =
+  unsigned long __element_selector_98 = __mask & 0x03;
+  unsigned long __element_selector_BA = (__mask >> 2) & 0x03;
+  unsigned long __element_selector_DC = (__mask >> 4) & 0x03;
+  unsigned long __element_selector_FE = (__mask >> 6) & 0x03;
+  static const unsigned short __permute_selectors[4] =
     {
 #ifdef __LITTLE_ENDIAN__
 	      0x0908, 0x0B0A, 0x0D0C, 0x0F0E
@@ -2091,33 +2085,33 @@  _mm_shufflehi_epi16 (__m128i __A, const int __mask)
 	      0x0809, 0x0A0B, 0x0C0D, 0x0E0F
 #endif
     };
-  __v2du pmask =
+  __v2du __pmask =
 #ifdef __LITTLE_ENDIAN__
       { 0x1716151413121110UL,  0UL};
 #else
       { 0x1011121314151617UL,  0UL};
 #endif
-  __m64_union t;
-  __v2du a, r;
+  __m64_union __t;
+  __v2du __a, __r;
 
-  t.as_short[0] = permute_selectors[element_selector_98];
-  t.as_short[1] = permute_selectors[element_selector_BA];
-  t.as_short[2] = permute_selectors[element_selector_DC];
-  t.as_short[3] = permute_selectors[element_selector_FE];
-  pmask[1] = t.as_m64;
-  a = (__v2du)__A;
-  r = vec_perm (a, a, (__vector unsigned char)pmask);
-  return (__m128i) r;
+  __t.as_short[0] = __permute_selectors[__element_selector_98];
+  __t.as_short[1] = __permute_selectors[__element_selector_BA];
+  __t.as_short[2] = __permute_selectors[__element_selector_DC];
+  __t.as_short[3] = __permute_selectors[__element_selector_FE];
+  __pmask[1] = __t.as_m64;
+  __a = (__v2du)__A;
+  __r = vec_perm (__a, __a, (__vector unsigned char)__pmask);
+  return (__m128i) __r;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_shufflelo_epi16 (__m128i __A, const int __mask)
 {
-  unsigned long element_selector_10 = __mask & 0x03;
-  unsigned long element_selector_32 = (__mask >> 2) & 0x03;
-  unsigned long element_selector_54 = (__mask >> 4) & 0x03;
-  unsigned long element_selector_76 = (__mask >> 6) & 0x03;
-  static const unsigned short permute_selectors[4] =
+  unsigned long __element_selector_10 = __mask & 0x03;
+  unsigned long __element_selector_32 = (__mask >> 2) & 0x03;
+  unsigned long __element_selector_54 = (__mask >> 4) & 0x03;
+  unsigned long __element_selector_76 = (__mask >> 6) & 0x03;
+  static const unsigned short __permute_selectors[4] =
     {
 #ifdef __LITTLE_ENDIAN__
 	      0x0100, 0x0302, 0x0504, 0x0706
@@ -2125,32 +2119,32 @@  _mm_shufflelo_epi16 (__m128i __A, const int __mask)
 	      0x0001, 0x0203, 0x0405, 0x0607
 #endif
     };
-  __v2du pmask =
+  __v2du __pmask =
 #ifdef __LITTLE_ENDIAN__
                  { 0UL,  0x1f1e1d1c1b1a1918UL};
 #else
                  { 0UL,  0x18191a1b1c1d1e1fUL};
 #endif
-  __m64_union t;
-  __v2du a, r;
-  t.as_short[0] = permute_selectors[element_selector_10];
-  t.as_short[1] = permute_selectors[element_selector_32];
-  t.as_short[2] = permute_selectors[element_selector_54];
-  t.as_short[3] = permute_selectors[element_selector_76];
-  pmask[0] = t.as_m64;
-  a = (__v2du)__A;
-  r = vec_perm (a, a, (__vector unsigned char)pmask);
-  return (__m128i) r;
+  __m64_union __t;
+  __v2du __a, __r;
+  __t.as_short[0] = __permute_selectors[__element_selector_10];
+  __t.as_short[1] = __permute_selectors[__element_selector_32];
+  __t.as_short[2] = __permute_selectors[__element_selector_54];
+  __t.as_short[3] = __permute_selectors[__element_selector_76];
+  __pmask[0] = __t.as_m64;
+  __a = (__v2du)__A;
+  __r = vec_perm (__a, __a, (__vector unsigned char)__pmask);
+  return (__m128i) __r;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_shuffle_epi32 (__m128i __A, const int __mask)
 {
-  unsigned long element_selector_10 = __mask & 0x03;
-  unsigned long element_selector_32 = (__mask >> 2) & 0x03;
-  unsigned long element_selector_54 = (__mask >> 4) & 0x03;
-  unsigned long element_selector_76 = (__mask >> 6) & 0x03;
-  static const unsigned int permute_selectors[4] =
+  unsigned long __element_selector_10 = __mask & 0x03;
+  unsigned long __element_selector_32 = (__mask >> 2) & 0x03;
+  unsigned long __element_selector_54 = (__mask >> 4) & 0x03;
+  unsigned long __element_selector_76 = (__mask >> 6) & 0x03;
+  static const unsigned int __permute_selectors[4] =
     {
 #ifdef __LITTLE_ENDIAN__
 	0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
@@ -2158,26 +2152,26 @@  _mm_shuffle_epi32 (__m128i __A, const int __mask)
       0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
 #endif
     };
-  __v4su t;
+  __v4su __t;
 
-  t[0] = permute_selectors[element_selector_10];
-  t[1] = permute_selectors[element_selector_32];
-  t[2] = permute_selectors[element_selector_54] + 0x10101010;
-  t[3] = permute_selectors[element_selector_76] + 0x10101010;
-  return (__m128i)vec_perm ((__v4si) __A, (__v4si)__A, (__vector unsigned char)t);
+  __t[0] = __permute_selectors[__element_selector_10];
+  __t[1] = __permute_selectors[__element_selector_32];
+  __t[2] = __permute_selectors[__element_selector_54] + 0x10101010;
+  __t[3] = __permute_selectors[__element_selector_76] + 0x10101010;
+  return (__m128i)vec_perm ((__v4si) __A, (__v4si)__A, (__vector unsigned char)__t);
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
 {
-  __v2du hibit = { 0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL};
-  __v16qu mask, tmp;
-  __m128i_u *p = (__m128i_u*)__C;
+  __v2du __hibit = { 0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL};
+  __v16qu __mask, __tmp;
+  __m128i_u *__p = (__m128i_u*)__C;
 
-  tmp = (__v16qu)_mm_loadu_si128(p);
-  mask = (__v16qu)vec_cmpgt ((__v16qu)__B, (__v16qu)hibit);
-  tmp = vec_sel (tmp, (__v16qu)__A, mask);
-  _mm_storeu_si128 (p, (__m128i)tmp);
+  __tmp = (__v16qu)_mm_loadu_si128(__p);
+  __mask = (__v16qu)vec_cmpgt ((__v16qu)__B, (__v16qu)__hibit);
+  __tmp = vec_sel (__tmp, (__v16qu)__A, __mask);
+  _mm_storeu_si128 (__p, (__m128i)__tmp);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -2196,29 +2190,29 @@  _mm_avg_epu16 (__m128i __A, __m128i __B)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sad_epu8 (__m128i __A, __m128i __B)
 {
-  __v16qu a, b;
-  __v16qu vmin, vmax, vabsdiff;
-  __v4si vsum;
-  const __v4su zero = { 0, 0, 0, 0 };
-  __v4si result;
-
-  a = (__v16qu) __A;
-  b = (__v16qu) __B;
-  vmin = vec_min (a, b);
-  vmax = vec_max (a, b);
-  vabsdiff = vec_sub (vmax, vmin);
+  __v16qu __a, __b;
+  __v16qu __vmin, __vmax, __vabsdiff;
+  __v4si __vsum;
+  const __v4su __zero = { 0, 0, 0, 0 };
+  __v4si __result;
+
+  __a = (__v16qu) __A;
+  __b = (__v16qu) __B;
+  __vmin = vec_min (__a, __b);
+  __vmax = vec_max (__a, __b);
+  __vabsdiff = vec_sub (__vmax, __vmin);
   /* Sum four groups of bytes into integers.  */
-  vsum = (__vector signed int) vec_sum4s (vabsdiff, zero);
+  __vsum = (__vector signed int) vec_sum4s (__vabsdiff, __zero);
   /* Sum across four integers with two integer results.  */
-  result = vec_sum2s (vsum, (__vector signed int) zero);
+  __result = vec_sum2s (__vsum, (__vector signed int) __zero);
   /* Rotate the sums into the correct position.  */
 #ifdef __LITTLE_ENDIAN__
-  result = vec_sld (result, result, 4);
+  __result = vec_sld (__result, __result, 4);
 #else
-  result = vec_sld (result, result, 6);
+  __result = vec_sld (__result, __result, 6);
 #endif
   /* Rotate the sums into the correct position.  */
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/rs6000/mm_malloc.h b/gcc/config/rs6000/mm_malloc.h
index c04348068e0b..4503e75ce189 100644
--- a/gcc/config/rs6000/mm_malloc.h
+++ b/gcc/config/rs6000/mm_malloc.h
@@ -35,28 +35,28 @@  extern "C" int posix_memalign (void **, size_t, size_t) throw ();
 #endif
 
 static __inline void *
-_mm_malloc (size_t size, size_t alignment)
+_mm_malloc (size_t __size, size_t __alignment)
 {
   /* PowerPC64 ELF V2 ABI requires quadword alignment.  */
-  size_t vec_align = sizeof (__vector float);
+  size_t __vec_align = sizeof (__vector float);
   /* Linux GLIBC malloc alignment is at least 2 X ptr size.  */
-  size_t malloc_align = (sizeof (void *) + sizeof (void *));
-  void *ptr;
-
-  if (alignment == malloc_align && alignment == vec_align)
-    return malloc (size);
-  if (alignment < vec_align)
-    alignment = vec_align;
-  if (posix_memalign (&ptr, alignment, size) == 0)
-    return ptr;
+  size_t __malloc_align = (sizeof (void *) + sizeof (void *));
+  void *__ptr;
+
+  if (__alignment == __malloc_align && __alignment == __vec_align)
+    return malloc (__size);
+  if (__alignment < __vec_align)
+    __alignment = __vec_align;
+  if (posix_memalign (&__ptr, __alignment, __size) == 0)
+    return __ptr;
   else
     return NULL;
 }
 
 static __inline void
-_mm_free (void * ptr)
+_mm_free (void * __ptr)
 {
-  free (ptr);
+  free (__ptr);
 }
 
 #endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc/config/rs6000/mmintrin.h b/gcc/config/rs6000/mmintrin.h
index 0bd929c5afe9..a826fdd0c7f2 100644
--- a/gcc/config/rs6000/mmintrin.h
+++ b/gcc/config/rs6000/mmintrin.h
@@ -170,17 +170,17 @@  _mm_cvtsi64_si64x (__m64 __i)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
 {
-  __vector signed short vm1;
-  __vector signed char vresult;
+  __vector signed short __vm1;
+  __vector signed char __vresult;
 
-  vm1 = (__vector signed short) (__vector unsigned long long)
+  __vm1 = (__vector signed short) (__vector unsigned long long)
 #ifdef __LITTLE_ENDIAN__
         { __m1, __m2 };
 #else
         { __m2, __m1 };
 #endif
-  vresult = vec_packs (vm1, vm1);
-  return (__m64) ((__vector long long) vresult)[0];
+  __vresult = vec_packs (__vm1, __vm1);
+  return (__m64) ((__vector long long) __vresult)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -195,17 +195,17 @@  _m_packsswb (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
 {
-  __vector signed int vm1;
-  __vector signed short vresult;
+  __vector signed int __vm1;
+  __vector signed short __vresult;
 
-  vm1 = (__vector signed int) (__vector unsigned long long)
+  __vm1 = (__vector signed int) (__vector unsigned long long)
 #ifdef __LITTLE_ENDIAN__
         { __m1, __m2 };
 #else
         { __m2, __m1 };
 #endif
-  vresult = vec_packs (vm1, vm1);
-  return (__m64) ((__vector long long) vresult)[0];
+  __vresult = vec_packs (__vm1, __vm1);
+  return (__m64) ((__vector long long) __vresult)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -220,19 +220,19 @@  _m_packssdw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
 {
-  __vector unsigned char r;
-  __vector signed short vm1 = (__vector signed short) (__vector long long)
+  __vector unsigned char __r;
+  __vector signed short __vm1 = (__vector signed short) (__vector long long)
 #ifdef __LITTLE_ENDIAN__
         { __m1, __m2 };
 #else
         { __m2, __m1 };
 #endif
   const __vector signed short __zero = { 0 };
-  __vector __bool short __select = vec_cmplt (vm1, __zero);
-  r = vec_packs ((__vector unsigned short) vm1, (__vector unsigned short) vm1);
-  __vector __bool char packsel = vec_pack (__select, __select);
-  r = vec_sel (r, (const __vector unsigned char) __zero, packsel);
-  return (__m64) ((__vector long long) r)[0];
+  __vector __bool short __select = vec_cmplt (__vm1, __zero);
+  __r = vec_packs ((__vector unsigned short) __vm1, (__vector unsigned short) __vm1);
+  __vector __bool char __packsel = vec_pack (__select, __select);
+  __r = vec_sel (__r, (const __vector unsigned char) __zero, __packsel);
+  return (__m64) ((__vector long long) __r)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -248,28 +248,28 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector unsigned char a, b, c;
+  __vector unsigned char __a, __b, __c;
 
-  a = (__vector unsigned char)vec_splats (__m1);
-  b = (__vector unsigned char)vec_splats (__m2);
-  c = vec_mergel (a, b);
-  return (__m64) ((__vector long long) c)[1];
+  __a = (__vector unsigned char)vec_splats (__m1);
+  __b = (__vector unsigned char)vec_splats (__m2);
+  __c = vec_mergel (__a, __b);
+  return (__m64) ((__vector long long) __c)[1];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_char[0] = m1.as_char[4];
-  res.as_char[1] = m2.as_char[4];
-  res.as_char[2] = m1.as_char[5];
-  res.as_char[3] = m2.as_char[5];
-  res.as_char[4] = m1.as_char[6];
-  res.as_char[5] = m2.as_char[6];
-  res.as_char[6] = m1.as_char[7];
-  res.as_char[7] = m2.as_char[7];
+  __res.as_char[0] = __mu1.as_char[4];
+  __res.as_char[1] = __mu2.as_char[4];
+  __res.as_char[2] = __mu1.as_char[5];
+  __res.as_char[3] = __mu2.as_char[5];
+  __res.as_char[4] = __mu1.as_char[6];
+  __res.as_char[5] = __mu2.as_char[6];
+  __res.as_char[6] = __mu1.as_char[7];
+  __res.as_char[7] = __mu2.as_char[7];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -284,17 +284,17 @@  _m_punpckhbw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
 {
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_short[0] = m1.as_short[2];
-  res.as_short[1] = m2.as_short[2];
-  res.as_short[2] = m1.as_short[3];
-  res.as_short[3] = m2.as_short[3];
+  __res.as_short[0] = __mu1.as_short[2];
+  __res.as_short[1] = __mu2.as_short[2];
+  __res.as_short[2] = __mu1.as_short[3];
+  __res.as_short[3] = __mu2.as_short[3];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -307,15 +307,15 @@  _m_punpckhwd (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
 {
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_int[0] = m1.as_int[1];
-  res.as_int[1] = m2.as_int[1];
+  __res.as_int[0] = __mu1.as_int[1];
+  __res.as_int[1] = __mu2.as_int[1];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -329,28 +329,28 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector unsigned char a, b, c;
+  __vector unsigned char __a, __b, __c;
 
-  a = (__vector unsigned char)vec_splats (__m1);
-  b = (__vector unsigned char)vec_splats (__m2);
-  c = vec_mergel (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector unsigned char)vec_splats (__m1);
+  __b = (__vector unsigned char)vec_splats (__m2);
+  __c = vec_mergel (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_char[0] = m1.as_char[0];
-  res.as_char[1] = m2.as_char[0];
-  res.as_char[2] = m1.as_char[1];
-  res.as_char[3] = m2.as_char[1];
-  res.as_char[4] = m1.as_char[2];
-  res.as_char[5] = m2.as_char[2];
-  res.as_char[6] = m1.as_char[3];
-  res.as_char[7] = m2.as_char[3];
+  __res.as_char[0] = __mu1.as_char[0];
+  __res.as_char[1] = __mu2.as_char[0];
+  __res.as_char[2] = __mu1.as_char[1];
+  __res.as_char[3] = __mu2.as_char[1];
+  __res.as_char[4] = __mu1.as_char[2];
+  __res.as_char[5] = __mu2.as_char[2];
+  __res.as_char[6] = __mu1.as_char[3];
+  __res.as_char[7] = __mu2.as_char[3];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -364,17 +364,17 @@  _m_punpcklbw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
 {
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_short[0] = m1.as_short[0];
-  res.as_short[1] = m2.as_short[0];
-  res.as_short[2] = m1.as_short[1];
-  res.as_short[3] = m2.as_short[1];
+  __res.as_short[0] = __mu1.as_short[0];
+  __res.as_short[1] = __mu2.as_short[0];
+  __res.as_short[2] = __mu1.as_short[1];
+  __res.as_short[3] = __mu2.as_short[1];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -388,15 +388,15 @@  _m_punpcklwd (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
 {
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_int[0] = m1.as_int[0];
-  res.as_int[1] = m2.as_int[0];
+  __res.as_int[0] = __mu1.as_int[0];
+  __res.as_int[1] = __mu2.as_int[0];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -410,28 +410,28 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_add_pi8 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector signed char a, b, c;
+  __vector signed char __a, __b, __c;
 
-  a = (__vector signed char)vec_splats (__m1);
-  b = (__vector signed char)vec_splats (__m2);
-  c = vec_add (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed char)vec_splats (__m1);
+  __b = (__vector signed char)vec_splats (__m2);
+  __c = vec_add (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_char[0] = m1.as_char[0] + m2.as_char[0];
-  res.as_char[1] = m1.as_char[1] + m2.as_char[1];
-  res.as_char[2] = m1.as_char[2] + m2.as_char[2];
-  res.as_char[3] = m1.as_char[3] + m2.as_char[3];
-  res.as_char[4] = m1.as_char[4] + m2.as_char[4];
-  res.as_char[5] = m1.as_char[5] + m2.as_char[5];
-  res.as_char[6] = m1.as_char[6] + m2.as_char[6];
-  res.as_char[7] = m1.as_char[7] + m2.as_char[7];
+  __res.as_char[0] = __mu1.as_char[0] + __mu2.as_char[0];
+  __res.as_char[1] = __mu1.as_char[1] + __mu2.as_char[1];
+  __res.as_char[2] = __mu1.as_char[2] + __mu2.as_char[2];
+  __res.as_char[3] = __mu1.as_char[3] + __mu2.as_char[3];
+  __res.as_char[4] = __mu1.as_char[4] + __mu2.as_char[4];
+  __res.as_char[5] = __mu1.as_char[5] + __mu2.as_char[5];
+  __res.as_char[6] = __mu1.as_char[6] + __mu2.as_char[6];
+  __res.as_char[7] = __mu1.as_char[7] + __mu2.as_char[7];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -446,24 +446,24 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_add_pi16 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector signed short a, b, c;
+  __vector signed short __a, __b, __c;
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = vec_add (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = vec_add (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_short[0] = m1.as_short[0] + m2.as_short[0];
-  res.as_short[1] = m1.as_short[1] + m2.as_short[1];
-  res.as_short[2] = m1.as_short[2] + m2.as_short[2];
-  res.as_short[3] = m1.as_short[3] + m2.as_short[3];
+  __res.as_short[0] = __mu1.as_short[0] + __mu2.as_short[0];
+  __res.as_short[1] = __mu1.as_short[1] + __mu2.as_short[1];
+  __res.as_short[2] = __mu1.as_short[2] + __mu2.as_short[2];
+  __res.as_short[3] = __mu1.as_short[3] + __mu2.as_short[3];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -478,22 +478,22 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_add_pi32 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR9
-  __vector signed int a, b, c;
+  __vector signed int __a, __b, __c;
 
-  a = (__vector signed int)vec_splats (__m1);
-  b = (__vector signed int)vec_splats (__m2);
-  c = vec_add (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed int)vec_splats (__m1);
+  __b = (__vector signed int)vec_splats (__m2);
+  __c = vec_add (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_int[0] = m1.as_int[0] + m2.as_int[0];
-  res.as_int[1] = m1.as_int[1] + m2.as_int[1];
+  __res.as_int[0] = __mu1.as_int[0] + __mu2.as_int[0];
+  __res.as_int[1] = __mu1.as_int[1] + __mu2.as_int[1];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -508,28 +508,28 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector signed char a, b, c;
+  __vector signed char __a, __b, __c;
 
-  a = (__vector signed char)vec_splats (__m1);
-  b = (__vector signed char)vec_splats (__m2);
-  c = vec_sub (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed char)vec_splats (__m1);
+  __b = (__vector signed char)vec_splats (__m2);
+  __c = vec_sub (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_char[0] = m1.as_char[0] - m2.as_char[0];
-  res.as_char[1] = m1.as_char[1] - m2.as_char[1];
-  res.as_char[2] = m1.as_char[2] - m2.as_char[2];
-  res.as_char[3] = m1.as_char[3] - m2.as_char[3];
-  res.as_char[4] = m1.as_char[4] - m2.as_char[4];
-  res.as_char[5] = m1.as_char[5] - m2.as_char[5];
-  res.as_char[6] = m1.as_char[6] - m2.as_char[6];
-  res.as_char[7] = m1.as_char[7] - m2.as_char[7];
+  __res.as_char[0] = __mu1.as_char[0] - __mu2.as_char[0];
+  __res.as_char[1] = __mu1.as_char[1] - __mu2.as_char[1];
+  __res.as_char[2] = __mu1.as_char[2] - __mu2.as_char[2];
+  __res.as_char[3] = __mu1.as_char[3] - __mu2.as_char[3];
+  __res.as_char[4] = __mu1.as_char[4] - __mu2.as_char[4];
+  __res.as_char[5] = __mu1.as_char[5] - __mu2.as_char[5];
+  __res.as_char[6] = __mu1.as_char[6] - __mu2.as_char[6];
+  __res.as_char[7] = __mu1.as_char[7] - __mu2.as_char[7];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -544,24 +544,24 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector signed short a, b, c;
+  __vector signed short __a, __b, __c;
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = vec_sub (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = vec_sub (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_short[0] = m1.as_short[0] - m2.as_short[0];
-  res.as_short[1] = m1.as_short[1] - m2.as_short[1];
-  res.as_short[2] = m1.as_short[2] - m2.as_short[2];
-  res.as_short[3] = m1.as_short[3] - m2.as_short[3];
+  __res.as_short[0] = __mu1.as_short[0] - __mu2.as_short[0];
+  __res.as_short[1] = __mu1.as_short[1] - __mu2.as_short[1];
+  __res.as_short[2] = __mu1.as_short[2] - __mu2.as_short[2];
+  __res.as_short[3] = __mu1.as_short[3] - __mu2.as_short[3];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -576,22 +576,22 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR9
-  __vector signed int a, b, c;
+  __vector signed int __a, __b, __c;
 
-  a = (__vector signed int)vec_splats (__m1);
-  b = (__vector signed int)vec_splats (__m2);
-  c = vec_sub (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed int)vec_splats (__m1);
+  __b = (__vector signed int)vec_splats (__m2);
+  __c = vec_sub (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_int[0] = m1.as_int[0] - m2.as_int[0];
-  res.as_int[1] = m1.as_int[1] - m2.as_int[1];
+  __res.as_int[0] = __mu1.as_int[0] - __mu2.as_int[0];
+  __res.as_int[1] = __mu1.as_int[1] - __mu2.as_int[1];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -729,30 +729,30 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
 {
 #if defined(_ARCH_PWR6) && defined(__powerpc64__)
-  __m64 res;
+  __m64 __res;
   __asm__(
       "cmpb %0,%1,%2;\n"
-      : "=r" (res)
+      : "=r" (__res)
       : "r" (__m1),
 	"r" (__m2)
       : );
-  return (res);
+  return (__res);
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_char[0] = (m1.as_char[0] == m2.as_char[0])? -1: 0;
-  res.as_char[1] = (m1.as_char[1] == m2.as_char[1])? -1: 0;
-  res.as_char[2] = (m1.as_char[2] == m2.as_char[2])? -1: 0;
-  res.as_char[3] = (m1.as_char[3] == m2.as_char[3])? -1: 0;
-  res.as_char[4] = (m1.as_char[4] == m2.as_char[4])? -1: 0;
-  res.as_char[5] = (m1.as_char[5] == m2.as_char[5])? -1: 0;
-  res.as_char[6] = (m1.as_char[6] == m2.as_char[6])? -1: 0;
-  res.as_char[7] = (m1.as_char[7] == m2.as_char[7])? -1: 0;
+  __res.as_char[0] = (__mu1.as_char[0] == __mu2.as_char[0])? -1: 0;
+  __res.as_char[1] = (__mu1.as_char[1] == __mu2.as_char[1])? -1: 0;
+  __res.as_char[2] = (__mu1.as_char[2] == __mu2.as_char[2])? -1: 0;
+  __res.as_char[3] = (__mu1.as_char[3] == __mu2.as_char[3])? -1: 0;
+  __res.as_char[4] = (__mu1.as_char[4] == __mu2.as_char[4])? -1: 0;
+  __res.as_char[5] = (__mu1.as_char[5] == __mu2.as_char[5])? -1: 0;
+  __res.as_char[6] = (__mu1.as_char[6] == __mu2.as_char[6])? -1: 0;
+  __res.as_char[7] = (__mu1.as_char[7] == __mu2.as_char[7])? -1: 0;
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -766,28 +766,28 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector signed char a, b, c;
+  __vector signed char __a, __b, __c;
 
-  a = (__vector signed char)vec_splats (__m1);
-  b = (__vector signed char)vec_splats (__m2);
-  c = (__vector signed char)vec_cmpgt (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed char)vec_splats (__m1);
+  __b = (__vector signed char)vec_splats (__m2);
+  __c = (__vector signed char)vec_cmpgt (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_char[0] = (m1.as_char[0] > m2.as_char[0])? -1: 0;
-  res.as_char[1] = (m1.as_char[1] > m2.as_char[1])? -1: 0;
-  res.as_char[2] = (m1.as_char[2] > m2.as_char[2])? -1: 0;
-  res.as_char[3] = (m1.as_char[3] > m2.as_char[3])? -1: 0;
-  res.as_char[4] = (m1.as_char[4] > m2.as_char[4])? -1: 0;
-  res.as_char[5] = (m1.as_char[5] > m2.as_char[5])? -1: 0;
-  res.as_char[6] = (m1.as_char[6] > m2.as_char[6])? -1: 0;
-  res.as_char[7] = (m1.as_char[7] > m2.as_char[7])? -1: 0;
+  __res.as_char[0] = (__mu1.as_char[0] > __mu2.as_char[0])? -1: 0;
+  __res.as_char[1] = (__mu1.as_char[1] > __mu2.as_char[1])? -1: 0;
+  __res.as_char[2] = (__mu1.as_char[2] > __mu2.as_char[2])? -1: 0;
+  __res.as_char[3] = (__mu1.as_char[3] > __mu2.as_char[3])? -1: 0;
+  __res.as_char[4] = (__mu1.as_char[4] > __mu2.as_char[4])? -1: 0;
+  __res.as_char[5] = (__mu1.as_char[5] > __mu2.as_char[5])? -1: 0;
+  __res.as_char[6] = (__mu1.as_char[6] > __mu2.as_char[6])? -1: 0;
+  __res.as_char[7] = (__mu1.as_char[7] > __mu2.as_char[7])? -1: 0;
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -803,24 +803,24 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector signed short a, b, c;
+  __vector signed short __a, __b, __c;
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = (__vector signed short)vec_cmpeq (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = (__vector signed short)vec_cmpeq (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_short[0] = (m1.as_short[0] == m2.as_short[0])? -1: 0;
-  res.as_short[1] = (m1.as_short[1] == m2.as_short[1])? -1: 0;
-  res.as_short[2] = (m1.as_short[2] == m2.as_short[2])? -1: 0;
-  res.as_short[3] = (m1.as_short[3] == m2.as_short[3])? -1: 0;
+  __res.as_short[0] = (__mu1.as_short[0] == __mu2.as_short[0])? -1: 0;
+  __res.as_short[1] = (__mu1.as_short[1] == __mu2.as_short[1])? -1: 0;
+  __res.as_short[2] = (__mu1.as_short[2] == __mu2.as_short[2])? -1: 0;
+  __res.as_short[3] = (__mu1.as_short[3] == __mu2.as_short[3])? -1: 0;
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -834,24 +834,24 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR8
-  __vector signed short a, b, c;
+  __vector signed short __a, __b, __c;
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = (__vector signed short)vec_cmpgt (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = (__vector signed short)vec_cmpgt (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_short[0] = (m1.as_short[0] > m2.as_short[0])? -1: 0;
-  res.as_short[1] = (m1.as_short[1] > m2.as_short[1])? -1: 0;
-  res.as_short[2] = (m1.as_short[2] > m2.as_short[2])? -1: 0;
-  res.as_short[3] = (m1.as_short[3] > m2.as_short[3])? -1: 0;
+  __res.as_short[0] = (__mu1.as_short[0] > __mu2.as_short[0])? -1: 0;
+  __res.as_short[1] = (__mu1.as_short[1] > __mu2.as_short[1])? -1: 0;
+  __res.as_short[2] = (__mu1.as_short[2] > __mu2.as_short[2])? -1: 0;
+  __res.as_short[3] = (__mu1.as_short[3] > __mu2.as_short[3])? -1: 0;
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -867,22 +867,22 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR9
-  __vector signed int a, b, c;
+  __vector signed int __a, __b, __c;
 
-  a = (__vector signed int)vec_splats (__m1);
-  b = (__vector signed int)vec_splats (__m2);
-  c = (__vector signed int)vec_cmpeq (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed int)vec_splats (__m1);
+  __b = (__vector signed int)vec_splats (__m2);
+  __c = (__vector signed int)vec_cmpeq (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_int[0] = (m1.as_int[0] == m2.as_int[0])? -1: 0;
-  res.as_int[1] = (m1.as_int[1] == m2.as_int[1])? -1: 0;
+  __res.as_int[0] = (__mu1.as_int[0] == __mu2.as_int[0])? -1: 0;
+  __res.as_int[1] = (__mu1.as_int[1] == __mu2.as_int[1])? -1: 0;
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -896,22 +896,22 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
 {
 #if _ARCH_PWR9
-  __vector signed int a, b, c;
+  __vector signed int __a, __b, __c;
 
-  a = (__vector signed int)vec_splats (__m1);
-  b = (__vector signed int)vec_splats (__m2);
-  c = (__vector signed int)vec_cmpgt (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed int)vec_splats (__m1);
+  __b = (__vector signed int)vec_splats (__m2);
+  __c = (__vector signed int)vec_cmpgt (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __mu1, __mu2, __res;
 
-  m1.as_m64 = __m1;
-  m2.as_m64 = __m2;
+  __mu1.as_m64 = __m1;
+  __mu2.as_m64 = __m2;
 
-  res.as_int[0] = (m1.as_int[0] > m2.as_int[0])? -1: 0;
-  res.as_int[1] = (m1.as_int[1] > m2.as_int[1])? -1: 0;
+  __res.as_int[0] = (__mu1.as_int[0] > __mu2.as_int[0])? -1: 0;
+  __res.as_int[1] = (__mu1.as_int[1] > __mu2.as_int[1])? -1: 0;
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -927,12 +927,12 @@  _m_pcmpgtd (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
 {
-  __vector signed char a, b, c;
+  __vector signed char __a, __b, __c;
 
-  a = (__vector signed char)vec_splats (__m1);
-  b = (__vector signed char)vec_splats (__m2);
-  c = vec_adds (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed char)vec_splats (__m1);
+  __b = (__vector signed char)vec_splats (__m2);
+  __c = vec_adds (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -945,12 +945,12 @@  _m_paddsb (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
 {
-  __vector signed short a, b, c;
+  __vector signed short __a, __b, __c;
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = vec_adds (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = vec_adds (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -963,12 +963,12 @@  _m_paddsw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
 {
-  __vector unsigned char a, b, c;
+  __vector unsigned char __a, __b, __c;
 
-  a = (__vector unsigned char)vec_splats (__m1);
-  b = (__vector unsigned char)vec_splats (__m2);
-  c = vec_adds (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector unsigned char)vec_splats (__m1);
+  __b = (__vector unsigned char)vec_splats (__m2);
+  __c = vec_adds (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -982,12 +982,12 @@  _m_paddusb (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
 {
-  __vector unsigned short a, b, c;
+  __vector unsigned short __a, __b, __c;
 
-  a = (__vector unsigned short)vec_splats (__m1);
-  b = (__vector unsigned short)vec_splats (__m2);
-  c = vec_adds (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector unsigned short)vec_splats (__m1);
+  __b = (__vector unsigned short)vec_splats (__m2);
+  __c = vec_adds (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1001,12 +1001,12 @@  _m_paddusw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
 {
-  __vector signed char a, b, c;
+  __vector signed char __a, __b, __c;
 
-  a = (__vector signed char)vec_splats (__m1);
-  b = (__vector signed char)vec_splats (__m2);
-  c = vec_subs (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed char)vec_splats (__m1);
+  __b = (__vector signed char)vec_splats (__m2);
+  __c = vec_subs (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1020,12 +1020,12 @@  _m_psubsb (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
 {
-  __vector signed short a, b, c;
+  __vector signed short __a, __b, __c;
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = vec_subs (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = vec_subs (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1039,12 +1039,12 @@  _m_psubsw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
 {
-  __vector unsigned char a, b, c;
+  __vector unsigned char __a, __b, __c;
 
-  a = (__vector unsigned char)vec_splats (__m1);
-  b = (__vector unsigned char)vec_splats (__m2);
-  c = vec_subs (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector unsigned char)vec_splats (__m1);
+  __b = (__vector unsigned char)vec_splats (__m2);
+  __c = vec_subs (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1058,12 +1058,12 @@  _m_psubusb (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
 {
-  __vector unsigned short a, b, c;
+  __vector unsigned short __a, __b, __c;
 
-  a = (__vector unsigned short)vec_splats (__m1);
-  b = (__vector unsigned short)vec_splats (__m2);
-  c = vec_subs (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector unsigned short)vec_splats (__m1);
+  __b = (__vector unsigned short)vec_splats (__m2);
+  __c = vec_subs (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1078,14 +1078,14 @@  _m_psubusw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
 {
-  __vector signed short a, b;
-  __vector signed int c;
-  __vector signed int zero = {0, 0, 0, 0};
+  __vector signed short __a, __b;
+  __vector signed int __c;
+  __vector signed int __zero = {0, 0, 0, 0};
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = vec_vmsumshm (a, b, zero);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = vec_vmsumshm (__a, __b, __zero);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1098,10 +1098,10 @@  _m_pmaddwd (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
 {
-  __vector signed short a, b;
-  __vector signed short c;
-  __vector signed int w0, w1;
-  __vector unsigned char xform1 = {
+  __vector signed short __a, __b;
+  __vector signed short __c;
+  __vector signed int __w0, __w1;
+  __vector unsigned char __xform1 = {
 #ifdef __LITTLE_ENDIAN__
       0x02, 0x03, 0x12, 0x13,  0x06, 0x07, 0x16, 0x17,
       0x0A, 0x0B, 0x1A, 0x1B,  0x0E, 0x0F, 0x1E, 0x1F
@@ -1111,14 +1111,14 @@  _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
 #endif
     };
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
 
-  w0 = vec_vmulesh (a, b);
-  w1 = vec_vmulosh (a, b);
-  c = (__vector signed short)vec_perm (w0, w1, xform1);
+  __w0 = vec_vmulesh (__a, __b);
+  __w1 = vec_vmulosh (__a, __b);
+  __c = (__vector signed short)vec_perm (__w0, __w1, __xform1);
 
-  return (__m64) ((__vector long long) c)[0];
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1132,12 +1132,12 @@  _m_pmulhw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
 {
-  __vector signed short a, b, c;
+  __vector signed short __a, __b, __c;
 
-  a = (__vector signed short)vec_splats (__m1);
-  b = (__vector signed short)vec_splats (__m2);
-  c = a * b;
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector signed short)vec_splats (__m1);
+  __b = (__vector signed short)vec_splats (__m2);
+  __c = __a * __b;
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1150,15 +1150,15 @@  _m_pmullw (__m64 __m1, __m64 __m2)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sll_pi16 (__m64 __m, __m64 __count)
 {
-  __vector signed short m, r;
-  __vector unsigned short c;
+  __vector signed short __r;
+  __vector unsigned short __c;
 
   if (__count <= 15)
     {
-      m = (__vector signed short)vec_splats (__m);
-      c = (__vector unsigned short)vec_splats ((unsigned short)__count);
-      r = vec_sl (m, (__vector unsigned short)c);
-      return (__m64) ((__vector long long) r)[0];
+      __r = (__vector signed short)vec_splats (__m);
+      __c = (__vector unsigned short)vec_splats ((unsigned short)__count);
+      __r = vec_sl (__r, (__vector unsigned short)__c);
+      return (__m64) ((__vector long long) __r)[0];
     }
   else
   return (0);
@@ -1187,13 +1187,13 @@  _m_psllwi (__m64 __m, int __count)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sll_pi32 (__m64 __m, __m64 __count)
 {
-  __m64_union m, res;
+  __m64_union __res;
 
-  m.as_m64 = __m;
+  __res.as_m64 = __m;
 
-  res.as_int[0] = m.as_int[0] << __count;
-  res.as_int[1] = m.as_int[1] << __count;
-  return (res.as_m64);
+  __res.as_int[0] = __res.as_int[0] << __count;
+  __res.as_int[1] = __res.as_int[1] << __count;
+  return (__res.as_m64);
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1219,15 +1219,15 @@  _m_pslldi (__m64 __m, int __count)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sra_pi16 (__m64 __m, __m64 __count)
 {
-  __vector signed short m, r;
-  __vector unsigned short c;
+  __vector signed short __r;
+  __vector unsigned short __c;
 
   if (__count <= 15)
     {
-	m = (__vector signed short)vec_splats (__m);
-	c = (__vector unsigned short)vec_splats ((unsigned short)__count);
-	r = vec_sra (m, (__vector unsigned short)c);
-        return (__m64) ((__vector long long) r)[0];
+	__r = (__vector signed short)vec_splats (__m);
+	__c = (__vector unsigned short)vec_splats ((unsigned short)__count);
+	__r = vec_sra (__r, (__vector unsigned short)__c);
+        return (__m64) ((__vector long long) __r)[0];
     }
   else
   return (0);
@@ -1256,13 +1256,13 @@  _m_psrawi (__m64 __m, int __count)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sra_pi32 (__m64 __m, __m64 __count)
 {
-  __m64_union m, res;
+  __m64_union __res;
 
-  m.as_m64 = __m;
+  __res.as_m64 = __m;
 
-  res.as_int[0] = m.as_int[0] >> __count;
-  res.as_int[1] = m.as_int[1] >> __count;
-  return (res.as_m64);
+  __res.as_int[0] = __res.as_int[0] >> __count;
+  __res.as_int[1] = __res.as_int[1] >> __count;
+  return (__res.as_m64);
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1288,15 +1288,15 @@  _m_psradi (__m64 __m, int __count)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srl_pi16 (__m64 __m, __m64 __count)
 {
-  __vector unsigned short m, r;
-  __vector unsigned short c;
+  __vector unsigned short __r;
+  __vector unsigned short __c;
 
   if (__count <= 15)
     {
-	m = (__vector unsigned short)vec_splats (__m);
-	c = (__vector unsigned short)vec_splats ((unsigned short)__count);
-	r = vec_sr (m, (__vector unsigned short)c);
-        return (__m64) ((__vector long long) r)[0];
+	__r = (__vector unsigned short)vec_splats (__m);
+	__c = (__vector unsigned short)vec_splats ((unsigned short)__count);
+	__r = vec_sr (__r, (__vector unsigned short)__c);
+        return (__m64) ((__vector long long) __r)[0];
     }
   else
     return (0);
@@ -1325,13 +1325,13 @@  _m_psrlwi (__m64 __m, int __count)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srl_pi32 (__m64 __m, __m64 __count)
 {
-  __m64_union m, res;
+  __m64_union __res;
 
-  m.as_m64 = __m;
+  __res.as_m64 = __m;
 
-  res.as_int[0] = (unsigned int)m.as_int[0] >> __count;
-  res.as_int[1] = (unsigned int)m.as_int[1] >> __count;
-  return (res.as_m64);
+  __res.as_int[0] = (unsigned int)__res.as_int[0] >> __count;
+  __res.as_int[1] = (unsigned int)__res.as_int[1] >> __count;
+  return (__res.as_m64);
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1358,24 +1358,24 @@  _m_psrldi (__m64 __m, int __count)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_set_pi32 (int __i1, int __i0)
 {
-  __m64_union res;
+  __m64_union __res;
 
-  res.as_int[0] = __i0;
-  res.as_int[1] = __i1;
-  return (res.as_m64);
+  __res.as_int[0] = __i0;
+  __res.as_int[1] = __i1;
+  return (__res.as_m64);
 }
 
 /* Creates a vector of four 16-bit values; W0 is least significant.  */
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
 {
-  __m64_union res;
+  __m64_union __res;
 
-  res.as_short[0] = __w0;
-  res.as_short[1] = __w1;
-  res.as_short[2] = __w2;
-  res.as_short[3] = __w3;
-  return (res.as_m64);
+  __res.as_short[0] = __w0;
+  __res.as_short[1] = __w1;
+  __res.as_short[2] = __w2;
+  __res.as_short[3] = __w3;
+  return (__res.as_m64);
 }
 
 /* Creates a vector of eight 8-bit values; B0 is least significant.  */
@@ -1383,28 +1383,28 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
 	     char __b3, char __b2, char __b1, char __b0)
 {
-  __m64_union res;
+  __m64_union __res;
 
-  res.as_char[0] = __b0;
-  res.as_char[1] = __b1;
-  res.as_char[2] = __b2;
-  res.as_char[3] = __b3;
-  res.as_char[4] = __b4;
-  res.as_char[5] = __b5;
-  res.as_char[6] = __b6;
-  res.as_char[7] = __b7;
-  return (res.as_m64);
+  __res.as_char[0] = __b0;
+  __res.as_char[1] = __b1;
+  __res.as_char[2] = __b2;
+  __res.as_char[3] = __b3;
+  __res.as_char[4] = __b4;
+  __res.as_char[5] = __b5;
+  __res.as_char[6] = __b6;
+  __res.as_char[7] = __b7;
+  return (__res.as_m64);
 }
 
 /* Similar, but with the arguments in reverse order.  */
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_setr_pi32 (int __i0, int __i1)
 {
-  __m64_union res;
+  __m64_union __res;
 
-  res.as_int[0] = __i0;
-  res.as_int[1] = __i1;
-  return (res.as_m64);
+  __res.as_int[0] = __i0;
+  __res.as_int[1] = __i1;
+  return (__res.as_m64);
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1424,11 +1424,11 @@  _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_set1_pi32 (int __i)
 {
-  __m64_union res;
+  __m64_union __res;
 
-  res.as_int[0] = __i;
-  res.as_int[1] = __i;
-  return (res.as_m64);
+  __res.as_int[0] = __i;
+  __res.as_int[1] = __i;
+  return (__res.as_m64);
 }
 
 /* Creates a vector of four 16-bit values, all elements containing W.  */
@@ -1441,13 +1441,13 @@  _mm_set1_pi16 (short __w)
   w = (__vector signed short)vec_splats (__w);
   return (__m64) ((__vector long long) w)[0];
 #else
-  __m64_union res;
+  __m64_union __res;
 
-  res.as_short[0] = __w;
-  res.as_short[1] = __w;
-  res.as_short[2] = __w;
-  res.as_short[3] = __w;
-  return (res.as_m64);
+  __res.as_short[0] = __w;
+  __res.as_short[1] = __w;
+  __res.as_short[2] = __w;
+  __res.as_short[3] = __w;
+  return (__res.as_m64);
 #endif
 }
 
@@ -1456,22 +1456,22 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_set1_pi8 (signed char __b)
 {
 #if _ARCH_PWR8
-  __vector signed char b;
+  __vector signed char __res;
 
-  b = (__vector signed char)vec_splats (__b);
-  return (__m64) ((__vector long long) b)[0];
+  __res = (__vector signed char)vec_splats (__b);
+  return (__m64) ((__vector long long) __res)[0];
 #else
-  __m64_union res;
-
-  res.as_char[0] = __b;
-  res.as_char[1] = __b;
-  res.as_char[2] = __b;
-  res.as_char[3] = __b;
-  res.as_char[4] = __b;
-  res.as_char[5] = __b;
-  res.as_char[6] = __b;
-  res.as_char[7] = __b;
-  return (res.as_m64);
+  __m64_union __res;
+
+  __res.as_char[0] = __b;
+  __res.as_char[1] = __b;
+  __res.as_char[2] = __b;
+  __res.as_char[3] = __b;
+  __res.as_char[4] = __b;
+  __res.as_char[5] = __b;
+  __res.as_char[6] = __b;
+  __res.as_char[7] = __b;
+  return (__res.as_m64);
 #endif
 }
 #endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc/config/rs6000/pmmintrin.h b/gcc/config/rs6000/pmmintrin.h
index eab712fdfa66..4d7e14f312ad 100644
--- a/gcc/config/rs6000/pmmintrin.h
+++ b/gcc/config/rs6000/pmmintrin.h
@@ -58,55 +58,55 @@ 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_addsub_ps (__m128 __X, __m128 __Y)
 {
-  const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0};
-  __v4sf even_neg_Y = vec_xor(__Y, even_n0);
-  return (__m128) vec_add (__X, even_neg_Y);
+  const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0};
+  __v4sf __even_neg_Y = vec_xor(__Y, __even_n0);
+  return (__m128) vec_add (__X, __even_neg_Y);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_addsub_pd (__m128d __X, __m128d __Y)
 {
-  const __v2df even_n0 = {-0.0, 0.0};
-  __v2df even_neg_Y = vec_xor(__Y, even_n0);
-  return (__m128d) vec_add (__X, even_neg_Y);
+  const __v2df __even_n0 = {-0.0, 0.0};
+  __v2df __even_neg_Y = vec_xor(__Y, __even_n0);
+  return (__m128d) vec_add (__X, __even_neg_Y);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_hadd_ps (__m128 __X, __m128 __Y)
 {
-  __vector unsigned char xform2 = {
+  __vector unsigned char __xform2 = {
       0x00, 0x01, 0x02, 0x03,
       0x08, 0x09, 0x0A, 0x0B,
       0x10, 0x11, 0x12, 0x13,
       0x18, 0x19, 0x1A, 0x1B
     };
-  __vector unsigned char xform1 = {
+  __vector unsigned char __xform1 = {
       0x04, 0x05, 0x06, 0x07,
       0x0C, 0x0D, 0x0E, 0x0F,
       0x14, 0x15, 0x16, 0x17,
       0x1C, 0x1D, 0x1E, 0x1F
     };
-  return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
-			   vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
+  return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2),
+			   vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_hsub_ps (__m128 __X, __m128 __Y)
 {
-  __vector unsigned char xform2 = {
+  __vector unsigned char __xform2 = {
       0x00, 0x01, 0x02, 0x03,
       0x08, 0x09, 0x0A, 0x0B,
       0x10, 0x11, 0x12, 0x13,
       0x18, 0x19, 0x1A, 0x1B
     };
-  __vector unsigned char xform1 = {
+  __vector unsigned char __xform1 = {
       0x04, 0x05, 0x06, 0x07,
       0x0C, 0x0D, 0x0E, 0x0F,
       0x14, 0x15, 0x16, 0x17,
       0x1C, 0x1D, 0x1E, 0x1F
     };
-  return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
-			   vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
+  return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2),
+			   vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1));
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index bdf6eb365d88..9d9df7842ba9 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -45,31 +45,31 @@ 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
 {
-  __v16qi result = (__v16qi)__A;
+  __v16qi __result = (__v16qi)__A;
 
-  result [__N & 0xf] = __D;
+  __result [__N & 0xf] = __D;
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_insert_epi32 (__m128i const __A, int const __D, int const __N)
 {
-  __v4si result = (__v4si)__A;
+  __v4si __result = (__v4si)__A;
 
-  result [__N & 3] = __D;
+  __result [__N & 3] = __D;
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
 {
-  __v2di result = (__v2di)__A;
+  __v2di __result = (__v2di)__A;
 
-  result [__N & 1] = __D;
+  __result [__N & 1] = __D;
 
-  return (__m128i) result;
+  return (__m128i) __result;
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/rs6000/tmmintrin.h b/gcc/config/rs6000/tmmintrin.h
index 971511260b78..63e3f8a82626 100644
--- a/gcc/config/rs6000/tmmintrin.h
+++ b/gcc/config/rs6000/tmmintrin.h
@@ -112,8 +112,8 @@  _mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count)
     {
       if (__count >= 32)
 	{
-	  const __v16qu zero = { 0 };
-	  return (__m128i) zero;
+	  const __v16qu __zero = { 0 };
+	  return (__m128i) __zero;
 	}
       else
 	{
diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
index ae1a33e8d95b..ba311cbf2df5 100644
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -127,14 +127,14 @@  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif
 _mm_loadr_ps (float const *__P)
 {
   __v4sf   __tmp;
-  __m128 result;
-  static const __vector unsigned char permute_vector =
+  __m128 __result;
+  static const __vector unsigned char __permute_vector =
     { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
 	0x17, 0x10, 0x11, 0x12, 0x13 };
 
   __tmp = vec_ld (0, (__v4sf *) __P);
-  result = (__m128) vec_perm (__tmp, __tmp, permute_vector);
-  return result;
+  __result = (__m128) vec_perm (__tmp, __tmp, __permute_vector);
+  return __result;
 }
 
 /* Create a vector with all four elements equal to F.  */
@@ -184,11 +184,11 @@  extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artific
 _mm_storer_ps (float *__P, __m128 __A)
 {
   __v4sf   __tmp;
-  static const __vector unsigned char permute_vector =
+  static const __vector unsigned char __permute_vector =
     { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
 	0x17, 0x10, 0x11, 0x12, 0x13 };
 
-  __tmp = (__m128) vec_perm (__A, __A, permute_vector);
+  __tmp = (__m128) vec_perm (__A, __A, __permute_vector);
 
   _mm_store_ps (__P, __tmp);
 }
@@ -218,9 +218,9 @@  _mm_set_ss (float __F)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_move_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
 
-  return (vec_sel ((__v4sf)__A, (__v4sf)__B, mask));
+  return (vec_sel ((__v4sf)__A, (__v4sf)__B, __mask));
 }
 
 /* Create a vector with element 0 as *P and the rest zero.  */
@@ -245,18 +245,18 @@  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif
 _mm_add_ss (__m128 __A, __m128 __B)
 {
 #ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __m128 __a, __b, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower double)
      results. So to insure we don't generate spurious exceptions
      (from the upper double values) we splat the lower double
      before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a + b;
+  __a = vec_splat (__A, 0);
+  __b = vec_splat (__B, 0);
+  __c = __a + __b;
   /* Then we merge the lower float result with the original upper
      float elements from __A.  */
-  return (vec_sel (__A, c, mask));
+  return (vec_sel (__A, __c, __mask));
 #else
   __A[0] = __A[0] + __B[0];
   return (__A);
@@ -267,18 +267,18 @@  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif
 _mm_sub_ss (__m128 __A, __m128 __B)
 {
 #ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __m128 __a, __b, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower double)
      results. So to insure we don't generate spurious exceptions
      (from the upper double values) we splat the lower double
      before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a - b;
+  __a = vec_splat (__A, 0);
+  __b = vec_splat (__B, 0);
+  __c = __a - __b;
   /* Then we merge the lower float result with the original upper
      float elements from __A.  */
-  return (vec_sel (__A, c, mask));
+  return (vec_sel (__A, __c, __mask));
 #else
   __A[0] = __A[0] - __B[0];
   return (__A);
@@ -289,18 +289,18 @@  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif
 _mm_mul_ss (__m128 __A, __m128 __B)
 {
 #ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __m128 __a, __b, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower double)
      results. So to insure we don't generate spurious exceptions
      (from the upper double values) we splat the lower double
      before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a * b;
+  __a = vec_splat (__A, 0);
+  __b = vec_splat (__B, 0);
+  __c = __a * __b;
   /* Then we merge the lower float result with the original upper
      float elements from __A.  */
-  return (vec_sel (__A, c, mask));
+  return (vec_sel (__A, __c, __mask));
 #else
   __A[0] = __A[0] * __B[0];
   return (__A);
@@ -311,18 +311,18 @@  extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif
 _mm_div_ss (__m128 __A, __m128 __B)
 {
 #ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __m128 __a, __b, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower double)
      results. So to insure we don't generate spurious exceptions
      (from the upper double values) we splat the lower double
      before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a / b;
+  __a = vec_splat (__A, 0);
+  __b = vec_splat (__B, 0);
+  __c = __a / __b;
   /* Then we merge the lower float result with the original upper
      float elements from __A.  */
-  return (vec_sel (__A, c, mask));
+  return (vec_sel (__A, __c, __mask));
 #else
   __A[0] = __A[0] / __B[0];
   return (__A);
@@ -332,17 +332,17 @@  _mm_div_ss (__m128 __A, __m128 __B)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sqrt_ss (__m128 __A)
 {
-  __m128 a, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __m128 __a, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower double)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper double values) we splat the lower double
    * before we to the operation. */
-  a = vec_splat (__A, 0);
-  c = vec_sqrt (a);
+  __a = vec_splat (__A, 0);
+  __c = vec_sqrt (__a);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return (vec_sel (__A, c, mask));
+  return (vec_sel (__A, __c, __mask));
 }
 
 /* Perform the respective operation on the four SPFP values in A and B.  */
@@ -391,81 +391,81 @@  _mm_rsqrt_ps (__m128 __A)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp_ss (__m128 __A)
 {
-  __m128 a, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __m128 __a, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower double)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper double values) we splat the lower double
    * before we to the operation. */
-  a = vec_splat (__A, 0);
-  c = _mm_rcp_ps (a);
+  __a = vec_splat (__A, 0);
+  __c = _mm_rcp_ps (__a);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return (vec_sel (__A, c, mask));
+  return (vec_sel (__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt_ss (__m128 __A)
 {
-  __m128 a, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __m128 __a, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower double)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper double values) we splat the lower double
    * before we to the operation. */
-  a = vec_splat (__A, 0);
-  c = vec_rsqrte (a);
+  __a = vec_splat (__A, 0);
+  __c = vec_rsqrte (__a);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return (vec_sel (__A, c, mask));
+  return (vec_sel (__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_ss (__m128 __A, __m128 __B)
 {
-  __v4sf a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __v4sf __a, __b, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower float)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper float values) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf)__A, 0);
-  b = vec_splat ((__v4sf)__B, 0);
-  c = vec_min (a, b);
+  __a = vec_splat ((__v4sf)__A, 0);
+  __b = vec_splat ((__v4sf)__B, 0);
+  __c = vec_min (__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return (vec_sel ((__v4sf)__A, c, mask));
+  return (vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_ss (__m128 __A, __m128 __B)
 {
-  __v4sf a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  __v4sf __a, __b, __c;
+  static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0};
   /* PowerISA VSX does not allow partial (for just lower float)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper float values) we splat the lower float
    * before we to the operation. */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = vec_max (a, b);
+  __a = vec_splat (__A, 0);
+  __b = vec_splat (__B, 0);
+  __c = vec_max (__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return (vec_sel ((__v4sf)__A, c, mask));
+  return (vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_ps (__m128 __A, __m128 __B)
 {
-  __vector __bool int m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A);
-  return vec_sel (__B, __A, m);
+  __vector __bool int __m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A);
+  return vec_sel (__B, __A, __m);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_ps (__m128 __A, __m128 __B)
 {
-  __vector __bool int m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B);
-  return vec_sel (__B, __A, m);
+  __vector __bool int __m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B);
+  return vec_sel (__B, __A, __m);
 }
 
 /* Perform logical bit-wise operations on 128-bit values.  */
@@ -530,8 +530,8 @@  _mm_cmpge_ps (__m128 __A, __m128 __B)
 extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpneq_ps (__m128  __A, __m128  __B)
 {
-  __v4sf temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B);
-  return ((__m128)vec_nor (temp, temp));
+  __v4sf __temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B);
+  return ((__m128)vec_nor (__temp, __temp));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -561,31 +561,31 @@  _mm_cmpnge_ps (__m128 __A, __m128 __B)
 extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpord_ps (__m128  __A, __m128  __B)
 {
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
+  __vector unsigned int __a, __b;
+  __vector unsigned int __c, __d;
+  static const __vector unsigned int __float_exp_mask =
     { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
 
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
-  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
-  return ((__m128 ) vec_and (c, d));
+  __a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  __b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  __c = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __a);
+  __d = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __b);
+  return ((__m128 ) vec_and (__c, __d));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpunord_ps (__m128 __A, __m128 __B)
 {
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
+  __vector unsigned int __a, __b;
+  __vector unsigned int __c, __d;
+  static const __vector unsigned int __float_exp_mask =
     { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
 
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
-  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
-  return ((__m128 ) vec_or (c, d));
+  __a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  __b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  __c = (__vector unsigned int) vec_cmpgt (__a, __float_exp_mask);
+  __d = (__vector unsigned int) vec_cmpgt (__b, __float_exp_mask);
+  return ((__m128 ) vec_or (__c, __d));
 }
 
 /* Perform a comparison on the lower SPFP values of A and B.  If the
@@ -594,222 +594,222 @@  _mm_cmpunord_ps (__m128 __A, __m128 __B)
 extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpeq_ss (__m128  __A, __m128  __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpeq(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmpeq (__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmplt_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmplt(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmplt(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmple_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmple(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmple(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpgt_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpgt(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmpgt(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpge_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpge(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmpge(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpneq_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpeq(a, b);
-  c = vec_nor (c, c);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmpeq(__a, __b);
+  __c = vec_nor (__c, __c);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpnlt_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpge(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmpge(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpnle_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpgt(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmpgt(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpngt_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmple(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmple(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpnge_ss (__m128 __A, __m128 __B)
 {
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
+  __v4sf __a, __b, __c;
   /* PowerISA VMX does not allow partial (for just element 0)
    * results. So to insure we don't generate spurious exceptions
    * (from the upper elements) we splat the lower float
    * before we do the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmplt(a, b);
+  __a = vec_splat ((__v4sf) __A, 0);
+  __b = vec_splat ((__v4sf) __B, 0);
+  __c = (__v4sf) vec_cmplt(__a, __b);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, __c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpord_ss (__m128 __A, __m128 __B)
 {
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
+  __vector unsigned int __a, __b;
+  __vector unsigned int __c, __d;
+  static const __vector unsigned int __float_exp_mask =
     { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
 
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
-  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
-  c = vec_and (c, d);
+  __a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  __b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  __c = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __a);
+  __d = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __b);
+  __c = vec_and (__c, __d);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)__c, __mask));
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpunord_ss (__m128 __A, __m128 __B)
 {
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
+  __vector unsigned int __a, __b;
+  __vector unsigned int __c, __d;
+  static const __vector unsigned int __float_exp_mask =
     { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
-  static const __vector unsigned int mask =
+  static const __vector unsigned int __mask =
     { 0xffffffff, 0, 0, 0 };
 
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
-  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
-  c = vec_or (c, d);
+  __a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  __b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  __c = (__vector unsigned int) vec_cmpgt (__a, __float_exp_mask);
+  __d = (__vector unsigned int) vec_cmpgt (__b, __float_exp_mask);
+  __c = vec_or (__c, __d);
   /* Then we merge the lower float result with the original upper
    * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
+  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)__c, __mask));
 }
 
 /* Compare the lower SPFP values of A and B and return 1 if true
@@ -905,9 +905,9 @@  _mm_cvtss_f32 (__m128 __A)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtss_si32 (__m128 __A)
 {
-  int res;
+  int __res;
 #ifdef _ARCH_PWR8
-  double dtmp;
+  double __dtmp;
   __asm__(
 #ifdef __LITTLE_ENDIAN__
       "xxsldwi %x0,%x0,%x0,3;\n"
@@ -916,13 +916,13 @@  _mm_cvtss_si32 (__m128 __A)
       "fctiw  %2,%2;\n"
       "mfvsrd  %1,%x2;\n"
       : "+wa" (__A),
-        "=r" (res),
-        "=f" (dtmp)
+        "=r" (__res),
+        "=f" (__dtmp)
       : );
 #else
-  res = __builtin_rint(__A[0]);
+  __res = __builtin_rint(__A[0]);
 #endif
-  return (res);
+  return __res;
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -938,9 +938,9 @@  _mm_cvt_ss2si (__m128 __A)
 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtss_si64 (__m128 __A)
 {
-  long long res;
+  long long __res;
 #if defined (_ARCH_PWR8) && defined (__powerpc64__)
-  double dtmp;
+  double __dtmp;
   __asm__(
 #ifdef __LITTLE_ENDIAN__
       "xxsldwi %x0,%x0,%x0,3;\n"
@@ -949,13 +949,13 @@  _mm_cvtss_si64 (__m128 __A)
       "fctid  %2,%2;\n"
       "mfvsrd  %1,%x2;\n"
       : "+wa" (__A),
-        "=r" (res),
-        "=f" (dtmp)
+        "=r" (__res),
+        "=f" (__dtmp)
       : );
 #else
-  res = __builtin_llrint(__A[0]);
+  __res = __builtin_llrint(__A[0]);
 #endif
-  return (res);
+  return __res;
 }
 
 /* Microsoft intrinsic.  */
@@ -992,15 +992,15 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_cvtps_pi32 (__m128 __A)
 {
   /* Splat two lower SPFP values to both halves.  */
-  __v4sf temp, rounded;
-  __vector unsigned long long result;
+  __v4sf __temp, __rounded;
+  __vector unsigned long long __result;
 
   /* Splat two lower SPFP values to both halves.  */
-  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
-  rounded = vec_rint(temp);
-  result = (__vector unsigned long long) vec_cts (rounded, 0);
+  __temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
+  __rounded = vec_rint (__temp);
+  __result = (__vector unsigned long long) vec_cts (__rounded, 0);
 
-  return (__m64) ((__vector long long) result)[0];
+  return (__m64) ((__vector long long) __result)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1014,9 +1014,9 @@  extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artifici
 _mm_cvttss_si32 (__m128 __A)
 {
   /* Extract the lower float element.  */
-  float temp = __A[0];
+  float __temp = __A[0];
   /* truncate to 32-bit integer and return.  */
-  return temp;
+  return __temp;
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1030,9 +1030,9 @@  extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __ar
 _mm_cvttss_si64 (__m128 __A)
 {
   /* Extract the lower float element.  */
-  float temp = __A[0];
+  float __temp = __A[0];
   /* truncate to 32-bit integer and return.  */
-  return temp;
+  return __temp;
 }
 
 /* Microsoft intrinsic.  */
@@ -1040,9 +1040,9 @@  extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __ar
 _mm_cvttss_si64x (__m128 __A)
 {
   /* Extract the lower float element.  */
-  float temp = __A[0];
+  float __temp = __A[0];
   /* truncate to 32-bit integer and return.  */
-  return temp;
+  return __temp;
 }
 
 /* Truncate the two lower SPFP values to 32-bit integers.  Return the
@@ -1050,14 +1050,14 @@  _mm_cvttss_si64x (__m128 __A)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvttps_pi32 (__m128 __A)
 {
-  __v4sf temp;
-  __vector unsigned long long result;
+  __v4sf __temp;
+  __vector unsigned long long __result;
 
   /* Splat two lower SPFP values to both halves.  */
-  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
-  result = (__vector unsigned long long) vec_cts (temp, 0);
+  __temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
+  __result = (__vector unsigned long long) vec_cts (__temp, 0);
 
-  return (__m64) ((__vector long long) result)[0];
+  return (__m64) ((__vector long long) __result)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1070,8 +1070,8 @@  _mm_cvtt_ps2pi (__m128 __A)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsi32_ss (__m128 __A, int __B)
 {
-  float temp = __B;
-  __A[0] = temp;
+  float __temp = __B;
+  __A[0] = __temp;
 
   return __A;
 }
@@ -1087,8 +1087,8 @@  _mm_cvt_si2ss (__m128 __A, int __B)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsi64_ss (__m128 __A, long long __B)
 {
-  float temp = __B;
-  __A[0] = temp;
+  float __temp = __B;
+  __A[0] = __temp;
 
   return __A;
 }
@@ -1105,14 +1105,14 @@  _mm_cvtsi64x_ss (__m128 __A, long long __B)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpi32_ps (__m128        __A, __m64        __B)
 {
-  __vector signed int vm1;
-  __vector float vf1;
+  __vector signed int __vm1;
+  __vector float __vf1;
 
-  vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
-  vf1 = (__vector float) vec_ctf (vm1, 0);
+  __vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
+  __vf1 = (__vector float) vec_ctf (__vm1, 0);
 
   return ((__m128) (__vector unsigned long long)
-    { ((__vector unsigned long long)vf1) [0],
+    { ((__vector unsigned long long)__vf1) [0],
 	((__vector unsigned long long)__A) [1]});
 }
 
@@ -1126,54 +1126,54 @@  _mm_cvt_pi2ps (__m128 __A, __m64 __B)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpi16_ps (__m64 __A)
 {
-  __vector signed short vs8;
-  __vector signed int vi4;
-  __vector float vf1;
+  __vector signed short __vs8;
+  __vector signed int __vi4;
+  __vector float __vf1;
 
-  vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
-  vi4 = vec_vupklsh (vs8);
-  vf1 = (__vector float) vec_ctf (vi4, 0);
+  __vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
+  __vi4 = vec_vupklsh (__vs8);
+  __vf1 = (__vector float) vec_ctf (__vi4, 0);
 
-  return (__m128) vf1;
+  return (__m128) __vf1;
 }
 
 /* Convert the four unsigned 16-bit values in A to SPFP form.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpu16_ps (__m64 __A)
 {
-  const __vector unsigned short zero =
+  const __vector unsigned short __zero =
     { 0, 0, 0, 0, 0, 0, 0, 0 };
-  __vector unsigned short vs8;
-  __vector unsigned int vi4;
-  __vector float vf1;
+  __vector unsigned short __vs8;
+  __vector unsigned int __vi4;
+  __vector float __vf1;
 
-  vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
-  vi4 = (__vector unsigned int) vec_mergel
+  __vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
+  __vi4 = (__vector unsigned int) vec_mergel
 #ifdef __LITTLE_ENDIAN__
-                                           (vs8, zero);
+                                           (__vs8, __zero);
 #else
-                                           (zero, vs8);
+                                           (__zero, __vs8);
 #endif
-  vf1 = (__vector float) vec_ctf (vi4, 0);
+  __vf1 = (__vector float) vec_ctf (__vi4, 0);
 
-  return (__m128) vf1;
+  return (__m128) __vf1;
 }
 
 /* Convert the low four signed 8-bit values in A to SPFP form.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpi8_ps (__m64 __A)
 {
-  __vector signed char vc16;
-  __vector signed short vs8;
-  __vector signed int vi4;
-  __vector float vf1;
+  __vector signed char __vc16;
+  __vector signed short __vs8;
+  __vector signed int __vi4;
+  __vector float __vf1;
 
-  vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
-  vs8 = vec_vupkhsb (vc16);
-  vi4 = vec_vupkhsh (vs8);
-  vf1 = (__vector float) vec_ctf (vi4, 0);
+  __vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
+  __vs8 = vec_vupkhsb (__vc16);
+  __vi4 = vec_vupkhsh (__vs8);
+  __vf1 = (__vector float) vec_ctf (__vi4, 0);
 
-  return (__m128) vf1;
+  return (__m128) __vf1;
 }
 
 /* Convert the low four unsigned 8-bit values in A to SPFP form.  */
@@ -1181,70 +1181,70 @@  extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __art
 
 _mm_cvtpu8_ps (__m64  __A)
 {
-  const __vector unsigned char zero =
+  const __vector unsigned char __zero =
     { 0, 0, 0, 0, 0, 0, 0, 0 };
-  __vector unsigned char vc16;
-  __vector unsigned short vs8;
-  __vector unsigned int vi4;
-  __vector float vf1;
+  __vector unsigned char __vc16;
+  __vector unsigned short __vs8;
+  __vector unsigned int __vi4;
+  __vector float __vf1;
 
-  vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
+  __vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
 #ifdef __LITTLE_ENDIAN__
-  vs8 = (__vector unsigned short) vec_mergel (vc16, zero);
-  vi4 = (__vector unsigned int) vec_mergeh (vs8,
-					    (__vector unsigned short) zero);
+  __vs8 = (__vector unsigned short) vec_mergel (__vc16, __zero);
+  __vi4 = (__vector unsigned int) vec_mergeh (__vs8,
+					    (__vector unsigned short) __zero);
 #else
-  vs8 = (__vector unsigned short) vec_mergel (zero, vc16);
-  vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) zero,
-                                            vs8);
+  __vs8 = (__vector unsigned short) vec_mergel (__zero, __vc16);
+  __vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) __zero,
+                                            __vs8);
 #endif
-  vf1 = (__vector float) vec_ctf (vi4, 0);
+  __vf1 = (__vector float) vec_ctf (__vi4, 0);
 
-  return (__m128) vf1;
+  return (__m128) __vf1;
 }
 
 /* Convert the four signed 32-bit values in A and B to SPFP form.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtpi32x2_ps (__m64 __A, __m64 __B)
 {
-  __vector signed int vi4;
-  __vector float vf4;
+  __vector signed int __vi4;
+  __vector float __vf4;
 
-  vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B };
-  vf4 = (__vector float) vec_ctf (vi4, 0);
-  return (__m128) vf4;
+  __vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B };
+  __vf4 = (__vector float) vec_ctf (__vi4, 0);
+  return (__m128) __vf4;
 }
 
 /* Convert the four SPFP values in A to four signed 16-bit integers.  */
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtps_pi16 (__m128 __A)
 {
-  __v4sf rounded;
-  __vector signed int temp;
-  __vector unsigned long long result;
+  __v4sf __rounded;
+  __vector signed int __temp;
+  __vector unsigned long long __result;
 
-  rounded = vec_rint(__A);
-  temp = vec_cts (rounded, 0);
-  result = (__vector unsigned long long) vec_pack (temp, temp);
+  __rounded = vec_rint(__A);
+  __temp = vec_cts (__rounded, 0);
+  __result = (__vector unsigned long long) vec_pack (__temp, __temp);
 
-  return (__m64) ((__vector long long) result)[0];
+  return (__m64) ((__vector long long) __result)[0];
 }
 
 /* Convert the four SPFP values in A to four signed 8-bit integers.  */
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtps_pi8 (__m128 __A)
 {
-  __v4sf rounded;
-  __vector signed int tmp_i;
-  static const __vector signed int zero = {0, 0, 0, 0};
-  __vector signed short tmp_s;
-  __vector signed char res_v;
+  __v4sf __rounded;
+  __vector signed int __tmp_i;
+  static const __vector signed int __zero = {0, 0, 0, 0};
+  __vector signed short __tmp_s;
+  __vector signed char __res_v;
 
-  rounded = vec_rint(__A);
-  tmp_i = vec_cts (rounded, 0);
-  tmp_s = vec_pack (tmp_i, zero);
-  res_v = vec_pack (tmp_s, tmp_s);
-  return (__m64) ((__vector long long) res_v)[0];
+  __rounded = vec_rint(__A);
+  __tmp_i = vec_cts (__rounded, 0);
+  __tmp_s = vec_pack (__tmp_i, __zero);
+  __res_v = vec_pack (__tmp_s, __tmp_s);
+  return (__m64) ((__vector long long) __res_v)[0];
 }
 
 /* Selects four specific SPFP values from A and B based on MASK.  */
@@ -1252,11 +1252,11 @@  extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __art
 
 _mm_shuffle_ps (__m128  __A, __m128  __B, int const __mask)
 {
-  unsigned long element_selector_10 = __mask & 0x03;
-  unsigned long element_selector_32 = (__mask >> 2) & 0x03;
-  unsigned long element_selector_54 = (__mask >> 4) & 0x03;
-  unsigned long element_selector_76 = (__mask >> 6) & 0x03;
-  static const unsigned int permute_selectors[4] =
+  unsigned long __element_selector_10 = __mask & 0x03;
+  unsigned long __element_selector_32 = (__mask >> 2) & 0x03;
+  unsigned long __element_selector_54 = (__mask >> 4) & 0x03;
+  unsigned long __element_selector_76 = (__mask >> 6) & 0x03;
+  static const unsigned int __permute_selectors[4] =
     {
 #ifdef __LITTLE_ENDIAN__
       0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
@@ -1264,13 +1264,13 @@  _mm_shuffle_ps (__m128  __A, __m128  __B, int const __mask)
       0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
 #endif
     };
-  __vector unsigned int t;
+  __vector unsigned int __t;
 
-  t[0] = permute_selectors[element_selector_10];
-  t[1] = permute_selectors[element_selector_32];
-  t[2] = permute_selectors[element_selector_54] + 0x10101010;
-  t[3] = permute_selectors[element_selector_76] + 0x10101010;
-  return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)t);
+  __t[0] = __permute_selectors[__element_selector_10];
+  __t[1] = __permute_selectors[__element_selector_32];
+  __t[2] = __permute_selectors[__element_selector_54] + 0x10101010;
+  __t[3] = __permute_selectors[__element_selector_76] + 0x10101010;
+  return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)__t);
 }
 
 /* Selects and interleaves the upper two SPFP values from A and B.  */
@@ -1352,8 +1352,8 @@  _mm_storel_pi (__m64 *__P, __m128 __A)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movemask_ps (__m128  __A)
 {
-  __vector unsigned long long result;
-  static const __vector unsigned int perm_mask =
+  __vector unsigned long long __result;
+  static const __vector unsigned int __perm_mask =
     {
 #ifdef __LITTLE_ENDIAN__
 	0x00204060, 0x80808080, 0x80808080, 0x80808080
@@ -1362,14 +1362,14 @@  _mm_movemask_ps (__m128  __A)
 #endif
     };
 
-  result = ((__vector unsigned long long)
+  __result = ((__vector unsigned long long)
 	    vec_vbpermq ((__vector unsigned char) __A,
-			 (__vector unsigned char) perm_mask));
+			 (__vector unsigned char) __perm_mask));
 
 #ifdef __LITTLE_ENDIAN__
-  return result[1];
+  return __result[1];
 #else
-  return result[0];
+  return __result[0];
 #endif
 }
 #endif /* _ARCH_PWR8 */
@@ -1391,12 +1391,12 @@  _mm_load_ps1 (float const *__P)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_extract_pi16 (__m64 const __A, int const __N)
 {
-  unsigned int shiftr = __N & 3;
+  unsigned int __shiftr = __N & 3;
 #ifdef __BIG_ENDIAN__
-  shiftr = 3 - shiftr;
+  __shiftr = 3 - __shiftr;
 #endif
 
-  return ((__A >> (shiftr * 16)) & 0xffff);
+  return ((__A >> (__shiftr * 16)) & 0xffff);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1410,12 +1410,12 @@  _m_pextrw (__m64 const __A, int const __N)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
 {
-  const int shiftl = (__N & 3) * 16;
-  const __m64 shiftD = (const __m64) __D << shiftl;
-  const __m64 mask = 0xffffUL << shiftl;
-  __m64 result = (__A & (~mask)) | (shiftD & mask);
+  const int __shiftl = (__N & 3) * 16;
+  const __m64 __shiftD = (const __m64) __D << __shiftl;
+  const __m64 __mask = 0xffffUL << __shiftl;
+  __m64 __result = (__A & (~__mask)) | (__shiftD & __mask);
 
-  return (result);
+  return __result;
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1430,30 +1430,30 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_max_pi16 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
-  __vector signed short a, b, r;
-  __vector __bool short c;
-
-  a = (__vector signed short)vec_splats (__A);
-  b = (__vector signed short)vec_splats (__B);
-  c = (__vector __bool short)vec_cmpgt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
+  __vector signed short __a, __b, __r;
+  __vector __bool short __c;
+
+  __a = (__vector signed short)vec_splats (__A);
+  __b = (__vector signed short)vec_splats (__B);
+  __c = (__vector __bool short)vec_cmpgt (__a, __b);
+  __r = vec_sel (__b, __a, __c);
+  return (__m64) ((__vector long long) __r)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __m1, __m2, __res;
 
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
+  __m1.as_m64 = __A;
+  __m2.as_m64 = __B;
 
-  res.as_short[0] =
-      (m1.as_short[0] > m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
-  res.as_short[1] =
-      (m1.as_short[1] > m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
-  res.as_short[2] =
-      (m1.as_short[2] > m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
-  res.as_short[3] =
-      (m1.as_short[3] > m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
+  __res.as_short[0] =
+      (__m1.as_short[0] > __m2.as_short[0]) ? __m1.as_short[0] : __m2.as_short[0];
+  __res.as_short[1] =
+      (__m1.as_short[1] > __m2.as_short[1]) ? __m1.as_short[1] : __m2.as_short[1];
+  __res.as_short[2] =
+      (__m1.as_short[2] > __m2.as_short[2]) ? __m1.as_short[2] : __m2.as_short[2];
+  __res.as_short[3] =
+      (__m1.as_short[3] > __m2.as_short[3]) ? __m1.as_short[3] : __m2.as_short[3];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -1468,28 +1468,27 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_max_pu8 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
-  __vector unsigned char a, b, r;
-  __vector __bool char c;
-
-  a = (__vector unsigned char)vec_splats (__A);
-  b = (__vector unsigned char)vec_splats (__B);
-  c = (__vector __bool char)vec_cmpgt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
+  __vector unsigned char __a, __b, __r;
+  __vector __bool char __c;
+
+  __a = (__vector unsigned char)vec_splats (__A);
+  __b = (__vector unsigned char)vec_splats (__B);
+  __c = (__vector __bool char)vec_cmpgt (__a, __b);
+  __r = vec_sel (__b, __a, __c);
+  return (__m64) ((__vector long long) __r)[0];
 #else
-  __m64_union m1, m2, res;
-  long i;
+  __m64_union __m1, __m2, __res;
+  long __i;
 
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
+  __m1.as_m64 = __A;
+  __m2.as_m64 = __B;
 
+  for (__i = 0; __i < 8; __i++)
+    __res.as_char[__i] =
+      ((unsigned char) __m1.as_char[__i] > (unsigned char) __m2.as_char[__i]) ?
+	  __m1.as_char[__i] : __m2.as_char[__i];
 
-  for (i = 0; i < 8; i++)
-  res.as_char[i] =
-      ((unsigned char) m1.as_char[i] > (unsigned char) m2.as_char[i]) ?
-	  m1.as_char[i] : m2.as_char[i];
-
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -1504,30 +1503,30 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_min_pi16 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
-  __vector signed short a, b, r;
-  __vector __bool short c;
-
-  a = (__vector signed short)vec_splats (__A);
-  b = (__vector signed short)vec_splats (__B);
-  c = (__vector __bool short)vec_cmplt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
+  __vector signed short __a, __b, __r;
+  __vector __bool short __c;
+
+  __a = (__vector signed short)vec_splats (__A);
+  __b = (__vector signed short)vec_splats (__B);
+  __c = (__vector __bool short)vec_cmplt (__a, __b);
+  __r = vec_sel (__b, __a, __c);
+  return (__m64) ((__vector long long) __r)[0];
 #else
-  __m64_union m1, m2, res;
+  __m64_union __m1, __m2, __res;
 
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
+  __m1.as_m64 = __A;
+  __m2.as_m64 = __B;
 
-  res.as_short[0] =
-      (m1.as_short[0] < m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
-  res.as_short[1] =
-      (m1.as_short[1] < m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
-  res.as_short[2] =
-      (m1.as_short[2] < m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
-  res.as_short[3] =
-      (m1.as_short[3] < m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
+  __res.as_short[0] =
+      (__m1.as_short[0] < __m2.as_short[0]) ? __m1.as_short[0] : __m2.as_short[0];
+  __res.as_short[1] =
+      (__m1.as_short[1] < __m2.as_short[1]) ? __m1.as_short[1] : __m2.as_short[1];
+  __res.as_short[2] =
+      (__m1.as_short[2] < __m2.as_short[2]) ? __m1.as_short[2] : __m2.as_short[2];
+  __res.as_short[3] =
+      (__m1.as_short[3] < __m2.as_short[3]) ? __m1.as_short[3] : __m2.as_short[3];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -1542,28 +1541,28 @@  extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi
 _mm_min_pu8 (__m64 __A, __m64 __B)
 {
 #if _ARCH_PWR8
-  __vector unsigned char a, b, r;
-  __vector __bool char c;
-
-  a = (__vector unsigned char)vec_splats (__A);
-  b = (__vector unsigned char)vec_splats (__B);
-  c = (__vector __bool char)vec_cmplt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
+  __vector unsigned char __a, __b, __r;
+  __vector __bool char __c;
+
+  __a = (__vector unsigned char)vec_splats (__A);
+  __b = (__vector unsigned char)vec_splats (__B);
+  __c = (__vector __bool char)vec_cmplt (__a, __b);
+  __r = vec_sel (__b, __a, __c);
+  return (__m64) ((__vector long long) __r)[0];
 #else
-  __m64_union m1, m2, res;
-  long i;
+  __m64_union __m1, __m2, __res;
+  long __i;
 
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
+  __m1.as_m64 = __A;
+  __m2.as_m64 = __B;
 
 
-  for (i = 0; i < 8; i++)
-  res.as_char[i] =
-      ((unsigned char) m1.as_char[i] < (unsigned char) m2.as_char[i]) ?
-	  m1.as_char[i] : m2.as_char[i];
+  for (__i = 0; __i < 8; __i++)
+    __res.as_char[__i] =
+      ((unsigned char) __m1.as_char[__i] < (unsigned char) __m2.as_char[__i]) ?
+	  __m1.as_char[__i] : __m2.as_char[__i];
 
-  return (__m64) res.as_m64;
+  return (__m64) __res.as_m64;
 #endif
 }
 
@@ -1578,24 +1577,24 @@  extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artifici
 _mm_movemask_pi8 (__m64 __A)
 {
 #ifdef __powerpc64__
-  unsigned long long p =
+  unsigned long long __p =
 #ifdef __LITTLE_ENDIAN__
                          0x0008101820283038UL; // permute control for sign bits
 #else
                          0x3830282018100800UL; // permute control for sign bits
 #endif
-  return __builtin_bpermd (p, __A);
+  return __builtin_bpermd (__p, __A);
 #else
 #ifdef __LITTLE_ENDIAN__
-  unsigned int mask = 0x20283038UL;
-  unsigned int r1 = __builtin_bpermd (mask, __A) & 0xf;
-  unsigned int r2 = __builtin_bpermd (mask, __A >> 32) & 0xf;
+  unsigned int __mask = 0x20283038UL;
+  unsigned int __r1 = __builtin_bpermd (__mask, __A) & 0xf;
+  unsigned int __r2 = __builtin_bpermd (__mask, __A >> 32) & 0xf;
 #else
-  unsigned int mask = 0x38302820UL;
-  unsigned int r1 = __builtin_bpermd (mask, __A >> 32) & 0xf;
-  unsigned int r2 = __builtin_bpermd (mask, __A) & 0xf;
+  unsigned int __mask = 0x38302820UL;
+  unsigned int __r1 = __builtin_bpermd (__mask, __A >> 32) & 0xf;
+  unsigned int __r2 = __builtin_bpermd (__mask, __A) & 0xf;
 #endif
-  return (r2 << 4) | r1;
+  return (__r2 << 4) | __r1;
 #endif
 }
 
@@ -1610,10 +1609,10 @@  _m_pmovmskb (__m64 __A)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mulhi_pu16 (__m64 __A, __m64 __B)
 {
-  __vector unsigned short a, b;
-  __vector unsigned short c;
-  __vector unsigned int w0, w1;
-  __vector unsigned char xform1 = {
+  __vector unsigned short __a, __b;
+  __vector unsigned short __c;
+  __vector unsigned int __w0, __w1;
+  __vector unsigned char __xform1 = {
 #ifdef __LITTLE_ENDIAN__
       0x02, 0x03, 0x12, 0x13,  0x06, 0x07, 0x16, 0x17,
       0x0A, 0x0B, 0x1A, 0x1B,  0x0E, 0x0F, 0x1E, 0x1F
@@ -1623,14 +1622,14 @@  _mm_mulhi_pu16 (__m64 __A, __m64 __B)
 #endif
     };
 
-  a = (__vector unsigned short)vec_splats (__A);
-  b = (__vector unsigned short)vec_splats (__B);
+  __a = (__vector unsigned short)vec_splats (__A);
+  __b = (__vector unsigned short)vec_splats (__B);
 
-  w0 = vec_vmuleuh (a, b);
-  w1 = vec_vmulouh (a, b);
-  c = (__vector unsigned short)vec_perm (w0, w1, xform1);
+  __w0 = vec_vmuleuh (__a, __b);
+  __w1 = vec_vmulouh (__a, __b);
+  __c = (__vector unsigned short)vec_perm (__w0, __w1, __xform1);
 
-  return (__m64) ((__vector long long) c)[0];
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1644,11 +1643,11 @@  _m_pmulhuw (__m64 __A, __m64 __B)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_shuffle_pi16 (__m64 __A, int const __N)
 {
-  unsigned long element_selector_10 = __N & 0x03;
-  unsigned long element_selector_32 = (__N >> 2) & 0x03;
-  unsigned long element_selector_54 = (__N >> 4) & 0x03;
-  unsigned long element_selector_76 = (__N >> 6) & 0x03;
-  static const unsigned short permute_selectors[4] =
+  unsigned long __element_selector_10 = __N & 0x03;
+  unsigned long __element_selector_32 = (__N >> 2) & 0x03;
+  unsigned long __element_selector_54 = (__N >> 4) & 0x03;
+  unsigned long __element_selector_76 = (__N >> 6) & 0x03;
+  static const unsigned short __permute_selectors[4] =
     {
 #ifdef __LITTLE_ENDIAN__
 	      0x0908, 0x0B0A, 0x0D0C, 0x0F0E
@@ -1656,24 +1655,24 @@  _mm_shuffle_pi16 (__m64 __A, int const __N)
 	      0x0607, 0x0405, 0x0203, 0x0001
 #endif
     };
-  __m64_union t;
-  __vector unsigned long long a, p, r;
+  __m64_union __t;
+  __vector unsigned long long __a, __p, __r;
 
 #ifdef __LITTLE_ENDIAN__
-  t.as_short[0] = permute_selectors[element_selector_10];
-  t.as_short[1] = permute_selectors[element_selector_32];
-  t.as_short[2] = permute_selectors[element_selector_54];
-  t.as_short[3] = permute_selectors[element_selector_76];
+  __t.as_short[0] = __permute_selectors[__element_selector_10];
+  __t.as_short[1] = __permute_selectors[__element_selector_32];
+  __t.as_short[2] = __permute_selectors[__element_selector_54];
+  __t.as_short[3] = __permute_selectors[__element_selector_76];
 #else
-  t.as_short[3] = permute_selectors[element_selector_10];
-  t.as_short[2] = permute_selectors[element_selector_32];
-  t.as_short[1] = permute_selectors[element_selector_54];
-  t.as_short[0] = permute_selectors[element_selector_76];
+  __t.as_short[3] = __permute_selectors[__element_selector_10];
+  __t.as_short[2] = __permute_selectors[__element_selector_32];
+  __t.as_short[1] = __permute_selectors[__element_selector_54];
+  __t.as_short[0] = __permute_selectors[__element_selector_76];
 #endif
-  p = vec_splats (t.as_m64);
-  a = vec_splats (__A);
-  r = vec_perm (a, a, (__vector unsigned char)p);
-  return (__m64) ((__vector long long) r)[0];
+  __p = vec_splats (__t.as_m64);
+  __a = vec_splats (__A);
+  __r = vec_perm (__a, __a, (__vector unsigned char)__p);
+  return (__m64) ((__vector long long) __r)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1688,14 +1687,14 @@  _m_pshufw (__m64 __A, int const __N)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
 {
-  __m64 hibit = 0x8080808080808080UL;
-  __m64 mask, tmp;
-  __m64 *p = (__m64*)__P;
+  __m64 __hibit = 0x8080808080808080UL;
+  __m64 __mask, __tmp;
+  __m64 *__p = (__m64*)__P;
 
-  tmp = *p;
-  mask = _mm_cmpeq_pi8 ((__N & hibit), hibit);
-  tmp = (tmp & (~mask)) | (__A & mask);
-  *p = tmp;
+  __tmp = *__p;
+  __mask = _mm_cmpeq_pi8 ((__N & __hibit), __hibit);
+  __tmp = (__tmp & (~__mask)) | (__A & __mask);
+  *__p = __tmp;
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1708,12 +1707,12 @@  _m_maskmovq (__m64 __A, __m64 __N, char *__P)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_avg_pu8 (__m64 __A, __m64 __B)
 {
-  __vector unsigned char a, b, c;
+  __vector unsigned char __a, __b, __c;
 
-  a = (__vector unsigned char)vec_splats (__A);
-  b = (__vector unsigned char)vec_splats (__B);
-  c = vec_avg (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector unsigned char)vec_splats (__A);
+  __b = (__vector unsigned char)vec_splats (__B);
+  __c = vec_avg (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1726,12 +1725,12 @@  _m_pavgb (__m64 __A, __m64 __B)
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_avg_pu16 (__m64 __A, __m64 __B)
 {
-  __vector unsigned short a, b, c;
+  __vector unsigned short __a, __b, __c;
 
-  a = (__vector unsigned short)vec_splats (__A);
-  b = (__vector unsigned short)vec_splats (__B);
-  c = vec_avg (a, b);
-  return (__m64) ((__vector long long) c)[0];
+  __a = (__vector unsigned short)vec_splats (__A);
+  __b = (__vector unsigned short)vec_splats (__B);
+  __c = vec_avg (__a, __b);
+  return (__m64) ((__vector long long) __c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1746,26 +1745,26 @@  _m_pavgw (__m64 __A, __m64 __B)
 extern __inline    __m64    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sad_pu8 (__m64  __A, __m64  __B)
 {
-  __vector unsigned char a, b;
-  __vector unsigned char vmin, vmax, vabsdiff;
-  __vector signed int vsum;
-  const __vector unsigned int zero =
+  __vector unsigned char __a, __b;
+  __vector unsigned char __vmin, __vmax, __vabsdiff;
+  __vector signed int __vsum;
+  const __vector unsigned int __zero =
     { 0, 0, 0, 0 };
-  __m64_union result = {0};
+  __m64_union __result = {0};
 
-  a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
-  b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
-  vmin = vec_min (a, b);
-  vmax = vec_max (a, b);
-  vabsdiff = vec_sub (vmax, vmin);
+  __a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
+  __b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
+  __vmin = vec_min (__a, __b);
+  __vmax = vec_max (__a, __b);
+  __vabsdiff = vec_sub (__vmax, __vmin);
   /* Sum four groups of bytes into integers.  */
-  vsum = (__vector signed int) vec_sum4s (vabsdiff, zero);
+  __vsum = (__vector signed int) vec_sum4s (__vabsdiff, __zero);
   /* Sum across four integers with integer result.  */
-  vsum = vec_sums (vsum, (__vector signed int) zero);
+  __vsum = vec_sums (__vsum, (__vector signed int) __zero);
   /* The sum is in the right most 32-bits of the vector result.
      Transfer to a GPR and truncate to 16 bits.  */
-  result.as_short[0] = vsum[3];
-  return result.as_m64;
+  __result.as_short[0] = __vsum[3];
+  return __result.as_m64;
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))