[PATCHv2,rs6000] Add V1TI into vector comparison expand [PR103316]

Message ID 3f35ec32-cb71-d827-02da-e4042091b8e5@linux.ibm.com
State New
Headers
Series [PATCHv2,rs6000] Add V1TI into vector comparison expand [PR103316] |

Commit Message

HAO CHEN GUI March 17, 2022, 5:35 a.m. UTC
  Hi,
   This patch adds V1TI mode into a new mode iterator used in vector
comparison expands.With the patch, both built-ins and direct comparison
could generate P10 new V1TI comparison instructions.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-03-16 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	PR target/103316
	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
	* config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10
	V1TI instructions.
	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
	(vec_cmpu<mode><mode>): Likewise.

gcc/testsuite/
	PR target/103316
	* gcc.target/powerpc/pr103316.c: New.
	* gcc.target/powerpc/fold-vec-cmp-int128.c: New cases for vector
	__int128.

patch.diff
  

Comments

will schmidt March 17, 2022, 10:03 p.m. UTC | #1
On Thu, 2022-03-17 at 13:35 +0800, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
>    This patch adds V1TI mode into a new mode iterator used in vector
> comparison expands.With the patch, both built-ins and direct
> comparison
> could generate P10 new V1TI comparison instructions.

Hi,

<snip from below>
-    /* We deliberately omit RS6000_BIF_CMPGE_1TI ...
-       for now, because gimple folding produces worse code for 128-bit
-       compares.  */


I assume it is the case, but don't see a before/after example to
clarify the situation.   A clear statement that the 'worse code'
situation has been resolved with this addition of TI modes into the
iterators, would be good.

Otherwise lgtm.  :-)

Thanks,
-Will


> 
>    Bootstrapped and tested on ppc64 Linux BE and LE with no
> regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-03-16 Haochen Gui <guihaoc@linux.ibm.com>
> 
> gcc/
> 	PR target/103316
> 	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
> 	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
> 	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
> 	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
> 	* config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10
> 	V1TI instructions.
> 	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
> 	(vec_cmpu<mode><mode>): Likewise.
> 
> gcc/testsuite/
> 	PR target/103316
> 	* gcc.target/powerpc/pr103316.c: New.
> 	* gcc.target/powerpc/fold-vec-cmp-int128.c: New cases for vector
> 	__int128.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc
> b/gcc/config/rs6000/rs6000-builtin.cc
> index 5d34c1bcfc9..fac7f43f438 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -1994,16 +1994,14 @@ rs6000_gimple_fold_builtin
> (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPEQUH:
>      case RS6000_BIF_VCMPEQUW:
>      case RS6000_BIF_VCMPEQUD:
> -    /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because
> gimple
> -       folding produces worse code for 128-bit compares.  */
> +    case RS6000_BIF_VCMPEQUT:
>        fold_compare_helper (gsi, EQ_EXPR, stmt);
>        return true;
> 
>      case RS6000_BIF_VCMPNEB:
>      case RS6000_BIF_VCMPNEH:
>      case RS6000_BIF_VCMPNEW:
> -    /* We deliberately omit RS6000_BIF_VCMPNET for now, because
> gimple
> -       folding produces worse code for 128-bit compares.  */
> +    case RS6000_BIF_VCMPNET:
>        fold_compare_helper (gsi, NE_EXPR, stmt);
>        return true;
> 
> @@ -2015,9 +2013,8 @@ rs6000_gimple_fold_builtin
> (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPGE_U4SI:
>      case RS6000_BIF_CMPGE_2DI:
>      case RS6000_BIF_CMPGE_U2DI:
> -    /* We deliberately omit RS6000_BIF_CMPGE_1TI and
> RS6000_BIF_CMPGE_U1TI
> -       for now, because gimple folding produces worse code for 128-
> bit
> -       compares.  */
> +    case RS6000_BIF_CMPGE_1TI:
> +    case RS6000_BIF_CMPGE_U1TI:
>        fold_compare_helper (gsi, GE_EXPR, stmt);
>        return true;
> 
> @@ -2029,9 +2026,8 @@ rs6000_gimple_fold_builtin
> (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPGTUW:
>      case RS6000_BIF_VCMPGTUD:
>      case RS6000_BIF_VCMPGTSD:
> -    /* We deliberately omit RS6000_BIF_VCMPGTUT and
> RS6000_BIF_VCMPGTST
> -       for now, because gimple folding produces worse code for 128-
> bit
> -       compares.  */
> +    case RS6000_BIF_VCMPGTUT:
> +    case RS6000_BIF_VCMPGTST:
>        fold_compare_helper (gsi, GT_EXPR, stmt);
>        return true;
> 
> @@ -2043,9 +2039,8 @@ rs6000_gimple_fold_builtin
> (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPLE_U4SI:
>      case RS6000_BIF_CMPLE_2DI:
>      case RS6000_BIF_CMPLE_U2DI:
> -    /* We deliberately omit RS6000_BIF_CMPLE_1TI and
> RS6000_BIF_CMPLE_U1TI
> -       for now, because gimple folding produces worse code for 128-
> bit
> -       compares.  */
> +    case RS6000_BIF_CMPLE_1TI:
> +    case RS6000_BIF_CMPLE_U1TI:
>        fold_compare_helper (gsi, LE_EXPR, stmt);
>        return true;
> 
> diff --git a/gcc/config/rs6000/vector.md
> b/gcc/config/rs6000/vector.md
> index b87a742cca8..d88869cc8d0 100644
> --- a/gcc/config/rs6000/vector.md
> +++ b/gcc/config/rs6000/vector.md
> @@ -26,6 +26,9 @@
>  ;; Vector int modes
>  (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
> 
> +;; Vector int modes for comparison
> +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI
> "TARGET_POWER10")])
> +
>  ;; 128-bit int modes
>  (define_mode_iterator VEC_TI [V1TI TI])
> 
> @@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>"
> 
>  ;; For signed integer vectors comparison.
>  (define_expand "vec_cmp<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "signed_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    enum rtx_code code = GET_CODE (operands[1]);
> @@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>"
> 
>  ;; For unsigned integer vectors comparison.
>  (define_expand "vec_cmpu<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "unsigned_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    enum rtx_code code = GET_CODE (operands[1]);
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
> new file mode 100644
> index 00000000000..1a4db0f45d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
> @@ -0,0 +1,86 @@
> +/* Verify that overloaded built-ins for vec_cmp with __int128
> +   inputs produce the right code.  */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <altivec.h>
> +
> +vector bool __int128
> +test3_eq (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpeq (x, y);
> +}
> +
> +vector bool __int128
> +test6_eq (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpeq (x, y);
> +}
> +
> +vector bool __int128
> +test3_ge (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpge (x, y);
> +}
> +
> +vector bool __int128
> +test6_ge (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpge (x, y);
> +}
> +
> +vector bool __int128
> +test3_gt (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpgt (x, y);
> +}
> +
> +vector bool __int128
> +test6_gt (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpgt (x, y);
> +}
> +
> +vector bool __int128
> +test3_le (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmple (x, y);
> +}
> +
> +vector bool __int128
> +test6_le (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmple (x, y);
> +}
> +
> +vector bool __int128
> +test3_lt (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmplt (x, y);
> +}
> +
> +vector bool __int128
> +test6_lt (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmplt (x, y);
> +}
> +
> +vector bool __int128
> +test3_ne (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpne (x, y);
> +}
> +
> +vector bool __int128
> +test6_ne (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpne (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
> +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
> +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
> +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c
> b/gcc/testsuite/gcc.target/powerpc/pr103316.c
> new file mode 100644
> index 00000000000..02f7dc5ca1b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
> @@ -0,0 +1,80 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +vector bool __int128
> +test_eq (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a == b;
> +}
> +
> +vector bool __int128
> +test_ne (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a != b;
> +}
> +
> +vector bool __int128
> +test_gt (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a > b;
> +}
> +
> +vector bool __int128
> +test_ge (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a >= b;
> +}
> +
> +vector bool __int128
> +test_lt (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a < b;
> +}
> +
> +vector bool __int128
> +test_le (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a <= b;
> +}
> +
> +vector bool __int128
> +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a == b;
> +}
> +
> +vector bool __int128
> +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a != b;
> +}
> +
> +vector bool __int128
> +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a > b;
> +}
> +
> +vector bool __int128
> +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a >= b;
> +}
> +
> +vector bool __int128
> +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a < b;
> +}
> +
> +vector bool __int128
> +testu_le (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a <= b;
> +}
> +
> +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
> +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
> +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
> +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
  

Patch

diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 5d34c1bcfc9..fac7f43f438 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1994,16 +1994,14 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_VCMPEQUH:
     case RS6000_BIF_VCMPEQUW:
     case RS6000_BIF_VCMPEQUD:
-    /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
-       folding produces worse code for 128-bit compares.  */
+    case RS6000_BIF_VCMPEQUT:
       fold_compare_helper (gsi, EQ_EXPR, stmt);
       return true;

     case RS6000_BIF_VCMPNEB:
     case RS6000_BIF_VCMPNEH:
     case RS6000_BIF_VCMPNEW:
-    /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
-       folding produces worse code for 128-bit compares.  */
+    case RS6000_BIF_VCMPNET:
       fold_compare_helper (gsi, NE_EXPR, stmt);
       return true;

@@ -2015,9 +2013,8 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_CMPGE_U4SI:
     case RS6000_BIF_CMPGE_2DI:
     case RS6000_BIF_CMPGE_U2DI:
-    /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
-       for now, because gimple folding produces worse code for 128-bit
-       compares.  */
+    case RS6000_BIF_CMPGE_1TI:
+    case RS6000_BIF_CMPGE_U1TI:
       fold_compare_helper (gsi, GE_EXPR, stmt);
       return true;

@@ -2029,9 +2026,8 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_VCMPGTUW:
     case RS6000_BIF_VCMPGTUD:
     case RS6000_BIF_VCMPGTSD:
-    /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
-       for now, because gimple folding produces worse code for 128-bit
-       compares.  */
+    case RS6000_BIF_VCMPGTUT:
+    case RS6000_BIF_VCMPGTST:
       fold_compare_helper (gsi, GT_EXPR, stmt);
       return true;

@@ -2043,9 +2039,8 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_CMPLE_U4SI:
     case RS6000_BIF_CMPLE_2DI:
     case RS6000_BIF_CMPLE_U2DI:
-    /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
-       for now, because gimple folding produces worse code for 128-bit
-       compares.  */
+    case RS6000_BIF_CMPLE_1TI:
+    case RS6000_BIF_CMPLE_U1TI:
       fold_compare_helper (gsi, LE_EXPR, stmt);
       return true;

diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index b87a742cca8..d88869cc8d0 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@ 
 ;; Vector int modes
 (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])

+;; Vector int modes for comparison
+(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])
+
 ;; 128-bit int modes
 (define_mode_iterator VEC_TI [V1TI TI])

@@ -533,10 +536,10 @@  (define_expand "vcond_mask_<mode><VEC_int>"

 ;; For signed integer vectors comparison.
 (define_expand "vec_cmp<mode><mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
 	(match_operator 1 "signed_or_equality_comparison_operator"
-	  [(match_operand:VEC_I 2 "vint_operand")
-	   (match_operand:VEC_I 3 "vint_operand")]))]
+	  [(match_operand:VEC_IC 2 "vint_operand")
+	   (match_operand:VEC_IC 3 "vint_operand")]))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   enum rtx_code code = GET_CODE (operands[1]);
@@ -573,10 +576,10 @@  (define_expand "vec_cmp<mode><mode>"

 ;; For unsigned integer vectors comparison.
 (define_expand "vec_cmpu<mode><mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
 	(match_operator 1 "unsigned_or_equality_comparison_operator"
-	  [(match_operand:VEC_I 2 "vint_operand")
-	   (match_operand:VEC_I 3 "vint_operand")]))]
+	  [(match_operand:VEC_IC 2 "vint_operand")
+	   (match_operand:VEC_IC 3 "vint_operand")]))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   enum rtx_code code = GET_CODE (operands[1]);
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
new file mode 100644
index 00000000000..1a4db0f45d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
@@ -0,0 +1,86 @@ 
+/* Verify that overloaded built-ins for vec_cmp with __int128
+   inputs produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector bool __int128
+test3_eq (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool __int128
+test6_eq (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool __int128
+test3_ge (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool __int128
+test6_ge (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool __int128
+test3_gt (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool __int128
+test6_gt (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool __int128
+test3_le (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool __int128
+test6_le (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool __int128
+test3_lt (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool __int128
+test6_lt (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool __int128
+test3_ne (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpne (x, y);
+}
+
+vector bool __int128
+test6_ne (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c
new file mode 100644
index 00000000000..02f7dc5ca1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
@@ -0,0 +1,80 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+vector bool __int128
+test_eq (vector signed __int128 a, vector signed __int128 b)
+{
+  return a == b;
+}
+
+vector bool __int128
+test_ne (vector signed __int128 a, vector signed __int128 b)
+{
+  return a != b;
+}
+
+vector bool __int128
+test_gt (vector signed __int128 a, vector signed __int128 b)
+{
+  return a > b;
+}
+
+vector bool __int128
+test_ge (vector signed __int128 a, vector signed __int128 b)
+{
+  return a >= b;
+}
+
+vector bool __int128
+test_lt (vector signed __int128 a, vector signed __int128 b)
+{
+  return a < b;
+}
+
+vector bool __int128
+test_le (vector signed __int128 a, vector signed __int128 b)
+{
+  return a <= b;
+}
+
+vector bool __int128
+testu_eq (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a == b;
+}
+
+vector bool __int128
+testu_ne (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a != b;
+}
+
+vector bool __int128
+testu_gt (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a > b;
+}
+
+vector bool __int128
+testu_ge (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a >= b;
+}
+
+vector bool __int128
+testu_lt (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a < b;
+}
+
+vector bool __int128
+testu_le (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a <= b;
+}
+
+/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */