[rs6000] Optimization for vec_xl_sext

Message ID e319e90f-817f-55c0-4f10-c48e116212a3@linux.ibm.com
State New
Headers
Series [rs6000] Optimization for vec_xl_sext |

Commit Message

HAO CHEN GUI Nov. 16, 2021, 2:16 a.m. UTC
  Hi,

   The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.

   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog

2021-11-16 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
        * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify
        the expansion for sign extension. All extensions are done on VSX
        registers.

gcc/testsuite/
        * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
 
   if (sign_extend)
     {
-      rtx discratch = gen_reg_rtx (DImode);
+      rtx discratch = gen_reg_rtx (V2DImode);
       rtx tiscratch = gen_reg_rtx (TImode);
 
       /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
 	return 0;
       emit_insn (pat);
 
-      /* Emit a sign extension from QI,HI,WI to double (DI).  */
-      rtx scratch = gen_lowpart (smode, tiscratch);
+      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+      rtx temp1, temp2;
       if (icode == CODE_FOR_vsx_lxvrbx)
-	emit_insn (gen_extendqidi2 (discratch, scratch));
+	{
+	  temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+	  emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+	}
       else if (icode == CODE_FOR_vsx_lxvrhx)
-	emit_insn (gen_extendhidi2 (discratch, scratch));
+	{
+	  temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+	  emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+	}
       else if (icode == CODE_FOR_vsx_lxvrwx)
-	emit_insn (gen_extendsidi2 (discratch, scratch));
-      /*  Assign discratch directly if scratch is already DI.  */
-      if (icode == CODE_FOR_vsx_lxvrdx)
-	discratch = scratch;
+	{
+	  temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+	  emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+	}
+      else if (icode == CODE_FOR_vsx_lxvrdx)
+	discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+      else
+	gcc_unreachable ();
 
-      /* Emit the sign extension from DI (double) to TI (quad).  */
-      emit_insn (gen_extendditi2 (target, discratch));
+      /* Emit the sign extension from V2DI (double) to TI (quad).  */
+      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+      emit_insn (gen_extendditi2_vector (target, temp2));
 
       return target;
     }
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
2021-11-16 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify
	the expansion for sign extension. All extensions are done on VSX
	registers.

gcc/testsuite/
	* gcc.target/powerpc/p10_vec_xl_sext.c: New test.
  

Comments

Li, Pan2 via Gcc-patches Nov. 16, 2021, 1:10 p.m. UTC | #1
Hi Hao Chen,

I don't understand.  This patch was already approved and you committed it. :-)  I know
because I needed to make corresponding adjustments to the new builtins code.

Thanks,
Bill

On 11/15/21 8:16 PM, HAO CHEN GUI wrote:
> Hi,
>
>    The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.
>
>    Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>
> ChangeLog
>
> 2021-11-16 Haochen Gui <guihaoc@linux.ibm.com>
>
> gcc/
>         * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify
>         the expansion for sign extension. All extensions are done on VSX
>         registers.
>
> gcc/testsuite/
>         * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index b4e13af4dc6..587e9fa2a2a 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
>
>    if (sign_extend)
>      {
> -      rtx discratch = gen_reg_rtx (DImode);
> +      rtx discratch = gen_reg_rtx (V2DImode);
>        rtx tiscratch = gen_reg_rtx (TImode);
>
>        /* Emit the lxvr*x insn.  */
> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
>         return 0;
>        emit_insn (pat);
>
> -      /* Emit a sign extension from QI,HI,WI to double (DI).  */
> -      rtx scratch = gen_lowpart (smode, tiscratch);
> +      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
> +      rtx temp1, temp2;
>        if (icode == CODE_FOR_vsx_lxvrbx)
> -       emit_insn (gen_extendqidi2 (discratch, scratch));
> +       {
> +         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
> +         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
> +       }
>        else if (icode == CODE_FOR_vsx_lxvrhx)
> -       emit_insn (gen_extendhidi2 (discratch, scratch));
> +       {
> +         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
> +         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
> +       }
>        else if (icode == CODE_FOR_vsx_lxvrwx)
> -       emit_insn (gen_extendsidi2 (discratch, scratch));
> -      /*  Assign discratch directly if scratch is already DI.  */
> -      if (icode == CODE_FOR_vsx_lxvrdx)
> -       discratch = scratch;
> +       {
> +         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
> +         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
> +       }
> +      else if (icode == CODE_FOR_vsx_lxvrdx)
> +       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
> +      else
> +       gcc_unreachable ();
>
> -      /* Emit the sign extension from DI (double) to TI (quad).  */
> -      emit_insn (gen_extendditi2 (target, discratch));
> +      /* Emit the sign extension from V2DI (double) to TI (quad).  */
> +      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
> +      emit_insn (gen_extendditi2_vector (target, temp2));
>
>        return target;
>      }
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> new file mode 100644
> index 00000000000..78e72ac5425
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed __int128
> +foo1 (signed long a, signed char *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo2 (signed long a, signed short *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo3 (signed long a, signed int *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo4 (signed long a, signed long *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>
  
HAO CHEN GUI Nov. 17, 2021, 1:44 a.m. UTC | #2
Bill,

    Sorry, I mixed up the patches. There is one vec_reve patch which hasn't gotten approval for a long time. I will re-send it.  Thanks a lot.

On 16/11/2021 下午 9:10, Bill Schmidt wrote:
> Hi Hao Chen,
>
> I don't understand.  This patch was already approved and you committed it. :-)  I know
> because I needed to make corresponding adjustments to the new builtins code.
>
> Thanks,
> Bill
>
> On 11/15/21 8:16 PM, HAO CHEN GUI wrote:
>> Hi,
>>
>>    The patch optimizes the code generation for vec_xl_sext builtin. Now all the sign extensions are done on VSX registers directly.
>>
>>    Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>>
>> 2021-11-16 Haochen Gui <guihaoc@linux.ibm.com>
>>
>> gcc/
>>         * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify
>>         the expansion for sign extension. All extensions are done on VSX
>>         registers.
>>
>> gcc/testsuite/
>>         * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>>
>> patch.diff
>>
>> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
>> index b4e13af4dc6..587e9fa2a2a 100644
>> --- a/gcc/config/rs6000/rs6000-call.c
>> +++ b/gcc/config/rs6000/rs6000-call.c
>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
>>
>>    if (sign_extend)
>>      {
>> -      rtx discratch = gen_reg_rtx (DImode);
>> +      rtx discratch = gen_reg_rtx (V2DImode);
>>        rtx tiscratch = gen_reg_rtx (TImode);
>>
>>        /* Emit the lxvr*x insn.  */
>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
>>         return 0;
>>        emit_insn (pat);
>>
>> -      /* Emit a sign extension from QI,HI,WI to double (DI).  */
>> -      rtx scratch = gen_lowpart (smode, tiscratch);
>> +      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
>> +      rtx temp1, temp2;
>>        if (icode == CODE_FOR_vsx_lxvrbx)
>> -       emit_insn (gen_extendqidi2 (discratch, scratch));
>> +       {
>> +         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
>> +         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
>> +       }
>>        else if (icode == CODE_FOR_vsx_lxvrhx)
>> -       emit_insn (gen_extendhidi2 (discratch, scratch));
>> +       {
>> +         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
>> +         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
>> +       }
>>        else if (icode == CODE_FOR_vsx_lxvrwx)
>> -       emit_insn (gen_extendsidi2 (discratch, scratch));
>> -      /*  Assign discratch directly if scratch is already DI.  */
>> -      if (icode == CODE_FOR_vsx_lxvrdx)
>> -       discratch = scratch;
>> +       {
>> +         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
>> +         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
>> +       }
>> +      else if (icode == CODE_FOR_vsx_lxvrdx)
>> +       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
>> +      else
>> +       gcc_unreachable ();
>>
>> -      /* Emit the sign extension from DI (double) to TI (quad).  */
>> -      emit_insn (gen_extendditi2 (target, discratch));
>> +      /* Emit the sign extension from V2DI (double) to TI (quad).  */
>> +      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
>> +      emit_insn (gen_extendditi2_vector (target, temp2));
>>
>>        return target;
>>      }
>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>> new file mode 100644
>> index 00000000000..78e72ac5425
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>> @@ -0,0 +1,35 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target int128 } */
>> +/* { dg-require-effective-target power10_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>> +
>> +#include <altivec.h>
>> +
>> +vector signed __int128
>> +foo1 (signed long a, signed char *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo2 (signed long a, signed short *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo3 (signed long a, signed int *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo4 (signed long a, signed long *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
>> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
>> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
>> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>>
  

Patch

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@  altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl

   if (sign_extend)
     {
-      rtx discratch = gen_reg_rtx (DImode);
+      rtx discratch = gen_reg_rtx (V2DImode);
       rtx tiscratch = gen_reg_rtx (TImode);

       /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@  altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
        return 0;
       emit_insn (pat);

-      /* Emit a sign extension from QI,HI,WI to double (DI).  */
-      rtx scratch = gen_lowpart (smode, tiscratch);
+      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+      rtx temp1, temp2;
       if (icode == CODE_FOR_vsx_lxvrbx)
-       emit_insn (gen_extendqidi2 (discratch, scratch));
+       {
+         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+       }
       else if (icode == CODE_FOR_vsx_lxvrhx)
-       emit_insn (gen_extendhidi2 (discratch, scratch));
+       {
+         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+       }
       else if (icode == CODE_FOR_vsx_lxvrwx)
-       emit_insn (gen_extendsidi2 (discratch, scratch));
-      /*  Assign discratch directly if scratch is already DI.  */
-      if (icode == CODE_FOR_vsx_lxvrdx)
-       discratch = scratch;
+       {
+         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+       }
+      else if (icode == CODE_FOR_vsx_lxvrdx)
+       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+      else
+       gcc_unreachable ();

-      /* Emit the sign extension from DI (double) to TI (quad).  */
-      emit_insn (gen_extendditi2 (target, discratch));
+      /* Emit the sign extension from V2DI (double) to TI (quad).  */
+      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+      emit_insn (gen_extendditi2_vector (target, temp2));

       return target;
     }
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */