[rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set

Message ID 787f0d5d-a8f5-c513-997f-a5b906951da4@linux.ibm.com
State Under Review
Headers
Series [rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set |

Commit Message

HAO CHEN GUI Oct. 20, 2021, 9:04 a.m. UTC
  Hi,

This patch disables gimple folding for float or double vec_min/max when fast-math is not set. It makes vec_min/max conform with the guide.

Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

   I refined the patch according to reviewers' advice. The attachments are the ChangeLog and patch diff in case the email body is messed up.


ChangeLog

2021-10-20 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
         * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
         Disable gimple fold for VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP,
         VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP when fast-math is not
         set.

gcc/testsuite/
         * gcc.target/powerpc/vec-minmax-1.c: New test.
         * gcc.target/powerpc/vec-minmax-2.c: Likewise.


patch.diff
2021-10-20 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	* config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
	Disable gimple fold for VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP,
	VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP when fast-math is not
	set.

gcc/testsuite/
	* gcc.target/powerpc/vec-minmax-1.c: New test.
	* gcc.target/powerpc/vec-minmax-2.c: Likewise.
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..51c7ba447c3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12159,6 +12159,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       return true;
     /* flavors of vec_min.  */
     case VSX_BUILTIN_XVMINDP:
+    case ALTIVEC_BUILTIN_VMINFP:
+      {
+	lhs = gimple_call_lhs (stmt);
+	tree type = TREE_TYPE (lhs);
+	if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+	  return false;
+	gcc_fallthrough ();
+      }
     case P8V_BUILTIN_VMINSD:
     case P8V_BUILTIN_VMINUD:
     case ALTIVEC_BUILTIN_VMINSB:
@@ -12167,7 +12175,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case ALTIVEC_BUILTIN_VMINUB:
     case ALTIVEC_BUILTIN_VMINUH:
     case ALTIVEC_BUILTIN_VMINUW:
-    case ALTIVEC_BUILTIN_VMINFP:
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
@@ -12177,6 +12184,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       return true;
     /* flavors of vec_max.  */
     case VSX_BUILTIN_XVMAXDP:
+    case ALTIVEC_BUILTIN_VMAXFP:
+      {
+	lhs = gimple_call_lhs (stmt);
+	tree type = TREE_TYPE (lhs);
+	if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+	  return false;
+	gcc_fallthrough ();
+      }
     case P8V_BUILTIN_VMAXSD:
     case P8V_BUILTIN_VMAXUD:
     case ALTIVEC_BUILTIN_VMAXSB:
@@ -12185,7 +12200,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case ALTIVEC_BUILTIN_VMAXUB:
     case ALTIVEC_BUILTIN_VMAXUH:
     case ALTIVEC_BUILTIN_VMAXUW:
-    case ALTIVEC_BUILTIN_VMAXFP:
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
new file mode 100644
index 00000000000..e238659c9be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-final { scan-assembler-times {\mxvmaxdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmaxsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmindp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvminsp\M} 1 } } */
+
+/* This test verifies that float or double vec_min/max are bound to
+   xv[min|max][d|s]p instructions when fast-math is not set.  */
+
+
+#include <altivec.h>
+
+#ifdef _BIG_ENDIAN
+   const int PREF_D = 0;
+#else
+   const int PREF_D = 1;
+#endif
+
+double vmaxd (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_max (va, vb), PREF_D);
+}
+
+double vmind (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_min (va, vb), PREF_D);
+}
+
+#ifdef _BIG_ENDIAN
+   const int PREF_F = 0;
+#else
+   const int PREF_F = 3;
+#endif
+
+float vmaxf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_max (va, vb), PREF_F);
+}
+
+float vminf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_min (va, vb), PREF_F);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
new file mode 100644
index 00000000000..149275d8709
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
@@ -0,0 +1,50 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -ffast-math" } */
+/* { dg-final { scan-assembler-times {\mxsmaxcdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxsmincdp\M} 2 } } */
+
+/* This test verifies that float or double vec_min/max can be converted
+   to scalar comparison when fast-math is set.  */
+
+
+#include <altivec.h>
+
+#ifdef _BIG_ENDIAN
+   const int PREF_D = 0;
+#else
+   const int PREF_D = 1;
+#endif
+
+double vmaxd (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_max (va, vb), PREF_D);
+}
+
+double vmind (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_min (va, vb), PREF_D);
+}
+
+#ifdef _BIG_ENDIAN
+   const int PREF_F = 0;
+#else
+   const int PREF_F = 3;
+#endif
+
+float vmaxf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_max (va, vb), PREF_F);
+}
+
+float vminf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_min (va, vb), PREF_F);
+}
  

Comments

Segher Boessenkool Oct. 20, 2021, 4:19 p.m. UTC | #1
Hi!

On Wed, Oct 20, 2021 at 05:04:56PM +0800, HAO CHEN GUI wrote:
> This patch disables gimple folding for float or double vec_min/max when fast-math is not set. It makes vec_min/max conform with the guide.
> 
> Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
> 
>   I refined the patch according to reviewers' advice. The attachments are 
> the ChangeLog and patch diff in case the email body is messed up.
> 
> 
> ChangeLog
> 
> 2021-10-20 Haochen Gui <guihaoc@linux.ibm.com>
> 
> gcc/
>         * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
>         Disable gimple fold for VSX_BUILTIN_XVMINDP, 
> ALTIVEC_BUILTIN_VMINFP,
>         VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP when fast-math 
> is not
>         set.

Content-Type: text/plain; charset=UTF-8; format=flowed

Please don't use flowed.  It makes patches unreadable and unusable if
you do (they will not apply anymore).

Also, the left border should be one tab, not eight spaces, and the right
border is at 80 chars (so there are 72 usable chars on a line).  Don't
end a line in ":" if you don't overflow a line if you put text after it.

> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -12159,6 +12159,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
> *gsi)
>        return true;
>      /* flavors of vec_min.  */
>      case VSX_BUILTIN_XVMINDP:
> +    case ALTIVEC_BUILTIN_VMINFP:
> +      {
> +       lhs = gimple_call_lhs (stmt);
> +       tree type = TREE_TYPE (lhs);
> +       if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
> +         return false;
> +       gcc_fallthrough ();
> +      }

Both vminfp anf xvmindp (and xvminsp and xsmindp) return -0 or the
minimum of +0 and -0, that is okay even with HONOR_SIGNED_ZEROS, I
think?

x[sv]min[sd]p returns the number for the minimum of a NaN and a number,
but vminfp returns a NaN.  Do you really want to make the xvmindp
builtin handle less than it does currently?  And, what about vminfp?
Did tht do the wrong thing before?

There are no tests for any of that apparently.  Hrm.


Segher
  
HAO CHEN GUI Oct. 21, 2021, 6:25 a.m. UTC | #2
On 21/10/2021 上午 12:19, Segher Boessenkool wrote:
> Hi!
>
> On Wed, Oct 20, 2021 at 05:04:56PM +0800, HAO CHEN GUI wrote:
>> This patch disables gimple folding for float or double vec_min/max when fast-math is not set. It makes vec_min/max conform with the guide.
>>
>> Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>>   I refined the patch according to reviewers' advice. The attachments are 
>> the ChangeLog and patch diff in case the email body is messed up.
>>
>>
>> ChangeLog
>>
>> 2021-10-20 Haochen Gui <guihaoc@linux.ibm.com>
>>
>> gcc/
>>         * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
>>         Disable gimple fold for VSX_BUILTIN_XVMINDP, 
>> ALTIVEC_BUILTIN_VMINFP,
>>         VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP when fast-math 
>> is not
>>         set.
> Content-Type: text/plain; charset=UTF-8; format=flowed
>
> Please don't use flowed.  It makes patches unreadable and unusable if
> you do (they will not apply anymore).
>
> Also, the left border should be one tab, not eight spaces, and the right
> border is at 80 chars (so there are 72 usable chars on a line).  Don't
> end a line in ":" if you don't overflow a line if you put text after it.
I found the settings of the format in my client configuration. Thanks for reminder.
>> --- a/gcc/config/rs6000/rs6000-call.c
>> +++ b/gcc/config/rs6000/rs6000-call.c
>> @@ -12159,6 +12159,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
>> *gsi)
>>        return true;
>>      /* flavors of vec_min.  */
>>      case VSX_BUILTIN_XVMINDP:
>> +    case ALTIVEC_BUILTIN_VMINFP:
>> +      {
>> +       lhs = gimple_call_lhs (stmt);
>> +       tree type = TREE_TYPE (lhs);
>> +       if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
>> +         return false;
>> +       gcc_fallthrough ();
>> +      }
> Both vminfp anf xvmindp (and xvminsp and xsmindp) return -0 or the
> minimum of +0 and -0, that is okay even with HONOR_SIGNED_ZEROS, I
> think?
Yes, the HONOR_SIGNED_ZEROS is unnecessary.
> x[sv]min[sd]p returns the number for the minimum of a NaN and a number,
> but vminfp returns a NaN.  Do you really want to make the xvmindp
> builtin handle less than it does currently?  And, what about vminfp?
> Did tht do the wrong thing before?
x[sv]min[sd]p meets the requirement of IEEE std 754-2008 while xsmincdp doesn't. This patch prevents the builtin to be converted to xsmincdp. 

If the two elements in the vectors are the same, the vector comparison is optimized to scalar comparison.
MAX_EXPR <va_3, vb_5>
at vector low pass,
MAX_EXPR <a_2(D), b_4(D)>

On P9, the scalar comparison is implemented by xs[min|max]cdp.It doesn't conform with IEEE std 754-2008.
The puzzle here is that both x[sv]min[sd]p and xs[min|max]cdp doesn't conform the latest IEEE standard 754-2019 which says "return a quiet NaN if either operand is a NaN". 


The builtin would never be implemented by vminfp, I think.

Gui Haochen

>
> There are no tests for any of that apparently.  Hrm.
>
>
> Segher
  

Patch

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..51c7ba447c3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12159,6 +12159,14 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
        return true;
      /* flavors of vec_min.  */
      case VSX_BUILTIN_XVMINDP:
+    case ALTIVEC_BUILTIN_VMINFP:
+      {
+       lhs = gimple_call_lhs (stmt);
+       tree type = TREE_TYPE (lhs);
+       if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+         return false;
+       gcc_fallthrough ();
+      }
      case P8V_BUILTIN_VMINSD:
      case P8V_BUILTIN_VMINUD:
      case ALTIVEC_BUILTIN_VMINSB:
@@ -12167,7 +12175,6 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
      case ALTIVEC_BUILTIN_VMINUB:
      case ALTIVEC_BUILTIN_VMINUH:
      case ALTIVEC_BUILTIN_VMINUW:
-    case ALTIVEC_BUILTIN_VMINFP:
        arg0 = gimple_call_arg (stmt, 0);
        arg1 = gimple_call_arg (stmt, 1);
        lhs = gimple_call_lhs (stmt);
@@ -12177,6 +12184,14 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
        return true;
      /* flavors of vec_max.  */
      case VSX_BUILTIN_XVMAXDP:
+    case ALTIVEC_BUILTIN_VMAXFP:
+      {
+       lhs = gimple_call_lhs (stmt);
+       tree type = TREE_TYPE (lhs);
+       if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+         return false;
+       gcc_fallthrough ();
+      }
      case P8V_BUILTIN_VMAXSD:
      case P8V_BUILTIN_VMAXUD:
      case ALTIVEC_BUILTIN_VMAXSB:
@@ -12185,7 +12200,6 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
      case ALTIVEC_BUILTIN_VMAXUB:
      case ALTIVEC_BUILTIN_VMAXUH:
      case ALTIVEC_BUILTIN_VMAXUW:
-    case ALTIVEC_BUILTIN_VMAXFP:
        arg0 = gimple_call_arg (stmt, 0);
        arg1 = gimple_call_arg (stmt, 1);
        lhs = gimple_call_lhs (stmt);
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
new file mode 100644
index 00000000000..e238659c9be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
@@ -0,0 +1,52 @@ 
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-final { scan-assembler-times {\mxvmaxdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmaxsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmindp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvminsp\M} 1 } } */
+
+/* This test verifies that float or double vec_min/max are bound to
+   xv[min|max][d|s]p instructions when fast-math is not set.  */
+
+
+#include <altivec.h>
+
+#ifdef _BIG_ENDIAN
+   const int PREF_D = 0;
+#else
+   const int PREF_D = 1;
+#endif
+
+double vmaxd (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_max (va, vb), PREF_D);
+}
+
+double vmind (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_min (va, vb), PREF_D);
+}
+
+#ifdef _BIG_ENDIAN
+   const int PREF_F = 0;
+#else
+   const int PREF_F = 3;
+#endif
+
+float vmaxf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_max (va, vb), PREF_F);
+}
+
+float vminf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_min (va, vb), PREF_F);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
new file mode 100644
index 00000000000..149275d8709
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
@@ -0,0 +1,50 @@ 
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -ffast-math" } */
+/* { dg-final { scan-assembler-times {\mxsmaxcdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxsmincdp\M} 2 } } */
+
+/* This test verifies that float or double vec_min/max can be converted
+   to scalar comparison when fast-math is set.  */
+
+
+#include <altivec.h>
+
+#ifdef _BIG_ENDIAN
+   const int PREF_D = 0;
+#else
+   const int PREF_D = 1;
+#endif
+
+double vmaxd (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_max (va, vb), PREF_D);
+}
+
+double vmind (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_min (va, vb), PREF_D);
+}
+
+#ifdef _BIG_ENDIAN
+   const int PREF_F = 0;
+#else
+   const int PREF_F = 3;
+#endif
+
+float vmaxf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_max (va, vb), PREF_F);
+}
+
+float vminf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_min (va, vb), PREF_F);
+}