[i386] Prefer INT_SSE_REGS for SSE_FLOAT_MODE_P in preferred_reload_class.

Message ID 20211203061855.32518-1-hongtao.liu@intel.com
State New
Headers
Series [i386] Prefer INT_SSE_REGS for SSE_FLOAT_MODE_P in preferred_reload_class. |

Commit Message

Liu, Hongtao Dec. 3, 2021, 6:18 a.m. UTC
  Hi:
> Please also consider TARGET_INTER_UNIT_MOVES_TO_VEC and
> TARGET_INTER_UNIT_MOVES_FROM_VEC.
Here's updated patch.

Also honor TARGET_INTER_UNIT_MOVES_TO/FROM_VEC and in
preferred_{,out_}reload_class.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32\ -march=k8,\ -march=k8}.
Ok?

gcc/ChangeLog:

	PR target/95740
	* config/i386/i386.c (ix86_preferred_output_reload_class):
	don't reload integer register to/from sse register when tune
	"inter_unit_moves_to/from_vec" is off.
	(ix86_preferred_reload_class): Ditto, also prefer
	INT_SSE_REGS for SSE_FLOAT_MODE_P.
	* config/i386/i386.h (INT_SSE_CLASS_P): New.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr95740.c: New test.
---
 gcc/config/i386/i386.c                  | 32 +++++++++++++++++++++++--
 gcc/config/i386/i386.h                  |  2 ++
 gcc/testsuite/gcc.target/i386/pr95740.c | 26 ++++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95740.c
  

Comments

Uros Bizjak Dec. 3, 2021, 9:43 a.m. UTC | #1
On Fri, Dec 3, 2021 at 7:19 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Hi:
> > Please also consider TARGET_INTER_UNIT_MOVES_TO_VEC and
> > TARGET_INTER_UNIT_MOVES_FROM_VEC.
> Here's updated patch.
>
> Also honor TARGET_INTER_UNIT_MOVES_TO/FROM_VEC and in
> preferred_{,out_}reload_class.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32\ -march=k8,\ -march=k8}.
> Ok?
>
> gcc/ChangeLog:
>
>         PR target/95740
>         * config/i386/i386.c (ix86_preferred_output_reload_class):
>         don't reload integer register to/from sse register when tune
>         "inter_unit_moves_to/from_vec" is off.
>         (ix86_preferred_reload_class): Ditto, also prefer
>         INT_SSE_REGS for SSE_FLOAT_MODE_P.
>         * config/i386/i386.h (INT_SSE_CLASS_P): New.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr95740.c: New test.

I was thinking about:

--cut here--
@@ -19194,9 +19194,17 @@ ix86_preferred_reload_class (rtx x,
reg_class_t regclass)
      return NO_REGS;
    }

-  /* Prefer SSE regs only, if we can use them for math.  */
+  /* Prefer SSE if we can use them for math.  Also allow integer regs
+     when moves between register units are cheap.  */
  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
-    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
+    {
+      if (TARGET_INTER_UNIT_MOVES_FROM_VEC
+         && TARGET_INTER_UNIT_MOVES_TO_VEC
+         && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
+       return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
+      else
+       return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
+    }

  /* Generally when we see PLUS here, it's the function invariant
     (plus soft-fp const_int).  Which can only be computed into general
--cut here--

So, INT_SSE class is allowed when interunit moves are enabled. The
patch also takes care for 64-bit moves which are expensive on 32-bit
targets.

Uros.

> ---
>  gcc/config/i386/i386.c                  | 32 +++++++++++++++++++++++--
>  gcc/config/i386/i386.h                  |  2 ++
>  gcc/testsuite/gcc.target/i386/pr95740.c | 26 ++++++++++++++++++++
>  3 files changed, 58 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95740.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 80fee627358..5b90c09a0ba 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19194,9 +19194,24 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
>        return NO_REGS;
>      }
>
> -  /* Prefer SSE regs only, if we can use them for math.  */
> +  /* Unless hard register REGNO is known, it is hard to to tell whether a movd
> +     instruction will be generated based on MODE and REGCLASS, because for
> +     pseudo-registers, even SFmode could be assigned to INTGER_CLASS_P.  */
> +  if (GENERAL_REG_P (x)
> +      && !TARGET_INTER_UNIT_MOVES_TO_VEC
> +      && MAYBE_SSE_CLASS_P (regclass))
> +    return NO_REGS;
> +
> +  if (SSE_REG_P (x)
> +      && !TARGET_INTER_UNIT_MOVES_FROM_VEC
> +      && MAYBE_INTEGER_CLASS_P (regclass))
> +    return NO_REGS;
> +
> +  /* Prefer INT_SSE_REGS, enable reload from SSE register to GENERAL_REGS,
> +     MAYBE_SSE_CLASS_P is too broad, for sse math, FLOAT_SSE_REGS,
> +     FLOAT_INT_SSE_REGS should be disliked.  */
>    if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
> -    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
> +    return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
>
>    /* Generally when we see PLUS here, it's the function invariant
>       (plus soft-fp const_int).  Which can only be computed into general
> @@ -19226,6 +19241,19 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
>  static reg_class_t
>  ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
>  {
> +
> +  /* Handle movement between integer and sse register like
> +     ix86_preferred_reload_class.  */
> +  if (GENERAL_REG_P (x)
> +      && !TARGET_INTER_UNIT_MOVES_TO_VEC
> +      && MAYBE_SSE_CLASS_P (regclass))
> +    return NO_REGS;
> +
> +  if (SSE_REG_P (x)
> +      && !TARGET_INTER_UNIT_MOVES_FROM_VEC
> +      && MAYBE_INTEGER_CLASS_P (regclass))
> +    return NO_REGS;
> +
>    /* Restrict the output reload class to the register bank that we are doing
>       math on.  If we would like not to return a subset of CLASS, reject this
>       alternative: if reload cannot do this, it will still use its choice.  */
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 2fda1e0686e..ec90e47904b 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1283,6 +1283,8 @@ enum reg_class
>    reg_class_subset_p ((CLASS), FLOAT_REGS)
>  #define SSE_CLASS_P(CLASS) \
>    reg_class_subset_p ((CLASS), ALL_SSE_REGS)
> +#define INT_SSE_CLASS_P(CLASS) \
> +  reg_class_subset_p ((CLASS), INT_SSE_REGS)
>  #define MMX_CLASS_P(CLASS) \
>    ((CLASS) == MMX_REGS)
>  #define MASK_CLASS_P(CLASS) \
> diff --git a/gcc/testsuite/gcc.target/i386/pr95740.c b/gcc/testsuite/gcc.target/i386/pr95740.c
> new file mode 100644
> index 00000000000..9bc7b862787
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95740.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-msse2 -O2 -mtune-ctrl=use_incdec -masm=att -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-times {(?n)movd[\t ]*%xmm0.*%eax} 1 } } */
> +/* { dg-final { scan-assembler-times {(?n)incl[\t ]*%eax} 1 } } */
> +/* { dg-final { scan-assembler-times {(?n)movq[\t ]*%xmm0.*%rax} 1 } } */
> +/* { dg-final { scan-assembler-times {(?n)incq[\t ]*%rax} 1 } } */
> +
> +int
> +foo (float a)
> +{
> +  union{
> +    int b;
> +    float a;}u;
> +  u.a = a;
> +  return u.b + 1;
> +}
> +
> +long long
> +foo1 (double a)
> +{
> +  union{
> +    long long b;
> +    double a;}u;
> +  u.a = a;
> +  return u.b + 1;
> +}
> --
> 2.18.1
>
  

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 80fee627358..5b90c09a0ba 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19194,9 +19194,24 @@  ix86_preferred_reload_class (rtx x, reg_class_t regclass)
       return NO_REGS;
     }
 
-  /* Prefer SSE regs only, if we can use them for math.  */
+  /* Unless hard register REGNO is known, it is hard to to tell whether a movd
+     instruction will be generated based on MODE and REGCLASS, because for
+     pseudo-registers, even SFmode could be assigned to INTGER_CLASS_P.  */
+  if (GENERAL_REG_P (x)
+      && !TARGET_INTER_UNIT_MOVES_TO_VEC
+      && MAYBE_SSE_CLASS_P (regclass))
+    return NO_REGS;
+
+  if (SSE_REG_P (x)
+      && !TARGET_INTER_UNIT_MOVES_FROM_VEC
+      && MAYBE_INTEGER_CLASS_P (regclass))
+    return NO_REGS;
+
+  /* Prefer INT_SSE_REGS, enable reload from SSE register to GENERAL_REGS,
+     MAYBE_SSE_CLASS_P is too broad, for sse math, FLOAT_SSE_REGS,
+     FLOAT_INT_SSE_REGS should be disliked.  */
   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
-    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
+    return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
 
   /* Generally when we see PLUS here, it's the function invariant
      (plus soft-fp const_int).  Which can only be computed into general
@@ -19226,6 +19241,19 @@  ix86_preferred_reload_class (rtx x, reg_class_t regclass)
 static reg_class_t
 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
 {
+
+  /* Handle movement between integer and sse register like
+     ix86_preferred_reload_class.  */
+  if (GENERAL_REG_P (x)
+      && !TARGET_INTER_UNIT_MOVES_TO_VEC
+      && MAYBE_SSE_CLASS_P (regclass))
+    return NO_REGS;
+
+  if (SSE_REG_P (x)
+      && !TARGET_INTER_UNIT_MOVES_FROM_VEC
+      && MAYBE_INTEGER_CLASS_P (regclass))
+    return NO_REGS;
+
   /* Restrict the output reload class to the register bank that we are doing
      math on.  If we would like not to return a subset of CLASS, reject this
      alternative: if reload cannot do this, it will still use its choice.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2fda1e0686e..ec90e47904b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1283,6 +1283,8 @@  enum reg_class
   reg_class_subset_p ((CLASS), FLOAT_REGS)
 #define SSE_CLASS_P(CLASS) \
   reg_class_subset_p ((CLASS), ALL_SSE_REGS)
+#define INT_SSE_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), INT_SSE_REGS)
 #define MMX_CLASS_P(CLASS) \
   ((CLASS) == MMX_REGS)
 #define MASK_CLASS_P(CLASS) \
diff --git a/gcc/testsuite/gcc.target/i386/pr95740.c b/gcc/testsuite/gcc.target/i386/pr95740.c
new file mode 100644
index 00000000000..9bc7b862787
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95740.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-msse2 -O2 -mtune-ctrl=use_incdec -masm=att -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)movd[\t ]*%xmm0.*%eax} 1 } } */
+/* { dg-final { scan-assembler-times {(?n)incl[\t ]*%eax} 1 } } */
+/* { dg-final { scan-assembler-times {(?n)movq[\t ]*%xmm0.*%rax} 1 } } */
+/* { dg-final { scan-assembler-times {(?n)incq[\t ]*%rax} 1 } } */
+
+int
+foo (float a)
+{
+  union{
+    int b;
+    float a;}u;
+  u.a = a;
+  return u.b + 1;
+}
+
+long long
+foo1 (double a)
+{
+  union{
+    long long b;
+    double a;}u;
+  u.a = a;
+  return u.b + 1;
+}