i386: Improve HImode interunit moves

Message ID CAFULd4bBD7UB9sxAiyFNwvOPAN0ynGjrerUq4GtrOXiSA4mHGQ@mail.gmail.com
State Committed
Commit 0d03db068c89d46bc62ec79395708fd19f09d3d8
Headers
Series i386: Improve HImode interunit moves |

Commit Message

Uros Bizjak Jan. 6, 2022, 7:04 p.m. UTC
  Currently, the compiler moves HImode values between GPR and XMM registers with:

    %vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}
    %vpextrw\t{$0, %1, %k0|%k0, %1, 0}

but it could use slightly faster and shorter:

    %vmovd\t{%k1, %0|%0, %k1}
    %vmovd\t{%1, %k0|%k0, %1}

2022-01-06  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

    * config/i386/i386.c (ix86_output_ssemov) <MODE_DI>:
    Add %q modifier for operands in general registers.
    <MODE_SI>: Add %q modifier for operands in general registers.
    * config/i386/i386.md (*movhi_internal): Change type attribute of
    xmm-gpr interunit alternatives 9,10 to ssemov and mode attribute
    to SImode for non-avx512fp16 targets.
    (*movhf_internal): Ditto for xmm-gpr interunit alternatives 6,8.
    * config/i386/mmx.md (*movv2qi_internal):
    Ditto for xmm-gpr interunit alternatives 8,9.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/pr102811-2.c (dg-final):
    Update scan-assembler-times directives.
    * gcc.target/i386/sse2-float16-2.c (dg-final):
    Update scan-assembler directives.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
  

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1a964fe00f4..aeb7db5a5e3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5535,15 +5535,30 @@  ix86_output_ssemov (rtx_insn *insn, rtx *operands)
 
     case MODE_DI:
       /* Handle broken assemblers that require movd instead of movq. */
-      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-	  && (GENERAL_REG_P (operands[0])
-	      || GENERAL_REG_P (operands[1])))
-	return "%vmovd\t{%1, %0|%0, %1}";
+      if (GENERAL_REG_P (operands[0]))
+	{
+	  if (HAVE_AS_IX86_INTERUNIT_MOVQ)
+	    return "%vmovq\t{%1, %q0|%q0, %1}";
+	  else
+	    return "%vmovd\t{%1, %q0|%q0, %1}";
+	}
+      else if (GENERAL_REG_P (operands[1]))
+	{
+	  if (HAVE_AS_IX86_INTERUNIT_MOVQ)
+	    return "%vmovq\t{%q1, %0|%0, %q1}";
+	  else
+	    return "%vmovd\t{%q1, %0|%0, %q1}";
+	}
       else
 	return "%vmovq\t{%1, %0|%0, %1}";
 
     case MODE_SI:
-      return "%vmovd\t{%1, %0|%0, %1}";
+      if (GENERAL_REG_P (operands[0]))
+	return "%vmovd\t{%1, %k0|%k0, %1}";
+      else if (GENERAL_REG_P (operands[1]))
+	return "%vmovd\t{%k1, %0|%0, %k1}";
+      else
+	return "%vmovd\t{%1, %0|%0, %1}";
 
     case MODE_HI:
       if (GENERAL_REG_P (operands[0]))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9b424a3935b..376df1d51d1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2580,13 +2580,9 @@ 
 	return standard_sse_constant_opcode (insn, operands);
 
       if (SSE_REG_P (operands[0]))
-	return MEM_P (operands[1])
-	  ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
-	  : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
+	return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
       else
-	return MEM_P (operands[0])
-	  ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
-	  : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
+	return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
 
     case TYPE_MSKLOG:
       if (operands[1] == const0_rtx)
@@ -2614,13 +2610,13 @@ 
 	      (const_string "mskmov")
 	    (eq_attr "alternative" "8")
 	      (const_string "msklog")
-	    (eq_attr "alternative" "9,10,13,14")
+	    (eq_attr "alternative" "13,14")
 	      (if_then_else (match_test "TARGET_AVX512FP16")
 		(const_string "ssemov")
 		(const_string "sselog1"))
 	    (eq_attr "alternative" "11")
 	      (const_string "sselog1")
-	    (eq_attr "alternative" "12")
+	    (eq_attr "alternative" "9,10,12")
 	      (const_string "ssemov")
 	    (match_test "optimize_function_for_size_p (cfun)")
 	      (const_string "imov")
@@ -2644,7 +2640,11 @@ 
 	      ]
 	      (const_string "orig")))
    (set (attr "mode")
-     (cond [(eq_attr "alternative" "9,10,13,14")
+     (cond [(eq_attr "alternative" "9,10")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "HI")
+		(const_string "SI"))
+	    (eq_attr "alternative" "13,14")
 	      (if_then_else (match_test "TARGET_AVX512FP16")
 		(const_string "HI")
 		(const_string "TI"))
@@ -3876,13 +3876,9 @@ 
 	return standard_sse_constant_opcode (insn, operands);
 
       if (SSE_REG_P (operands[0]))
-	return MEM_P (operands[1])
-	       ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
-	       : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
+	return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
       else
-	return MEM_P (operands[0])
-	       ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
-	       : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
+	return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
 
     default:
       if (get_attr_mode (insn) == MODE_SI)
@@ -3901,9 +3897,9 @@ 
    (set (attr "type")
 	(cond [(eq_attr "alternative" "4")
 		 (const_string "sselog1")
-	       (eq_attr "alternative" "5")
+	       (eq_attr "alternative" "5,6,8")
 		 (const_string "ssemov")
-	       (eq_attr "alternative" "6,7,8,9")
+	       (eq_attr "alternative" "7,9")
 		 (if_then_else
 		   (match_test ("TARGET_AVX512FP16"))
 		   (const_string "ssemov")
@@ -3930,7 +3926,12 @@ 
    (set (attr "mode")
 	(cond [(eq_attr "alternative" "4")
 		 (const_string "V4SF")
-	       (eq_attr "alternative" "6,7,8,9")
+	       (eq_attr "alternative" "6,8")
+		 (if_then_else
+		   (match_test "TARGET_AVX512FP16")
+		   (const_string "HI")
+		   (const_string "SI"))
+	       (eq_attr "alternative" "7,9")
 		 (if_then_else
 		   (match_test "TARGET_AVX512FP16")
 		   (const_string "HI")
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a409bb7c6c6..8e0a6490b7b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -389,13 +389,9 @@ 
 	return standard_sse_constant_opcode (insn, operands);
 
       if (SSE_REG_P (operands[0]))
-	return MEM_P (operands[1])
-	  ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
-	  : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
+	return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
       else
-	return MEM_P (operands[0])
-	  ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
-	  : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
+	return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
 
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
@@ -412,13 +408,13 @@ 
 	       ]
 	       (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "6,7,8,9")
+     (cond [(eq_attr "alternative" "6,7")
 	      (if_then_else (match_test "TARGET_AVX512FP16")
 		(const_string "ssemov")
 		(const_string "sselog1"))
 	    (eq_attr "alternative" "4")
 	      (const_string "sselog1")
-	    (eq_attr "alternative" "5")
+	    (eq_attr "alternative" "5,8,9")
 	      (const_string "ssemov")
 	    (match_test "optimize_function_for_size_p (cfun)")
 	      (const_string "imov")
@@ -440,10 +436,14 @@ 
 	      ]
 	      (const_string "orig")))
    (set (attr "mode")
-     (cond [(eq_attr "alternative" "6,7,8,9")
+     (cond [(eq_attr "alternative" "6,7")
 	      (if_then_else (match_test "TARGET_AVX512FP16")
 		(const_string "HI")
 		(const_string "TI"))
+	    (eq_attr "alternative" "8,9")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "HI")
+		(const_string "SI"))
 	    (eq_attr "alternative" "4")
 	      (cond [(match_test "TARGET_AVX")
 		       (const_string "TI")
diff --git a/gcc/testsuite/gcc.target/i386/pr102811-2.c b/gcc/testsuite/gcc.target/i386/pr102811-2.c
index e511c665ae8..97bc9b14e7f 100644
--- a/gcc/testsuite/gcc.target/i386/pr102811-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr102811-2.c
@@ -1,7 +1,6 @@ 
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
-/* { dg-final { scan-assembler-times "pextrw" 1 } } */
-/* { dg-final { scan-assembler-times "pinsrw" 1 } } */
+/* { dg-final { scan-assembler-times "vmovd" 2 } } */
 /* { dg-final { scan-assembler-not "\\\(%rsp\\\)"} } */
 short test (_Float16 a)
 {
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
index 3da7683fc31..25e17231c1a 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c
@@ -13,4 +13,5 @@  foo (union flt x)
   return x.flt;
 }
 
-/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} } } */
+/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} { target ia32 } } } */
+/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} { target { ! ia32 } } } } */