mips: Improved RTL representation of wsbh/dsbh/dshd

Message ID 003a01d7edd8$4c4b2170$e4e16450$@nextmovesoftware.com
State New
Headers
Series mips: Improved RTL representation of wsbh/dsbh/dshd |

Commit Message

Roger Sayle Dec. 10, 2021, 3:12 p.m. UTC
  This patch to the mips backend updates the representations used
internally for MIPS' wsbh, dsbh and dshd instructions.  These were
previously described using an UNSPEC rtx, which prevents simplification
at the RTL level.  In addition to now being able to eliminate rotate
instructions before/after wsbh, allowing a wsbh to be emitted without
a backend builtin, these new representations also allow dsbh and dshd
to be synthesized from standard C/C++ vector idioms.

This patch has been tested by a make bootstrap and make -k check
on mips-unknown-linux-gnu, with --enable-multiarch --enable-multilib
--enable-targets=all --with-arch-32=mips32r2 --with-arch-64=mips64r2
(thanks to the compile farm's gcc230.fsffrance.org) and a cross-compiler
to mips64-linux-gnu hosted on x86_64-pc-linux-gnu.

Ok for mainline?


2021-12-10  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/mips/mips.c (mips_vector_mode_supported_p): Allow
	V4HI and V8QI modes on TARGET_64BIT with ISA_HAS_WSBH.
	(mips_expand_vec_perm_const_1): With one_vector_p, try
	expanding a vselect directly before an interleaved variant.
	* mips.md (UNSPEC_WSBH, UNSPEC_DSBH, UNSPEC_DSHD): Delete.
	(bswapsi2): Change from define_insn_and_split to an expander.
	(bswapdi2): Change from define_insn_and_split to an expander.
	(wsbh): Represent as SI rotate by 16 of a bswap32.
	(wsbh_2): Also recognize as a bswap32 of a rotatert:SI ... 16.
	(wsbh_v4qi): Recognize wsbh from a vec_select:V4QI.
	(wsbh_v2hi): Recognize wsbh from a vec_select:V2HI.
	(dsbh): Represent as a DImode cast of a vec_select:V8QI.
	(dsbh_v8qi): Recognize dsbh from a vec_select:V8QI.
	(dshd): Represent as a DImode cast of a vec_select:V8QI.
	(dshd_v8qi): Recognize dshd from a vec_select:V8QI.
	(dshd_v4hi): Recognize dshd from a vec_select:V4HI.

gcc/testsuite/ChangeLog
	* gcc.target/mips/dsbh-v8qi.c: New test case.
	* gcc.target/mips/dshd-v4hi.c: New test case.
	* gcc.target/mips/dshd-v8qi.c: New test case.
	* gcc.target/mips/wsbh.c: New test case.


Thanks in advance,
Roger
--
  

Comments

Maciej W. Rozycki Jan. 19, 2022, 9:43 p.m. UTC | #1
Hi Roger,

> This patch to the mips backend updates the representations used
> internally for MIPS' wsbh, dsbh and dshd instructions.  These were
> previously described using an UNSPEC rtx, which prevents simplification
> at the RTL level.  In addition to now being able to eliminate rotate
> instructions before/after wsbh, allowing a wsbh to be emitted without
> a backend builtin, these new representations also allow dsbh and dshd
> to be synthesized from standard C/C++ vector idioms.

 I came across your submission and while it is not a proper review I've 
noticed a couple of issues as below.

> diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
> index 455b9b8..21364d6 100644
> --- a/gcc/config/mips/mips.md
> +++ b/gcc/config/mips/mips.md
[...]
> +
> +;; Non-canonical variant of wsbh
> +(define_insn "wsbh_2"

 This insn is only ever matched by its RTL pattern and is nowhere referred 
to by its name, but giving it a callable one makes extra code produced to 
make a call possible (via `gen_wsbh_2').  Can you prevent useless callable 
code from being produced by giving the pattern a debug name instead such 
as `*wsbh_2'?

> +;; V4QI form of wsbh
> +(define_insn "wsbh_v4qi"

 Likewise here and throughout for other new insns.

 Please note that you need a full stop at the end of sentences in comments 
to comply with the GNU coding style.

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/mips/dsbh-v8qi.c
> @@ -0,0 +1,12 @@
> +/* { dg-options "isa_rev>=2 -mgp64" } */
> +
> +typedef char v8qi __attribute__((vector_size (8)));
> +
> +long long foo(long long x)
> +{
> +  v8qi t = (v8qi)x;
> +  t = __builtin_shufflevector (t, t, 1, 0, 3, 2, 5, 4, 7, 6);
> +  return (long long)t;
> +}
> +
> +/* { dg-final { scan-assembler "\tdsbh\t" } } */

 Your new test cases do not follow the GNU coding style, can you please 
run them through `indent -gnu'?

  Maciej
  

Patch

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 807bf1a..c5699d6 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -13407,9 +13407,12 @@  mips_vector_mode_supported_p (machine_mode mode)
       return TARGET_DSP;
 
     case E_V2SImode:
+      return TARGET_LOONGSON_MMI;
+
     case E_V4HImode:
     case E_V8QImode:
-      return TARGET_LOONGSON_MMI;
+      return TARGET_LOONGSON_MMI
+	     || (TARGET_64BIT && ISA_HAS_WSBH);
 
     default:
       return MSA_SUPPORTED_MODE_P (mode);
@@ -21608,6 +21611,9 @@  mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 
   if (d->one_vector_p)
     {
+      if (mips_expand_vselect (d->target, d->op0, d->perm, nelt))
+	return true;
+
       /* Try interleave with alternating operands.  */
       memcpy (perm2, d->perm, sizeof(perm2));
       for (i = 1; i < nelt; i += 2)
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 455b9b8..21364d6 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -81,11 +81,6 @@ 
   UNSPEC_STORE_LEFT
   UNSPEC_STORE_RIGHT
 
-  ;; Integer operations that are too cumbersome to describe directly.
-  UNSPEC_WSBH
-  UNSPEC_DSBH
-  UNSPEC_DSHD
-
   ;; Floating-point moves.
   UNSPEC_LOAD_LOW
   UNSPEC_LOAD_HIGH
@@ -5866,45 +5861,139 @@ 
   "wsbh\t%0,%1"
   [(set_attr "type" "shift")])
 
-(define_insn_and_split "bswapsi2"
+(define_expand "bswapsi2"
   [(set (match_operand:SI 0 "register_operand" "=d")
 	(bswap:SI (match_operand:SI 1 "register_operand" "d")))]
   "ISA_HAS_WSBH && ISA_HAS_ROR"
-  "#"
-  "&& 1"
-  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH))
-   (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))]
-  ""
-  [(set_attr "insn_count" "2")])
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_wsbh (tmp, operands[1]));
+  emit_insn (gen_rotrsi3 (operands[0], tmp, GEN_INT (16)));
+  DONE;
+})
 
-(define_insn_and_split "bswapdi2"
+(define_expand "bswapdi2"
   [(set (match_operand:DI 0 "register_operand" "=d")
 	(bswap:DI (match_operand:DI 1 "register_operand" "d")))]
   "TARGET_64BIT && ISA_HAS_WSBH"
-  "#"
-  "&& 1"
-  [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH))
-   (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))]
-  ""
-  [(set_attr "insn_count" "2")])
+{
+  rtx tmp = gen_reg_rtx (DImode);
+  emit_insn (gen_dsbh (tmp, operands[1]));
+  emit_insn (gen_dshd (operands[0], tmp));
+  DONE;
+})
 
 (define_insn "wsbh"
   [(set (match_operand:SI 0 "register_operand" "=d")
-	(unspec:SI [(match_operand:SI 1 "register_operand" "d")] UNSPEC_WSBH))]
+        (rotatert:SI (bswap:SI (match_operand:SI 1 "register_operand" "d"))
+		     (const_int 16)))]
+  "ISA_HAS_WSBH"
+  "wsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+;; Non-canonical variant of wsbh
+(define_insn "wsbh_2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(bswap:SI (rotatert:SI (match_operand:SI 1 "register_operand" "d")
+			       (const_int 16))))]
+  "ISA_HAS_WSBH"
+  "wsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+;; V4QI form of wsbh
+(define_insn "wsbh_v4qi"
+  [(set (match_operand:V4QI 0 "register_operand")
+	(vec_select:V4QI (match_operand:V4QI 1 "register_operand")
+			 (parallel [(const_int 1)
+				    (const_int 0)
+				    (const_int 3)
+				    (const_int 2)])))]
+  "ISA_HAS_WSBH"
+  "wsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+;; V2HI form of wsbh
+(define_insn "wsbh_v2hi"
+  [(set (match_operand:V2HI 0 "register_operand")
+	(vec_select:V2HI (match_operand:V2HI 1 "register_operand")
+			 (parallel [(const_int 1)
+				    (const_int 0)])))]
   "ISA_HAS_WSBH"
   "wsbh\t%0,%1"
   [(set_attr "type" "shift")])
 
 (define_insn "dsbh"
-  [(set (match_operand:DI 0 "register_operand" "=d")
-	(unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSBH))]
+  [(set (subreg:V8QI (match_operand:DI 0 "register_operand" "=d") 0)
+	(vec_select:V8QI
+	 (subreg:V8QI (match_operand:DI 1 "register_operand" "d") 0)
+	 (parallel [(const_int 1)
+		    (const_int 0)
+		    (const_int 3)
+		    (const_int 2)
+		    (const_int 5)
+		    (const_int 4)
+		    (const_int 7)
+		    (const_int 6)])))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "dsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+;; V8QI form of dsbh
+(define_insn "dsbh_v8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=d")
+	(vec_select:V8QI (match_operand:V8QI 1 "register_operand")
+			 (parallel [(const_int 1)
+				    (const_int 0)
+				    (const_int 3)
+				    (const_int 2)
+				    (const_int 5)
+				    (const_int 4)
+				    (const_int 7)
+				    (const_int 6)])))]
   "TARGET_64BIT && ISA_HAS_WSBH"
   "dsbh\t%0,%1"
   [(set_attr "type" "shift")])
 
 (define_insn "dshd"
-  [(set (match_operand:DI 0 "register_operand" "=d")
-	(unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSHD))]
+  [(set (subreg:V8QI (match_operand:DI 0 "register_operand" "=d") 0)
+	(vec_select:V8QI
+	 (subreg:V8QI (match_operand:DI 1 "register_operand" "d") 0)
+	 (parallel [(const_int 6)
+		    (const_int 7)
+		    (const_int 4)
+		    (const_int 5)
+		    (const_int 2)
+		    (const_int 3)
+		    (const_int 0)
+		    (const_int 1)])))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "dshd\t%0,%1"
+  [(set_attr "type" "shift")])
+
+;; V8QI form of dshd
+(define_insn "dshd_v8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=d")
+	(vec_select:V8QI (match_operand:V8QI 1 "register_operand")
+			 (parallel [(const_int 6)
+				    (const_int 7)
+				    (const_int 4)
+				    (const_int 5)
+				    (const_int 2)
+				    (const_int 3)
+				    (const_int 0)
+				    (const_int 1)])))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "dshd\t%0,%1"
+  [(set_attr "type" "shift")])
+
+;; V4HI form of dshd
+(define_insn "dshd_v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=d")
+	(vec_select:V4HI (match_operand:V4HI 1 "register_operand")
+			 (parallel [(const_int 3)
+				    (const_int 2)
+				    (const_int 1)
+				    (const_int 0)])))]
   "TARGET_64BIT && ISA_HAS_WSBH"
   "dshd\t%0,%1"
   [(set_attr "type" "shift")])
diff --git a/gcc/testsuite/gcc.target/mips/dsbh-v8qi.c b/gcc/testsuite/gcc.target/mips/dsbh-v8qi.c
new file mode 100644
index 0000000..0bb7a1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/dsbh-v8qi.c
@@ -0,0 +1,12 @@ 
+/* { dg-options "isa_rev>=2 -mgp64" } */
+
+typedef char v8qi __attribute__((vector_size (8)));
+
+long long foo(long long x)
+{
+  v8qi t = (v8qi)x;
+  t = __builtin_shufflevector (t, t, 1, 0, 3, 2, 5, 4, 7, 6);
+  return (long long)t;
+}
+
+/* { dg-final { scan-assembler "\tdsbh\t" } } */
diff --git a/gcc/testsuite/gcc.target/mips/dshd-v4hi.c b/gcc/testsuite/gcc.target/mips/dshd-v4hi.c
new file mode 100644
index 0000000..309468f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/dshd-v4hi.c
@@ -0,0 +1,13 @@ 
+/* { dg-options "isa_rev>=2 -mgp64" } */
+
+typedef short v4hi __attribute__((vector_size (8)));
+
+long long foo(long long x)
+{
+  v4hi t = (v4hi)x;
+  t = __builtin_shufflevector (t, t, 3, 2, 1, 0);
+  return (long long)t;
+}
+
+/* { dg-final { scan-assembler "\tdshd\t" } } */
+
diff --git a/gcc/testsuite/gcc.target/mips/dshd-v8qi.c b/gcc/testsuite/gcc.target/mips/dshd-v8qi.c
new file mode 100644
index 0000000..f07a735
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/dshd-v8qi.c
@@ -0,0 +1,12 @@ 
+/* { dg-options "isa_rev>=2 -mgp64" } */
+
+typedef char v8qi __attribute__((vector_size (8)));
+
+long long foo(long long x)
+{
+  v8qi t = (v8qi)x;
+  t = __builtin_shufflevector (t, t, 6, 7, 4, 5, 2, 3, 0, 1);
+  return (long long)t;
+}
+
+/* { dg-final { scan-assembler "\tdshd\t" } } */
diff --git a/gcc/testsuite/gcc.target/mips/wsbh.c b/gcc/testsuite/gcc.target/mips/wsbh.c
new file mode 100644
index 0000000..5f6c37b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/wsbh.c
@@ -0,0 +1,18 @@ 
+/* { dg-options "isa_rev>=2" } */
+/* { dg-skip-if "bswap recognition needs expensive optimizations" { *-*-* } { "-O0" "-O1" } { "" } } */
+
+unsigned int foo(unsigned int x)
+{
+  unsigned int t = __builtin_bswap32(x);
+  return (t>>16) | (t<<16);
+}
+
+unsigned int bar(unsigned int x)
+{
+  unsigned int t = (x>>16) | (x<<16);
+  return __builtin_bswap32(t);
+}
+
+/* { dg-final { scan-assembler-times "\twsbh\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tror\t" } } */
+