[1/4] xtensa: Improve shift operations more

Message ID 196a3880-b4ee-6c6c-bdc1-b99800c43705@yahoo.co.jp
State New
Headers
Series [1/4] xtensa: Improve shift operations more |

Commit Message

Takayuki 'January June' Suwa June 12, 2022, 6:31 a.m. UTC
  This patch introduces funnel shifter utilization, and rearranges existing
"per-byte shift" insn patterns.

gcc/ChangeLog:

	* config/xtensa/predicates.md (logical_shift_operator,
	xtensa_shift_per_byte_operator): New predicates.
	* config/xtensa/xtensa-protos.h (xtensa_shlrd_which_direction):
	New prototype.
	* config/xtensa/xtensa.cc (xtensa_shlrd_which_direction):
	New helper function for funnel shift patterns.
	* config/xtensa/xtensa.md (ior_op): New code iterator.
	(*ashlsi3_1): Replace with new split pattern.
	(*shift_per_byte): Unify *ashlsi3_3x, *ashrsi3_3x and *lshrsi3_3x.
	(*shift_per_byte_omit_AND_0, *shift_per_byte_omit_AND_1):
	New insn-and-split patterns that redirect to *xtensa_shift_per_byte,
	in order to omit unnecessary bitwise AND operation.
	(*shlrd_reg_<code>, *shlrd_const_<code>, *shlrd_per_byte_<code>,
	*shlrd_per_byte_<code>_omit_AND):
	New insn patterns for funnel shifts.

gcc/testsuite/ChangeLog:

	* gcc.target/xtensa/funnel_shifter.c: New.
---
  gcc/config/xtensa/predicates.md               |   6 +
  gcc/config/xtensa/xtensa-protos.h             |   1 +
  gcc/config/xtensa/xtensa.cc                   |  14 ++
  gcc/config/xtensa/xtensa.md                   | 212 ++++++++++++++----
  .../gcc.target/xtensa/funnel_shifter.c        |  17 ++
  5 files changed, 212 insertions(+), 38 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/xtensa/funnel_shifter.c
  

Patch

diff --git a/gcc/config/xtensa/predicates.md 
b/gcc/config/xtensa/predicates.md
index a912e6d8bb2..bcc83ada0ae 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -164,9 +164,15 @@ 
  (define_predicate "boolean_operator"
    (match_code "eq,ne"))

+(define_predicate "logical_shift_operator"
+  (match_code "ashift,lshiftrt"))
+
  (define_predicate "xtensa_cstoresi_operator"
    (match_code "eq,ne,gt,ge,lt,le"))

+(define_predicate "xtensa_shift_per_byte_operator"
+  (match_code "ashift,ashiftrt,lshiftrt"))
+
  (define_predicate "tls_symbol_operand"
    (and (match_code "symbol_ref")
         (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index c2fd750cd3a..2c08ed4992d 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -56,6 +56,7 @@  extern char *xtensa_emit_bit_branch (bool, bool, rtx *);
  extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
  extern char *xtensa_emit_call (int, rtx *);
  extern bool xtensa_tls_referenced_p (rtx);
+extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);

  #ifdef TREE_CODE
  extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3477e983592..df78af66714 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2403,6 +2403,20 @@  xtensa_tls_referenced_p (rtx x)
  }


+/* Helper function for "*shlrd_..." patterns.  */
+
+enum rtx_code
+xtensa_shlrd_which_direction (rtx op0, rtx op1)
+{
+  if (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
+    return ASHIFT;	/* shld  */
+  if (GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
+    return LSHIFTRT;	/* shrd  */
+
+  return UNKNOWN;
+}
+
+
  /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */

  static bool
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index d806d43d129..bf79099f21b 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -83,6 +83,9 @@ 
  ;; the same template.
  (define_mode_iterator HQI [HI QI])

+;; This code iterator is for *shlrd and its variants.
+(define_code_iterator ior_op [ior plus])
+
  
  ;; Attributes.

@@ -1267,16 +1270,6 @@ 
    operands[1] = xtensa_copy_incoming_a7 (operands[1]);
  })

-(define_insn "*ashlsi3_1"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-	(ashift:SI (match_operand:SI 1 "register_operand" "r")
-		   (const_int 1)))]
-  "TARGET_DENSITY"
-  "add.n\t%0, %1, %1"
-  [(set_attr "type"	"arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"2")])
-
  (define_insn "ashlsi3_internal"
    [(set (match_operand:SI 0 "register_operand" "=a,a")
  	(ashift:SI (match_operand:SI 1 "register_operand" "r,r")
@@ -1289,16 +1282,14 @@ 
     (set_attr "mode"	"SI")
     (set_attr "length"	"3,6")])

-(define_insn "*ashlsi3_3x"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-	(ashift:SI (match_operand:SI 1 "register_operand" "r")
-		   (ashift:SI (match_operand:SI 2 "register_operand" "r")
-			      (const_int 3))))]
-  ""
-  "ssa8b\t%2\;sll\t%0, %1"
-  [(set_attr "type"	"arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"6")])
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(ashift:SI (match_operand:SI 1 "register_operand")
+		   (const_int 1)))]
+  "TARGET_DENSITY"
+  [(set (match_dup 0)
+	(plus:SI (match_dup 1)
+		 (match_dup 1)))])

  (define_insn "ashrsi3"
    [(set (match_operand:SI 0 "register_operand" "=a,a")
@@ -1312,17 +1303,6 @@ 
     (set_attr "mode"	"SI")
     (set_attr "length"	"3,6")])

-(define_insn "*ashrsi3_3x"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
-		     (ashift:SI (match_operand:SI 2 "register_operand" "r")
-				(const_int 3))))]
-  ""
-  "ssa8l\t%2\;sra\t%0, %1"
-  [(set_attr "type"	"arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"6")])
-
  (define_insn "lshrsi3"
    [(set (match_operand:SI 0 "register_operand" "=a,a")
  	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
@@ -1332,9 +1312,9 @@ 
    if (which_alternative == 0)
      {
        if ((INTVAL (operands[2]) & 0x1f) < 16)
-        return "srli\t%0, %1, %R2";
+	return "srli\t%0, %1, %R2";
        else
-      	return "extui\t%0, %1, %R2, %L2";
+	return "extui\t%0, %1, %R2, %L2";
      }
    return "ssr\t%2\;srl\t%0, %1";
  }
@@ -1342,13 +1322,169 @@ 
     (set_attr "mode"	"SI")
     (set_attr "length"	"3,6")])

-(define_insn "*lshrsi3_3x"
+(define_insn "*shift_per_byte"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(match_operator:SI 3 "xtensa_shift_per_byte_operator"
+		[(match_operand:SI 1 "register_operand" "r")
+		 (ashift:SI (match_operand:SI 2 "register_operand" "r")
+			    (const_int 3))]))]
+  "!optimize_debug && optimize"
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case ASHIFT:	return "ssa8b\t%2\;sll\t%0, %1";
+    case ASHIFTRT:	return "ssa8l\t%2\;sra\t%0, %1";
+    case LSHIFTRT:	return "ssa8l\t%2\;srl\t%0, %1";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_insn_and_split "*shift_per_byte_omit_AND_0"
    [(set (match_operand:SI 0 "register_operand" "=a")
-	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
-		     (ashift:SI (match_operand:SI 2 "register_operand" "r")
-				(const_int 3))))]
+	(match_operator:SI 4 "xtensa_shift_per_byte_operator"
+		[(match_operand:SI 1 "register_operand" "r")
+		 (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+				    (const_int 3))
+			 (match_operand:SI 3 "const_int_operand" "i"))]))]
+  "!optimize_debug && optimize
+   && (INTVAL (operands[3]) & 0x1f) == 3 << 3"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(match_op_dup 4
+		[(match_dup 1)
+		 (ashift:SI (match_dup 2)
+			    (const_int 3))]))]
+  ""
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_insn_and_split "*shift_per_byte_omit_AND_1"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(match_operator:SI 5 "xtensa_shift_per_byte_operator"
+		[(match_operand:SI 2 "register_operand" "r")
+		 (neg:SI (and:SI (ashift:SI (match_operand:SI 3 "register_operand" "r")
+					    (const_int 3))
+				 (match_operand:SI 4 "const_int_operand" "i")))]))
+   (clobber (match_scratch:SI 1 "=&a"))]
+  "!optimize_debug && optimize
+   && (INTVAL (operands[4]) & 0x1f) == 3 << 3"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1)
+	(neg:SI (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 5
+		[(match_dup 2)
+		 (ashift:SI (match_dup 1)
+			    (const_int 3))]))]
+  ""
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"9")])
+
+(define_insn "*shlrd_reg_<code>"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(ior_op:SI (match_operator:SI 4 "logical_shift_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "register_operand" "r")])
+		   (match_operator:SI 5 "logical_shift_operator"
+			[(match_operand:SI 3 "register_operand" "r")
+			 (neg:SI (match_dup 2))])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN"
+{
+  switch (xtensa_shlrd_which_direction (operands[4], operands[5]))
+    {
+    case ASHIFT:	return "ssl\t%2\;src\t%0, %1, %3";
+    case LSHIFTRT:	return "ssr\t%2\;src\t%0, %3, %1";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_insn "*shlrd_const_<code>"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(ior_op:SI (match_operator:SI 5 "logical_shift_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 3 "const_int_operand" "i")])
+		   (match_operator:SI 6 "logical_shift_operator"
+			[(match_operand:SI 2 "register_operand" "r")
+			 (match_operand:SI 4 "const_int_operand" "i")])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN
+   && IN_RANGE (INTVAL (operands[3]), 1, 31)
+   && IN_RANGE (INTVAL (operands[4]), 1, 31)
+   && INTVAL (operands[3]) + INTVAL (operands[4]) == 32"
+{
+  switch (xtensa_shlrd_which_direction (operands[5], operands[6]))
+    {
+    case ASHIFT:	return "ssai\t%L3\;src\t%0, %1, %2";
+    case LSHIFTRT:	return "ssai\t%R3\;src\t%0, %2, %1";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_insn "*shlrd_per_byte_<code>"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(ior_op:SI (match_operator:SI 4 "logical_shift_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (ashift:SI (match_operand:SI 2 "register_operand" "r")
+				    (const_int 3))])
+		   (match_operator:SI 5 "logical_shift_operator"
+			[(match_operand:SI 3 "register_operand" "r")
+			 (neg:SI (ashift:SI (match_dup 2)
+					    (const_int 3)))])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN"
+{
+  switch (xtensa_shlrd_which_direction (operands[4], operands[5]))
+    {
+    case ASHIFT:	return "ssa8b\t%2\;src\t%0, %1, %3";
+    case LSHIFTRT:	return "ssa8l\t%2\;src\t%0, %3, %1";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_insn_and_split "*shlrd_per_byte_<code>_omit_AND"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(ior_op:SI (match_operator:SI 5 "logical_shift_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+					    (const_int 3))
+				 (match_operand:SI 4 "const_int_operand" "i"))])
+		   (match_operator:SI 6 "logical_shift_operator"
+			[(match_operand:SI 3 "register_operand" "r")
+			 (neg:SI (and:SI (ashift:SI (match_dup 2)
+						    (const_int 3))
+					 (match_dup 4)))])))]
+  "!optimize_debug && optimize
+   && xtensa_shlrd_which_direction (operands[5], operands[6]) != UNKNOWN
+   && (INTVAL (operands[4]) & 0x1f) == 3 << 3"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(ior_op:SI (match_op_dup 5
+			[(match_dup 1)
+			 (ashift:SI (match_dup 2)
+				    (const_int 3))])
+		   (match_op_dup 6
+			[(match_dup 3)
+			 (neg:SI (ashift:SI (match_dup 2)
+					    (const_int 3)))])))]
    ""
-  "ssa8l\t%2\;srl\t%0, %1"
    [(set_attr "type"	"arith")
     (set_attr "mode"	"SI")
     (set_attr "length"	"6")])
diff --git a/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c 
b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c
new file mode 100644
index 00000000000..c8f987ccda9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/funnel_shifter.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned int test_0(const void *addr)
+{
+  unsigned int n = (unsigned int)addr;
+  const unsigned int *a = (const unsigned int*)(n & ~3);
+  n = (n & 3) * 8;
+  return (a[0] >> n) | (a[1] << (32 - n));
+}
+
+unsigned int test_1(unsigned int a, unsigned int b)
+{
+  return (a >> 16) + (b << 16);
+}
+
+/* { dg-final { scan-assembler-times "src" 2 } } */