[v2,1/2] RISC-V: Use bseti/bclri/binvi to extend reach of ori/andi/xori

Message ID 20221118111001.1488517-2-philipp.tomsich@vrull.eu
State Deferred, archived
Headers
Series Use Zbs with xori/ori/andi and polarity-reversed twobit-tests |

Commit Message

Philipp Tomsich Nov. 18, 2022, 11:10 a.m. UTC
  Sequences of the form "a | C" and "a ^ C" with C being the positive
half of a signed immediate's range with one extra bit set in addition
are mapped to ori/xori and one bseti/binvi to avoid using a temporary
(and a multi-insn sequence to load C into that temporary).

Something similar holds for "a & ~C" being representable as either
bclri + bclri or bclri + andi.

gcc/ChangeLog:

	* config/riscv/bitmanip.md (*<or_optab>i<mode>_extrabit):
	New pattern for binvi+binvi/xori and bseti+bseti/ori
	(*andi<mode>_extrabit): New pattern for bclri+bclri/andi
	* config/riscv/iterators.md (any_or): Match or and ior
	* config/riscv/predicates.md (const_twobits_operand):
	New predicate.
	(uimm_extra_bit_operand): New predicate.
	(uimm_extra_bit_or_twobits): New predicate.
	(not_uimm_extra_bit_operand): New predicate.
	(not_uimm_extra_bit_or_nottwobits): New predicate.
	* config/riscv/riscv.h (UIMM_EXTRA_BIT_OPERAND):
	Helper for the uimm_extra_bit_operand and
	not_uimm_extra_bit_operand predicates.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/zbs-bclri-02.c: New test.
	* gcc.target/riscv/zbs-binvi.c: New test.
	* gcc.target/riscv/zbs-bseti.c: New test.

Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
---
- This no longer depends on "RISC-V: Optimize branches testing a
  bit-range or a shifted immediate".  The other series now needs to be
  adjusted before merging.

Changes in v2:
- Collects already approved changes for v2 for (a | C) and (a ^ C).
- Pulls in the (already) approved branch on polarity-reversed bits
  for v2, as it shares predicates with the other changes.
- Newly adds support for the (a & ~C) case.
- Use an iterator for the ori/xori case and share one pattern
- Adds the andi (a & ~C) case, expanding to bclri/andi.
- Cleans up the predicates (incl. removing the non-intuitive inclusion
  of two-bits-set under the uimm_extra_bits)

 gcc/config/riscv/bitmanip.md                  | 37 +++++++++++++++++++
 gcc/config/riscv/iterators.md                 |  8 ++++
 gcc/config/riscv/predicates.md                | 28 ++++++++++++++
 gcc/config/riscv/riscv.h                      |  8 ++++
 .../riscv/{zbs-bclri.c => zbs-bclri-01.c}     |  0
 gcc/testsuite/gcc.target/riscv/zbs-bclri-02.c | 27 ++++++++++++++
 gcc/testsuite/gcc.target/riscv/zbs-binvi.c    | 22 +++++++++++
 gcc/testsuite/gcc.target/riscv/zbs-bseti.c    | 27 ++++++++++++++
 8 files changed, 157 insertions(+)
 rename gcc/testsuite/gcc.target/riscv/{zbs-bclri.c => zbs-bclri-01.c} (100%)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-bclri-02.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-binvi.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-bseti.c
  

Patch

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 2175c626ee5..d7c64270c00 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -480,3 +480,40 @@  (define_split
   "TARGET_ZBS"
   [(set (match_dup 0) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
    (set (match_dup 0) (plus:GPR (match_dup 0) (const_int -1)))])
+
+;; Catch those cases where we can use a bseti/binvi + ori/xori or
+;; bseti/binvi + bseti/binvi instead of a lui + addi + or/xor sequence.
+(define_insn_and_split "*<or_optab>i<mode>_extrabit"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(any_or:X (match_operand:X 1 "register_operand" "r")
+	          (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))]
+  "TARGET_ZBS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (<or_optab>:X (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (<or_optab>:X (match_dup 0) (match_dup 4)))]
+{
+	unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
+	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+
+	operands[3] = GEN_INT (bits &~ topbit);
+	operands[4] = GEN_INT (topbit);
+})
+
+;; Same to use blcri + andi and blcri + bclri
+(define_insn_and_split "*andi<mode>_extrabit"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(and:X (match_operand:X 1 "register_operand" "r")
+	       (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))]
+  "TARGET_ZBS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+{
+	unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
+	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (~bits);
+
+	operands[3] = GEN_INT (bits | topbit);
+	operands[4] = GEN_INT (~topbit);
+})
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 50380ecfac9..ab1f4ee8d34 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -136,6 +136,10 @@  (define_code_iterator any_shift [ashift ashiftrt lshiftrt])
 ;; from the same template.
 (define_code_iterator any_bitwise [and ior xor])
 
+;; This code iterator allows ior and xor instructions to be generated
+;; from the same template.
+(define_code_iterator any_or [ior xor])
+
 ;; This code iterator allows unsigned and signed division to be generated
 ;; from the same template.
 (define_code_iterator any_div [div udiv mod umod])
@@ -194,6 +198,10 @@  (define_code_attr optab [(ashift "ashl")
 			 (plus "add")
 			 (minus "sub")])
 
+;; <or_optab> code attributes
+(define_code_attr or_optab [(ior "ior")
+			    (xor "xor")])
+
 ;; <insn> expands to the name of the insn that implements a particular code.
 (define_code_attr insn [(ashift "sll")
 			(ashiftrt "sra")
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index ffb3fca2ac3..3300c0e36eb 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -290,3 +290,31 @@  (define_predicate "vector_merge_operand"
 (define_predicate "const_nottwobits_operand"
   (and (match_code "const_int")
        (match_test "popcount_hwi (~UINTVAL (op)) == 2")))
+
+;; A CONST_INT operand that has exactly two bits set.
+(define_predicate "const_twobits_operand"
+  (and (match_code "const_int")
+       (match_test "popcount_hwi (UINTVAL (op)) == 2")))
+
+;; A CONST_INT operand that fits into the unsigned half of a
+;; signed-immediate after the top bit has been cleared
+(define_predicate "uimm_extra_bit_operand"
+  (and (match_code "const_int")
+       (match_test "UIMM_EXTRA_BIT_OPERAND (UINTVAL (op))")))
+
+(define_predicate "uimm_extra_bit_or_twobits"
+  (and (match_code "const_int")
+       (ior (match_operand 0 "uimm_extra_bit_operand")
+	    (match_operand 0 "const_twobits_operand"))))
+
+;; A CONST_INT operand that fits into the negative half of a
+;; signed-immediate after a single cleared top bit has been
+;; set: i.e., a bitwise-negated uimm_extra_bit_operand
+(define_predicate "not_uimm_extra_bit_operand"
+  (and (match_code "const_int")
+       (match_test "UIMM_EXTRA_BIT_OPERAND (~UINTVAL (op))")))
+
+(define_predicate "not_uimm_extra_bit_or_nottwobits"
+  (and (match_code "const_int")
+       (ior (match_operand 0 "not_uimm_extra_bit_operand")
+	    (match_operand 0 "const_nottwobits_operand"))))
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 2d0d170645c..b05c3c1545c 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -593,6 +593,14 @@  enum reg_class
 		? (VALUE)						\
 		: ((VALUE) & ((HOST_WIDE_INT_1U << 32)-1))))
 
+/* True if VALUE can be represented as an immediate with 1 extra bit
+   set: we check that it is not a SMALL_OPERAND (as this would be true
+   for all small operands) unmodified and turns into a small operand
+   once we clear the top bit. */
+#define UIMM_EXTRA_BIT_OPERAND(VALUE)					\
+  (!SMALL_OPERAND (VALUE)						\
+   && SMALL_OPERAND (VALUE & ~(HOST_WIDE_INT_1U << floor_log2 (VALUE))))
+
 /* Stack layout; function entry, exit and calling.  */
 
 #define STACK_GROWS_DOWNWARD 1
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bclri.c b/gcc/testsuite/gcc.target/riscv/zbs-bclri-01.c
similarity index 100%
rename from gcc/testsuite/gcc.target/riscv/zbs-bclri.c
rename to gcc/testsuite/gcc.target/riscv/zbs-bclri-01.c
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bclri-02.c b/gcc/testsuite/gcc.target/riscv/zbs-bclri-02.c
new file mode 100644
index 00000000000..61254844a4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-bclri-02.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f3(long long a)
+{
+  return a & ~0x1100;
+}
+
+long long f4 (long long a)
+{
+  return a & ~0x80000000000000ffull;
+}
+
+long long f5 (long long a)
+{
+  return a & ~0x8000001000000000ull;
+}
+
+long long f6 (long long a)
+{
+  return a & ~0xff7ffffffffffffull;
+}
+
+/* { dg-final { scan-assembler-times "bclri\t" 4 } } */
+/* { dg-final { scan-assembler-times "andi\t" 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-binvi.c b/gcc/testsuite/gcc.target/riscv/zbs-binvi.c
new file mode 100644
index 00000000000..c2d6725b53b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-binvi.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long f3(long long a)
+{
+  return a ^ 0x1100;
+}
+
+long long f4 (long long a)
+{
+  return a ^ 0x80000000000000ffull;
+}
+
+long long f5 (long long a)
+{
+  return a ^ 0x8000001000000000ull;
+}
+
+/* { dg-final { scan-assembler-times "binvi\t" 4 } } */
+/* { dg-final { scan-assembler-times "xori\t" 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bseti.c b/gcc/testsuite/gcc.target/riscv/zbs-bseti.c
new file mode 100644
index 00000000000..5738add6348
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-bseti.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+long long foo1 (long long a)
+{
+  return a | 0x1100;
+}
+
+long long foo2 (long long a)
+{
+  return a | 0x80000000000000ffull;
+}
+
+long long foo3 (long long a)
+{
+  return a | 0x8000000100000000ull;
+}
+
+long long foo4 (long long a)
+{
+  return a | 0xfff;
+}
+
+/* { dg-final { scan-assembler-times "bseti\t" 5 } } */
+/* { dg-final { scan-assembler-times "ori\t" 3 } } */
+