[COMMITTED,4/9] pru: Add pattern variants for zero extending destination
Commit Message
The higher bits in the result of some ALU operations are inherently
always zero when all input operands are smaller than 32-bits.
Add pattern variants to match when the resulting value is zero
extended, so that all operations can be effectively executed in a
single instruction. For PRU it simply means to use a wider register for
destination.
ALU operations which cannot be presented as zero-extending their
destination are addition, subtraction and logical shift left. The PRU
ALU performs all operations in 32-bit mode, so the carry-out and
shifted-out bits would violate the assumption that ALU operation was
performed in 16-bit or 8-bit mode, and result was zero-extended.
gcc/ChangeLog:
* config/pru/alu-zext.md (_noz0): New subst attribute.
(<code>_impl): Allow zero-extending the destination.
(<shift_op>): Remove unified pattern
(ashl_impl): New distinct pattern.
(lshr_impl): Ditto.
(alu3_zext_op0_subst): New subst iterator to zero-extend the
destination register.
gcc/testsuite/ChangeLog:
* gcc.target/pru/extzv-1.c: Update to mark the new more
efficient generated code sequence.
* gcc.target/pru/extzv-2.c: Ditto.
* gcc.target/pru/extzv-3.c: Ditto.
* gcc.target/pru/zero_extend-op0.c: New test.
Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
---
gcc/config/pru/alu-zext.md | 38 ++++++++++++++++---
gcc/testsuite/gcc.target/pru/extzv-1.c | 2 +-
gcc/testsuite/gcc.target/pru/extzv-2.c | 2 +-
gcc/testsuite/gcc.target/pru/extzv-3.c | 2 +-
.../gcc.target/pru/zero_extend-op0.c | 28 ++++++++++++++
5 files changed, 63 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/pru/zero_extend-op0.c
@@ -33,6 +33,7 @@
(define_subst_attr "alu2_zext" "alu2_zext_subst" "_z" "_noz")
+(define_subst_attr "alu3_zext_op0" "alu3_zext_op0_subst" "_z0" "_noz0")
(define_subst_attr "alu3_zext_op1" "alu3_zext_op1_subst" "_z1" "_noz1")
(define_subst_attr "alu3_zext_op2" "alu3_zext_op2_subst" "_z2" "_noz2")
(define_subst_attr "alu3_zext" "alu3_zext_subst" "_z" "_noz")
@@ -44,6 +45,7 @@ (define_subst_attr "lmbd_zext" "lmbd_zext_subst" "_z" "_noz")
(define_subst_attr "bitalu_zext" "bitalu_zext_subst" "_z" "_noz")
(define_code_iterator ALUOP3 [plus minus and ior xor umin umax ashift lshiftrt])
+(define_code_iterator ALUOP3_ZEXT0 [and ior xor umin umax lshiftrt])
(define_code_iterator ALUOP2 [neg not])
;; Arithmetic Operations
@@ -130,8 +132,9 @@ (define_insn "setbit_<EQD:mode><EQS0:mode>_<bitalu_zext>"
"set\\t%0, %1, %T2"
[(set_attr "type" "alu")])
-; Regular ALU ops
-(define_insn "<code>_impl<EQD:mode><EQS0:mode><EQS1:mode>_<alu3_zext><alu3_zext_op1><alu3_zext_op2>"
+; Regular ALU ops. For all of them it is safe to present the result as
+; zero-extended, because there is no carry or shifted-out bits.
+(define_insn "<code>_impl<EQD:mode><EQS0:mode><EQS1:mode>_<alu3_zext><alu3_zext_op0><alu3_zext_op1><alu3_zext_op2>"
[(set (match_operand:EQD 0 "register_operand" "=r")
(LOGICAL:EQD
(zero_extend:EQD
@@ -142,14 +145,25 @@ (define_insn "<code>_impl<EQD:mode><EQS0:mode><EQS1:mode>_<alu3_zext><alu3_zext_
"<logical_asm>\\t%0, %1, %u2"
[(set_attr "type" "alu")])
-; Shift ALU ops
-(define_insn "<shift_op>_impl<EQD:mode><EQS0:mode><EQS1:mode>_<alu3_zext><alu3_zext_op1><alu3_zext_op2>"
+; Shift left ALU op. Cannot present the result as zero-extended because
+; of the shifted-out bits.
+(define_insn "ashl_impl<EQD:mode><EQS0:mode><EQS1:mode>_<alu3_zext><alu3_zext_op1><alu3_zext_op2>"
[(set (match_operand:EQD 0 "register_operand" "=r")
- (SHIFT:EQD
+ (ashift:EQD
(zero_extend:EQD (match_operand:EQS0 1 "register_operand" "r"))
(zero_extend:EQD (match_operand:EQS1 2 "shift_operand" "rL"))))]
""
- "<shift_asm>\\t%0, %1, %2"
+ "lsl\\t%0, %1, %2"
+ [(set_attr "type" "alu")])
+
+; Shift right ALU op. The result can be presented as zero-extended.
+(define_insn "lshr_impl<EQD:mode><EQS0:mode><EQS1:mode>_<alu3_zext><alu3_zext_op0><alu3_zext_op1><alu3_zext_op2>"
+ [(set (match_operand:EQD 0 "register_operand" "=r")
+ (lshiftrt:EQD
+ (zero_extend:EQD (match_operand:EQS0 1 "register_operand" "r"))
+ (zero_extend:EQD (match_operand:EQS1 2 "shift_operand" "rL"))))]
+ ""
+ "lsr\\t%0, %1, %2"
[(set_attr "type" "alu")])
;; Substitutions
@@ -197,6 +211,18 @@ (define_subst "alu3_zext_op2_subst"
(ALUOP3:EQD (zero_extend:EQD (match_dup 1))
(match_dup 2)))])
+;; Some ALU operations with zero-extended inputs are
+;; equivalent to doing the same ALU operation in the
+;; smaller mode, and then zero-extending the output.
+(define_subst "alu3_zext_op0_subst"
+ [(set (match_operand:EQD 0)
+ (ALUOP3_ZEXT0:EQD (zero_extend:EQD (match_operand:EQS0 1))
+ (zero_extend:EQD (match_operand:EQS0 2))))]
+ "GET_MODE_SIZE (<EQS0:MODE>mode) < GET_MODE_SIZE (<EQD:MODE>mode)"
+ [(set (match_dup 0)
+ (zero_extend:EQD
+ (ALUOP3_ZEXT0:EQS0 (match_dup 1)
+ (match_dup 2))))])
(define_subst "lmbd_zext_subst"
[(set (match_operand:EQD 0)
@@ -1,6 +1,6 @@
/* { dg-do assemble } */
/* { dg-options "-Os" } */
-/* { dg-final { object-size text <= 12 } } */
+/* { dg-final { object-size text <= 8 } } */
struct S {
unsigned int a : 5;
@@ -1,6 +1,6 @@
/* { dg-do assemble } */
/* { dg-options "-Os" } */
-/* { dg-final { object-size text <= 12 } } */
+/* { dg-final { object-size text <= 8 } } */
struct S {
unsigned int a : 5;
@@ -1,6 +1,6 @@
/* { dg-do assemble } */
/* { dg-options "-Os" } */
-/* { dg-final { object-size text <= 16 } } */
+/* { dg-final { object-size text <= 12 } } */
struct S {
unsigned int a : 9;
new file mode 100644
@@ -0,0 +1,28 @@
+/* ALU operations with zero extended destination. */
+
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+/* -O1 in the options is significant. Without it the zero extension might not
+ be coalesced into the ALU instruction. */
+
+unsigned int
+test_zext_xor_hi (unsigned short val1, unsigned short val2)
+{
+ /* { dg-final { scan-assembler "xor\\tr14, r14.w0, r14.w2" } } */
+ return (unsigned short)(val1 ^ val2);
+}
+
+unsigned int
+test_zext_or_hi (unsigned short val1, unsigned short val2)
+{
+ /* { dg-final { scan-assembler "or\\tr14, r14.w0, r14.w2" } } */
+ return (unsigned short)(val1 | val2);
+}
+
+unsigned int
+test_zext_ashr_hi_const (unsigned short val1)
+{
+ /* { dg-final { scan-assembler "lsr\\tr14, r14.w0, 3" } } */
+ return (unsigned short)(val1 >> 3);
+}