[6/8] AArch64: Add peephole and scheduling logic for pairwise operations that appear late in RTL.
Commit Message
Hi All,
Says what it does on the tin. In case some operations form in RTL due to
a split, combine or any RTL pass then still try to recognize them.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md: Add new peepholes.
* config/aarch64/aarch64.cc (aarch_macro_fusion_pair_p): Schedule
sequential PLUS operations next to each other to increase the chance of
forming pairwise operations.
--- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 93a2888f567460ad10ec050ea7d4f701df4729d1..20e9adbf7b9b484f9a19f0c62770930dc3941eb2 100644
--
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 93a2888f567460ad10ec050ea7d4f701df4729d1..20e9adbf7b9b484f9a19f0c62770930dc3941eb2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3425,6 +3425,22 @@ (define_insn "aarch64_faddp<mode>"
[(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
)
+(define_peephole2
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (vec_select:<VEL>
+ (match_operand:VHSDF 1 "register_operand")
+ (parallel [(match_operand 2 "const_int_operand")])))
+ (set (match_operand:<VEL> 3 "register_operand")
+ (plus:<VEL>
+ (match_dup 0)
+ (match_operand:<VEL> 5 "register_operand")))]
+ "TARGET_SIMD
+ && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1
+ && REGNO (operands[5]) == REGNO (operands[1])
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 3) (unspec:<VEL> [(match_dup 1)] UNSPEC_FADDV))]
+)
+
(define_insn "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f3bd71c9f10868f9e6ab50d8e36ed3ee3d48ac22..4023b1729d92bf37f5a2fc8fc8cd3a5194532079 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25372,6 +25372,29 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
}
}
+ /* Try to schedule vec_select and add together so the peephole works. */
+ if (simple_sets_p && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set))
+ && GET_CODE (SET_SRC (prev_set)) == VEC_SELECT && GET_CODE (SET_SRC (curr_set)) == PLUS)
+ {
+ /* We're trying to match:
+ prev (vec_select) == (set (reg r0)
+ (vec_select (reg r1) n)
+ curr (plus) == (set (reg r2)
+ (plus (reg r0) (reg r1))) */
+ rtx prev_src = SET_SRC (prev_set);
+ rtx curr_src = SET_SRC (curr_set);
+ rtx parallel = XEXP (prev_src, 1);
+ auto idx
+ = ENDIAN_LANE_N (GET_MODE_NUNITS (GET_MODE (XEXP (prev_src, 0))), 1);
+ if (GET_CODE (parallel) == PARALLEL
+ && XVECLEN (parallel, 0) == 1
+ && known_eq (INTVAL (XVECEXP (parallel, 0, 0)), idx)
+ && GET_MODE (SET_DEST (prev_set)) == GET_MODE (curr_src)
+ && GET_MODE_INNER (GET_MODE (XEXP (prev_src, 0)))
+ == GET_MODE (XEXP (curr_src, 1)))
+ return true;
+ }
+
/* Fuse compare (CMP/CMN/TST/BICS) and conditional branch. */
if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
&& prev_set && curr_set && any_condjump_p (curr)
@@ -3425,6 +3425,22 @@ (define_insn "aarch64_faddp<mode>"
[(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
)
+(define_peephole2
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (vec_select:<VEL>
+ (match_operand:VHSDF 1 "register_operand")
+ (parallel [(match_operand 2 "const_int_operand")])))
+ (set (match_operand:<VEL> 3 "register_operand")
+ (plus:<VEL>
+ (match_dup 0)
+ (match_operand:<VEL> 5 "register_operand")))]
+ "TARGET_SIMD
+ && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1
+ && REGNO (operands[5]) == REGNO (operands[1])
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 3) (unspec:<VEL> [(match_dup 1)] UNSPEC_FADDV))]
+)
+
(define_insn "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
@@ -25372,6 +25372,29 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
}
}
+ /* Try to schedule vec_select and add together so the peephole works. */
+ if (simple_sets_p && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set))
+ && GET_CODE (SET_SRC (prev_set)) == VEC_SELECT && GET_CODE (SET_SRC (curr_set)) == PLUS)
+ {
+ /* We're trying to match:
+ prev (vec_select) == (set (reg r0)
+ (vec_select (reg r1) n)
+ curr (plus) == (set (reg r2)
+ (plus (reg r0) (reg r1))) */
+ rtx prev_src = SET_SRC (prev_set);
+ rtx curr_src = SET_SRC (curr_set);
+ rtx parallel = XEXP (prev_src, 1);
+ auto idx
+ = ENDIAN_LANE_N (GET_MODE_NUNITS (GET_MODE (XEXP (prev_src, 0))), 1);
+ if (GET_CODE (parallel) == PARALLEL
+ && XVECLEN (parallel, 0) == 1
+ && known_eq (INTVAL (XVECEXP (parallel, 0, 0)), idx)
+ && GET_MODE (SET_DEST (prev_set)) == GET_MODE (curr_src)
+ && GET_MODE_INNER (GET_MODE (XEXP (prev_src, 0)))
+ == GET_MODE (XEXP (curr_src, 1)))
+ return true;
+ }
+
/* Fuse compare (CMP/CMN/TST/BICS) and conditional branch. */
if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
&& prev_set && curr_set && any_condjump_p (curr)