[i386] Add combine splitter to transform vashr/vlshr/vashl_optab to ashr/lshr/ashl_optab for const vector duplicate operand.
Commit Message
Hi,
This patch add combine splitter to transform vashr/vlshr/vashl_optab to ashr/lshr/ashl_optab for const vector duplicate operand.
Regtested on x86_64-pc-linux-gnu. Ok for trunk?
BRs,
Haochen
gcc/ChangeLog:
PR target/101796
* config/i386/predicates.md (const_vector_operand):
Add new predicate.
* config/i386/sse.md(<insn><mode>3<mask_name>):
Add new define_split below.
gcc/testsuite/ChangeLog:
PR target/101796
* gcc.target/i386/pr101796-1.c: New test.
---
gcc/config/i386/predicates.md | 13 +++++++++++++
gcc/config/i386/sse.md | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr101796-1.c | 20 ++++++++++++++++++++
3 files changed, 47 insertions(+)
create mode 100755 gcc/testsuite/gcc.target/i386/pr101796-1.c
Comments
On Wed, Dec 8, 2021 at 2:47 PM Haochen Jiang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> This patch add combine splitter to transform vashr/vlshr/vashl_optab to ashr/lshr/ashl_optab for const vector duplicate operand.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
Ok.
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
> PR target/101796
> * config/i386/predicates.md (const_vector_operand):
> Add new predicate.
> * config/i386/sse.md(<insn><mode>3<mask_name>):
> Add new define_split below.
>
> gcc/testsuite/ChangeLog:
>
> PR target/101796
> * gcc.target/i386/pr101796-1.c: New test.
> ---
> gcc/config/i386/predicates.md | 13 +++++++++++++
> gcc/config/i386/sse.md | 14 ++++++++++++++
> gcc/testsuite/gcc.target/i386/pr101796-1.c | 20 ++++++++++++++++++++
> 3 files changed, 47 insertions(+)
> create mode 100755 gcc/testsuite/gcc.target/i386/pr101796-1.c
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 4ccbe11b842..770e2f0c0dd 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1844,6 +1844,19 @@
> return true;
> })
>
> +;; Return true if OP is a const vector with duplicate value.
> +(define_predicate "const_vector_duplicate_operand"
> + (match_code "const_vector")
> +{
> + rtx elt = XVECEXP (op, 0, 0);
> + int i, nelt = XVECLEN (op, 0);
> +
> + for (i = 1; i < nelt; ++i)
> + if (!rtx_equal_p (elt, XVECEXP (op, 0, i)))
> + return false;
> + return true;
> +})
> +
> ;; Return true if OP is a parallel for a vbroadcast permute.
> (define_predicate "avx_vbroadcast_operand"
> (and (match_code "parallel")
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 08bdcddc111..a2c0c1209c7 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -15232,6 +15232,20 @@
> (const_string "0")))
> (set_attr "mode" "<sseinsnmode>")])
>
> +;; PR target/101796: Transfrom movl+vpbranchcastw+vpsravw to vpsraw
> +;; when COUNT is immediate.
> +(define_split
> + [(set (match_operand:VI248_AVX512BW 0 "register_operand")
> + (any_shift:VI248_AVX512BW
> + (match_operand:VI248_AVX512BW 1 "nonimmediate_operand")
> + (match_operand:VI248_AVX512BW 2 "const_vector_duplicate_operand")))]
> + "TARGET_AVX512F && GET_MODE_UNIT_BITSIZE (<MODE>mode)
> + > INTVAL (XVECEXP (operands[2], 0, 0))"
> + [(set (match_dup 0)
> + (any_shift:VI248_AVX512BW
> + (match_dup 1)
> + (match_dup 3)))]
> + "operands[3] = XVECEXP (operands[2], 0, 0);")
>
> (define_expand "vec_shl_<mode>"
> [(set (match_dup 3)
> diff --git a/gcc/testsuite/gcc.target/i386/pr101796-1.c b/gcc/testsuite/gcc.target/i386/pr101796-1.c
> new file mode 100755
> index 00000000000..32ae5909913
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101796-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512bw" } */
> +/* { dg-final {scan-assembler-times "vpsrlw\[ \\t\]" 1 } } */
> +/* { dg-final {scan-assembler-times "vpsllw\[ \\t\]" 1 } } */
> +/* { dg-final {scan-assembler-times "vpsraw\[ \\t\]" 1 } } */
> +/* { dg-final {scan-assembler-not "vpbroadcastw\[ \\t\]" } } */
> +/* { dg-final {scan-assembler-not "vpsrlvw\[ \\t\]" } } */
> +/* { dg-final {scan-assembler-not "vpsllvw\[ \\t\]" } } */
> +/* { dg-final {scan-assembler-not "vpsravw\[ \\t\]" } } */
> +#include <immintrin.h>
> +
> +volatile __m512i a, b;
> +
> +void
> +foo()
> +{
> + b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3));
> + b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (4));
> + b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (5));
> +}
> --
> 2.18.1
>
@@ -1844,6 +1844,19 @@
return true;
})
+;; Return true if OP is a const vector with duplicate value.
+(define_predicate "const_vector_duplicate_operand"
+ (match_code "const_vector")
+{
+ rtx elt = XVECEXP (op, 0, 0);
+ int i, nelt = XVECLEN (op, 0);
+
+ for (i = 1; i < nelt; ++i)
+ if (!rtx_equal_p (elt, XVECEXP (op, 0, i)))
+ return false;
+ return true;
+})
+
;; Return true if OP is a parallel for a vbroadcast permute.
(define_predicate "avx_vbroadcast_operand"
(and (match_code "parallel")
@@ -15232,6 +15232,20 @@
(const_string "0")))
(set_attr "mode" "<sseinsnmode>")])
+;; PR target/101796: Transfrom movl+vpbranchcastw+vpsravw to vpsraw
+;; when COUNT is immediate.
+(define_split
+ [(set (match_operand:VI248_AVX512BW 0 "register_operand")
+ (any_shift:VI248_AVX512BW
+ (match_operand:VI248_AVX512BW 1 "nonimmediate_operand")
+ (match_operand:VI248_AVX512BW 2 "const_vector_duplicate_operand")))]
+ "TARGET_AVX512F && GET_MODE_UNIT_BITSIZE (<MODE>mode)
+ > INTVAL (XVECEXP (operands[2], 0, 0))"
+ [(set (match_dup 0)
+ (any_shift:VI248_AVX512BW
+ (match_dup 1)
+ (match_dup 3)))]
+ "operands[3] = XVECEXP (operands[2], 0, 0);")
(define_expand "vec_shl_<mode>"
[(set (match_dup 3)
new file mode 100755
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-final {scan-assembler-times "vpsrlw\[ \\t\]" 1 } } */
+/* { dg-final {scan-assembler-times "vpsllw\[ \\t\]" 1 } } */
+/* { dg-final {scan-assembler-times "vpsraw\[ \\t\]" 1 } } */
+/* { dg-final {scan-assembler-not "vpbroadcastw\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpsrlvw\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpsllvw\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpsravw\[ \\t\]" } } */
+#include <immintrin.h>
+
+volatile __m512i a, b;
+
+void
+foo()
+{
+ b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3));
+ b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (4));
+ b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (5));
+}