[V2] RISC-V: Add vwadd<u>/vwsub<u>/vwmul<u>/vwmulsu.vv lowering optimizaiton for RVV auto-vectorization

Message ID 20230531132049.4095145-1-juzhe.zhong@rivai.ai
State Committed
Headers
Series [V2] RISC-V: Add vwadd<u>/vwsub<u>/vwmul<u>/vwmulsu.vv lowering optimizaiton for RVV auto-vectorization |

Commit Message

juzhe.zhong@rivai.ai May 31, 2023, 1:20 p.m. UTC
  From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

Base on V1 patch, adding comment:
;; Use define_insn_and_split to define vsext.vf2/vzext.vf2 will help combine PASS
;; to combine instructions as below:
;;   vsext.vf2 + vsext.vf2 + vadd.vv ==> vwadd.vv

gcc/ChangeLog:

        * config/riscv/autovec.md (<optab><v_double_trunc><mode>2): Change expand into define_insn_and_split.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/rvv.exp:
        * gcc.target/riscv/rvv/autovec/widen/widen-1.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen-2.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen-3.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen-4.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen_run-1.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen_run-2.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen_run-3.c: New test.
        * gcc.target/riscv/rvv/autovec/widen/widen_run-4.c: New test.

---
 gcc/config/riscv/autovec.md                   | 16 ++++++---
 .../riscv/rvv/autovec/widen/widen-1.c         | 27 +++++++++++++++
 .../riscv/rvv/autovec/widen/widen-2.c         | 27 +++++++++++++++
 .../riscv/rvv/autovec/widen/widen-3.c         | 27 +++++++++++++++
 .../riscv/rvv/autovec/widen/widen-4.c         | 23 +++++++++++++
 .../riscv/rvv/autovec/widen/widen_run-1.c     | 34 +++++++++++++++++++
 .../riscv/rvv/autovec/widen/widen_run-2.c     | 34 +++++++++++++++++++
 .../riscv/rvv/autovec/widen/widen_run-3.c     | 34 +++++++++++++++++++
 .../riscv/rvv/autovec/widen/widen_run-4.c     | 31 +++++++++++++++++
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    | 13 +++++++
 10 files changed, 262 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-4.c
  

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 4834bb4b412..2a21ce3f93c 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -401,16 +401,24 @@ 
 ;; - vsext.vf[2|4|8]
 ;; -------------------------------------------------------------------------
 
-(define_expand "<optab><v_double_trunc><mode>2"
-  [(set (match_operand:VWEXTI 0 "register_operand")
+;; Use define_insn_and_split to define vsext.vf2/vzext.vf2 will help combine PASS
+;; to combine instructions as below:
+;;   vsext.vf2 + vsext.vf2 + vadd.vv ==> vwadd.vv
+(define_insn_and_split "<optab><v_double_trunc><mode>2"
+  [(set (match_operand:VWEXTI 0 "register_operand" "=&vr")
     (any_extend:VWEXTI
-     (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")))]
+     (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand" "vr")))]
   "TARGET_VECTOR"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
 {
   insn_code icode = code_for_pred_vf2 (<CODE>, <MODE>mode);
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
   DONE;
-})
+}
+  [(set_attr "type" "vext")
+   (set_attr "mode" "<MODE>")])
 
 (define_expand "<optab><v_quad_trunc><mode>2"
   [(set (match_operand:VQEXTI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-1.c
new file mode 100644
index 00000000000..00edecab089
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-1.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2)                                                \
+  __attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 (TYPE1 *__restrict dst,   \
+						      TYPE2 *__restrict a,     \
+						      TYPE2 *__restrict b,     \
+						      int n)                   \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = (TYPE1) a[i] + (TYPE1) b[i];                                    \
+  }
+
+#define TEST_ALL()                                                             \
+  TEST_TYPE (int16_t, int8_t)                                                  \
+  TEST_TYPE (uint16_t, uint8_t)                                                \
+  TEST_TYPE (int32_t, int16_t)                                                 \
+  TEST_TYPE (uint32_t, uint16_t)                                               \
+  TEST_TYPE (int64_t, int32_t)                                                 \
+  TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvwadd\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-2.c
new file mode 100644
index 00000000000..4d370f583b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-2.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2)                                                \
+  __attribute__ ((noipa)) void vwsub_##TYPE1_##TYPE2 (TYPE1 *__restrict dst,   \
+						      TYPE2 *__restrict a,     \
+						      TYPE2 *__restrict b,     \
+						      int n)                   \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = (TYPE1) a[i] - (TYPE1) b[i];                                    \
+  }
+
+#define TEST_ALL()                                                             \
+  TEST_TYPE (int16_t, int8_t)                                                  \
+  TEST_TYPE (uint16_t, uint8_t)                                                \
+  TEST_TYPE (int32_t, int16_t)                                                 \
+  TEST_TYPE (uint32_t, uint16_t)                                               \
+  TEST_TYPE (int64_t, int32_t)                                                 \
+  TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvwsub\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwsubu\.vv} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
new file mode 100644
index 00000000000..609a5c09f70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2)                                                \
+  __attribute__ ((noipa)) void vwmul_##TYPE1_##TYPE2 (TYPE1 *__restrict dst,   \
+						      TYPE2 *__restrict a,     \
+						      TYPE2 *__restrict b,     \
+						      int n)                   \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = (TYPE1) a[i] * (TYPE1) b[i];                                    \
+  }
+
+#define TEST_ALL()                                                             \
+  TEST_TYPE (int16_t, int8_t)                                                  \
+  TEST_TYPE (uint16_t, uint8_t)                                                \
+  TEST_TYPE (int32_t, int16_t)                                                 \
+  TEST_TYPE (uint32_t, uint16_t)                                               \
+  TEST_TYPE (int64_t, int32_t)                                                 \
+  TEST_TYPE (uint64_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvwmul\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwmulu\.vv} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-4.c
new file mode 100644
index 00000000000..c29a74c4f8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-4.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2, TYPE3)                                         \
+  __attribute__ ((noipa)) void vwmul_##TYPE1_##TYPE2 (TYPE1 *__restrict dst,   \
+						      TYPE2 *__restrict a,     \
+						      TYPE3 *__restrict b,     \
+						      int n)                   \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = (TYPE1) a[i] * (TYPE1) b[i];                                    \
+  }
+
+#define TEST_ALL()                                                             \
+  TEST_TYPE (int16_t, int8_t, uint8_t)                                         \
+  TEST_TYPE (int32_t, int16_t, uint16_t)                                       \
+  TEST_TYPE (int64_t, int32_t, uint32_t)
+
+TEST_ALL ()
+
+/* { dg-final { scan-assembler-times {\tvwmulsu\.vv} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-1.c
new file mode 100644
index 00000000000..6cdeb571711
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-1.c
@@ -0,0 +1,34 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "widen-1.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE2 b##TYPE2[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % 8723;                                          \
+      b##TYPE2[i] = LIMIT + i & 1964;                                          \
+    }                                                                          \
+  vwadd_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] + (TYPE1) b##TYPE2[i]));
+
+#define RUN_ALL()                                                              \
+  RUN (int16_t, int8_t, -128)                                                  \
+  RUN (uint16_t, uint8_t, 255)                                                 \
+  RUN (int32_t, int16_t, -32768)                                               \
+  RUN (uint32_t, uint16_t, 65535)                                              \
+  RUN (int64_t, int32_t, -2147483648)                                          \
+  RUN (uint64_t, uint32_t, 4294967295)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-2.c
new file mode 100644
index 00000000000..84baa515610
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-2.c
@@ -0,0 +1,34 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "widen-2.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE2 b##TYPE2[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % 8723;                                          \
+      b##TYPE2[i] = LIMIT + i & 1964;                                          \
+    }                                                                          \
+  vwsub_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] - (TYPE1) b##TYPE2[i]));
+
+#define RUN_ALL()                                                              \
+  RUN (int16_t, int8_t, -128)                                                  \
+  RUN (uint16_t, uint8_t, 255)                                                 \
+  RUN (int32_t, int16_t, -32768)                                               \
+  RUN (uint32_t, uint16_t, 65535)                                              \
+  RUN (int64_t, int32_t, -2147483648)                                          \
+  RUN (uint64_t, uint32_t, 4294967295)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
new file mode 100644
index 00000000000..beb0cc2b58b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-3.c
@@ -0,0 +1,34 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "widen-3.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, LIMIT)                                               \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE2 b##TYPE2[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % 8723;                                          \
+      b##TYPE2[i] = LIMIT + i & 1964;                                          \
+    }                                                                          \
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]));
+
+#define RUN_ALL()                                                              \
+  RUN (int16_t, int8_t, -128)                                                  \
+  RUN (uint16_t, uint8_t, 255)                                                 \
+  RUN (int32_t, int16_t, -32768)                                               \
+  RUN (uint32_t, uint16_t, 65535)                                              \
+  RUN (int64_t, int32_t, -2147483648)                                          \
+  RUN (uint64_t, uint32_t, 4294967295)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-4.c
new file mode 100644
index 00000000000..a14539f72ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen_run-4.c
@@ -0,0 +1,31 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include <assert.h>
+#include "widen-4.c"
+
+#define SZ 512
+
+#define RUN(TYPE1, TYPE2, TYPE3, LIMIT)                                        \
+  TYPE2 a##TYPE2[SZ];                                                          \
+  TYPE3 b##TYPE3[SZ];                                                          \
+  TYPE1 dst##TYPE1[SZ];                                                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE2[i] = LIMIT + i % 8723;                                          \
+      b##TYPE3[i] = LIMIT + i & 1964;                                          \
+    }                                                                          \
+  vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE3, SZ);                  \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE3[i]));
+
+#define RUN_ALL()                                                              \
+  RUN (int16_t, int8_t, uint8_t, -128)                                         \
+  RUN (int32_t, int16_t, uint16_t, -32768)                                     \
+  RUN (int64_t, int32_t, uint32_t, -2147483648)
+
+int
+main ()
+{
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index bf03570b9cc..5e69235a268 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -73,6 +73,19 @@  foreach op $AUTOVEC_TEST_OPTS {
     "" "$op"
 }
 
+# widening operation only test on LMUL < 8
+set AUTOVEC_TEST_OPTS [list \
+  {-ftree-vectorize -O3 --param riscv-autovec-lmul=m1} \
+  {-ftree-vectorize -O3 --param riscv-autovec-lmul=m2} \
+  {-ftree-vectorize -O3 --param riscv-autovec-lmul=m4} \
+  {-ftree-vectorize -O2 --param riscv-autovec-lmul=m1} \
+  {-ftree-vectorize -O2 --param riscv-autovec-lmul=m2} \
+  {-ftree-vectorize -O2 --param riscv-autovec-lmul=m4} ]
+foreach op $AUTOVEC_TEST_OPTS {
+  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/widen/*.\[cS\]]] \
+    "" "$op"
+}
+
 # VLS-VLMAX tests
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
 	"-std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax" $CFLAGS