[v1] RISC-V: Bugfix for the RVV const vector

Message ID 20231218070433.2000339-1-pan2.li@intel.com
State Superseded
Headers
Series [v1] RISC-V: Bugfix for the RVV const vector |

Checks

Context Check Description
rivoscibot/toolchain-ci-rivos-apply-patch success Patch applied
rivoscibot/toolchain-ci-rivos-lint success Lint passed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-multilib success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
rivoscibot/toolchain-ci-rivos-build--linux-rv32gc_zba_zbb_zbc_zbs-ilp32d-non-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-non-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-test fail Testing failed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Testing passed

Commit Message

Li, Pan2 Dec. 18, 2023, 7:04 a.m. UTC
  From: Pan Li <pan2.li@intel.com>

This patch would like to fix one bug of const vector for interleave.
Assume we need to generate interleave const vector like below.

 V = {{4, -4, 3, -3, 2, -2, 1, -1,}

Before this patch:
vsetvl a3, zero, e64, m8, ta, ma
vid.v       v8            v8 =  {0, 1, 2, 3, 4}
li          a6, -1
vmul.vx     v8, v8, a6    v8 =  {-0, -1, -2, -3, -4}
vadd.vi     v24, v8, 4    v24 = { 4,  3,  2,  1,  0}
vadd.vi     v8, v8, -4    v8 =  {-4, -5, -6, -7, -8}
li          a6, 32
vsll.vx     v8, v8, a6    v8 =  {0, -4, 0, -5, 0, -6, 0, -7,} for e32
vor         v24, v24, v8  v24 = {4, -4, 3, -5, 2, -6, 1, -7,} for e32

After this patch:
vsetvli a6,zero,e64,m8,ta,ma
vidv  v8                  v8 =  {0, 1, 2, 3, 4}
li a7,-1
vmul.vx v16,v8,a7         v16 = {-0, -1, -2, -3, -4}
vaddvi v16,v16,4          v16 = { 4,  3,  2,  1, 0}
vaddvi v8,v8,-4           v8 =  {-4, -3, -2, -1, 0}
li a7,32
vsll.vx v8,v8,a7          v8 =  {0, -4, 0, -3, 0, -2,} for e32
vor.vv v16,v16,v8         v8 =  {4, -4, 3, -3, 2, -2,} for e32

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_const_vector): Take step2
	instead of step1 for second series.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/const-vector-0.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   |  2 +-
 .../riscv/rvv/autovec/const-vector-0.c        | 39 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c
  

Comments

juzhe.zhong@rivai.ai Dec. 18, 2023, 7:17 a.m. UTC | #1
The fix is reasonable.

But the test ASM check is too fragile which will easily break in the feature.

The key of the check should be:

vid.v       v8 -> can be v0, v8, v16, v24 since LMUL = 8
....
vadd.vi     v8, v8, -4  -> should be using the result of vid.

I think you should adjust test check according to this suggestion.

Also, rename the test from const-vector-0.c into bug-7.c


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-12-18 15:04
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Bugfix for the RVV const vector
From: Pan Li <pan2.li@intel.com>
 
This patch would like to fix one bug of const vector for interleave.
Assume we need to generate interleave const vector like below.
 
V = {{4, -4, 3, -3, 2, -2, 1, -1,}
 
Before this patch:
vsetvl a3, zero, e64, m8, ta, ma
vid.v       v8            v8 =  {0, 1, 2, 3, 4}
li          a6, -1
vmul.vx     v8, v8, a6    v8 =  {-0, -1, -2, -3, -4}
vadd.vi     v24, v8, 4    v24 = { 4,  3,  2,  1,  0}
vadd.vi     v8, v8, -4    v8 =  {-4, -5, -6, -7, -8}
li          a6, 32
vsll.vx     v8, v8, a6    v8 =  {0, -4, 0, -5, 0, -6, 0, -7,} for e32
vor         v24, v24, v8  v24 = {4, -4, 3, -5, 2, -6, 1, -7,} for e32
 
After this patch:
vsetvli a6,zero,e64,m8,ta,ma
vidv  v8                  v8 =  {0, 1, 2, 3, 4}
li a7,-1
vmul.vx v16,v8,a7         v16 = {-0, -1, -2, -3, -4}
vaddvi v16,v16,4          v16 = { 4,  3,  2,  1, 0}
vaddvi v8,v8,-4           v8 =  {-4, -3, -2, -1, 0}
li a7,32
vsll.vx v8,v8,a7          v8 =  {0, -4, 0, -3, 0, -2,} for e32
vor.vv v16,v16,v8         v8 =  {4, -4, 3, -3, 2, -2,} for e32
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (expand_const_vector): Take step2
instead of step1 for second series.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/const-vector-0.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc                   |  2 +-
.../riscv/rvv/autovec/const-vector-0.c        | 39 +++++++++++++++++++
2 files changed, 40 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index eade8db4cf1..d1eb7a0a9a5 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1331,7 +1331,7 @@ expand_const_vector (rtx target, rtx src)
  rtx tmp2 = gen_reg_rtx (new_mode);
  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
  expand_vec_series (tmp2, base2,
-      gen_int_mode (step1, new_smode));
+      gen_int_mode (step2, new_smode));
  rtx shifted_tmp2 = expand_simple_binop (
    new_mode, ASHIFT, tmp2,
    gen_int_mode (builder.inner_bits_size (), Pmode), NULL_RTX,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c
new file mode 100644
index 00000000000..4f83121c663
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl512b -mabi=lp64d --param=riscv-autovec-lmul=m8 -ftree-vectorize -fno-vect-cost-model -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 4
+struct C { int r, i; };
+
+/*
+** init_struct_data:
+** ...
+** vsetivli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m8,\s*ta,\s*ma
+** vid\.v\s+v8
+** li\s+[atx][0-9]+,\s*-1
+** vmul\.vx\s+v16,\s*v8,\s*[atx][0-9]+
+** vadd\.vi\s+v16,\s*v16,\s*4
+** vadd\.vi\s+v8,\s*v8,\s*-4
+** li\s+[axt][0-9]+,32
+** vsll\.vx\s+v8,\s*v8,\s*[atx][0-9]+
+** vor\.vv\s+v16,\s*v16,\s*v8
+** ...
+*/
+void
+init_struct_data (struct C * __restrict a, struct C * __restrict b,
+   struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].r = N - i;
+      a[i].i = i - N;
+
+      b[i].r = i - N;
+      b[i].i = i + N;
+
+      c[i].r = -1 - i;
+      c[i].i = 2 * N - 1 - i;
+    }
+}
-- 
2.34.1
  

Patch

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index eade8db4cf1..d1eb7a0a9a5 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1331,7 +1331,7 @@  expand_const_vector (rtx target, rtx src)
 		  rtx tmp2 = gen_reg_rtx (new_mode);
 		  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
 		  expand_vec_series (tmp2, base2,
-				     gen_int_mode (step1, new_smode));
+				     gen_int_mode (step2, new_smode));
 		  rtx shifted_tmp2 = expand_simple_binop (
 		    new_mode, ASHIFT, tmp2,
 		    gen_int_mode (builder.inner_bits_size (), Pmode), NULL_RTX,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c
new file mode 100644
index 00000000000..4f83121c663
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/const-vector-0.c
@@ -0,0 +1,39 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl512b -mabi=lp64d --param=riscv-autovec-lmul=m8 -ftree-vectorize -fno-vect-cost-model -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 4
+struct C { int r, i; };
+
+/*
+** init_struct_data:
+** ...
+** vsetivli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m8,\s*ta,\s*ma
+** vid\.v\s+v8
+** li\s+[atx][0-9]+,\s*-1
+** vmul\.vx\s+v16,\s*v8,\s*[atx][0-9]+
+** vadd\.vi\s+v16,\s*v16,\s*4
+** vadd\.vi\s+v8,\s*v8,\s*-4
+** li\s+[axt][0-9]+,32
+** vsll\.vx\s+v8,\s*v8,\s*[atx][0-9]+
+** vor\.vv\s+v16,\s*v16,\s*v8
+** ...
+*/
+void
+init_struct_data (struct C * __restrict a, struct C * __restrict b,
+		  struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].r = N - i;
+      a[i].i = i - N;
+
+      b[i].r = i - N;
+      b[i].i = i + N;
+
+      c[i].r = -1 - i;
+      c[i].i = 2 * N - 1 - i;
+    }
+}