RISC-V: Fix VSETVL PASS regression

Message ID 20231201005110.2689714-1-juzhe.zhong@rivai.ai
State Committed
Commit 923a67f17badcbe6e2b2e5d3570a265443258c8e
Headers
Series RISC-V: Fix VSETVL PASS regression |

Checks

Context Check Description
rivoscibot/toolchain-ci-rivos-apply-patch success Patch applied
rivoscibot/toolchain-ci-rivos-lint success Lint passed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv32gc_zba_zbb_zbc_zbs-ilp32d-non-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-non-multilib success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Testing passed
rivoscibot/toolchain-ci-rivos-test success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 warning Patch is already merged

Commit Message

juzhe.zhong@rivai.ai Dec. 1, 2023, 12:51 a.m. UTC
  This patch fix 2 regression (one is bug regression, the other is performance regression).
Those 2 regressions are both we are comparing ratio for same AVL in wrong place.

1. BUG regression:
avl_single-84.c:

f0:
        li      a5,999424
        add     a1,a1,a5
        li      a4,299008
        add     a5,a0,a5
        addi    a3,a4,992
        addi    a5,a5,576
        addi    a1,a1,576
        vsetvli a4,zero,e8,m2,ta,ma
        add     a0,a0,a3
        vlm.v   v1,0(a5)
        vsm.v   v1,0(a1)
        vl1re64.v       v1,0(a0)
        beq     a2,zero,.L10
        li      a5,0
        vsetvli zero,zero,e64,m1,tu,ma   --->  This is totally incorrect since the ratio above is 4, wheras it is demanding ratio = 64 here.
.L3:
        fcvt.d.lu       fa5,a5
        addi    a5,a5,1
        fadd.d  fa5,fa5,fa0
        vfmv.s.f        v1,fa5
        bne     a5,a2,.L3
        vfmv.f.s        fa0,v1
        ret
.L10:
        vsetvli zero,zero,e64,m1,ta,ma
        vfmv.f.s        fa0,v1
        ret

2. Performance regression:

before this patch:

        vsetvli a5,a4,e8,m1,ta,ma
        vsetvli zero,a5,e32,m1,tu,ma
        vmv.s.x v2,zero
        vmv.s.x v1,zero
        vsetvli zero,a5,e32,m4,tu,ma
        vle32.v v4,0(a1)
        vfmul.vv        v4,v4,v4
        vfredosum.vs    v1,v4,v2
        vfmv.f.s        fa5,v1
        fsw     fa5,0(a0)
        sub     a4,a4,a5
        bne     a4,zero,.L2
        ret

After this patch:

	vsetvli	a5,a4,e32,m4,tu,ma
	vle32.v	v4,0(a1)
	vmv.s.x	v2,zero
	vmv.s.x	v1,zero
	vfmul.vv	v4,v4,v4
	vfredosum.vs	v1,v4,v2
	vfmv.f.s	fa5,v1
	fsw	fa5,0(a0)
	sub	a4,a4,a5
	bne	a4,zero,.L2
	ret

Tested rv64gcv_zvfh_zfh passed no regression.

zvl256b/zvl512b/zvl1024b/zve64d is runing.

	PR target/112776

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc (pre_vsetvl::pre_global_vsetvl_info): Fix ratio.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/vsetvl/avl_single-84.c: Adapt test.
	* gcc.target/riscv/rvv/vsetvl/pr111037-3.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/pr112776.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc              | 13 ++++---
 .../riscv/rvv/vsetvl/avl_single-84.c          |  6 ++--
 .../gcc.target/riscv/rvv/vsetvl/pr111037-3.c  |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/pr112776.c    | 36 +++++++++++++++++++
 4 files changed, 46 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
  

Comments

juzhe.zhong@rivai.ai Dec. 1, 2023, 2:57 a.m. UTC | #1
All regressions (zve64d/zvl128b/zvl256b/zvl512b/zvl1024b) passed.


juzhe.zhong@rivai.ai
 
From: Juzhe-Zhong
Date: 2023-12-01 08:51
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH] RISC-V: Fix VSETVL PASS regression
This patch fix 2 regression (one is bug regression, the other is performance regression).
Those 2 regressions are both we are comparing ratio for same AVL in wrong place.
 
1. BUG regression:
avl_single-84.c:
 
f0:
        li      a5,999424
        add     a1,a1,a5
        li      a4,299008
        add     a5,a0,a5
        addi    a3,a4,992
        addi    a5,a5,576
        addi    a1,a1,576
        vsetvli a4,zero,e8,m2,ta,ma
        add     a0,a0,a3
        vlm.v   v1,0(a5)
        vsm.v   v1,0(a1)
        vl1re64.v       v1,0(a0)
        beq     a2,zero,.L10
        li      a5,0
        vsetvli zero,zero,e64,m1,tu,ma   --->  This is totally incorrect since the ratio above is 4, wheras it is demanding ratio = 64 here.
.L3:
        fcvt.d.lu       fa5,a5
        addi    a5,a5,1
        fadd.d  fa5,fa5,fa0
        vfmv.s.f        v1,fa5
        bne     a5,a2,.L3
        vfmv.f.s        fa0,v1
        ret
.L10:
        vsetvli zero,zero,e64,m1,ta,ma
        vfmv.f.s        fa0,v1
        ret
 
2. Performance regression:
 
before this patch:
 
        vsetvli a5,a4,e8,m1,ta,ma
        vsetvli zero,a5,e32,m1,tu,ma
        vmv.s.x v2,zero
        vmv.s.x v1,zero
        vsetvli zero,a5,e32,m4,tu,ma
        vle32.v v4,0(a1)
        vfmul.vv        v4,v4,v4
        vfredosum.vs    v1,v4,v2
        vfmv.f.s        fa5,v1
        fsw     fa5,0(a0)
        sub     a4,a4,a5
        bne     a4,zero,.L2
        ret
 
After this patch:
 
vsetvli a5,a4,e32,m4,tu,ma
vle32.v v4,0(a1)
vmv.s.x v2,zero
vmv.s.x v1,zero
vfmul.vv v4,v4,v4
vfredosum.vs v1,v4,v2
vfmv.f.s fa5,v1
fsw fa5,0(a0)
sub a4,a4,a5
bne a4,zero,.L2
ret
 
Tested rv64gcv_zvfh_zfh passed no regression.
 
zvl256b/zvl512b/zvl1024b/zve64d is runing.
 
PR target/112776
 
gcc/ChangeLog:
 
* config/riscv/riscv-vsetvl.cc (pre_vsetvl::pre_global_vsetvl_info): Fix ratio.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/vsetvl/avl_single-84.c: Adapt test.
* gcc.target/riscv/rvv/vsetvl/pr111037-3.c: Ditto.
* gcc.target/riscv/rvv/vsetvl/pr112776.c: New test.
 
---
gcc/config/riscv/riscv-vsetvl.cc              | 13 ++++---
.../riscv/rvv/vsetvl/avl_single-84.c          |  6 ++--
.../gcc.target/riscv/rvv/vsetvl/pr111037-3.c  |  2 +-
.../gcc.target/riscv/rvv/vsetvl/pr112776.c    | 36 +++++++++++++++++++
4 files changed, 46 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
 
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index b3e07d4c3aa..1da95daeeb0 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1497,9 +1497,6 @@ private:
   {
     gcc_assert (prev.valid_p () && next.valid_p ());
-    if (prev.get_ratio () != next.get_ratio ())
-      return false;
-
     if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
       return false;
@@ -2188,7 +2185,7 @@ private:
     return true;
   }
-  bool preds_has_same_avl_p (const vsetvl_info &curr_info)
+  bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
   {
     gcc_assert (
       !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
@@ -2200,7 +2197,8 @@ private:
       {
const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
if (!prev_info.valid_p ()
-     || !m_dem.avl_available_p (prev_info, curr_info))
+     || !m_dem.avl_available_p (prev_info, curr_info)
+     || prev_info.get_ratio () != curr_info.get_ratio ())
  return false;
       }
@@ -3171,7 +3169,7 @@ pre_vsetvl::pre_global_vsetvl_info ()
  curr_info = block_info.local_infos[0];
}
       if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
-   && preds_has_same_avl_p (curr_info))
+   && preds_all_same_avl_and_ratio_p (curr_info))
curr_info.set_change_vtype_only ();
       vsetvl_info prev_info = vsetvl_info ();
@@ -3179,7 +3177,8 @@ pre_vsetvl::pre_global_vsetvl_info ()
       for (auto &curr_info : block_info.local_infos)
{
  if (prev_info.valid_p () && curr_info.valid_p ()
-       && m_dem.avl_available_p (prev_info, curr_info))
+       && m_dem.avl_available_p (prev_info, curr_info)
+       && prev_info.get_ratio () == curr_info.get_ratio ())
    curr_info.set_change_vtype_only ();
  prev_info = curr_info;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
index a584dd97dc0..5cd0f285029 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
@@ -17,6 +17,6 @@ double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned c
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,\s*zero} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
index 0f40642c8b6..13344ecdd3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
@@ -13,4 +13,4 @@ void foo(_Float16 y, int16_t z, int64_t *i64p)
}
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
new file mode 100644
index 00000000000..853690178ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (float *r, const float *x)
+{
+  int i, k;
+
+  vfloat32m4_t x_vec;
+  vfloat32m4_t x_forward_vec;
+  vfloat32m4_t temp_vec;
+  vfloat32m1_t dst_vec;
+  vfloat32m1_t src_vec;
+
+  float result = 0.0f;
+  float shift_prev = 0.0f;
+
+  size_t n = 64;
+  for (size_t vl; n > 0; n -= vl)
+    {
+      vl = __riscv_vsetvl_e32m4 (n);
+      x_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      x_forward_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      temp_vec = __riscv_vfmul_vv_f32m4 (x_vec, x_forward_vec, vl);
+      src_vec = __riscv_vfmv_s_tu (src_vec, 0.0f, vl);
+      dst_vec = __riscv_vfmv_s_tu (dst_vec, 0.0f, vl);
+      dst_vec = __riscv_vfredosum_tu (dst_vec, temp_vec, src_vec, vl);
+      r[0] = __riscv_vfmv_f_s_f32m1_f32 (dst_vec);
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 } } */
-- 
2.36.3
  

Patch

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index b3e07d4c3aa..1da95daeeb0 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1497,9 +1497,6 @@  private:
   {
     gcc_assert (prev.valid_p () && next.valid_p ());
 
-    if (prev.get_ratio () != next.get_ratio ())
-      return false;
-
     if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
       return false;
 
@@ -2188,7 +2185,7 @@  private:
     return true;
   }
 
-  bool preds_has_same_avl_p (const vsetvl_info &curr_info)
+  bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
   {
     gcc_assert (
       !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
@@ -2200,7 +2197,8 @@  private:
       {
 	const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
 	if (!prev_info.valid_p ()
-	    || !m_dem.avl_available_p (prev_info, curr_info))
+	    || !m_dem.avl_available_p (prev_info, curr_info)
+	    || prev_info.get_ratio () != curr_info.get_ratio ())
 	  return false;
       }
 
@@ -3171,7 +3169,7 @@  pre_vsetvl::pre_global_vsetvl_info ()
 	  curr_info = block_info.local_infos[0];
 	}
       if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
-	  && preds_has_same_avl_p (curr_info))
+	  && preds_all_same_avl_and_ratio_p (curr_info))
 	curr_info.set_change_vtype_only ();
 
       vsetvl_info prev_info = vsetvl_info ();
@@ -3179,7 +3177,8 @@  pre_vsetvl::pre_global_vsetvl_info ()
       for (auto &curr_info : block_info.local_infos)
 	{
 	  if (prev_info.valid_p () && curr_info.valid_p ()
-	      && m_dem.avl_available_p (prev_info, curr_info))
+	      && m_dem.avl_available_p (prev_info, curr_info)
+	      && prev_info.get_ratio () == curr_info.get_ratio ())
 	    curr_info.set_change_vtype_only ();
 	  prev_info = curr_info;
 	}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
index a584dd97dc0..5cd0f285029 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
@@ -17,6 +17,6 @@  double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned c
 }
 
 /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,\s*zero} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
index 0f40642c8b6..13344ecdd3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
@@ -13,4 +13,4 @@  void foo(_Float16 y, int16_t z, int64_t *i64p)
 }
 
 /* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
new file mode 100644
index 00000000000..853690178ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
@@ -0,0 +1,36 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (float *r, const float *x)
+{
+  int i, k;
+
+  vfloat32m4_t x_vec;
+  vfloat32m4_t x_forward_vec;
+  vfloat32m4_t temp_vec;
+  vfloat32m1_t dst_vec;
+  vfloat32m1_t src_vec;
+
+  float result = 0.0f;
+  float shift_prev = 0.0f;
+
+  size_t n = 64;
+  for (size_t vl; n > 0; n -= vl)
+    {
+      vl = __riscv_vsetvl_e32m4 (n);
+      x_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      x_forward_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      temp_vec = __riscv_vfmul_vv_f32m4 (x_vec, x_forward_vec, vl);
+      src_vec = __riscv_vfmv_s_tu (src_vec, 0.0f, vl);
+      dst_vec = __riscv_vfmv_s_tu (dst_vec, 0.0f, vl);
+      dst_vec = __riscv_vfredosum_tu (dst_vec, temp_vec, src_vec, vl);
+      r[0] = __riscv_vfmv_f_s_f32m1_f32 (dst_vec);
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 } } */