[GCC14,QUEUE] RISC-V: Eliminate redundant vsetvli for duplicate AVL def

Message ID 20230328010124.235703-1-juzhe.zhong@rivai.ai
State Committed
Headers
Series [GCC14,QUEUE] RISC-V: Eliminate redundant vsetvli for duplicate AVL def |

Commit Message

钟居哲 March 28, 2023, 1:01 a.m. UTC
  From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
{
	  vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
	  for (int i = 0; i < n; i++){
	    v = __riscv_vor_vx_i8mf4 (v, 101, 32);
	    v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
	  }
	  __riscv_vse8_v_i8mf4 (out, v, 32);
}

before this patch:
	f:
		li      a5,32
		vsetvli zero,a5,e8,mf4,tu,ma
		vle8.v  v1,0(a0)
		ble     a3,zero,.L2
		li      t0,0
		li      a0,101
	.L3:
		addiw   t0,t0,1
		vor.vx  v1,v1,a0
		vle8.v  v1,0(a1)
		bne     a3,t0,.L3
	.L2:
		vsetvli zero,zero,e8,mf4,tu,ma
		vse8.v  v1,0(a2)
		ret


afther this patch:

	f:
		li      a5,32
		vsetvli zero,a5,e8,mf4,tu,ma
		vle8.v  v1,0(a0)
		ble     a3,zero,.L2
		li      t0,0
		li      a0,101
	.L3:
		addiw   t0,t0,1
		vor.vx  v1,v1,a0
		vle8.v  v1,0(a1)
		bne     a3,t0,.L3
	.L2:
		vse8.v  v1,0(a2)
		ret

gcc/ChangeLog:

        * config/riscv/riscv-vsetvl.cc (vector_infos_manager::all_avail_in_compatible_p): New function.
        (pass_vsetvl::refine_vsetvls): Remove redundant vsetvli.
        * config/riscv/riscv-vsetvl.h: New function declare.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/vsetvl/avl_single-102.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc              | 67 ++++++++++++++++++-
 gcc/config/riscv/riscv-vsetvl.h               |  1 +
 .../riscv/rvv/vsetvl/avl_single-102.c         | 16 +++++
 3 files changed, 81 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-102.c
  

Comments

Jeff Law April 22, 2023, 3:12 a.m. UTC | #1
On 3/27/23 19:01, juzhe.zhong@rivai.ai wrote:
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
> 
> void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
> {
> 	  vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
> 	  for (int i = 0; i < n; i++){
> 	    v = __riscv_vor_vx_i8mf4 (v, 101, 32);
> 	    v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
> 	  }
> 	  __riscv_vse8_v_i8mf4 (out, v, 32);
> }
> 
> before this patch:
> 	f:
> 		li      a5,32
> 		vsetvli zero,a5,e8,mf4,tu,ma
> 		vle8.v  v1,0(a0)
> 		ble     a3,zero,.L2
> 		li      t0,0
> 		li      a0,101
> 	.L3:
> 		addiw   t0,t0,1
> 		vor.vx  v1,v1,a0
> 		vle8.v  v1,0(a1)
> 		bne     a3,t0,.L3
> 	.L2:
> 		vsetvli zero,zero,e8,mf4,tu,ma
> 		vse8.v  v1,0(a2)
> 		ret
> 
> 
> afther this patch:
> 
> 	f:
> 		li      a5,32
> 		vsetvli zero,a5,e8,mf4,tu,ma
> 		vle8.v  v1,0(a0)
> 		ble     a3,zero,.L2
> 		li      t0,0
> 		li      a0,101
> 	.L3:
> 		addiw   t0,t0,1
> 		vor.vx  v1,v1,a0
> 		vle8.v  v1,0(a1)
> 		bne     a3,t0,.L3
> 	.L2:
> 		vse8.v  v1,0(a2)
> 		ret
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv-vsetvl.cc (vector_infos_manager::all_avail_in_compatible_p): New function.
>          (pass_vsetvl::refine_vsetvls): Remove redundant vsetvli.
>          * config/riscv/riscv-vsetvl.h: New function declare.
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/rvv/vsetvl/avl_single-102.c: New test.
> 
> ---
>   gcc/config/riscv/riscv-vsetvl.cc              | 67 ++++++++++++++++++-
>   gcc/config/riscv/riscv-vsetvl.h               |  1 +
>   .../riscv/rvv/vsetvl/avl_single-102.c         | 16 +++++
>   3 files changed, 81 insertions(+), 3 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-102.c
> 
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
> index 4948e5d4c5e..58568b45010 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -2376,6 +2376,23 @@ vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
>     return true;
>   }
>   
> +bool
> +vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
This needs a function comment.  Perhaps:

/* Return TRUE if the incoming vector configuration state
    to CFG_BB is compatible with the vector configuration
    state in CFG_BB, FALSE otherwise.  */


> +
> +      /* Optimize such case:
> +	void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
> +	{
> +	  vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
> +	  for (int i = 0; i < n; i++){
> +	    v = __riscv_vor_vx_i8mf4 (v, 101, 32);
> +	    v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
> +	  }
> +	  __riscv_vse8_v_i8mf4 (out, v, 32);
> +	}
In general I would suggest rather than writing code like this in the 
comments, instead describe the properties you're looking for.  That way 
someone who may not be a RISC-V expert can more easily interpret the 
scenario you're looking for and what action you want to take when the 
scenario is discovered.

In this particular case it look like you're trying to describe the 
scenario where all incoming edges to a block have a vector state that is 
compatbile with the block.  In such a case we need not emit a vsetvl in 
the current block.

THe right place for code is in the testsuite.

So generally OK, though you do need to adjust the comments slightly. 
Please do that and repost for a final review/ACK.

Thanks,

Jeff
  

Patch

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 4948e5d4c5e..58568b45010 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2376,6 +2376,23 @@  vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
   return true;
 }
 
+bool
+vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
+{
+  const auto &info = vector_block_infos[cfg_bb->index].local_dem;
+  sbitmap avin = vector_avin[cfg_bb->index];
+  unsigned int bb_index;
+  sbitmap_iterator sbi;
+  EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
+  {
+    const auto &avin_info
+      = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
+    if (!info.compatible_p (avin_info))
+      return false;
+  }
+  return true;
+}
+
 bool
 vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
 				      sbitmap bitdata) const
@@ -3741,9 +3758,53 @@  pass_vsetvl::refine_vsetvls (void) const
 	  m_vector_manager->to_refine_vsetvls.add (rinsn);
 	  continue;
 	}
-      rinsn = PREV_INSN (rinsn);
-      rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
-      change_insn (rinsn, new_pat);
+
+      /* Optimize such case:
+	void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
+	{
+	  vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
+	  for (int i = 0; i < n; i++){
+	    v = __riscv_vor_vx_i8mf4 (v, 101, 32);
+	    v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
+	  }
+	  __riscv_vse8_v_i8mf4 (out, v, 32);
+	}
+
+	f:
+		li      a5,32
+		vsetvli zero,a5,e8,mf4,tu,ma
+		vle8.v  v1,0(a0)
+		ble     a3,zero,.L2
+		li      t0,0
+		li      a0,101
+	.L3:
+		addiw   t0,t0,1
+		vor.vx  v1,v1,a0
+		vle8.v  v1,0(a1)
+		bne     a3,t0,.L3
+	.L2:
+		vsetvli zero,zero,e8,mf4,tu,ma
+		vse8.v  v1,0(a2)
+		ret
+
+	The second vsetvli is redundant.  */
+
+      gcc_assert (has_vtype_op (insn->rtl ()));
+      rinsn = PREV_INSN (insn->rtl ());
+      gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
+      if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
+	{
+	  size_t id = m_vector_manager->get_expr_id (info);
+	  if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
+	    continue;
+	  eliminate_insn (rinsn);
+	}
+      else
+	{
+	  rtx new_pat
+	    = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
+	  change_insn (rinsn, new_pat);
+	}
     }
 }
 
diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h
index eec03d35071..d05472c86a0 100644
--- a/gcc/config/riscv/riscv-vsetvl.h
+++ b/gcc/config/riscv/riscv-vsetvl.h
@@ -451,6 +451,7 @@  public:
   bool all_same_ratio_p (sbitmap) const;
 
   bool all_empty_predecessor_p (const basic_block) const;
+  bool all_avail_in_compatible_p (const basic_block) const;
 
   void release (void);
   void create_bitmap_vectors (void);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-102.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-102.c
new file mode 100644
index 00000000000..8236d4e7f18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-102.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize -frename-registers" } */
+
+#include "riscv_vector.h"
+
+void f (int8_t* base1,int8_t* base2,int8_t* out,int n)
+{
+  vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
+  for (int i = 0; i < n; i++){
+    v = __riscv_vor_vx_i8mf4 (v, 101, 32);
+    v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
+  }
+  __riscv_vse8_v_i8mf4 (out, v, 32);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */