[06/21] Add insert-vsetvl pass

Message ID 20220531085012.269719-7-juzhe.zhong@rivai.ai
State Committed
Headers
Series *** Add RVV (RISC-V 'V' Extension) support *** |

Commit Message

juzhe.zhong@rivai.ai May 31, 2022, 8:49 a.m. UTC
  From: zhongjuzhe <juzhe.zhong@rivai.ai>

gcc/ChangeLog:

        * config.gcc: Add riscv-insert-vsetvl.o extra_objs for RVV support.
        * config/riscv/constraints.md (Ws5): New constraint.
        * config/riscv/predicates.md (p_reg_or_const_csr_operand): New predicate.
        (vector_reg_or_const0_operand): New predicate.
        (vector_move_operand): New predicate.
        (reg_or_mem_operand): New predicate.
        (reg_or_simm5_operand): New predicate.
        (reg_or_const_int_operand): New predicate.
        * config/riscv/riscv-opts.h (enum vsew_field_enum): New enum.
        * config/riscv/riscv-passes.def (INSERT_PASS_AFTER): Run insert vsetvl pass after pass_split_all_insns.
        (INSERT_PASS_BEFORE): Run insert vsetvl pass before pass_sched2.
        * config/riscv/riscv-protos.h (make_pass_insert_vsetvl): New function.
        (make_pass_insert_vsetvl2): New function.
        (rvv_mask_mode_p): New function.
        (rvv_classify_vsew_field): New function.
        (rvv_gen_policy): New function.
        (rvv_get_mask_mode): New function.
        (rvv_translate_attr_mode): New function.
        * config/riscv/riscv-vector-builtins-iterators.def (V): New iterator.
        (VF): New iterator.
        (VB): New iterator.
        (VFULL): New iterator.
        (VPARTIAL): New iterator.
        (V64BITI): New iterator.
        (VM): New iterator.
        (VSUB): New iterator.
        (VDI_TO_VSI): New iterator.
        (VDI_TO_VSI_VM): New iterator.
        * config/riscv/riscv-vector.cc (enum vsew_field_enum): New enum.
        (rvv_classify_vsew_field): New function.
        (rvv_gen_policy): New function.
        (rvv_translate_attr_mode): New function.
        (TRANSLATE_VECTOR_MODE): New macro define.
        (classify_vtype_field): New function.
        (get_lmulx8): New function.
        (force_reg_for_over_uimm): New function.
        (gen_vlx2): New function.
        (emit_int64_to_vector_32bit): New function.
        (imm32_p): New function.
        (imm_p): New function.
        (gen_3): New function.
        (gen_4): New function.
        (gen_5): New function.
        (gen_6): New function.
        (gen_7): New function.
        (enum GEN_CLASS): New enum.
        (modify_operands): New function.
        (emit_op5_vmv_v_x): New function.
        (emit_op5): New function.
        * config/riscv/riscv-vector.h (riscv_vector_mode_p): New function. 
        (rvv_legitimate_poly_int_p): New function.
        (rvv_offset_temporaries): New function.
        (rvv_classify_vlmul_field): New function.
        (rvv_parse_vsew_field): New function.
        (rvv_parse_vlmul_field): New function.
        (rvv_parse_vta_field): New function.
        (rvv_parse_vma_field): New function.
        (rvv_regsize): New function.
        (rvv_get_mask_mode): New function.
        * config/riscv/riscv.md: Add RVV modes.
        * config/riscv/t-riscv: New object.
        * config/riscv/vector-iterators.md: New iterators and attributes.
        * config/riscv/vector.md (@vec_duplicate<mode>): New pattern.
        (@vle<mode>): New pattern.
        (@vse<mode>): New pattern.
        (@vlm<mode>): New pattern.
        (@vsm<mode>): New pattern.
        (@v<vxoptab><mode>_v_x): New pattern.
        (@vmv<mode>_v_x_internal): New pattern.
        (@vmv<mode>_v_x_32bit): New pattern.
        (@vfmv<mode>_v_f): New pattern.
        (@vmerge<mode>_vxm_internal): New pattern.
        * config/riscv/riscv-insert-vsetvl.cc: New file.
        
---
 gcc/config.gcc                                |    2 +-
 gcc/config/riscv/constraints.md               |    5 +
 gcc/config/riscv/predicates.md                |   31 +
 gcc/config/riscv/riscv-insert-vsetvl.cc       | 2312 +++++++++++++++++
 gcc/config/riscv/riscv-opts.h                 |   12 +
 gcc/config/riscv/riscv-passes.def             |    2 +
 gcc/config/riscv/riscv-protos.h               |   19 +
 .../riscv/riscv-vector-builtins-iterators.def |  236 ++
 gcc/config/riscv/riscv-vector.cc              |  368 +++
 gcc/config/riscv/riscv-vector.h               |   10 -
 gcc/config/riscv/riscv.md                     |   67 +-
 gcc/config/riscv/t-riscv                      |    4 +
 gcc/config/riscv/vector-iterators.md          |  129 +-
 gcc/config/riscv/vector.md                    |  235 +-
 14 files changed, 3417 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-insert-vsetvl.cc
  

Patch

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 042a7a17737..1592e344531 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -518,7 +518,7 @@  pru-*-*)
 riscv*)
 	cpu_type=riscv
 	extra_headers="riscv_vector.h"
-	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o riscv-vector-builtins.o"
+	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-vector.o riscv-vector-builtins-functions.o riscv-vector-builtins.o riscv-insert-vsetvl.o"
 	d_target_objs="riscv-d.o"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-builtins.cc \$(srcdir)/config/riscv/riscv-vector-builtins.cc"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins-functions.cc"
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 7fd61a04216..114878130bb 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -97,3 +97,8 @@ 
   (and (match_code "const_poly_int")
        (match_test "CONST_POLY_INT_COEFFS (op)[0] == UNITS_PER_V_REG.coeffs[0]
         && CONST_POLY_INT_COEFFS (op)[1] == UNITS_PER_V_REG.coeffs[1]")))
+
+(define_constraint "Ws5"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -16, 15)")))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 6328cfff367..7a101676538 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -246,3 +246,34 @@ 
 (define_predicate "imm5_operand"
   (and (match_code "const_int")
        (match_test "INTVAL (op) < 5")))
+
+;; Vector Predicates.
+
+(define_special_predicate "p_reg_or_const_csr_operand"
+  (match_code "reg, subreg, const_int")
+{
+  if (CONST_INT_P (op))
+    return satisfies_constraint_K (op);
+  return GET_MODE (op) == Pmode;
+})
+
+(define_predicate "vector_reg_or_const0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_test "op == const0_rtx && !VECTOR_MODE_P (GET_MODE (op))")))
+
+(define_predicate "vector_move_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+      (match_code "const_vector")))
+
+(define_predicate "reg_or_mem_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "memory_operand")))
+
+(define_predicate "reg_or_simm5_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "!FLOAT_MODE_P (GET_MODE (op)) && IN_RANGE (INTVAL (op), -16, 15)"))))
+
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_code "const_wide_int, const_int")))
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv-insert-vsetvl.cc b/gcc/config/riscv/riscv-insert-vsetvl.cc
new file mode 100644
index 00000000000..939927c5775
--- /dev/null
+++ b/gcc/config/riscv/riscv-insert-vsetvl.cc
@@ -0,0 +1,2312 @@ 
+/* Insert-vsetvli pass for RISC-V 'V' Extension for GNU compiler.
+   Copyright(C) 2022-2022 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_ALGORITHM 1
+#define INCLUDE_FUNCTIONAL 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "backend.h"
+#include "regs.h"
+#include "target.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "predict.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "cfgrtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "langhooks.h"
+#include "tree-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "cfgcleanup.h"
+
+#include <map>
+#include <vector>
+#include <queue>
+#include <set>
+#include <tuple>
+
+#include "riscv-protos.h"
+#include "riscv-vector-builtins-functions.h"
+#include "riscv-vector-builtins.h"
+
+using namespace riscv_vector;
+using namespace rtl_ssa;
+
+/*  This pass is to insert vsetvli instructions for RVV instructions that depend on vtype or vl.
+    Because Clang+LLVM compiler has the mature pass to insert vsetvli instructions and works well,
+    algorithm follows the Clang+LLVM compiler Pass.
+
+    This pass consists of 3 phases:
+
+    Phase 1 collects how each basic block affects VL/VTYPE.
+
+    Phase 2 uses the information from phase 1 to do a data flow analysis to
+    propagate the VL/VTYPE changes through the function. This gives us the
+    VL/VTYPE at the start of each basic block.
+
+    Phase 3 inserts vsetvli instructions in each basic block. Information from
+    phase 2 is used to prevent inserting a vsetvli before the first vector
+    instruction in the block if possible.  */
+
+enum state_enum
+{
+  STATE_UNINITIALIZED,
+  STATE_KNOWN,
+  STATE_UNKNOWN
+};
+
+enum replace_enum
+{
+  REPLACE_VL,
+  REPLACE_VTYPE
+};
+
+enum clobber_pat_enum
+{
+  MOV_CLOBBER_MEM_REG,
+  MOV_CLOBBER_REG_MEM,
+  MOV_CLOBBER_REG_REG,
+  MOV_CLOBBER_REG_CONST,
+  OTHERS
+};
+
+/* Helper functions. */
+
+static unsigned int
+get_policy_offset (rtx_insn *insn)
+{
+  unsigned int offset = 1;
+  if (GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      if (get_attr_type (insn) == TYPE_VCMP)
+        offset = 2;
+    }
+  return offset;
+}
+
+static unsigned int
+get_vl_offset (rtx_insn *insn)
+{
+  unsigned int offset = 2;
+  if (GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      if (get_attr_type (insn) == TYPE_VCMP)
+        offset = 3;
+    }
+  return offset;
+}
+
+static enum clobber_pat_enum
+recog_clobber_vl_vtype (rtx_insn *insn)
+{
+  /*
+   [(set (match_operand 0 "reg_or_mem_operand" "=vr,m,vr")
+         (match_operand 1 "reg_or_mem_operand" "m,vr,vr"))
+         (clobber (match_scratch:SI 2 "=&r,&r,X"))
+         (clobber (reg:SI VL_REGNUM))
+         (clobber (reg:SI VTYPE_REGNUM))]
+  */
+  rtx pat = PATTERN (insn);
+  if (GET_CODE (pat) != PARALLEL)
+    return OTHERS;
+
+  unsigned int len = XVECLEN (pat, 0);
+  if (len < 3)
+    return OTHERS;
+
+  if (!rtx_equal_p (
+          XVECEXP (pat, 0, len - 1),
+          gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, VTYPE_REGNUM))))
+    return OTHERS;
+
+  if (!rtx_equal_p (XVECEXP (pat, 0, len - 2),
+                    gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, VL_REGNUM))))
+    return OTHERS;
+
+  extract_insn_cached (insn);
+  rtx mov_pat = gen_rtx_SET (recog_data.operand[0], recog_data.operand[1]);
+  if (!rtx_equal_p (XVECEXP (pat, 0, 0), mov_pat))
+    return OTHERS;
+  
+  if (MEM_P (recog_data.operand[0]))
+    return MOV_CLOBBER_MEM_REG;
+ 
+  if (MEM_P (recog_data.operand[1]))
+    return MOV_CLOBBER_REG_MEM;
+  
+  if (REG_P (recog_data.operand[1]))
+    return MOV_CLOBBER_REG_REG;
+  
+  if (CONST_VECTOR_P (recog_data.operand[1]))
+    return MOV_CLOBBER_REG_CONST;
+      
+  return OTHERS;
+}
+
+static bool
+is_vector_config_instr (rtx_insn *insn)
+{
+  return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+         get_attr_type (insn) == TYPE_VSETVL;
+}
+
+/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
+/// VL and only sets VTYPE.
+static bool
+is_vl_preserving_config (rtx_insn *insn)
+{
+  if (is_vector_config_instr (insn))
+    {
+      extract_insn_cached (insn);
+      return recog_data.n_operands == 1;
+    }
+  return false;
+}
+
+static bool
+rvv_insn_p (rtx_insn *insn, rtx *src)
+{
+  *src = NULL_RTX;
+  if (!insn)
+    return false;
+  
+  if (!INSN_P (insn))
+    return false;
+  
+  if (recog_memoized (insn) < 0)
+    return false;
+  
+  if (!rvv_mode_p (rvv_translate_attr_mode (insn)))
+    return false;
+  
+  if (recog_clobber_vl_vtype (insn) != OTHERS)
+    {
+      if (reload_completed)
+        {
+          *src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+          return true;
+        }
+      else
+        return false;
+    }
+  
+  if (GET_CODE (PATTERN (insn)) == PARALLEL)
+    *src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+
+  if (GET_CODE (PATTERN (insn)) == SET)
+    *src = SET_SRC (PATTERN (insn));
+
+  if (!*src)
+    return false;
+
+  if (GET_CODE (*src) != UNSPEC)
+    return false;
+
+  if (XINT (*src, 1) != UNSPEC_RVV)
+    return false;
+
+  return true;
+}
+
+static bool
+use_vl_p (rtx_insn *insn)
+{
+  rtx src = NULL_RTX;
+  if (!rvv_insn_p (insn, &src))
+    return false;
+  
+  if (recog_clobber_vl_vtype (insn) != OTHERS)
+    return true;
+
+  if (rtx_equal_p (XVECEXP (src, 0, XVECLEN (src, 0) - 1),
+                   gen_rtx_REG (SImode, VL_REGNUM)))
+    return true;
+
+  if (XVECLEN (src, 0) > 1 &&
+      rtx_equal_p (XVECEXP (src, 0, XVECLEN (src, 0) - 2),
+                   gen_rtx_REG (SImode, VL_REGNUM)))
+    return true;
+
+  return false;
+}
+
+static bool
+use_vtype_p (rtx_insn *insn)
+{ 
+  rtx src = NULL_RTX;
+  if (!rvv_insn_p (insn, &src))
+    return false;
+  
+  if (recog_clobber_vl_vtype (insn) != OTHERS)
+    return true;
+    
+  if (rtx_equal_p (XVECEXP (src, 0, XVECLEN (src, 0) - 1),
+                   gen_rtx_REG (SImode, VTYPE_REGNUM)))
+    return true;
+
+  return false;
+}
+
+static bool
+use_vlmax_p (rtx_insn *insn)
+{
+  rtx src = NULL_RTX;
+  unsigned int length = 0;
+  
+  if (recog_clobber_vl_vtype (insn) != OTHERS)
+    return true;
+    
+  if (rvv_insn_p (insn, &src))
+    length = XVECLEN (src, 0);
+  
+  if (length < 2)
+    return false;
+
+  if (rtx_equal_p (XVECEXP (src, 0, length - 1),
+                   gen_rtx_REG (SImode, VL_REGNUM)))
+    return rtx_equal_p (XVECEXP (src, 0, length - 2),
+                        gen_rtx_REG (Pmode, X0_REGNUM));
+
+  if (length < 3)
+    return false;
+
+  return rtx_equal_p (XVECEXP (src, 0, length - 3),
+                      gen_rtx_REG (Pmode, X0_REGNUM));
+}
+
+static bool
+need_vsetvli_p (rtx_insn *insn)
+{
+  rtx src = NULL_RTX;
+  if (!rvv_insn_p (insn, &src))
+    return false;
+  return true;
+}
+
+static void
+replace_op (rtx_insn *insn, rtx x, unsigned int replace)
+{
+  extract_insn_cached (insn);
+  if (replace == REPLACE_VTYPE)
+    validate_change (insn, recog_data.operand_loc[recog_data.n_operands - 1], x, false);
+
+  if (replace == REPLACE_VL && !use_vlmax_p (insn))
+    {
+      unsigned int offset = get_vl_offset (insn);
+      validate_change (insn,
+                       recog_data.operand_loc[recog_data.n_operands - offset],
+                       x, false);
+    }
+}
+
+static bool
+update_vl_vtype_p (rtx_insn *insn)
+{
+  if (insn && NONDEBUG_INSN_P (insn))
+    {
+      if (recog_memoized (insn) >= 0 &&
+          (get_attr_type (insn) == TYPE_VLEFF))
+        {
+          extract_insn_cached (insn);
+          if (INTVAL (recog_data.operand[recog_data.n_operands - 1]) ==
+              DO_NOT_UPDATE_VL_VTYPE)
+            return false;
+          return true;
+        }
+      if (CALL_P (insn))
+        return true;
+      if (PATTERN (insn) && (GET_CODE (PATTERN (insn)) == ASM_INPUT ||
+                             GET_CODE (PATTERN (insn)) == ASM_OPERANDS ||
+                             asm_noperands (PATTERN (insn)) >= 0))
+        return true;
+    }
+  return false;
+}
+
+static rtx
+get_avl_source (rtx avl, rtx_insn *rtl)
+{
+  if (!rtl || !avl)
+    return NULL_RTX;
+
+  if (optimize < 2)
+    return NULL_RTX;
+
+  insn_info *next;
+  rtx avl_source = NULL_RTX;
+
+  if (!REG_P (avl))
+    return NULL_RTX;
+
+  for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
+    {
+      next = insn->next_any_insn ();
+      if (insn->rtl () == rtl)
+        {
+          resource_info resource{GET_MODE (avl), REGNO (avl)};
+          def_lookup dl = crtl->ssa->find_def (resource, insn);
+          def_info *def = dl.prev_def (insn);
+
+          if (!def)
+            return NULL_RTX;
+
+          if (!is_a<set_info *> (def))
+            return NULL_RTX;
+
+          insn_info *def_insn = def->insn ();
+
+          if (!def_insn)
+            return NULL_RTX;
+          rtx_insn *def_rtl = def_insn->rtl ();
+
+          if (!def_rtl)
+            return NULL_RTX;
+
+          if (INSN_P (def_rtl) && single_set (def_rtl))
+            {
+              avl_source = SET_SRC (single_set (def_rtl));
+              break;
+            }
+        }
+    }
+
+  return avl_source;
+}
+
+static machine_mode
+vsew_to_int_mode (unsigned vsew)
+{
+  return vsew == 0 ? QImode : vsew == 1 ? HImode : vsew == 2 ? SImode : DImode;
+}
+
+class vinfo
+{
+private:
+  state_enum state;
+  // Fields from VTYPE.
+  uint8_t vma : 1;
+  uint8_t vta : 1;
+  uint8_t vsew : 3;
+  uint8_t vlmul : 3;
+  uint8_t all_maskop_p : 1;
+  uint8_t store_p : 1;
+  uint8_t sew_lmul_ratio_only_p : 1;
+  uint8_t scalar_move_p : 1;
+  rtx avl;
+  rtx avl_source;
+
+public:
+  vinfo ()
+      : state (STATE_UNINITIALIZED), vma (false), vta (false), vsew (0),
+        vlmul (0), all_maskop_p (false), store_p (false), sew_lmul_ratio_only_p (false),
+        scalar_move_p (false), avl (NULL_RTX), avl_source (NULL_RTX)
+  {
+  }
+
+  ~vinfo () {}
+  
+  static vinfo
+  get_unknown ()
+  {
+    vinfo info;
+    info.set_unknown ();
+    return info;
+  }
+
+  bool
+  valid_p () const
+  {
+    return state != STATE_UNINITIALIZED;
+  }
+  void
+  set_unknown ()
+  {
+    state = STATE_UNKNOWN;
+  }
+  bool
+  unknown_p () const
+  {
+    return state == STATE_UNKNOWN;
+  }
+
+  bool
+  known_p () const
+  {
+    return state == STATE_KNOWN;
+  }
+
+  void
+  set_avl (rtx op)
+  {
+    avl = op;
+    state = STATE_KNOWN;
+  }
+
+  void
+  set_avl_source (rtx op)
+  {
+    avl_source = op;
+  }
+
+  bool
+  avl_const_p () const
+  {
+    return get_avl () && CONST_SCALAR_INT_P (get_avl ());
+  }
+  
+  bool
+  avl_reg_p () const
+  {
+    return get_avl () && REG_P (get_avl ());
+  }
+
+  rtx
+  get_avl () const
+  {
+    gcc_assert (known_p ());
+    return avl;
+  }
+
+  bool
+  has_zero_avl () const
+  {
+    if (!known_p ())
+      return false;
+    if (get_avl () == NULL_RTX)
+      return false;
+    if (avl_const_p ())
+      return INTVAL (get_avl ()) == 0; 
+    return false;
+  }
+
+  bool
+  has_nonzero_avl () const
+  {
+    if (!known_p ())
+      return false;
+    if (get_avl () == NULL_RTX)
+      return false;
+    if (avl_const_p ())
+      return INTVAL (get_avl ()) > 0; 
+    if (avl_reg_p ())
+      return rtx_equal_p (get_avl (), gen_rtx_REG (Pmode, X0_REGNUM));
+    return false;
+  }
+
+  rtx
+  get_avl_source () const
+  {
+    gcc_assert (known_p ());
+    return avl_source;
+  }
+
+  unsigned int
+  get_vsew () const
+  {
+    return vsew;
+  }
+  
+  enum vlmul_field_enum
+  get_vlmul () const
+  {
+    return (enum vlmul_field_enum) vlmul;
+  }
+  
+  unsigned int
+  get_vta () const
+  {
+    return vta;
+  }
+  
+  unsigned int
+  get_vma () const
+  {
+    return vma;
+  }
+
+  uint8_t
+  get_store_p () const
+  {
+    return store_p;
+  }
+  
+  bool
+  compare_vl (const vinfo &info) const
+  {
+    /* Optimize the code as follows:
+       if RVV is a fixed vector-length = 128bit.
+       vsetvli a5, 16, e8, m1......
+       .........
+       vsetvli a5, zero, e8, m1.....(no need)
+    */
+    if (!get_avl () || !info.get_avl ())
+      return false;
+
+    if (REG_P (get_avl ()) && REGNO (get_avl ()) == X0_REGNUM)
+      {
+        unsigned int vsew = info.get_vsew ();
+        machine_mode inner = vsew_to_int_mode (vsew);
+        machine_mode mode = riscv_vector::vector_builtin_mode (
+            as_a<scalar_mode> (inner), info.get_vlmul ());
+        if (CONST_SCALAR_INT_P (info.get_avl ()))
+          {
+            if (GET_MODE_NUNITS (mode).is_constant () &&
+                INTVAL (info.get_avl ()) ==
+                    GET_MODE_NUNITS (mode).to_constant ())
+              return true;
+          }
+
+        if (REG_P (info.get_avl ()))
+          {
+            if (info.get_avl_source ())
+              {
+                if (CONST_SCALAR_INT_P (info.get_avl_source ()) &&
+                    GET_MODE_NUNITS (mode).is_constant () &&
+                    INTVAL (info.get_avl_source ()) ==
+                        GET_MODE_NUNITS (mode).to_constant ())
+                  return true;
+                if (CONST_POLY_INT_P (info.get_avl_source ()) &&
+                    !GET_MODE_NUNITS (mode).is_constant () &&
+                    known_eq (rtx_to_poly_int64 (info.get_avl_source ()),
+                              GET_MODE_NUNITS (mode)))
+                  return true;
+              }
+          }
+      }
+
+    return false;
+  }
+
+  bool
+  avl_equal_p (const vinfo &other) const
+  {
+    gcc_assert (valid_p () && other.valid_p () &&
+                "Can't compare invalid VSETVLI Infos.");
+    gcc_assert (!unknown_p () && !other.unknown_p () &&
+                "Can't compare AVL in unknown state.");
+
+    if (compare_vl (other))
+      return true;
+      
+    if (other.compare_vl (*this))
+      return true;
+    
+    if (rtx_equal_p (get_avl (), other.get_avl ()))
+      return true;
+
+    if (!get_avl_source () && !other.get_avl_source ())
+      return false;
+
+    if (get_avl_source () && rtx_equal_p (get_avl_source (), other.get_avl ()))
+      return true;
+
+    if (other.get_avl_source () &&
+        rtx_equal_p (other.get_avl_source (), get_avl ()))
+      return true;
+
+    return rtx_equal_p (get_avl_source (), other.get_avl_source ());
+  }
+  
+  void
+  set_vma (unsigned int vma)
+  {
+    gcc_assert (valid_p () && !unknown_p () &&
+                "Can't set VTYPE for uninitialized or unknown.");
+    vma = vma;
+  }
+  
+  void
+  set_vta (unsigned int vta)
+  {
+    gcc_assert (valid_p () && !unknown_p () &&
+                "Can't set VTYPE for uninitialized or unknown.");
+    vta = vta;
+  }
+  
+  void
+  set_vtype (unsigned int vtype)
+  {
+    gcc_assert (valid_p () && !unknown_p () &&
+                "Can't set VTYPE for uninitialized or unknown.");
+    vma = rvv_parse_vma_field (vtype);
+    vta = rvv_parse_vta_field (vtype);
+    vsew = rvv_parse_vsew_field (vtype);
+    vlmul = rvv_parse_vlmul_field (vtype);
+  }
+
+  void
+  set_vtype (unsigned vl, unsigned vs, bool vt, bool vm, bool m_p,
+             bool st_p, bool is_scalar_move_op)
+  {
+    gcc_assert (valid_p () && !unknown_p () &&
+                "Can't set VTYPE for uninitialized or unknown.");
+    vma = vm;
+    vta = vt;
+    vsew = vs;
+    vlmul = vl;
+    all_maskop_p = m_p;
+    store_p = st_p;
+    scalar_move_p = is_scalar_move_op;
+  }
+
+  // Encode VTYPE into the binary format used by the the VSETVLI instruction
+  // which is used by our MC layer representation.
+  //
+  // Bits | Name       | Description
+  // -----+------------+------------------------------------------------
+  // 7    | vma        | Vector mask agnostic
+  // 6    | vta        | Vector tail agnostic
+  // 5:3  | vsew[2:0]  | Standard element width(SEW) setting
+  // 2:0  | vlmul[2:0] | Vector register group multiplier(LMUL) setting
+  unsigned
+  encode_vtype () const
+  {
+    gcc_assert (valid_p () && !unknown_p () && !sew_lmul_ratio_only_p &&
+                "Can't set VTYPE for uninitialized or unknown.");
+    gcc_assert (vsew >= 0 && vsew <= 7 && "Invalid SEW.");
+    unsigned vtype = (vsew << 3) | (vlmul & 0x7);
+    if (vta)
+      vtype |= 0x40;
+    if (vma)
+      vtype |= 0x80;
+
+    return vtype;
+  }
+
+  bool
+  get_sew_lmul_ratio_only_p () const
+  {
+    return sew_lmul_ratio_only_p;
+  }
+
+  bool
+  sew_equal_p (const vinfo &other) const
+  {
+    gcc_assert (valid_p () && other.valid_p () &&
+                "Can't compare invalid VSETVLI Infos.");
+    gcc_assert (!unknown_p () && !other.unknown_p () &&
+                "Can't compare VTYPE in unknown state.");
+    gcc_assert (!sew_lmul_ratio_only_p && !other.sew_lmul_ratio_only_p &&
+                "Can't compare when only LMUL/SEW ratio is valid.");
+    return vsew == other.vsew;
+  }
+
+  bool
+  vtype_equal_p (const vinfo &other) const
+  {
+    gcc_assert (valid_p () && other.valid_p () &&
+                "Can't compare invalid VSETVLI Infos.");
+    gcc_assert (!unknown_p () && !other.unknown_p () &&
+                "Can't compare VTYPE in unknown state.");
+    gcc_assert (!sew_lmul_ratio_only_p && !other.sew_lmul_ratio_only_p &&
+                "Can't compare when only LMUL/SEW ratio is valid.");
+    return std::tie (vma, vta, vsew, vlmul) ==
+           std::tie (other.vma, other.vta, other.vsew, other.vlmul);
+  }
+
+  bool
+  policy_equal_p (const vinfo &other) const
+  {
+    gcc_assert (valid_p () && other.valid_p () &&
+                "Can't compare invalid VSETVLI Infos.");
+    gcc_assert (!unknown_p () && !other.unknown_p () &&
+                "Can't compare VTYPE in unknown state.");
+    
+    return vta == other.vta && vma == other.vma;
+  }
+
+  unsigned
+  calc_sew_lmul_ratio (unsigned int vsew_arg, unsigned int vlmul_arg) const
+  {
+    gcc_assert (valid_p () && !unknown_p () &&
+                "Can't use VTYPE for uninitialized or unknown.");
+
+    unsigned lmul;
+    unsigned sew;
+    bool fractional;
+
+    switch (vsew_arg)
+      {
+      default:
+        gcc_unreachable ();
+      case 0:
+        sew = 8;
+        break;
+      case 1:
+        sew = 16;
+        break;
+      case 2:
+        sew = 32;
+        break;
+      case 3:
+        sew = 64;
+        break;
+      case 4:
+        sew = 128;
+        break;
+      case 5:
+        sew = 256;
+        break;
+      case 6:
+        sew = 512;
+        break;
+      case 7:
+        sew = 1024;
+        break;
+      }
+
+    switch (vlmul_arg)
+      {
+      default:
+        gcc_unreachable ();
+      case 0:
+        lmul = 1;
+        fractional = false;
+        break;
+      case 1:
+        lmul = 2;
+        fractional = false;
+        break;
+      case 2:
+        lmul = 4;
+        fractional = false;
+        break;
+      case 3:
+        lmul = 8;
+        fractional = false;
+        break;
+      case 5:
+        lmul = 8;
+        fractional = true;
+        break;
+      case 6:
+        lmul = 4;
+        fractional = true;
+        break;
+      case 7:
+        lmul = 2;
+        fractional = true;
+        break;
+      }
+
+    gcc_assert (sew >= 8 && "Unexpected SEW value.");
+    unsigned int sew_mul_ratio = fractional ? sew * lmul : sew / lmul;
+
+    return sew_mul_ratio;
+  }
+
+  unsigned
+  calc_sew_lmul_ratio () const
+  {
+    return calc_sew_lmul_ratio (vsew, vlmul);
+  }
+
+  // Check if the VTYPE for these two VSETVLI Infos produce the same VLMAX.
+  bool
+  vlmax_equal_p (const vinfo &other) const
+  {
+    gcc_assert (valid_p () && other.valid_p () &&
+                "Can't compare invalid VSETVLI Infos.");
+    gcc_assert (!unknown_p () && !other.unknown_p () &&
+                "Can't compare AVL in unknown state.");
+    return calc_sew_lmul_ratio () == other.calc_sew_lmul_ratio ();
+  }
+
+  bool
+  compatible_vtype_p (const vinfo &info) const
+  {
+    // Simple case, see if full VTYPE matches.
+    if (vtype_equal_p (info))
+      return true;
+
+    // If this is a mask reg operation, it only cares about VLMAX.
+    // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
+    // than "InstrInfo".
+    // FIXME: The policy bits can probably be ignored for mask reg operations.
+    if (info.all_maskop_p && vlmax_equal_p (info) && vta == info.vta &&
+        vma == info.vma)
+      return true;
+
+    return false;
+  }
+
+  // Determine whether the vector instructions requirements represented by
+  // InstrInfo are compatible with the previous vsetvli instruction represented
+  // by this.
+  bool
+  compatible_p (const vinfo &require) const
+  {
+    gcc_assert (valid_p () && require.valid_p () &&
+                "Can't compare invalid VSETVLI Infos.");
+    gcc_assert (!require.sew_lmul_ratio_only_p &&
+                "Expected a valid VTYPE for instruction.");
+
+    // Nothing is compatible with Unknown.
+    if (unknown_p () || require.unknown_p ())
+      return false;
+
+    // If only our VLMAX ratio is valid, then this isn't compatible.
+    if (sew_lmul_ratio_only_p)
+      return false;
+
+    // If the instruction doesn't need an AVLReg and the SEW matches, consider
+    // it compatible.
+    if (require.known_p () && require.avl == NULL_RTX
+      && vsew == require.vsew)
+      return true;
+    
+    // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
+    // So it's compatible when we could make sure that both VL be the same
+    // situation.
+    if (require.scalar_move_p && require.get_avl () &&
+        CONST_SCALAR_INT_P (require.get_avl ()) &&
+        ((has_nonzero_avl () && require.has_nonzero_avl ()) ||
+         (has_zero_avl () && require.has_zero_avl ())) &&
+        sew_equal_p (require) && policy_equal_p (require))
+      return true;
+    
+    // The AVL must match.
+    if (!avl_equal_p (require))
+      return false;
+    
+    if (compatible_vtype_p (require))
+      return true;
+      
+    // Store instructions don't use the policy fields.
+    // TODO: Move into hasCompatibleVTYPE?
+    if (require.store_p && vlmul == require.vlmul && vsew == require.vsew)
+      return true;
+      
+    // Anything else is not compatible.
+    return false;
+  }
+
+  bool
+  load_store_compatible_p (unsigned vsew_arg, const vinfo &info) const
+  {
+    gcc_assert (valid_p () && info.valid_p () &&
+                "Can't compare invalid VSETVLI Infos.");
+    gcc_assert (!info.sew_lmul_ratio_only_p &&
+                "Expected a valid VTYPE for instruction.");
+    gcc_assert (vsew_arg == info.vsew && "Mismatched EEW/SEW for store.");
+    
+    if (unknown_p () || get_sew_lmul_ratio_only_p ())
+      return false;
+
+    if (!avl_equal_p (info))
+      return false;
+
+    // Stores can ignore the tail and mask policies.
+    if (!info.store_p && (vta != info.vta || vma != info.vma))
+      return false;
+
+    return calc_sew_lmul_ratio () == calc_sew_lmul_ratio (vsew_arg, info.vlmul);
+  }
+
+  bool
+  operator== (const vinfo &other) const
+  {
+    // Uninitialized is only equal to another Uninitialized.
+    if (!valid_p ())
+      return !other.valid_p ();
+
+    if (!other.valid_p ())
+      return !valid_p ();
+
+    // Unknown is only equal to another Unknown.
+    if (unknown_p ())
+      return other.unknown_p ();
+
+    if (other.unknown_p ())
+      return unknown_p ();
+
+    if (!avl_equal_p (other))
+      return false;
+
+    // If only the VLMAX is valid, check that it is the same.
+    if (sew_lmul_ratio_only_p && other.sew_lmul_ratio_only_p)
+      return vlmax_equal_p (other);
+
+    // If the full VTYPE is valid, check that it is the same.
+    if (!sew_lmul_ratio_only_p && !other.sew_lmul_ratio_only_p)
+      return vtype_equal_p (other);
+
+    // If the sew_lmul_ratio_only bits are different, then they aren't equal.
+    return false;
+  }
+
+  bool
+  operator!= (const vinfo &Other) const
+  {
+    return !(*this == Other);
+  }
+
+  vinfo &
+  operator= (const vinfo &other)
+  {
+    state = other.state;
+    vma = other.vma;
+    vta = other.vta;
+    vsew = other.vsew;
+    vlmul = other.vlmul;
+    all_maskop_p = other.all_maskop_p;
+    sew_lmul_ratio_only_p = other.sew_lmul_ratio_only_p;
+    avl = other.avl;
+    avl_source = other.avl_source;
+    return *this;
+  }
+
+  // Calculate the vinfo visible to a block assuming this and other are
+  // both predecessors.
+  vinfo
+  intersect (const vinfo &other) const
+  {
+    // If the new value isn't valid, ignore it.
+    if (!other.valid_p ())
+      return *this;
+
+    // If this value isn't valid, this must be the first predecessor, use it.
+    if (!valid_p ())
+      return other;
+
+    // If either is unknown, the result is unknown.
+    if (unknown_p () || other.unknown_p ())
+      return vinfo::get_unknown ();
+
+    // If we have an exact, match return this.
+    if (*this == other)
+      return *this;
+
+    // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
+    // return an SEW/LMUL ratio only value.
+    if (avl_equal_p (other) && vlmax_equal_p (other))
+      {
+        vinfo merge_info = *this;
+        merge_info.sew_lmul_ratio_only_p = true;
+        return merge_info;
+      }
+
+    // otherwise the result is unknown.
+    return vinfo::get_unknown ();
+  }
+  
+  // Print debug info into rtl dump file. */
+  void
+  print () const
+  {
+    fprintf (dump_file, "{\n");
+    if (known_p ())
+      fprintf (dump_file, "  Known\n");
+    else if (unknown_p ())
+      fprintf (dump_file, "  Unknown\n");
+    else
+      fprintf (dump_file, "  Uninitialized\n");
+    
+    if (known_p () && get_avl ())
+      {
+        fprintf (dump_file, "  Avl=");
+        print_rtl_single (dump_file, get_avl ());
+        if (get_avl_source ())
+          {
+            fprintf (dump_file, "  Avl Source=");
+            print_rtl_single (dump_file, get_avl_source ());
+          }
+        else
+          fprintf (dump_file, "  Avl Source=(nil)\n");
+      }
+    else
+      fprintf (dump_file, "  Avl=(nil)\n  Avl Source=(nil)\n");
+    fprintf (dump_file, "  Vsew=%d\n", (unsigned int)vsew);
+    fprintf (dump_file, "  Vlmul=%d\n", (unsigned int)vlmul);
+    fprintf (dump_file, "  TailAgnostic=%d\n", (unsigned int)vta);
+    fprintf (dump_file, "  MaskAgnostic=%d\n", (unsigned int)vma);
+    fprintf (dump_file, "  MaskOp=%d\n", (unsigned int)all_maskop_p);
+    fprintf (dump_file, "  Store_p=%d\n", (unsigned int)store_p);
+    fprintf (dump_file, "  Scalar_move_p=%d\n", (unsigned int)scalar_move_p);
+    fprintf (dump_file, "  Sew_lmul_ratio_only_p=%d\n", (unsigned int)sew_lmul_ratio_only_p);
+    fprintf (dump_file, "}\n");
+  }
+};
+
+struct bb_vinfo
+{
+  // The vinfo that represents the net changes to the VL/VTYPE registers
+  // made by this block. Calculated in Phase 1.
+  vinfo change;
+
+  // The vinfo that represents the VL/VTYPE settings on exit from this
+  // block. Calculated in Phase 2.
+  vinfo exit;
+
+  // The vinfo that represents the VL/VTYPE settings from all predecessor
+  // blocks. Calculated in Phase 2, and used by Phase 3.
+  vinfo pred;
+
+  // Keeps track of whether the block is already in the queue.
+  bool inqueue = false;
+
+  bb_vinfo () {}
+};
+
+static std::map<unsigned int, bb_vinfo> bb_vinfo_map;
+static std::deque<basic_block> bb_queue;
+
+static rtx_insn *
+fetch_def_insn (rtx_insn *rtl, const vinfo info)
+{
+  /* We need use rtl ssa def_info to optimize which needs
+     optimization to large than or equal to 2. */
+  if (optimize < 2)
+    return NULL;
+  
+  // We didn't find a compatible value. If our AVL is a virtual register,
+  // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
+  // and the last VL/VTYPE we observed is the same, we don't need a
+  // VSETVLI here.
+  if (!info.known_p ())
+    return NULL;
+  if (!info.get_avl ())
+    return NULL;
+
+  rtx avl = info.get_avl ();
+
+  if (!REG_P (avl))
+    return NULL;
+
+  insn_info *next;
+  for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
+    {
+      next = insn->next_any_insn ();
+      if (insn->rtl () == rtl)
+        {
+          resource_info resource{GET_MODE (avl), REGNO (avl)};
+          def_lookup dl = crtl->ssa->find_def (resource, insn);
+          def_info *def = dl.prev_def (insn);
+
+          if (!def)
+            return NULL;
+
+          if (!is_a<set_info *> (def))
+            return NULL;
+
+          insn_info *def_insn = def->insn ();
+          rtx_insn *def_rtl = def_insn->rtl ();
+
+          if (!def_rtl)
+            return NULL;
+          if (!INSN_P (def_rtl))
+            return NULL;
+            
+          return def_rtl;
+        }
+    }
+
+  return NULL;
+}
+
+static void
+emit_vsetvl_insn (rtx op0, rtx op1, rtx op2, rtx_insn *insn)
+{
+  if (dump_file)
+    {
+      fprintf (dump_file, "insert vsetvli for insn %d\n\n", INSN_UID (insn));
+      print_rtl_single (dump_file, insn);
+    }
+
+  if (rtx_equal_p (op0, gen_rtx_REG (Pmode, X0_REGNUM)) &&
+      rtx_equal_p (op1, gen_rtx_REG (Pmode, X0_REGNUM)))
+    emit_insn_before (gen_vsetvl_zero_zero (op2), insn);
+  else if (rtx_equal_p (op0, gen_rtx_REG (Pmode, X0_REGNUM)))
+    emit_insn_before (gen_vsetvl_zero (Pmode, op1, op2), insn);
+  else
+    emit_insn_before (gen_vsetvl (Pmode, op0, op1, op2), insn);
+}
+
+static vinfo 
+compute_info_for_instr (rtx_insn *, vinfo);
+
+// Return a vinfo representing the changes made by this VSETVLI or
+// VSETIVLI instruction.
+static vinfo
+get_info_for_vsetvli (rtx_insn *insn, vinfo curr_info)
+{
+  vinfo new_info;
+  extract_insn_cached (insn);
+
+  if (recog_data.n_operands == 1)
+    {
+      gcc_assert (CONST_INT_P (recog_data.operand[0]) &&
+                  "Invalid vtype in vsetvli instruction.");
+      if (curr_info.valid_p () && !curr_info.unknown_p ())
+        {
+          new_info.set_avl (curr_info.get_avl ());
+          new_info.set_avl_source (curr_info.get_avl_source ());
+          new_info.set_vtype (INTVAL (recog_data.operand[0]));
+          /* if this X0, X0 vsetvli is redundant,
+             remove it. */
+          if (curr_info.compatible_vtype_p (new_info))
+            remove_insn (insn);
+        }
+      else
+        {
+          /* vsetvli X0, X0 means that the following instruction
+             use the same vl as before. */
+          basic_block bb = BLOCK_FOR_INSN (insn);
+          rtx_insn *next_insn;
+          bool find_vl_p = false;
+          for (next_insn = NEXT_INSN (insn); insn != NEXT_INSN (BB_END (bb));
+               next_insn = NEXT_INSN (next_insn))
+            {
+              if (use_vtype_p (next_insn))
+                {
+                  vinfo next_info = compute_info_for_instr (next_insn, curr_info);
+                  new_info.set_avl (next_info.get_avl ());
+                  new_info.set_avl_source (next_info.get_avl_source ());
+                  extract_insn_cached (insn);
+                  new_info.set_vtype (INTVAL (recog_data.operand[0]));
+                  
+                  if (recog_clobber_vl_vtype (next_insn) != MOV_CLOBBER_REG_REG &&
+                      recog_clobber_vl_vtype (next_insn) != OTHERS)
+                    new_info = vinfo::get_unknown ();
+                    
+                  find_vl_p = true;
+                  break;
+                }
+            }
+          gcc_assert (find_vl_p);
+        }
+      return new_info;
+    }
+  if (recog_data.n_operands == 2)
+    {
+      gcc_assert (CONST_INT_P (recog_data.operand[1]) &&
+                  "Invalid vtype in vsetvli instruction.");
+      new_info.set_avl (recog_data.operand[0]);
+      new_info.set_avl_source (get_avl_source (recog_data.operand[0], insn));
+      new_info.set_vtype (INTVAL (recog_data.operand[1]));
+      return new_info;
+    }
+  
+  gcc_assert (recog_data.n_operands == 3);
+  rtx vl = recog_data.operand[1];
+  rtx vtype = recog_data.operand[2];
+  gcc_assert (CONST_INT_P (vtype) && "Invalid vtype in vsetvli instruction.");
+  new_info.set_avl (vl);
+  new_info.set_avl_source (get_avl_source (vl, insn));
+  new_info.set_vtype (INTVAL (vtype));
+  return new_info;
+}
+
+static unsigned int
+analyze_vma_vta (rtx_insn *insn, vinfo curr_info)
+{
+  if (!use_vl_p (insn))
+    return 1;
+  
+  if (recog_clobber_vl_vtype (insn) != OTHERS)
+    return 1;
+    
+  if (use_vlmax_p (insn))
+    return 1;
+  unsigned int offset = get_policy_offset (insn);
+  extract_insn_cached (insn);
+  vector_policy vma =
+      riscv_vector::get_vma (INTVAL (recog_data.operand[recog_data.n_operands - offset]));
+  vector_policy vta =
+      riscv_vector::get_vta (INTVAL (recog_data.operand[recog_data.n_operands - offset]));
+  unsigned int vma_p = 0;
+  unsigned int vta_p = 0;
+  if (vma == vector_policy::agnostic)
+    vma_p = 1;
+  else if (vma == vector_policy::undisturbed)
+    vma_p = 0;
+  else
+    {
+      /* For N/A vma we remain the last vma if it valid. */
+      if (curr_info.valid_p () && !curr_info.unknown_p ())
+        vma_p = curr_info.get_vma ();
+      else
+        vma_p = 0;
+    }
+  
+  if (vta == vector_policy::agnostic)
+    vta_p = 1;
+  else if (vta == vector_policy::undisturbed)
+    vta_p = 0;
+  else
+    {
+      /* For N/A vta we remain the last vta if it valid. */
+      if (curr_info.valid_p () && !curr_info.unknown_p ())
+        vta_p = curr_info.get_vta ();
+      else
+        vta_p = 1;
+    }
+  return (vma_p << 1) | vta_p;
+}
+
+static bool
+scalar_move_insn_p (rtx_insn *insn)
+{
+  return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+         (get_attr_type (insn) == TYPE_VMV_S_X ||
+          get_attr_type (insn) == TYPE_VFMV_S_F);
+}
+
+static bool
+store_insn_p (rtx_insn *insn)
+{
+  return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+         (get_attr_type (insn) == TYPE_VSE ||
+          get_attr_type (insn) == TYPE_VSSE);
+}
+
+static bool
+can_skip_load_store_insn_p (rtx_insn *insn)
+{
+  return insn && INSN_P (insn) && recog_memoized (insn) >= 0 &&
+         (get_attr_type (insn) == TYPE_VSE ||
+          get_attr_type (insn) == TYPE_VSSE ||
+          get_attr_type (insn) == TYPE_VLE ||
+          get_attr_type (insn) == TYPE_VLSE);
+}
+
+static vinfo
+compute_info_for_instr (rtx_insn *insn, vinfo curr_info)
+{
+  vinfo info;
+
+  extract_insn_cached (insn);
+  
+  if (use_vl_p (insn))
+    {
+      if (recog_clobber_vl_vtype (insn) != OTHERS)
+        info.set_avl (gen_rtx_REG (Pmode, X0_REGNUM));
+      else if (use_vlmax_p (insn))
+        info.set_avl (gen_rtx_REG (Pmode, X0_REGNUM));
+      else
+        {
+          unsigned int offset = get_vl_offset (insn);
+          info.set_avl_source (get_avl_source (
+              recog_data.operand[recog_data.n_operands - offset], insn));
+          info.set_avl (recog_data.operand[recog_data.n_operands - offset]);
+        }
+    }
+  else
+    info.set_avl (NULL_RTX);
+
+  machine_mode mode = rvv_translate_attr_mode (insn);
+  bool st_p = store_insn_p (insn);
+  bool scalar_move_p = scalar_move_insn_p (insn);
+
+  unsigned int vma_vta = analyze_vma_vta (insn, curr_info);
+  unsigned int vta = vma_vta & 0x1;
+  unsigned int vma = (vma_vta >> 1) & 0x1;
+  info.set_vtype (rvv_classify_vlmul_field (mode),
+                  rvv_classify_vsew_field (mode),
+                  /*TailAgnostic*/ vta, /*MaskAgnostic*/ vma,
+                  rvv_mask_mode_p (mode), st_p, scalar_move_p);
+  
+  return info;
+}
+
+static bool
+can_skip_vsetvli_for_load_store_p (rtx_insn *insn, const vinfo &new_info, const vinfo &curr_info)
+{
+  gcc_assert (recog_memoized (insn) >= 0);
+  if (!can_skip_load_store_insn_p (insn))
+    return false;
+  machine_mode mode = rvv_translate_attr_mode (insn);
+  unsigned vsew = rvv_classify_vsew_field (mode);
+  gcc_assert (store_insn_p (insn) == new_info.get_store_p ());
+  return curr_info.load_store_compatible_p (vsew, new_info);
+}
+
+static bool
+need_vsetvli (rtx_insn *insn, const vinfo &require, const vinfo &curr_info)
+{
+  if (!need_vsetvli_p (insn))
+    return false;
+    
+  if (curr_info.compatible_p (require))
+    return false;
+
+  // We didn't find a compatible value. If our AVL is a virtual register,
+  // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
+  // and the last VL/VTYPE we observed is the same, we don't need a
+  // VSETVLI here.
+  if (!curr_info.unknown_p () && require.avl_reg_p () &&
+      REGNO (require.get_avl ()) >= FIRST_PSEUDO_REGISTER && 
+      !curr_info.get_sew_lmul_ratio_only_p () &&
+      curr_info.compatible_vtype_p (require))
+    {
+      rtx_insn *def_rtl = fetch_def_insn (insn, require);
+      if (def_rtl != NULL)
+        {
+          if (is_vector_config_instr (def_rtl))
+            {
+              vinfo def_info = get_info_for_vsetvli (def_rtl, curr_info);
+              if (def_info.avl_equal_p (curr_info) &&
+                  def_info.vlmax_equal_p (curr_info))
+                return false;
+            }
+        }
+    }
+
+  return true;
+}
+
+static bool
+need_vsetvli_phi (const vinfo &new_info, rtx_insn *rtl)
+{
+  /* Optimize the case as follows:
+  void foo (int8_t *base, int8_t* out, size_t vl, unsigned int m)
+  {
+    vint8mf8_t v0;
+    size_t avl;
+    if (m > 1000)
+      avl = vsetvl_e8mf8 (vl);
+    else
+      avl = vsetvl_e8mf8 (vl << 2);
+    for (int i = 0; i < m; i++)
+      {
+        v0 = vle8_v_i8mf8 (base + i * 32,avl);
+        v0 = vadd_vv_i8mf8 (v0,v0,avl);
+      }
+    *(vint8mf8_t*)out = v0;
+  } */
+
+  /* We need use rtl ssa phi to optimize which needs
+     optimization to large than or equal to 2. */
+  if (optimize < 2)
+    return true;
+
+  if (!(!new_info.unknown_p () && new_info.get_avl () &&
+        GET_CODE (new_info.get_avl ()) == REG))
+    return true;
+
+  rtx avl = new_info.get_avl ();
+
+  insn_info *next;
+  /* fetch phi_node.  */
+  for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
+    {
+      next = insn->next_any_insn ();
+      if (insn->rtl () == rtl)
+        {
+          bb_info *bb = insn->bb ();
+          ebb_info *ebb = bb->ebb ();
+          resource_info resource{GET_MODE (avl), REGNO (avl)};
+          insn_info *phi_insn = ebb->phi_insn ();
+          phi_info *phi;
+          def_lookup dl = crtl->ssa->find_def (resource, phi_insn);
+          def_info *set = dl.prev_def (phi_insn);
+
+          if (!set)
+            return true;
+
+          if (!is_a<phi_info *> (set))
+            return true;
+
+          // There is an existing phi.
+          phi = as_a<phi_info *> (set);
+          for (unsigned int i = 0; i < phi->num_inputs (); i++)
+            {
+              def_info *def = phi->input_value (i);
+              if (!def)
+                return true;
+              insn_info *def_insn = def->insn ();
+              rtx_insn *def_rtl = def_insn->rtl ();
+
+              if (!def_rtl)
+                return true;
+              if (!INSN_P (def_rtl))
+                return true;
+              extract_insn_cached (def_rtl);
+              if (recog_data.n_operands > 0 &&
+                  rtx_equal_p (recog_data.operand[0], avl))
+                {
+                  if (get_attr_type (def_rtl) &&
+                      get_attr_type (def_rtl) == TYPE_VSETVL)
+                    {
+                      basic_block def_bb = BLOCK_FOR_INSN (def_rtl);
+                      bb_vinfo info = bb_vinfo_map.at(def_bb->index);
+                      // If the exit from the predecessor has the VTYPE
+                      // we are looking for we might be able to avoid a
+                      // VSETVLI.
+                      if (info.exit.unknown_p () ||
+                          !info.exit.vtype_equal_p (new_info))
+                        return true;
+                      // We found a VSET(I)VLI make sure it matches the
+                      // output of the predecessor block.
+                      vinfo curr_info;
+                      vinfo avl_def_info =
+                          get_info_for_vsetvli (def_rtl, curr_info);
+                      if (!avl_def_info.vtype_equal_p (info.exit) ||
+                          !avl_def_info.avl_equal_p (info.exit))
+                        return true;
+                    }
+                  else
+                    return true;
+                }
+            }
+        }
+    }
+
+  // If all the incoming values to the PHI checked out, we don't need
+  // to insert a VSETVLI.
+  return false;
+}
+
+static bool
+compute_vl_vtype_changes (basic_block bb)
+{
+  bool vector_p = false;
+
+  bb_vinfo &info = bb_vinfo_map[bb->index];
+  info.change = info.pred;
+  rtx_insn *insn = NULL;
+  vinfo curr_info;
+
+  FOR_BB_INSNS (bb, insn)
+  {
+    // If this is an explicit VSETVLI or VSETIVLI, update our state.
+    if (is_vector_config_instr (insn))
+      {
+        vector_p = true;
+        info.change = get_info_for_vsetvli (insn, curr_info);
+        curr_info = info.change;
+        continue;
+      }
+    
+    /*  According to vector.md, each instruction pattern parallel.
+        It should have at least 2 side effects.
+        The last 2 side effects are use vl && use vtype  */
+    if (use_vtype_p (insn))
+      {
+        vector_p = true;
+
+        vinfo new_info = compute_info_for_instr (insn, curr_info);
+        curr_info = new_info;
+        if (!info.change.valid_p ())
+          info.change = new_info;
+        else
+          {
+            // If this instruction isn't compatible with the previous VL/VTYPE
+            // we need to insert a VSETVLI.
+            // If this is a unit-stride or strided load/store, we may be able
+            // to use the EMUL=(EEW/SEW)*LMUL relationship to avoid changing
+            // vtype. NOTE: We only do this if the vtype we're comparing
+            // against was created in this block. We need the first and third
+            // phase to treat the store the same way.
+            if (!can_skip_vsetvli_for_load_store_p (insn, new_info, info.change) &&
+                need_vsetvli (insn, new_info, info.change))
+              info.change = new_info;
+          }
+      }
+    // If this is something that updates VL/VTYPE that we don't know about, set
+    // the state to unknown.
+    if (update_vl_vtype_p (insn))
+      {
+        curr_info = vinfo::get_unknown ();
+        info.change = vinfo::get_unknown ();
+      }
+  }
+
+  return vector_p;
+}
+
+static void
+compute_incoming_vl_vtype (const basic_block bb)
+{
+  bb_vinfo &info = bb_vinfo_map[bb->index];
+  info.inqueue = false;
+
+  vinfo in_info;
+  if (EDGE_COUNT (bb->preds) == 0)
+    {
+      // There are no predecessors, so use the default starting status.
+      in_info.set_unknown ();
+    }
+  else
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, bb->preds)
+      {
+        basic_block ancestor = e->src;
+        in_info = in_info.intersect (bb_vinfo_map.at(ancestor->index).exit);
+      }
+    }
+
+  // If we don't have any valid predecessor value, wait until we do.
+  if (!in_info.valid_p ())
+    return;
+  
+  // If no change, no need to rerun block
+  if (in_info == info.pred)
+    return;
+
+  info.pred = in_info;
+  if (dump_file)
+    {
+      fprintf (dump_file, "Entry state of bb %d changed to\n", bb->index);
+      info.pred.print ();
+    }
+  
+  // Note: It's tempting to cache the state changes here, but due to the
+  // compatibility checks performed a blocks output state can change based on
+  // the input state.  To cache, we'd have to add logic for finding
+  // never-compatible state changes.
+  compute_vl_vtype_changes (bb);
+  vinfo tmpstatus = info.change;
+
+  // If the new exit value matches the old exit value, we don't need to revisit
+  // any blocks.
+  if (info.exit == tmpstatus)
+    return;
+
+  info.exit = tmpstatus;
+  
+  if (dump_file)
+    {
+      fprintf (dump_file, "Exit state of bb %d changed to\n", bb->index);
+      info.exit.print ();
+    }
+  // Add the successors to the work list so we can propagate the changed exit
+  // status.
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, bb->succs)
+  {
+    basic_block succ = e->dest;
+    if (!bb_vinfo_map[succ->index].inqueue)
+      bb_queue.push_back (succ);
+  }
+}
+
+static void
+insert_vsetvl (rtx_insn *insn, const vinfo &curr_info, const vinfo &prev_info)
+{
+  extract_insn_cached (insn);
+  rtx avl = curr_info.get_avl ();
+  rtx vtype = GEN_INT (curr_info.encode_vtype ());
+  rtx zero = gen_rtx_REG (Pmode, X0_REGNUM);
+
+  if (recog_clobber_vl_vtype (insn) == MOV_CLOBBER_REG_MEM
+    || recog_clobber_vl_vtype (insn) == MOV_CLOBBER_MEM_REG)
+    {
+      gcc_assert (
+          reload_completed &&
+          rtx_equal_p (curr_info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM)));
+      avl = recog_data.operand[2];
+      PUT_MODE (avl, Pmode);
+      emit_vsetvl_insn (avl, gen_rtx_REG (Pmode, X0_REGNUM), vtype, insn);
+      return;
+    }
+
+  // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
+  // VLMAX
+  if (prev_info.valid_p () && !prev_info.unknown_p () &&
+      curr_info.avl_equal_p (prev_info) && curr_info.vlmax_equal_p (prev_info))
+    {
+      emit_vsetvl_insn (zero, zero, vtype, insn);
+      return;
+    }
+
+  if (curr_info.get_avl () == NULL_RTX)
+    {
+      if (prev_info.valid_p () && !prev_info.unknown_p () &&
+          curr_info.vlmax_equal_p (prev_info))
+        {
+          emit_vsetvl_insn (zero, zero, vtype, insn);
+          return;
+        }
+      // Otherwise use an AVL of 0 to avoid depending on previous vl.
+      emit_vsetvl_insn (zero, GEN_INT (0), vtype, insn);
+      return;
+    }
+
+  if (rtx_equal_p (curr_info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM)))
+    {
+      if (reload_completed)
+        avl = gen_rtx_REG (Pmode, X0_REGNUM);
+      else
+        avl = gen_reg_rtx (Pmode);
+      emit_vsetvl_insn (avl, gen_rtx_REG (Pmode, X0_REGNUM), vtype, insn);
+      return;
+    }
+
+  emit_vsetvl_insn (zero, avl, vtype, insn);
+}
+
+static void
+cleanup_insn_op (rtx_insn *insn)
+{
+  if (!reload_completed)
+    return;
+
+  /* 1.Remove the vl operand for every rvv instruction.
+     2.Replace every reload register spilling rvv instruction. */
+  rtx pat;
+  extract_insn_cached (insn);
+  machine_mode mode = rvv_translate_attr_mode (insn);
+  if (recog_clobber_vl_vtype (insn) == MOV_CLOBBER_REG_MEM)
+    {
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+        pat =
+              gen_vlm (mode, recog_data.operand[0],
+                       XEXP (recog_data.operand[1], 0), const0_rtx, const0_rtx);
+      else
+        pat = gen_vle (mode, recog_data.operand[0], const0_rtx, const0_rtx,
+                       XEXP (recog_data.operand[1], 0), const0_rtx, const0_rtx);
+
+      validate_change (insn, &PATTERN (insn), pat, false);
+    }
+  else if (recog_clobber_vl_vtype (insn) == MOV_CLOBBER_MEM_REG)
+    {
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+        pat = gen_vsm (mode, XEXP (recog_data.operand[0], 0),
+                       recog_data.operand[1], const0_rtx, const0_rtx);
+      else
+        pat = gen_vse (mode, const0_rtx, XEXP (recog_data.operand[0], 0),
+                       recog_data.operand[1], const0_rtx, const0_rtx);
+
+      validate_change (insn, &PATTERN (insn), pat, false);
+    }
+  else
+    replace_op (insn, const0_rtx, REPLACE_VL);
+}
+
+static void
+emit_vsetvlis (const basic_block bb)
+{
+  vinfo curr_info;
+  rtx_insn *insn = NULL;
+
+  FOR_BB_INSNS (bb, insn)
+  {
+    // If this is an explicit VSETVLI or VSETIVLI, update our state.
+    if (is_vector_config_instr (insn))
+      {
+        curr_info = get_info_for_vsetvli (insn, curr_info);
+        continue;
+      }
+
+    if (use_vtype_p (insn))
+      {
+        vinfo new_info = compute_info_for_instr (insn, curr_info);
+        
+        if (!curr_info.valid_p ())
+          {
+            // We haven't found any vector instructions or VL/VTYPE changes
+            // yet, use the predecessor information.
+            curr_info = bb_vinfo_map[bb->index].pred;
+            gcc_assert (curr_info.valid_p () &&
+                        "Expected a valid predecessor state.");
+            if (need_vsetvli (insn, new_info, curr_info))
+              {
+                // If this is the first implicit state change, and the state change
+                // requested can be proven to produce the same register contents, we
+                // can skip emitting the actual state change and continue as if we
+                // had since we know the GPR result of the implicit state change
+                // wouldn't be used and VL/VTYPE registers are correct.  Note that
+                // we *do* need to model the state as if it changed as while the
+                // register contents are unchanged, the abstract model can change.
+                if (need_vsetvli_phi (new_info, insn))
+                  insert_vsetvl (insn, new_info, curr_info);
+                curr_info = new_info;
+              }
+          }
+        else
+          {
+            // If this instruction isn't compatible with the previous VL/VTYPE
+            // we need to insert a VSETVLI.
+            // If this is a unit-stride or strided load/store, we may be able
+            // to use the EMUL=(EEW/SEW)*LMUL relationship to avoid changing
+            // vtype. NOTE: We can't use predecessor information for the store.
+            // We must treat it the same as the first phase so that we produce
+            // the correct vl/vtype for succesor blocks.
+            if (!can_skip_vsetvli_for_load_store_p (insn, new_info,
+                                                    curr_info) &&
+                need_vsetvli (insn, new_info, curr_info))
+              {
+                insert_vsetvl (insn, new_info, curr_info);
+                curr_info = new_info;
+              }
+          }
+        cleanup_insn_op (insn);
+      }
+    // If this is something updates VL/VTYPE that we don't know about, set
+    // the state to unknown.
+    if (update_vl_vtype_p (insn))
+      curr_info = vinfo::get_unknown ();
+
+    // If we reach the end of the block and our current info doesn't match the
+    // expected info, insert a vsetvli to correct.
+    if (insn == BB_END (bb))
+      {
+        const vinfo exit_info = bb_vinfo_map.at(bb->index).exit;
+        if (curr_info.valid_p () && exit_info.valid_p () &&
+            !exit_info.unknown_p () && curr_info != exit_info)
+          {
+            insert_vsetvl (insn, exit_info, curr_info);
+            curr_info = exit_info;
+          }
+      }
+  }
+}
+
+static void
+dolocalprepass (const basic_block bb)
+{
+  rtx_insn *insn = NULL;
+  vinfo curr_info = vinfo::get_unknown ();
+  FOR_BB_INSNS (bb, insn)
+  {
+    // If this is an explicit VSETVLI or VSETIVLI, update our state.
+    if (is_vector_config_instr (insn))
+      {
+        curr_info = get_info_for_vsetvli (insn, curr_info);
+        continue;
+      }
+
+    if (scalar_move_insn_p (insn))
+      {
+        gcc_assert (use_vtype_p (insn) && use_vl_p (insn));
+        const vinfo new_info = compute_info_for_instr (insn, curr_info);
+
+        // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
+        // VL > 0. We can discard the user requested AVL and just use the last
+        // one if we can prove it equally zero.  This removes a vsetvli entirely
+        // if the types match or allows use of cheaper avl preserving variant
+        // if VLMAX doesn't change.  If VLMAX might change, we couldn't use
+        // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
+        // prevent extending live range of an avl register operand.
+        // TODO: We can probably relax this for immediates.
+        if (((curr_info.has_nonzero_avl () && new_info.has_nonzero_avl ()) ||
+             (curr_info.has_zero_avl () && new_info.has_zero_avl ())) &&
+            new_info.vlmax_equal_p (curr_info))
+          {
+            replace_op (insn, curr_info.get_avl (), REPLACE_VL);
+            curr_info = compute_info_for_instr (insn, curr_info);
+            continue;
+          }
+      }
+
+    if (use_vtype_p (insn))
+      {
+        if (use_vl_p (insn))
+          {
+            const auto require = compute_info_for_instr (insn, curr_info);
+            // If the AVL is the result of a previous vsetvli which has the
+            // same AVL and VLMAX as our current state, we can reuse the AVL
+            // from the current state for the new one.  This allows us to
+            // generate 'vsetvli x0, x0, vtype" or possible skip the transition
+            // entirely.
+            if (!curr_info.unknown_p () && require.get_avl () &&
+                REG_P (require.get_avl ()) &&
+                REGNO (require.get_avl ()) >= FIRST_PSEUDO_REGISTER)
+              {
+                rtx_insn *def_rtl = fetch_def_insn (insn, require);
+                
+                if (def_rtl != NULL)
+                  {
+                    if (is_vector_config_instr (def_rtl))
+                      {
+                        vinfo def_info = get_info_for_vsetvli (def_rtl, curr_info);
+                        if (def_info.avl_equal_p (curr_info) &&
+                            def_info.vlmax_equal_p (curr_info))
+                          {
+                            replace_op (insn, curr_info.get_avl (), REPLACE_VL);
+                            curr_info = compute_info_for_instr (insn, curr_info);
+                            continue;
+                          }
+                      }
+                  }
+              }
+
+            // If AVL is defined by a vsetvli with the same vtype, we can
+            // replace the AVL operand with the AVL of the defining vsetvli.
+            // We avoid general register AVLs to avoid extending live ranges
+            // without being sure we can kill the original source reg entirely.
+            // TODO: We can ignore policy bits here, we only need VL to be the
+            // same.
+            if (!curr_info.unknown_p () && require.get_avl () &&
+                REG_P (require.get_avl ()) &&
+                REGNO (require.get_avl ()) >= FIRST_PSEUDO_REGISTER)
+              {
+                rtx_insn *def_rtl = fetch_def_insn (insn, require);
+                if (def_rtl != NULL)
+                  {
+                    if (is_vector_config_instr (def_rtl))
+                      {
+                        vinfo def_info = get_info_for_vsetvli (def_rtl, curr_info);
+                        if (def_info.vtype_equal_p (require) &&
+                            (def_info.avl_const_p () ||
+                            (def_info.avl_reg_p () &&
+                            rtx_equal_p (def_info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM)))))
+                          {
+                            replace_op (insn, def_info.get_avl (), REPLACE_VL);
+                            curr_info = compute_info_for_instr (insn, curr_info);
+                            continue;
+                          }
+                      }
+                  }
+              }
+          }
+        curr_info = compute_info_for_instr (insn, curr_info);
+        continue;
+      }
+
+    // If this is something that updates VL/VTYPE that we don't know about,
+    // set the state to unknown.
+    if (update_vl_vtype_p (insn))
+      curr_info = vinfo::get_unknown ();
+  }
+}
+
+static void
+dolocalpostpass (const basic_block bb)
+{
+  rtx_insn *prev_insn = nullptr;
+  rtx_insn *insn = nullptr;
+  bool used_vl = false, used_vtype = false;
+  std::vector<rtx_insn *> to_delete;
+  FOR_BB_INSNS (bb, insn)
+  {
+    // Note: Must be *before* vsetvli handling to account for config cases
+    // which only change some subfields.
+    if (update_vl_vtype_p (insn) || use_vl_p (insn))
+      used_vl = true;
+    if (update_vl_vtype_p (insn) || use_vtype_p (insn))
+      used_vtype = true;
+
+    if (!is_vector_config_instr (insn))
+      continue;
+    
+    extract_insn_cached (insn);
+    if (prev_insn)
+      {
+        if (!used_vl && !used_vtype)
+          {
+            to_delete.push_back (prev_insn);
+            // fallthrough
+          }
+        else if (!used_vtype && is_vl_preserving_config (insn))
+          {
+            // Note: `vsetvli x0, x0, vtype' is the canonical instruction
+            // for this case.  If you find yourself wanting to add other forms
+            // to this "unused VTYPE" case, we're probably missing a
+            // canonicalization earlier.
+            // Note: We don't need to explicitly check vtype compatibility
+            // here because this form is only legal (per ISA) when not
+            // changing VL.
+            rtx new_vtype = recog_data.operand[recog_data.n_operands - 1];
+            replace_op (prev_insn, new_vtype, REPLACE_VTYPE);
+            to_delete.push_back (insn);
+            // Leave prev_insn unchanged
+            continue;
+          }
+      }
+    prev_insn = insn;
+    used_vl = false;
+    used_vtype = false;
+    
+    rtx vdef = recog_data.operand[0];
+    if (!rtx_equal_p (vdef, gen_rtx_REG (Pmode, X0_REGNUM)) &&
+        !(REGNO (vdef) >= FIRST_PSEUDO_REGISTER &&
+          (find_reg_note (insn, REG_UNUSED, vdef) ||
+           find_reg_note (insn, REG_DEAD, vdef))))
+      used_vl = true;
+  }
+
+  for (auto *to_remove : to_delete)
+    remove_insn (to_remove);
+}
+
+/// Return true if the VL value configured must be equal to the requested one.
+static bool
+has_fixed_result (const vinfo &info)
+{
+  if (!info.avl_const_p ())
+    // VLMAX is always the same value.
+    // TODO: Could extend to other registers by looking at the associated
+    // vreg def placement.
+    return rtx_equal_p (info.get_avl (), gen_rtx_REG (Pmode, X0_REGNUM));
+
+  if (VLMUL_FIELD_000 != info.get_vlmul ())
+    // TODO: Generalize the code below to account for LMUL
+    return false;
+  
+  if (!BYTES_PER_RISCV_VECTOR.is_constant ())
+    return false;
+    
+  unsigned int avl = INTVAL (info.get_avl ());
+  unsigned int vsew = info.get_vsew ();
+  machine_mode inner = vsew_to_int_mode (vsew);
+  unsigned int sew = GET_MODE_BITSIZE (as_a<scalar_mode> (inner));
+  unsigned avl_in_bits = avl * sew;
+  machine_mode mode = riscv_vector::vector_builtin_mode (
+            as_a<scalar_mode> (inner), info.get_vlmul ());
+  return GET_MODE_BITSIZE (mode).to_constant () >= avl_in_bits;
+}
+
+/// Perform simple partial redundancy elimination of the VSETVLI instructions
+/// we're about to insert by looking for cases where we can PRE from the
+/// beginning of one block to the end of one of its predecessors.  Specifically,
+/// this is geared to catch the common case of a fixed length vsetvl in a single
+/// block loop when it could execute once in the preheader instead.
+static void
+dopre (const basic_block bb)
+{
+  if (!bb_vinfo_map[bb->index].pred.unknown_p ())
+    return;
+
+  basic_block unavailable_pred = nullptr;
+  vinfo available_info;
+  
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, bb->preds)
+  {
+    basic_block predecessor = e->src;
+    const vinfo &pred_info = bb_vinfo_map[predecessor->index].exit;
+    if (pred_info.unknown_p ())
+      {
+        if (unavailable_pred)
+          return;
+        unavailable_pred = predecessor;
+      }
+    else if (!available_info.valid_p ())
+      available_info = pred_info;
+    else if (available_info != pred_info)
+      return;
+  }
+      
+  // unreachable, single pred, or full redundancy.  Note that FRE
+  // is handled by phase 3.
+  if (!unavailable_pred || !available_info.valid_p ())
+    return;
+
+  // critical edge - TODO: consider splitting?
+  if (EDGE_COUNT (unavailable_pred->succs) != 1)
+    return;
+
+  // If VL can be less than AVL, then we can't reduce the frequency of exec.
+  if (!has_fixed_result (available_info))
+    return;
+
+  // Does it actually let us remove an implicit transition in MBB?
+  bool found = false;
+  rtx_insn *insn;
+  vinfo curr_info;
+  FOR_BB_INSNS (bb, insn)
+  {
+    if (is_vector_config_instr (insn))
+      return;
+    
+    if (use_vtype_p (insn))
+      {
+        if (available_info != compute_info_for_instr (insn, curr_info))
+            return;
+          found = true;
+          break;
+      }
+  }
+      
+  if (!found)
+    return;
+
+  // Finally, update both data flow state and insert the actual vsetvli.
+  // Doing both keeps the code in sync with the dataflow results, which
+  // is critical for correctness of phase 3.
+  auto old_info = bb_vinfo_map[unavailable_pred->index].exit;
+  if (dump_file)
+    {
+      fprintf (dump_file, "PRE VSETVLI from bb %d changed to bb %d\n", bb->index, unavailable_pred->index);
+      available_info.print ();
+    }
+  bb_vinfo_map[unavailable_pred->index].exit = available_info;
+  bb_vinfo_map[bb->index].pred = available_info;
+
+  // Note there's an implicit assumption here that terminators never use
+  // or modify VL or VTYPE.  Also, fallthrough will return end().
+  auto insert_pt = BB_END (unavailable_pred);
+  insert_vsetvl (insert_pt, available_info, old_info);
+}
+
+static unsigned int
+rest_of_handle_insert_vsetvl (function *fn)
+{
+  basic_block bb;
+
+  if (n_basic_blocks_for_fn (fn) <= 0)
+    return 0;
+
+  gcc_assert (bb_vinfo_map.empty () && "Expect empty block infos.");
+
+  if (optimize >= 2)
+    {
+      // Initialization.
+      calculate_dominance_info (CDI_DOMINATORS);
+      df_analyze ();
+      crtl->ssa = new rtl_ssa::function_info (cfun);
+    }
+
+  if (dump_file)
+    fprintf (dump_file, "\nEntering InsertVSETVLI for %s\n\n",
+             current_function_name ());
+
+  /* Initialize Basic Block Map */
+  FOR_ALL_BB_FN (bb, fn)
+  {
+    bb_vinfo bb_init;
+    bb_vinfo_map.insert (std::pair<uint8_t, bb_vinfo> (bb->index, bb_init));
+  }
+
+  // Scan the block locally for cases where we can mutate the operands
+  // of the instructions to reduce state transitions.  Critically, this
+  // must be done before we start propagating data flow states as these
+  // transforms are allowed to change the contents of VTYPE and VL so
+  // long as the semantics of the program stays the same.
+  FOR_ALL_BB_FN (bb, fn)
+    dolocalprepass (bb);
+
+  bool vector_p = false;
+
+  if (dump_file)
+    fprintf (
+        dump_file,
+        "Phase 1 determine how VL/VTYPE are affected by the each block:\n");
+
+  // Phase 1 - determine how VL/VTYPE are affected by the each block.
+  FOR_ALL_BB_FN (bb, fn)
+  {
+    vector_p |= compute_vl_vtype_changes (bb);
+    bb_vinfo &info = bb_vinfo_map[bb->index];
+    info.exit = info.change;
+    if (dump_file)
+      {
+        fprintf (dump_file, "Initial exit state of bb %d\n", bb->index);
+        info.exit.print ();
+      }
+  }
+
+  if (!vector_p)
+    {
+      bb_vinfo_map.clear ();
+      bb_queue.clear ();
+      if (optimize >= 2)
+        {
+          // Finalization.
+          free_dominance_info (CDI_DOMINATORS);
+          if (crtl->ssa->perform_pending_updates ())
+            cleanup_cfg (0);
+    
+          delete crtl->ssa;
+          crtl->ssa = nullptr;
+        }
+      return 0;
+    }
+
+  if (dump_file)
+    fprintf (dump_file,
+             "Phase 2 determine the exit VL/VTYPE from each block:\n");
+  // Phase 2 - determine the exit VL/VTYPE from each block. We add all
+  // blocks to the list here, but will also add any that need to be
+  // revisited during Phase 2 processing.
+  FOR_ALL_BB_FN (bb, fn)
+  {
+    bb_queue.push_back (bb);
+    bb_vinfo_map[bb->index].inqueue = true;
+  }
+  while (!bb_queue.empty ())
+    {
+      bb = bb_queue.front ();
+      bb_queue.pop_front ();
+      compute_incoming_vl_vtype (bb);
+    }
+   
+  // Perform partial redundancy elimination of vsetvli transitions.
+  FOR_ALL_BB_FN (bb, fn)
+    dopre (bb);
+    
+  if (dump_file)
+    fprintf (dump_file,
+             "Phase 3 add any vsetvli instructions needed in the block:\n");
+  // Phase 3 - add any vsetvli instructions needed in the block. Use the
+  // Phase 2 information to avoid adding vsetvlis before the first vector
+  // instruction in the block if the VL/VTYPE is satisfied by its
+  // predecessors.
+  FOR_ALL_BB_FN (bb, fn) 
+    emit_vsetvlis (bb); 
+  
+  // Now that all vsetvlis are explicit, go through and do block local
+  // DSE and peephole based demanded fields based transforms.  Note that
+  // this *must* be done outside the main dataflow so long as we allow
+  // any cross block analysis within the dataflow.  We can't have both
+  // demanded fields based mutation and non-local analysis in the
+  // dataflow at the same time without introducing inconsistencies.
+  FOR_ALL_BB_FN (bb, fn)
+    dolocalpostpass(bb);
+
+  // Once we're fully done rewriting all the instructions, do a final pass
+  // through to check for VSETVLIs which write to an unused destination.
+  // For the non X0, X0 variant, we can replace the destination register
+  // with X0 to reduce register pressure.  This is really a generic
+  // optimization which can be applied to any dead def (TODO: generalize).
+  if (!reload_completed)
+    {
+      FOR_ALL_BB_FN (bb, fn)
+      {
+        rtx_insn *insn = NULL;
+        FOR_BB_INSNS (bb, insn)
+        {
+          if (is_vector_config_instr (insn))
+            {
+              extract_insn_cached (insn);
+              if (recog_data.n_operands == 3 &&
+                  !rtx_equal_p (recog_data.operand[0],
+                                gen_rtx_REG (Pmode, X0_REGNUM)) &&
+                  !rtx_equal_p (recog_data.operand[1],
+                                gen_rtx_REG (Pmode, X0_REGNUM)) &&
+                  (find_reg_note (insn, REG_UNUSED, recog_data.operand[0]) ||
+                   find_reg_note (insn, REG_DEAD, recog_data.operand[0])))
+                {
+                  rtx pat = gen_vsetvl_zero (Pmode, recog_data.operand[1],
+                                             recog_data.operand[2]);
+                  validate_change (insn, &PATTERN (insn), pat, false);
+                }
+            }
+        }
+      }
+    }
+
+  bb_vinfo_map.clear ();
+  bb_queue.clear ();
+
+  if (optimize >= 2)
+    {
+      // Finalization.
+      free_dominance_info (CDI_DOMINATORS);
+      if (crtl->ssa->perform_pending_updates ())
+        cleanup_cfg (0);
+
+      delete crtl->ssa;
+      crtl->ssa = nullptr;
+    }
+
+  return 0;
+}
+
+const pass_data pass_data_insert_vsetvl = {
+    RTL_PASS,         /* type */
+    "insert_vsetvl", /* name */
+    OPTGROUP_NONE,    /* optinfo_flags */
+    TV_NONE,          /* tv_id */
+    0,                /* properties_required */
+    0,                /* properties_provided */
+    0,                /* properties_destroyed */
+    0,                /* todo_flags_start */
+    0,                /* todo_flags_finish */
+};
+
+class pass_insert_vsetvl : public rtl_opt_pass
+{
+public:
+  pass_insert_vsetvl (gcc::context *ctxt)
+      : rtl_opt_pass (pass_data_insert_vsetvl, ctxt)
+  {
+  }
+
+  /* opt_pass methods: */
+  virtual bool
+  gate (function *)
+  {
+    return TARGET_VECTOR;
+  }
+  virtual unsigned int
+  execute (function *fn)
+  {
+    return rest_of_handle_insert_vsetvl (fn);
+  }
+
+}; // class pass_insert_vsetvl
+
+rtl_opt_pass *
+make_pass_insert_vsetvl (gcc::context *ctxt)
+{
+  return new pass_insert_vsetvl (ctxt);
+}
+
+const pass_data pass_data_insert_vsetvl2 = {
+    RTL_PASS,         /* type */
+    "insert_vsetvl2", /* name */
+    OPTGROUP_NONE,    /* optinfo_flags */
+    TV_NONE,          /* tv_id */
+    0,                /* properties_required */
+    0,                /* properties_provided */
+    0,                /* properties_destroyed */
+    0,                /* todo_flags_start */
+    0,                /* todo_flags_finish */
+};
+
+class pass_insert_vsetvl2 : public rtl_opt_pass
+{
+public:
+  pass_insert_vsetvl2 (gcc::context *ctxt)
+      : rtl_opt_pass (pass_data_insert_vsetvl2, ctxt)
+  {
+  }
+
+  /* opt_pass methods: */
+  virtual bool
+  gate (function *)
+  {
+    return TARGET_VECTOR;
+  }
+  virtual unsigned int
+  execute (function *fn)
+  {
+    return rest_of_handle_insert_vsetvl (fn);
+  }
+
+}; // class pass_insert_vsetvl2
+
+rtl_opt_pass *
+make_pass_insert_vsetvl2 (gcc::context *ctxt)
+{
+  return new pass_insert_vsetvl2 (ctxt);
+}
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index d99b8dcbaf1..1c42f6297f9 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -81,6 +81,18 @@  enum riscv_vector_bits_enum
   RVV_4096 = 4096
 };
 
+enum vsew_field_enum
+{
+  VSEW_FIELD_000, /* SEW = 8 */
+  VSEW_FIELD_001, /* SEW = 16 */
+  VSEW_FIELD_010, /* SEW = 32 */
+  VSEW_FIELD_011, /* SEW = 64 */
+  VSEW_FIELD_100, /* SEW = 128 */
+  VSEW_FIELD_101, /* SEW = 256 */
+  VSEW_FIELD_110, /* SEW = 512 */
+  VSEW_FIELD_111  /* SEW = 1024 */
+};
+
 enum vlmul_field_enum
 {
   VLMUL_FIELD_000, /* LMUL = 1 */
diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def
index 23ef8ac6114..282a0402485 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -18,3 +18,5 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
+INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_insert_vsetvl);
+INSERT_PASS_BEFORE (pass_sched2, 1, pass_insert_vsetvl2);
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index cae2974b54f..9a7e120854a 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -96,6 +96,8 @@  extern std::string riscv_arch_str (bool version_p = true);
 extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
 
 rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
+rtl_opt_pass * make_pass_insert_vsetvl (gcc::context *ctxt);
+rtl_opt_pass * make_pass_insert_vsetvl2 (gcc::context *ctxt);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
@@ -112,15 +114,32 @@  struct riscv_cpu_info {
 extern const riscv_cpu_info *riscv_find_cpu (const char *);
 
 /* Routines implemented in riscv-vector.cc. */
+extern bool rvv_mask_mode_p (machine_mode);
 extern bool rvv_mode_p (machine_mode);
 extern bool rvv_legitimate_poly_int_p (rtx);
 extern unsigned int rvv_offset_temporaries (bool, poly_int64);
+extern enum vsew_field_enum rvv_classify_vsew_field (machine_mode);
 extern enum vlmul_field_enum rvv_classify_vlmul_field (machine_mode);
 extern unsigned int rvv_parse_vsew_field (unsigned int);
 extern unsigned int rvv_parse_vlmul_field (unsigned int);
 extern bool rvv_parse_vta_field (unsigned int);
 extern bool rvv_parse_vma_field (unsigned int);
 extern int rvv_regsize (machine_mode);
+extern rtx rvv_gen_policy (unsigned int rvv_policy = 0);
+extern opt_machine_mode rvv_get_mask_mode (machine_mode);
+extern machine_mode rvv_translate_attr_mode (rtx_insn *);
+extern void
+emit_op5 (
+  unsigned int unspec,
+  machine_mode Vmode, machine_mode VSImode, machine_mode VMSImode,
+  machine_mode VSUBmode,
+  rtx *operands,
+  rtx (*gen_vx) (rtx, rtx, rtx, rtx, rtx),
+  rtx (*gen_vx_32bit) (rtx, rtx, rtx, rtx, rtx),
+  rtx (*gen_vv) (rtx, rtx, rtx, rtx, rtx),
+  bool (*imm_p) (rtx),
+  int i, bool reverse
+);
 
 /* We classify builtin types into two classes:
    1. General builtin class which is using the
diff --git a/gcc/config/riscv/riscv-vector-builtins-iterators.def b/gcc/config/riscv/riscv-vector-builtins-iterators.def
index cc968f5534f..77a391c7630 100644
--- a/gcc/config/riscv/riscv-vector-builtins-iterators.def
+++ b/gcc/config/riscv/riscv-vector-builtins-iterators.def
@@ -7,6 +7,38 @@ 
 #define DEF_RISCV_ARG_MODE_ATTR(A, B, C, D, E)
 #endif
 
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(V, 31)
+DEF_RISCV_ARG_MODE_ATTR(V, 0, VNx2QI, VNx2QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 1, VNx4QI, VNx4QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 2, VNx8QI, VNx8QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 3, VNx16QI, VNx16QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 4, VNx32QI, VNx32QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 5, VNx64QI, VNx64QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 6, VNx128QI, VNx128QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 7, VNx2HI, VNx2HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 8, VNx4HI, VNx4HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 9, VNx8HI, VNx8HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 10, VNx16HI, VNx16HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 11, VNx32HI, VNx32HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 12, VNx64HI, VNx64HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 13, VNx2SI, VNx2SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 14, VNx4SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 15, VNx8SI, VNx8SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 16, VNx16SI, VNx16SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 17, VNx32SI, VNx32SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 18, VNx2DI, VNx2DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 19, VNx4DI, VNx4DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 20, VNx8DI, VNx8DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 21, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V, 22, VNx2SF, VNx2SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 23, VNx4SF, VNx4SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 24, VNx8SF, VNx8SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 25, VNx16SF, VNx16SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 26, VNx32SF, VNx32SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 27, VNx2DF, VNx2DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 28, VNx4DF, VNx4DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 29, VNx8DF, VNx8DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(V, 30, VNx16DF, VNx16DF, TARGET_DOUBLE_FLOAT)
 DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VI, 22)
 DEF_RISCV_ARG_MODE_ATTR(VI, 0, VNx2QI, VNx2QI, TARGET_ANY)
 DEF_RISCV_ARG_MODE_ATTR(VI, 1, VNx4QI, VNx4QI, TARGET_ANY)
@@ -30,6 +62,210 @@  DEF_RISCV_ARG_MODE_ATTR(VI, 18, VNx2DI, VNx2DI, TARGET_ANY)
 DEF_RISCV_ARG_MODE_ATTR(VI, 19, VNx4DI, VNx4DI, TARGET_ANY)
 DEF_RISCV_ARG_MODE_ATTR(VI, 20, VNx8DI, VNx8DI, TARGET_ANY)
 DEF_RISCV_ARG_MODE_ATTR(VI, 21, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VF, 9)
+DEF_RISCV_ARG_MODE_ATTR(VF, 0, VNx2SF, VNx2SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 1, VNx4SF, VNx4SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 2, VNx8SF, VNx8SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 3, VNx16SF, VNx16SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 4, VNx32SF, VNx32SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 5, VNx2DF, VNx2DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 6, VNx4DF, VNx4DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 7, VNx8DF, VNx8DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VF, 8, VNx16DF, VNx16DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VB, 7)
+DEF_RISCV_ARG_MODE_ATTR(VB, 0, VNx2BI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 1, VNx4BI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 2, VNx8BI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 3, VNx16BI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 4, VNx32BI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 5, VNx64BI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VB, 6, VNx128BI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VFULL, 24)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 0, VNx16QI, VNx16QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 1, VNx32QI, VNx32QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 2, VNx64QI, VNx64QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 3, VNx128QI, VNx128QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 4, VNx8HI, VNx8HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 5, VNx16HI, VNx16HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 6, VNx32HI, VNx32HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 7, VNx64HI, VNx64HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 8, VNx4SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 9, VNx8SI, VNx8SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 10, VNx16SI, VNx16SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 11, VNx32SI, VNx32SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 12, VNx2DI, VNx2DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 13, VNx4DI, VNx4DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 14, VNx8DI, VNx8DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 15, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 16, VNx4SF, VNx4SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 17, VNx8SF, VNx8SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 18, VNx16SF, VNx16SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 19, VNx32SF, VNx32SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 20, VNx2DF, VNx2DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 21, VNx4DF, VNx4DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 22, VNx8DF, VNx8DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VFULL, 23, VNx16DF, VNx16DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VPARTIAL, 7)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 0, VNx2QI, VNx2QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 1, VNx4QI, VNx4QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 2, VNx8QI, VNx8QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 3, VNx2HI, VNx2HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 4, VNx4HI, VNx4HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 5, VNx2SI, VNx2SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VPARTIAL, 6, VNx2SF, VNx2SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(V64BITI, 4)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 0, VNx2DI, VNx2DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 1, VNx4DI, VNx4DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 2, VNx8DI, VNx8DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(V64BITI, 3, VNx16DI, VNx16DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VM, 69)
+DEF_RISCV_ARG_MODE_ATTR(VM, 0, VNx2BI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 1, VNx4BI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 2, VNx8BI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 3, VNx16BI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 4, VNx32BI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 5, VNx64BI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 6, VNx128BI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 7, VNx2QI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 8, VNx4QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 9, VNx8QI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 10, VNx16QI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 11, VNx32QI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 12, VNx64QI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 13, VNx128QI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 14, VNx2HI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 15, VNx4HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 16, VNx8HI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 17, VNx16HI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 18, VNx32HI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 19, VNx64HI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 20, VNx2SI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 21, VNx4SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 22, VNx8SI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 23, VNx16SI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 24, VNx32SI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 25, VNx2DI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 26, VNx4DI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 27, VNx8DI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 28, VNx16DI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 29, VNx2SF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 30, VNx4SF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 31, VNx8SF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 32, VNx16SF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 33, VNx32SF, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 34, VNx2DF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 35, VNx4DF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 36, VNx8DF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 37, VNx16DF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 38, VNx2QI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 39, VNx4QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 40, VNx8QI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 41, VNx16QI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 42, VNx32QI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 43, VNx64QI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 44, VNx128QI, VNx128BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 45, VNx2HI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 46, VNx4HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 47, VNx8HI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 48, VNx16HI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 49, VNx32HI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 50, VNx64HI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 51, VNx2SI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 52, VNx4SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 53, VNx8SI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 54, VNx16SI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 55, VNx32SI, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 56, VNx2DI, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 57, VNx4DI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 58, VNx8DI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 59, VNx16DI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 60, VNx2SF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 61, VNx4SF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 62, VNx8SF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 63, VNx16SF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 64, VNx32SF, VNx32BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 65, VNx2DF, VNx2BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 66, VNx4DF, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 67, VNx8DF, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VM, 68, VNx16DF, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VSUB, 31)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 0, VNx2QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 1, VNx4QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 2, VNx8QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 3, VNx16QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 4, VNx32QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 5, VNx64QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 6, VNx128QI, QI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 7, VNx2HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 8, VNx4HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 9, VNx8HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 10, VNx16HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 11, VNx32HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 12, VNx64HI, HI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 13, VNx2SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 14, VNx4SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 15, VNx8SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 16, VNx16SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 17, VNx32SI, SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 18, VNx2DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 19, VNx4DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 20, VNx8DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 21, VNx16DI, DI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 22, VNx2SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 23, VNx4SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 24, VNx8SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 25, VNx16SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 26, VNx32SF, SF, TARGET_HARD_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 27, VNx2DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 28, VNx4DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 29, VNx8DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR(VSUB, 30, VNx16DF, DF, TARGET_DOUBLE_FLOAT)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VDI_TO_VSI, 22)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 0, VNx2QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 1, VNx4QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 2, VNx8QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 3, VNx16QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 4, VNx32QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 5, VNx64QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 6, VNx128QI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 7, VNx2HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 8, VNx4HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 9, VNx8HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 10, VNx16HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 11, VNx32HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 12, VNx64HI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 13, VNx2SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 14, VNx4SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 15, VNx8SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 16, VNx16SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 17, VNx32SI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 18, VNx2DI, VNx4SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 19, VNx4DI, VNx8SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 20, VNx8DI, VNx16SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI, 21, VNx16DI, VNx32SI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR_VARIABLE(VDI_TO_VSI_VM, 22)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 0, VNx2QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 1, VNx4QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 2, VNx8QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 3, VNx16QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 4, VNx32QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 5, VNx64QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 6, VNx128QI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 7, VNx2HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 8, VNx4HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 9, VNx8HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 10, VNx16HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 11, VNx32HI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 12, VNx64HI, VNx64BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 13, VNx2SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 14, VNx4SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 15, VNx8SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 16, VNx16SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 17, VNx32SI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 18, VNx2DI, VNx4BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 19, VNx4DI, VNx8BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 20, VNx8DI, VNx16BI, TARGET_ANY)
+DEF_RISCV_ARG_MODE_ATTR(VDI_TO_VSI_VM, 21, VNx16DI, VNx32BI, TARGET_ANY)
 
 #undef DEF_RISCV_ARG_MODE_ATTR_VARIABLE
 #undef DEF_RISCV_ARG_MODE_ATTR
diff --git a/gcc/config/riscv/riscv-vector.cc b/gcc/config/riscv/riscv-vector.cc
index a9c8b290104..426490945dd 100644
--- a/gcc/config/riscv/riscv-vector.cc
+++ b/gcc/config/riscv/riscv-vector.cc
@@ -66,6 +66,7 @@ 
 #include "tree-ssa-loop-niter.h"
 #include "rtx-vector-builder.h"
 #include "riscv-vector.h"
+#include "riscv-vector-builtins.h"
 /* This file should be included last.  */
 #include "target-def.h"
 
@@ -158,6 +159,38 @@  rvv_offset_temporaries (bool add_p, poly_int64 offset)
   return count + rvv_add_offset_1_temporaries (constant);
 }
 
+/* Return the vsew field for a specific machine mode. */
+
+enum vsew_field_enum
+rvv_classify_vsew_field (machine_mode mode)
+{
+  switch (GET_MODE_INNER (mode))
+    {
+    case E_QImode:
+      return VSEW_FIELD_000;
+
+    case E_HImode:
+      return VSEW_FIELD_001;
+
+    case E_SImode:
+    case E_SFmode:
+      return VSEW_FIELD_010;
+
+    case E_DImode:
+    case E_DFmode:
+      return VSEW_FIELD_011;
+
+    case E_TImode:
+      return VSEW_FIELD_100;
+
+    default:
+      break;
+    }
+
+  /* we don't care about VSEW for Mask */
+  return VSEW_FIELD_000;
+}
+
 /* Return the vlmul field for a specific machine mode. */
 
 enum vlmul_field_enum
@@ -271,4 +304,339 @@  rvv_get_mask_mode (machine_mode mode)
       && rvv_mask_mode_p (mask_mode))
       return mask_mode;
   return default_get_mask_mode (mode);
+}
+
+/* Generate policy bitmap for a specific rvv_policy. */
+rtx
+rvv_gen_policy (unsigned int)
+{
+  return riscv_vector::gen_any_policy ();
+}
+
+/* Return machine mode for an insn type. */
+machine_mode
+rvv_translate_attr_mode (rtx_insn *insn)
+{
+  gcc_assert (recog_memoized (insn) >= 0);
+
+  switch (get_attr_mode (insn))
+    {
+#define TRANSLATE_VECTOR_MODE(MODE)                                            \
+  case MODE_VNX##MODE:                                                         \
+    return VNx##MODE##mode;
+      TRANSLATE_VECTOR_MODE (8QI)
+      TRANSLATE_VECTOR_MODE (4HI)
+      TRANSLATE_VECTOR_MODE (2SI)
+      TRANSLATE_VECTOR_MODE (2SF)
+      TRANSLATE_VECTOR_MODE (8BI)
+      TRANSLATE_VECTOR_MODE (4QI)
+      TRANSLATE_VECTOR_MODE (2HI)
+      TRANSLATE_VECTOR_MODE (4BI)
+      TRANSLATE_VECTOR_MODE (2QI)
+      TRANSLATE_VECTOR_MODE (2BI)
+      TRANSLATE_VECTOR_MODE (16QI)
+      TRANSLATE_VECTOR_MODE (8HI)
+      TRANSLATE_VECTOR_MODE (4SI)
+      TRANSLATE_VECTOR_MODE (2DI)
+      TRANSLATE_VECTOR_MODE (4SF)
+      TRANSLATE_VECTOR_MODE (2DF)
+      TRANSLATE_VECTOR_MODE (16BI)
+      TRANSLATE_VECTOR_MODE (32QI)
+      TRANSLATE_VECTOR_MODE (16HI)
+      TRANSLATE_VECTOR_MODE (8SI)
+      TRANSLATE_VECTOR_MODE (4DI)
+      TRANSLATE_VECTOR_MODE (8SF)
+      TRANSLATE_VECTOR_MODE (4DF)
+      TRANSLATE_VECTOR_MODE (32BI)
+      TRANSLATE_VECTOR_MODE (64QI)
+      TRANSLATE_VECTOR_MODE (32HI)
+      TRANSLATE_VECTOR_MODE (16SI)
+      TRANSLATE_VECTOR_MODE (8DI)
+      TRANSLATE_VECTOR_MODE (16SF)
+      TRANSLATE_VECTOR_MODE (8DF)
+      TRANSLATE_VECTOR_MODE (64BI)
+      TRANSLATE_VECTOR_MODE (128QI)
+      TRANSLATE_VECTOR_MODE (64HI)
+      TRANSLATE_VECTOR_MODE (32SI)
+      TRANSLATE_VECTOR_MODE (16DI)
+      TRANSLATE_VECTOR_MODE (32SF)
+      TRANSLATE_VECTOR_MODE (16DF)
+      TRANSLATE_VECTOR_MODE (128BI)
+
+    default:
+      break;
+    }
+
+  return VOIDmode;
+}
+
+/* Return the vtype field for a specific machine mode. */
+static unsigned int
+classify_vtype_field (machine_mode mode)
+{
+  unsigned int vlmul = rvv_classify_vlmul_field (mode);
+  unsigned int vsew = rvv_classify_vsew_field (mode);
+  unsigned int vtype = (vsew << 3) | (vlmul & 0x7) | 0x40;
+  return vtype;
+}
+
+/* lmul = real_lmul * 8
+   guarantee integer
+   e.g.
+     1  => 1/8
+     2  => 1/4
+     4  => 1/2
+     8  => 1
+     16 => 2
+     32 => 4
+     64 => 8
+ */
+static unsigned int
+get_lmulx8 (machine_mode mode)
+{
+  unsigned int vlmul = rvv_classify_vlmul_field (mode);
+  switch (vlmul)
+    {
+      case VLMUL_FIELD_000:
+        return 8;
+      case VLMUL_FIELD_001:
+        return 16;
+      case VLMUL_FIELD_010:
+        return 32;
+      case VLMUL_FIELD_011:
+        return 64;
+      case VLMUL_FIELD_101:
+        return 1;
+      case VLMUL_FIELD_110:
+        return 2;
+      case VLMUL_FIELD_111:
+        return 4;
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+force_reg_for_over_uimm (rtx vl)
+{
+  if (CONST_SCALAR_INT_P (vl) && INTVAL (vl) >= 32)
+    {
+      return force_reg (Pmode, vl);
+    }
+
+  return vl;
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+gen_vlx2 (rtx avl, machine_mode Vmode, machine_mode VSImode)
+{
+  if (rtx_equal_p (avl, gen_rtx_REG (Pmode, X0_REGNUM)))
+    {
+      return avl;
+    }
+  rtx i32vl = NULL_RTX;
+  if (CONST_SCALAR_INT_P (avl))
+    {
+      unsigned int vlen_max;
+      unsigned int vlen_min;
+      if (riscv_vector_chunks.is_constant ())
+        {
+          vlen_max = riscv_vector_chunks.to_constant () * 64;
+          vlen_min = vlen_max;
+        }
+      else
+        {
+          /* TODO: vlen_max will be supported as 65536 in the future. */ 
+          vlen_max = RVV_4096;
+          vlen_min = RVV_128;
+        }
+      unsigned int max_vlmax = (vlen_max / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+      unsigned int min_vlmax = (vlen_min / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+      
+      unsigned HOST_WIDE_INT avl_int = INTVAL (avl);
+      if (avl_int <= min_vlmax)
+        {
+          i32vl = gen_int_mode (2 * avl_int, SImode);
+        }
+      else if (avl_int >= 2 * max_vlmax)
+        {
+          // Just set i32vl to VLMAX in this situation
+          i32vl = gen_reg_rtx (Pmode);
+          unsigned int vtype = classify_vtype_field (VSImode);
+          emit_insn (gen_vsetvl (Pmode, i32vl, gen_rtx_REG (Pmode, X0_REGNUM), GEN_INT (vtype)));
+        }
+      else
+        {
+          // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
+          // is related to the hardware implementation.
+          // So let the following code handle
+        }
+    }
+  if (!i32vl)
+    {
+      // Using vsetvli instruction to get actually used length which related to
+      // the hardware implementation
+      rtx i64vl = gen_reg_rtx (Pmode);
+      unsigned int vtype = classify_vtype_field (Vmode);
+      emit_insn (gen_vsetvl (Pmode, i64vl, force_reg (Pmode, avl), GEN_INT (vtype)));
+      // scale 2 for 32-bit length
+      i32vl = gen_reg_rtx (Pmode);
+      emit_insn (gen_rtx_SET (i32vl, gen_rtx_ASHIFT (Pmode, i64vl, const1_rtx)));
+    }
+
+  return force_reg_for_over_uimm (i32vl);
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static void
+emit_int64_to_vector_32bit (machine_mode Vmode, machine_mode VSImode,
+                            machine_mode VMSImode, rtx vd, rtx s, rtx vl,
+                            rtx tail)
+{
+  if (CONST_SCALAR_INT_P (s))
+    {
+      s = force_reg (DImode, s);
+    }
+
+  rtx hi = gen_highpart (SImode, s);
+  rtx lo = gen_lowpart (SImode, s);
+
+  rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
+
+  /* make a "0101..." mask vector */
+  rtx vm1 = gen_reg_rtx (VNx4SImode);
+  emit_insn (gen_vmv_v_x_internal (VNx4SImode, vm1, const0_rtx,
+                                   force_reg (SImode, GEN_INT (0x55555555)),
+                                   zero, rvv_gen_policy ()));
+  rtx vm2 = gen_reg_rtx (VMSImode);
+  emit_insn (gen_rtx_SET (vm2, gen_lowpart (VMSImode, vm1)));
+
+  rtx vlx2 = gen_vlx2 (vl, Vmode, VSImode);
+  rtx v2 = gen_reg_rtx (VSImode);
+  emit_insn (gen_vmv_v_x_internal (VSImode, v2, const0_rtx, hi, vlx2,
+                                   rvv_gen_policy ()));
+
+  rtx vd_si = gen_reg_rtx (VSImode);
+  emit_insn (gen_vmerge_vxm_internal (VSImode, vd_si, vm2, const0_rtx, v2, lo,
+                                      vlx2, tail));
+
+  emit_insn (gen_rtx_SET (vd, gen_lowpart (Vmode, vd_si)));
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+bool
+imm32_p (rtx a)
+{
+  if (!CONST_SCALAR_INT_P (a))
+    return false;
+  unsigned HOST_WIDE_INT val = UINTVAL (a);
+  return val <= 0x7FFFFFFFULL || val >= 0xFFFFFFFF80000000ULL;
+}
+
+typedef bool imm_p (rtx);
+typedef rtx gen_3 (rtx, rtx, rtx);
+typedef rtx gen_4 (rtx, rtx, rtx, rtx);
+typedef rtx gen_5 (rtx, rtx, rtx, rtx, rtx);
+typedef rtx gen_6 (rtx, rtx, rtx, rtx, rtx, rtx);
+typedef rtx gen_7 (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+enum GEN_CLASS
+{
+  GEN_VX,
+  GEN_VX_32BIT,
+  GEN_VV
+};
+
+/* Helper functions for handling sew=64 on RV32 system. */
+enum GEN_CLASS
+modify_operands (machine_mode Vmode, machine_mode VSImode,
+                 machine_mode VMSImode, machine_mode VSUBmode, rtx *operands,
+                 bool (*imm5_p) (rtx), int i, bool reverse, unsigned int unspec)
+{
+  if (!TARGET_64BIT && VSUBmode == DImode)
+    {
+      if (imm32_p (operands[i]))
+        {
+          if (!imm5_p (operands[i]))
+            operands[i] = force_reg (SImode, operands[i]);
+          return GEN_VX_32BIT;
+        }
+      else
+        {
+          rtx result = gen_reg_rtx (Vmode);
+          rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
+          rtx tail = rvv_gen_policy ();
+
+          emit_int64_to_vector_32bit (Vmode, VSImode, VMSImode, result,
+                                      operands[i], zero, tail);
+
+          operands[i] = result;
+
+          if (reverse)
+            {
+              rtx b = operands[i - 1];
+              operands[i - 1] = operands[i];
+              operands[i] = b;
+            }
+          return GEN_VV;
+        }
+    }
+  else
+    {
+      if (!imm5_p (operands[i]))
+        operands[i] = force_reg (VSUBmode, operands[i]);
+      return GEN_VX;
+    }
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+bool
+emit_op5_vmv_v_x (machine_mode Vmode, machine_mode VSImode,
+                  machine_mode VMSImode, machine_mode VSUBmode, rtx *operands,
+                  int i)
+{
+  if (!TARGET_64BIT && VSUBmode == DImode)
+    {
+      if (!imm32_p (operands[i]))
+        {
+          rtx vd = operands[1];
+          if (rtx_equal_p (vd, const0_rtx))
+            {
+              vd = operands[0];
+            }
+          emit_int64_to_vector_32bit (Vmode, VSImode, VMSImode, vd, operands[i],
+                                      operands[3], operands[4]);
+
+          emit_insn (gen_rtx_SET (operands[0], vd));
+          return true;
+        }
+    }
+  return false;
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+void
+emit_op5 (unsigned int unspec, machine_mode Vmode, machine_mode VSImode,
+          machine_mode VMSImode, machine_mode VSUBmode, rtx *operands,
+          gen_5 *gen_vx, gen_5 *gen_vx_32bit, gen_5 *gen_vv, imm_p *imm5_p,
+          int i, bool reverse)
+{
+  if (unspec == UNSPEC_VMV)
+    {
+      if (emit_op5_vmv_v_x (Vmode, VSImode, VMSImode, VSUBmode, operands, i))
+        {
+          return;
+        }
+    }
+
+  enum GEN_CLASS gen_class = modify_operands (
+      Vmode, VSImode, VMSImode, VSUBmode, operands, imm5_p, i, reverse, unspec);
+
+  gen_5 *gen = gen_class == GEN_VX   ? gen_vx
+               : gen_class == GEN_VV ? gen_vv
+                                     : gen_vx_32bit;
+
+  emit_insn (
+      (*gen) (operands[0], operands[1], operands[2], operands[3], operands[4]));
 }
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv-vector.h b/gcc/config/riscv/riscv-vector.h
index 2c242959077..e93852e3e56 100644
--- a/gcc/config/riscv/riscv-vector.h
+++ b/gcc/config/riscv/riscv-vector.h
@@ -20,14 +20,4 @@ 
 
 #ifndef GCC_RISCV_VECTOR_H
 #define GCC_RISCV_VECTOR_H
-bool riscv_vector_mode_p (machine_mode);
-bool rvv_legitimate_poly_int_p (rtx);
-unsigned int rvv_offset_temporaries (bool, poly_int64);
-vlmul_field_enum rvv_classify_vlmul_field (machine_mode);
-extern unsigned int rvv_parse_vsew_field (unsigned int);
-extern unsigned int rvv_parse_vlmul_field (unsigned int);
-extern bool rvv_parse_vta_field (unsigned int);
-extern bool rvv_parse_vma_field (unsigned int);
-int rvv_regsize (machine_mode);
-opt_machine_mode rvv_get_mask_mode (machine_mode);
 #endif // GCC_RISCV_VECTOR_H
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 2b0b76458a7..238c972de09 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -107,6 +107,7 @@ 
    (VL_REGNUM			66)
    (VTYPE_REGNUM		67)
    (X0_REGNUM			0)
+   (DO_NOT_UPDATE_VL_VTYPE	21)
 ])
 
 (include "predicates.md")
@@ -138,7 +139,13 @@ 
   (const_string "unknown"))
 
 ;; Main data type used by the insn
-(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF"
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,
+  VNx8QI,VNx4HI,VNx2SI,VNx4HF,VNx2SF,VNx4QI,VNx2HI,VNx2HF,
+  VNx2QI,VNx16QI,VNx8HI,VNx4SI,VNx2DI,VNx8HF,VNx4SF,VNx2DF,
+  VNx32QI,VNx16HI,VNx8SI,VNx4DI,VNx16HF,VNx8SF,VNx4DF,
+  VNx64QI,VNx32HI,VNx16SI,VNx8DI,VNx32HF,VNx16SF,VNx8DF,
+  VNx128QI,VNx64HI,VNx32SI,VNx16DI,VNx64HF,VNx32SF,VNx16DF,
+  VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI,VNx128BI"
   (const_string "unknown"))
 
 ;; True if the main data type is twice the size of a word.
@@ -184,11 +191,67 @@ 
 ;; ghost	an instruction that produces no real code
 ;; bitmanip	bit manipulation instructions
 ;; vsetvl vector configuration setting
+;; vload vector whole register load
+;; vstore vector whole register store
+;; vcopy vector whole register copy
+;; vle vector unit-stride load
+;; vse vector unit-stride store
+;; vlse vector strided load
+;; vsse vector strided store
+;; vluxei vector unordered indexed load
+;; vloxei vector ordered indexed load
+;; vsuxei vector unordered indexed store
+;; vsoxei vector ordered indexed store
+;; vleff vector unit-stride fault-only-first load
+;; varith vector single-width integer and floating-point arithmetic instructions
+;; vadc vector single-width add-with-carry instructions with non-mask dest
+;; vmadc vector single-width add-with-carry instructions with mask dest
+;; vwarith vector widening integer and floating-point arithmetic instructions
+;; vlogical vector integer logical instructions
+;; vshift vector integer shift instructions
+;; vcmp vector integer and floating-point compare
+;; vmul vector integer and floating-point multiply
+;; vmulh vector integer highpart multiply
+;; vdiv vector integer and floating-point divide
+;; vwmul vector integer and floating-point widening multiply
+;; vmadd vector single-width integer and floating-point multiply-add/sub
+;; vwmadd vector widening integer and floating-point multiply-add/sub
+;; vmerge vector element data selection
+;; vmove vector register move
+;; vsarith vector saturating single-width arithmetic instructions
+;; vsmul vector saturating single-width multiply instructions
+;; vscaleshift vector scaling single-width shift instructions
+;; vclip vector saturating clip
+;; vfsqrt vector floating point square root
+;; vfsgnj vector floating-point sign-injection
+;; vfclass vector floating-point classify instructions
+;; vfcvt vector floating point convert
+;; vfwcvt vector widening floating point convert
+;; vfncvt vector narrowing floating point convert
+;; vwcvt vector widening only integer convert
+;; vncvt vector narrowing only integer convert
+;; vreduc vector single-width reduction operations
+;; vwreduc vector widening reduction operations
+;; vmask vector mask operations
+;; vcpop vector mask population count vpopc
+;; vmsetbit vector mask bit manipulation
+;; vid vector element index instruction
+;; vmv_x_s vmv.x.s instruction
+;; vmv_s_x vmv.s.x instruction
+;; vfmv_f_s vfmv.f.s instruction
+;; vfmv_s_f vfmv.s.f instruction
+;; vslide vector slide instrucions
+;; vgather vector gather instrucions
+;; vcompress vector compress instrucions
 (define_attr "type"
   "unknown,branch,jump,call,load,fpload,store,fpstore,
    mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
    fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost,bitmanip,rotate,
-   vsetvl"
+   vsetvl,vload,vstore,vcopy,vle,vse,vlse,vsse,vluxei,vloxei,vsuxei,vsoxei,vleff,
+   varith,vadc,vmadc,vwarith,vlogical,vshift,vcmp,vmul,vmulh,vdiv,vwmul,vmadd,vwmadd,
+   vmerge,vmove,vsarith,vsmul,vscaleshift,vclip,vfsqrt,vfsgnj,vfclass,vfcvt,vfwcvt,vfncvt,
+   vwcvt,vncvt,vreduc,vwreduc,vmask,vcpop,vmsetbit,viota,vid,vmv_x_s,vmv_s_x,vfmv_f_s,vfmv_s_f,
+   vslide,vgather,vcompress"
   (cond [(eq_attr "got" "load") (const_string "load")
 
 	 ;; If a doubleword move uses these expensive instructions,
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 9b0da73f3b5..278f3a0ba82 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -63,6 +63,10 @@  riscv-vector-builtins.o: \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/riscv/riscv-vector-builtins.cc
 
+riscv-insert-vsetvl.o: $(srcdir)/config/riscv/riscv-insert-vsetvl.cc
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
 PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def
 
 $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 3e0699de86c..9832d2adaa3 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -21,11 +21,138 @@ 
 (define_c_enum "unspec" [
   ;; vsetvli.
   UNSPEC_VSETVLI
+  ;; RVV instructions.
+  UNSPEC_RVV
+  ;; vector select
+  UNSPEC_SELECT
+  
+  ;; vle/vse
+  UNSPEC_UNIT_STRIDE_LOAD
+  UNSPEC_UNIT_STRIDE_STORE
+  
+  ;; unspec merge
+  UNSPEC_MERGE
+  
+  UNSPEC_VMV
 ])
 
+;; All vector modes supported.
+(define_mode_iterator V [
+  VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI VNx64QI VNx128QI
+  VNx2HI VNx4HI VNx8HI VNx16HI VNx32HI VNx64HI
+  VNx2SI VNx4SI VNx8SI VNx16SI VNx32SI
+  VNx2DI VNx4DI VNx8DI VNx16DI
+  (VNx2SF "TARGET_HARD_FLOAT") (VNx4SF "TARGET_HARD_FLOAT") (VNx8SF "TARGET_HARD_FLOAT")
+  (VNx16SF "TARGET_HARD_FLOAT") (VNx32SF "TARGET_HARD_FLOAT")
+  (VNx2DF "TARGET_DOUBLE_FLOAT") (VNx4DF "TARGET_DOUBLE_FLOAT") (VNx8DF "TARGET_DOUBLE_FLOAT")
+  (VNx16DF "TARGET_DOUBLE_FLOAT")])
+  
 ;; All integer vector modes supported for RVV.
 (define_mode_iterator VI [
   VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI VNx64QI VNx128QI
   VNx2HI VNx4HI VNx8HI VNx16HI VNx32HI VNx64HI
   VNx2SI VNx4SI VNx8SI VNx16SI VNx32SI
-  VNx2DI VNx4DI VNx8DI VNx16DI])
\ No newline at end of file
+  VNx2DI VNx4DI VNx8DI VNx16DI])
+
+;; All vector modes supported for float load/store/alu.
+(define_mode_iterator VF [
+  (VNx2SF "TARGET_HARD_FLOAT") (VNx4SF "TARGET_HARD_FLOAT") (VNx8SF "TARGET_HARD_FLOAT")
+  (VNx16SF "TARGET_HARD_FLOAT") (VNx32SF "TARGET_HARD_FLOAT")
+  (VNx2DF "TARGET_DOUBLE_FLOAT") (VNx4DF "TARGET_DOUBLE_FLOAT") (VNx8DF "TARGET_DOUBLE_FLOAT")
+  (VNx16DF "TARGET_DOUBLE_FLOAT")])
+
+;; All vector masking modes.
+(define_mode_iterator VB [
+  VNx2BI VNx4BI VNx8BI VNx16BI
+  VNx32BI VNx64BI VNx128BI])
+
+;; Full vector modes supported.
+(define_mode_iterator VFULL [
+  VNx16QI VNx32QI VNx64QI VNx128QI
+  VNx8HI VNx16HI VNx32HI VNx64HI
+  VNx4SI VNx8SI VNx16SI VNx32SI
+  VNx2DI VNx4DI VNx8DI VNx16DI
+  (VNx4SF "TARGET_HARD_FLOAT") (VNx8SF "TARGET_HARD_FLOAT") (VNx16SF "TARGET_HARD_FLOAT") (VNx32SF "TARGET_HARD_FLOAT")
+  (VNx2DF "TARGET_DOUBLE_FLOAT") (VNx4DF "TARGET_DOUBLE_FLOAT") (VNx8DF "TARGET_DOUBLE_FLOAT") (VNx16DF "TARGET_DOUBLE_FLOAT")])
+
+;; Paritial vector modes supported.
+(define_mode_iterator VPARTIAL [
+  VNx2QI VNx4QI VNx8QI
+  VNx2HI VNx4HI
+  VNx2SI
+  (VNx2SF "TARGET_HARD_FLOAT")])
+
+;; All vector modes supported for integer sew = 64.
+(define_mode_iterator V64BITI [VNx2DI VNx4DI VNx8DI VNx16DI])
+
+;; Map a vector int or float mode to a vector compare mode.
+(define_mode_attr VM [
+  (VNx2BI "VNx2BI") (VNx4BI "VNx4BI") (VNx8BI "VNx8BI") (VNx16BI "VNx16BI")
+  (VNx32BI "VNx32BI") (VNx64BI "VNx64BI") (VNx128BI "VNx128BI")
+  (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI")
+  (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") (VNx128QI "VNx128BI") (VNx2HI "VNx2BI")
+  (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI")
+  (VNx64HI "VNx64BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI")
+  (VNx16SI "VNx16BI") (VNx32SI "VNx32BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI")
+  (VNx8DI "VNx8BI") (VNx16DI "VNx16BI")
+  (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI")
+  (VNx32SF "VNx32BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI")
+  (VNx16DF "VNx16BI")
+  (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI")
+  (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") (VNx128QI "VNx128BI") (VNx2HI "VNx2BI")
+  (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI")
+  (VNx64HI "VNx64BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI")
+  (VNx16SI "VNx16BI") (VNx32SI "VNx32BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI")
+  (VNx8DI "VNx8BI") (VNx16DI "VNx16BI")
+  (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI")
+  (VNx32SF "VNx32BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI")
+  (VNx16DF "VNx16BI")])
+
+;; Map a vector mode to its element mode.
+(define_mode_attr VSUB [
+  (VNx2QI "QI") (VNx4QI "QI") (VNx8QI "QI") (VNx16QI "QI")
+  (VNx32QI "QI") (VNx64QI "QI") (VNx128QI "QI") (VNx2HI "HI")
+  (VNx4HI "HI") (VNx8HI "HI") (VNx16HI "HI") (VNx32HI "HI")
+  (VNx64HI "HI") (VNx2SI "SI") (VNx4SI "SI") (VNx8SI "SI")
+  (VNx16SI "SI") (VNx32SI "SI") (VNx2DI "DI") (VNx4DI "DI")
+  (VNx8DI "DI") (VNx16DI "DI")
+  (VNx2SF "SF") (VNx4SF "SF") (VNx8SF "SF") (VNx16SF "SF")
+  (VNx32SF "SF") (VNx2DF "DF") (VNx4DF "DF") (VNx8DF "DF")
+  (VNx16DF "DF")])
+
+(define_mode_attr VDI_TO_VSI [
+  (VNx2QI "VNx4SI") (VNx4QI "VNx4SI") (VNx8QI "VNx4SI") (VNx16QI "VNx4SI") (VNx32QI "VNx4SI") (VNx64QI "VNx4SI") (VNx128QI "VNx4SI")
+  (VNx2HI "VNx4SI") (VNx4HI "VNx4SI") (VNx8HI "VNx4SI") (VNx16HI "VNx4SI") (VNx32HI "VNx4SI") (VNx64HI "VNx4SI")
+  (VNx2SI "VNx4SI") (VNx4SI "VNx4SI") (VNx8SI "VNx4SI") (VNx16SI "VNx4SI") (VNx32SI "VNx4SI")
+  (VNx2DI "VNx4SI") (VNx4DI "VNx8SI") (VNx8DI "VNx16SI") (VNx16DI "VNx32SI")])
+
+(define_mode_attr VDI_TO_VSI_VM [
+  (VNx2QI "VNx4BI") (VNx4QI "VNx4BI") (VNx8QI "VNx4BI") (VNx16QI "VNx4BI")
+  (VNx32QI "VNx4BI") (VNx64QI "VNx4BI") (VNx128QI "VNx4BI")
+  (VNx2HI "VNx4BI") (VNx4HI "VNx4BI") (VNx8HI "VNx4BI") (VNx16HI "VNx4BI") (VNx32HI "VNx4BI")
+  (VNx64HI "VNx64BI")
+  (VNx2SI "VNx4BI") (VNx4SI "VNx4BI") (VNx8SI "VNx4BI") (VNx16SI "VNx4BI") (VNx32SI "VNx4BI")
+  (VNx2DI "VNx4BI") (VNx4DI "VNx8BI") (VNx8DI "VNx16BI") (VNx16DI "VNx32BI")
+])
+
+(define_mode_attr vi_to_v64biti [
+  (VNx2QI "vnx2di") (VNx4QI "vnx2di") (VNx8QI "vnx2di") (VNx16QI "vnx2di") (VNx32QI "vnx2di") (VNx64QI "vnx2di") (VNx128QI "vnx2di")
+  (VNx2HI "vnx2di") (VNx4HI "vnx2di") (VNx8HI "vnx2di") (VNx16HI "vnx2di") (VNx32HI "vnx2di") (VNx64HI "vnx2di")
+  (VNx2SI "vnx2di") (VNx4SI "vnx2di") (VNx8SI "vnx2di") (VNx16SI "vnx2di") (VNx32SI "vnx2di")
+  (VNx2DI "vnx2di") (VNx4DI "vnx4di") (VNx8DI "vnx8di") (VNx16DI "vnx16di")])
+  
+(define_int_iterator VMVOP [
+  UNSPEC_VMV
+])
+
+(define_int_attr vxoptab [
+  (UNSPEC_VMV "mv")
+])
+
+(define_int_attr VXOPTAB [
+  (UNSPEC_VMV "UNSPEC_VMV")
+])
+
+(define_int_attr immptab [
+  (UNSPEC_VMV "Ws5")
+])
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 31fdec981b9..4a9c6769812 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -26,6 +26,43 @@ 
 ;; - RVV intrinsic implmentation (Document:https://github.com/riscv/rvv-intrinsic-doc)
 
 (include "vector-iterators.md")
+
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Vector Creation
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - Duplicate element to a vector
+;; - Initialize from individual elements
+;; -------------------------------------------------------------------------
+
+;; vector integer modes vec_duplicate.
+(define_expand "@vec_duplicate<mode>"
+  [(match_operand:VI 0 "register_operand")
+   (match_operand:<VSUB> 1 "reg_or_simm5_operand")]
+  "TARGET_VECTOR"
+{
+  emit_insn (gen_v_v_x (UNSPEC_VMV, <MODE>mode, 
+      operands[0], const0_rtx, operands[1],
+      gen_rtx_REG (Pmode, X0_REGNUM), rvv_gen_policy ()));
+  DONE;
+})
+
+;; vector floating-point modes vec_duplicate.
+(define_expand "@vec_duplicate<mode>"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:<VSUB> 1 "register_operand")]
+  "TARGET_VECTOR"
+{
+  emit_insn (gen_vfmv_v_f (<MODE>mode, operands[0], const0_rtx, 
+      operands[1], gen_rtx_REG (Pmode, X0_REGNUM),
+      rvv_gen_policy ()));
+  DONE;
+})
+   
 ;; ===============================================================================
 ;; == Intrinsics
 ;; ===============================================================================
@@ -137,4 +174,200 @@ 
     return "";
   }
   [(set_attr "type" "vsetvl")
-   (set_attr "mode" "none")])
\ No newline at end of file
+   (set_attr "mode" "none")])
+
+;; -------------------------------------------------------------------------------
+;; ---- 7. Vector Loads and Stores
+;; -------------------------------------------------------------------------------
+;; Includes:
+;; - 7.4. Vector Unit-Stride Instructions
+;; - 7.5. Vector Strided Instructions
+;; - 7.6. Vector Indexed Instructions
+;; - 7.7. Unit-stride Fault-Only-First Instructions
+;; - 7.8. Vector Load/Store Segment Instructions
+;;  -  7.8.1. Vector Unit-Stride Segment Loads and Stores
+;;  -  7.8.2. Vector Strided Segment Loads and Stores
+;;  -  7.8.3. Vector Indexed Segment Loads and Stores
+;; -------------------------------------------------------------------------------
+
+;; Vector Unit-Stride Loads.
+(define_insn "@vle<mode>"
+  [(set (match_operand:V 0 "register_operand"                 "=vd,vd,  vr,vr")
+    (unspec:V
+      [(unspec:V
+        [(match_operand:<VM> 1 "vector_reg_or_const0_operand" "vm,vm,   J,J")
+          (unspec:V
+            [(match_operand 3 "pmode_register_operand"        "r,r,     r,r")
+            (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_LOAD)
+         (match_operand:V 2 "vector_reg_or_const0_operand"    "0,J,     0,J")] UNSPEC_SELECT)
+      (match_operand 4 "p_reg_or_const_csr_operand"           "rK,rK,   rK,rK")
+      (match_operand 5 "const_int_operand")
+      (reg:SI VL_REGNUM)
+      (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+  "TARGET_VECTOR"
+  "@
+   vle<sew>.v\t%0,(%3),%1.t
+   vle<sew>.v\t%0,(%3),%1.t
+   vle<sew>.v\t%0,(%3)
+   vle<sew>.v\t%0,(%3)"
+  [(set_attr "type" "vle")
+   (set_attr "mode" "<MODE>")])
+
+;; Vector Unit-Stride Stores.
+(define_insn "@vse<mode>"
+  [(set (mem:BLK (scratch))
+    (unspec:BLK
+      [(unspec:V
+        [(match_operand:<VM> 0 "vector_reg_or_const0_operand" "vm,J")
+         (unspec:BLK
+           [(match_operand 1 "pmode_register_operand"         "r,r")
+            (match_operand:V 2 "register_operand"             "vr,vr")
+            (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_STORE)
+         (match_dup 1)] UNSPEC_SELECT)
+      (match_operand 3 "p_reg_or_const_csr_operand"           "rK,rK")
+      (match_operand 4 "const_int_operand")
+      (reg:SI VL_REGNUM)
+      (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+  "TARGET_VECTOR"
+  "@
+   vse<sew>.v\t%2,(%1),%0.t
+   vse<sew>.v\t%2,(%1)"
+  [(set_attr "type" "vse")
+   (set_attr "mode" "<MODE>")])
+
+;; Vector Unit-stride mask Loads.
+(define_insn "@vlm<mode>"
+  [(set (match_operand:VB 0 "register_operand"    "=vr")
+  (unspec:VB
+    [(unspec:VB
+      [(match_operand 1 "pmode_register_operand"  "r")
+       (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_LOAD)
+    (match_operand 2 "p_reg_or_const_csr_operand" "rK")
+    (match_operand 3 "const_int_operand")
+    (reg:SI VL_REGNUM)
+    (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+  "TARGET_VECTOR"
+  "vlm.v\t%0,(%1)"
+  [(set_attr "type" "vle")
+   (set_attr "mode" "<MODE>")])
+
+;; Vector Unit-stride mask Stores.
+(define_insn "@vsm<mode>"
+  [(set (mem:BLK (scratch))
+  (unspec:BLK
+    [(unspec:BLK
+      [(match_operand 0 "pmode_register_operand"  "r")
+       (match_operand:VB 1 "register_operand"     "vr")
+       (mem:BLK (scratch))] UNSPEC_UNIT_STRIDE_STORE)
+    (match_operand 2 "p_reg_or_const_csr_operand" "rK")
+    (match_operand 3 "const_int_operand")
+    (reg:SI VL_REGNUM)
+    (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+  "TARGET_VECTOR"
+  "vsm.v\t%1,(%0)"
+  [(set_attr "type" "vse")
+   (set_attr "mode" "<MODE>")])
+
+;; vmv.v.x
+(define_expand "@v<vxoptab><mode>_v_x"
+  [(unspec [
+    (match_operand:VI 0 "register_operand")
+    (match_operand:VI 1 "vector_reg_or_const0_operand")
+    (match_operand:<VSUB> 2 "reg_or_const_int_operand")
+    (match_operand 3 "p_reg_or_const_csr_operand")
+    (match_operand 4 "const_int_operand")
+   ] VMVOP)]
+  "TARGET_VECTOR"
+  {
+    emit_op5 (
+      <VXOPTAB>,
+      <MODE>mode, <VDI_TO_VSI>mode, <VDI_TO_VSI_VM>mode,
+      <VSUB>mode,
+      operands,
+      gen_v<vxoptab><mode>_v_x_internal,
+      gen_v<vxoptab><vi_to_v64biti>_v_x_32bit,
+      NULL,
+      satisfies_constraint_<immptab>,
+      2, false
+    );
+    DONE;
+  }
+)
+
+;; Vector-Scalar Integer Move.
+(define_insn "@vmv<mode>_v_x_internal"
+  [(set (match_operand:VI 0 "register_operand"          "=vr,vr,vr,vr")
+  (unspec:VI
+    [(match_operand:VI 1 "vector_reg_or_const0_operand" "0,0,J,J")
+     (vec_duplicate:VI
+      (match_operand:<VSUB> 2 "reg_or_simm5_operand"  "r,Ws5,r,Ws5"))
+     (match_operand 3 "p_reg_or_const_csr_operand"      "rK,rK,rK,rK")
+     (match_operand 4 "const_int_operand")
+     (reg:SI VL_REGNUM)
+     (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+  vmv.v.x\t%0,%2
+  vmv.v.i\t%0,%2
+  vmv.v.x\t%0,%2
+  vmv.v.i\t%0,%2"
+ [(set_attr "type" "vmove")
+  (set_attr "mode" "<MODE>")])
+
+(define_insn "@vmv<mode>_v_x_32bit"
+  [(set (match_operand:V64BITI 0 "register_operand"                     "=vr,vr,vr,vr")
+  (unspec:V64BITI
+    [(match_operand:V64BITI 1 "vector_reg_or_const0_operand"            "0,0,J,J")
+     (vec_duplicate:V64BITI
+        (sign_extend:<VSUB> (match_operand:SI 2 "reg_or_simm5_operand"  "r,Ws5,r,Ws5")))
+     (match_operand:SI 3 "csr_operand"                                  "rK,rK,rK,rK")
+     (match_operand:SI 4 "const_int_operand")
+     (reg:SI VL_REGNUM)
+     (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+  vmv.v.x\t%0,%2
+  vmv.v.i\t%0,%2
+  vmv.v.x\t%0,%2
+  vmv.v.i\t%0,%2"
+ [(set_attr "type" "vmove")
+  (set_attr "mode" "<MODE>")])
+
+;; Vector-Scalar Floating-Point Move.
+(define_insn "@vfmv<mode>_v_f"
+  [(set (match_operand:VF 0 "register_operand"          "=vr,vr")
+  (unspec:VF
+    [(match_operand:VF 1 "vector_reg_or_const0_operand" "0,J")
+     (vec_duplicate:VF
+       (match_operand:<VSUB> 2 "register_operand"       "f,f"))
+     (match_operand 3 "p_reg_or_const_csr_operand"      "rK,rK")
+     (match_operand 4 "const_int_operand")
+     (reg:SI VL_REGNUM)
+     (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vfmv.v.f\t%0,%2"
+ [(set_attr "type" "vmove")
+  (set_attr "mode" "<MODE>")])
+
+;; Vector-Scalar integer merge.
+(define_insn "@vmerge<mode>_vxm_internal"
+  [(set (match_operand:VI 0 "register_operand"              "=vd,vd,vd,vd")
+    (unspec:VI
+      [(match_operand:VI 2 "vector_reg_or_const0_operand"   "0,0,J,J")
+        (unspec:VI
+          [(match_operand:<VM> 1 "register_operand"         "vm,vm,vm,vm")
+           (match_operand:VI 3 "register_operand"           "vr,vr,vr,vr")
+           (vec_duplicate:VI
+             (match_operand:<VSUB> 4 "reg_or_simm5_operand" "r,Ws5,r,Ws5"))] UNSPEC_MERGE)
+       (match_operand 5 "p_reg_or_const_csr_operand"        "rK,rK,rK,rK")
+       (match_operand 6 "const_int_operand")
+       (reg:SI VL_REGNUM)
+       (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "@
+  vmerge.vxm\t%0,%3,%4,%1
+  vmerge.vim\t%0,%3,%4,%1
+  vmerge.vxm\t%0,%3,%4,%1
+  vmerge.vim\t%0,%3,%4,%1"
+ [(set_attr "type" "vmerge")
+  (set_attr "mode" "<MODE>")])
\ No newline at end of file