[01/21] Add RVV modes and support scalable vector

Message ID 20220531085012.269719-2-juzhe.zhong@rivai.ai
State Committed
Headers
Series *** Add RVV (RISC-V 'V' Extension) support *** |

Commit Message

钟居哲 May 31, 2022, 8:49 a.m. UTC
  From: zhongjuzhe <juzhe.zhong@rivai.ai>

gcc/ChangeLog:

        * config.gcc: Add riscv-vector.o extra_objs for RVV support.
        * config/riscv/constraints.md: New constraints.
        * config/riscv/predicates.md: New predicates.
        * config/riscv/riscv-modes.def: New machine mode.
        * config/riscv/riscv-opts.h: New enum.
        * config/riscv/riscv-protos.h: New functions declare.
        * config/riscv/riscv-sr.cc (riscv_remove_unneeded_save_restore_calls): Adjust for poly_int.
        * config/riscv/riscv.cc (struct riscv_frame_info): Change HOST_WIDE_INT to poly_int64.
        (poly_uint16 riscv_vector_chunks): New declare.
        (riscv_legitimate_constant_p): Adjust for poly_int.
        (riscv_cannot_force_const_mem): Adjust for poly_int.
        (riscv_valid_offset_p): Adjust for poly_int.
        (riscv_valid_lo_sum_p): Adjust for poly_int.
        (riscv_classify_address): Disallow PLUS, LO_SUM and CONST_INT memory address for RVV.
        (riscv_address_insns): Adjust for poly_int.
        (riscv_const_insns): Adjust for poly_int.
        (riscv_load_store_insns): Adjust for poly_int.
        (riscv_legitimize_move): Adjust for poly_int.
        (riscv_binary_cost): Adjust for poly_int.
        (riscv_rtx_costs): Adjust for poly_int.
        (riscv_output_move): Adjust for poly_int.
        (riscv_extend_comparands): Adjust for poly_int.
        (riscv_flatten_aggregate_field): Adjust for poly_int.
        (riscv_get_arg_info): Adjust for poly_int.
        (riscv_pass_by_reference): Adjust for poly_int.
        (riscv_elf_select_rtx_section): Adjust for poly_int.
        (riscv_stack_align): Adjust for poly_int.
        (riscv_compute_frame_info): Adjust for poly_int.
        (riscv_initial_elimination_offset): Change HOST_WIDE_INT to poly_int64.
        (riscv_set_return_address): Adjust for poly_int.
        (riscv_for_each_saved_reg): Adjust for poly_int.
        (riscv_first_stack_step): Adjust for poly_int.
        (riscv_expand_prologue): Adjust for poly_int.
        (riscv_expand_epilogue): Adjust for poly_int.
        (riscv_can_use_return_insn): Adjust for poly_int.
        (riscv_secondary_memory_needed): Disable secondary memory for RVV.
        (riscv_hard_regno_nregs): Add RVV register allocation.
        (riscv_hard_regno_mode_ok): Add RVV register allocation.
        (riscv_convert_riscv_vector_bits): New function.
        (riscv_option_override): Add RVV vector bits parser.
        (riscv_promote_function_mode): Adjust for RVV modes.
        * config/riscv/riscv.h: New macro define.
        * config/riscv/riscv.md: Adjust for poly_int.
        * config/riscv/riscv.opt: New option.
        * config/riscv/t-riscv: New object.
        * config/riscv/riscv-vector.cc: New file.
        * config/riscv/riscv-vector.h: New file.
        
---
 gcc/config.gcc                   |   2 +-
 gcc/config/riscv/constraints.md  |  17 ++
 gcc/config/riscv/predicates.md   |   5 +-
 gcc/config/riscv/riscv-modes.def | 177 ++++++++++++++++++
 gcc/config/riscv/riscv-opts.h    |  27 +++
 gcc/config/riscv/riscv-protos.h  |   9 +-
 gcc/config/riscv/riscv-sr.cc     |   2 +-
 gcc/config/riscv/riscv-vector.cc | 229 +++++++++++++++++++++++
 gcc/config/riscv/riscv-vector.h  |  28 +++
 gcc/config/riscv/riscv.cc        | 302 +++++++++++++++++++++++--------
 gcc/config/riscv/riscv.h         |  84 +++++++--
 gcc/config/riscv/riscv.md        |  36 ++--
 gcc/config/riscv/riscv.opt       |  32 ++++
 gcc/config/riscv/t-riscv         |   4 +
 14 files changed, 849 insertions(+), 105 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-vector.cc
 create mode 100644 gcc/config/riscv/riscv-vector.h
  

Patch

diff --git a/gcc/config.gcc b/gcc/config.gcc
index cdbefb5b4f5..50154c2eb3a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -517,7 +517,7 @@  pru-*-*)
 	;;
 riscv*)
 	cpu_type=riscv
-	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o"
+	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-vector.o"
 	d_target_objs="riscv-d.o"
 	;;
 rs6000*-*-*)
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index bafa4188ccb..7fd61a04216 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -80,3 +80,20 @@ 
    A constant @code{move_operand}."
   (and (match_operand 0 "move_operand")
        (match_test "CONSTANT_P (op)")))
+
+;; Vector constraints.
+
+(define_register_constraint "vr" "TARGET_VECTOR ? V_REGS : NO_REGS"
+  "A vector register (if available).")
+
+(define_register_constraint "vd" "TARGET_VECTOR ? VD_REGS : NO_REGS"
+  "A vector register except mask register (if available).")
+
+(define_register_constraint "vm" "TARGET_VECTOR ? VM_REGS : NO_REGS"
+  "A vector mask register (if available).")
+
+(define_constraint "vp"
+  "POLY_INT"
+  (and (match_code "const_poly_int")
+       (match_test "CONST_POLY_INT_COEFFS (op)[0] == UNITS_PER_V_REG.coeffs[0]
+        && CONST_POLY_INT_COEFFS (op)[1] == UNITS_PER_V_REG.coeffs[1]")))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index c37caa2502b..6328cfff367 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -71,7 +71,7 @@ 
 {
   /* Don't handle multi-word moves this way; we don't want to introduce
      the individual word-mode moves until after reload.  */
-  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+  if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
     return false;
 
   /* Check whether the constant can be loaded in a single
@@ -145,6 +145,9 @@ 
     {
     case CONST_INT:
       return !splittable_const_int_operand (op, mode);
+    
+    case CONST_POLY_INT:
+      return rvv_legitimate_poly_int_p (op);
 
     case CONST:
     case SYMBOL_REF:
diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index 653228409a4..88cf9551727 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -20,3 +20,180 @@  along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Vector modes.  */
+
+/* Encode the ratio of SEW/LMUL into the mask types. There are the following mask types.  */
+
+/* | Type      | Mode     | SEW/LMUL |
+   | vbool64_t | VNx2BI   | 64       |
+   | vbool32_t | VNx4BI   | 32       |
+   | vbool16_t | VNx8BI   | 16       |
+   | vbool8_t  | VNx16BI  | 8        |
+   | vbool4_t  | VNx32BI  | 4        |
+   | vbool2_t  | VNx64BI  | 2        |
+   | vbool1_t  | VNx128BI | 1        |  */
+
+VECTOR_BOOL_MODE (VNx2BI, 2, BI, 16);
+VECTOR_BOOL_MODE (VNx4BI, 4, BI, 16);
+VECTOR_BOOL_MODE (VNx8BI, 8, BI, 16);
+VECTOR_BOOL_MODE (VNx16BI, 16, BI, 16);
+VECTOR_BOOL_MODE (VNx32BI, 32, BI, 16);
+VECTOR_BOOL_MODE (VNx64BI, 64, BI, 16);
+VECTOR_BOOL_MODE (VNx128BI, 128, BI, 16);
+
+ADJUST_NUNITS (VNx2BI, riscv_vector_chunks * 1);
+ADJUST_NUNITS (VNx4BI, riscv_vector_chunks * 2);
+ADJUST_NUNITS (VNx8BI, riscv_vector_chunks * 4);
+ADJUST_NUNITS (VNx16BI, riscv_vector_chunks * 8);
+ADJUST_NUNITS (VNx32BI, riscv_vector_chunks * 16);
+ADJUST_NUNITS (VNx64BI, riscv_vector_chunks * 32);
+ADJUST_NUNITS (VNx128BI, riscv_vector_chunks * 64);
+
+ADJUST_ALIGNMENT (VNx2BI, 1);
+ADJUST_ALIGNMENT (VNx4BI, 1);
+ADJUST_ALIGNMENT (VNx8BI, 1);
+ADJUST_ALIGNMENT (VNx16BI, 1);
+ADJUST_ALIGNMENT (VNx32BI, 1);
+ADJUST_ALIGNMENT (VNx64BI, 1);
+ADJUST_ALIGNMENT (VNx128BI, 1);
+
+ADJUST_BYTESIZE (VNx2BI, riscv_vector_chunks * 8);
+ADJUST_BYTESIZE (VNx4BI, riscv_vector_chunks * 8);
+ADJUST_BYTESIZE (VNx8BI, riscv_vector_chunks * 8);
+ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 8);
+ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * 8);
+ADJUST_BYTESIZE (VNx64BI, riscv_vector_chunks * 8);
+ADJUST_BYTESIZE (VNx128BI, riscv_vector_chunks * 8);
+
+/* Define RVV modes for NVECS vectors.  VB, VH, VS and VD are the prefixes
+   for 8-bit, 16-bit, 32-bit and 64-bit elements respectively.  It isn't
+   strictly necessary to set the alignment here, since the default would
+   be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer.  */
+   
+/* TODO:Because there is no 'Zfh' on the upstream GCC, we will support
+   vector mode with 16-bit half-precision floating-point in the next patch 
+   after 'Zfh' is supported in the GCC upstream. */
+
+/* | Type                    | Mode     | SEW/LMUL |
+   | vint8m1_t/vuint8m1_t    | VNx16QI  | 8        |
+   | vint8m2_t/vuint8m2_t    | VNx32QI  | 4        |
+   | vint8m4_t/vuint8m4_t    | VNx64QI  | 2        |
+   | vint8m8_t/vuint8m8_t    | VNx128QI | 1        |
+   | vint16m1_t/vint16m1_t   | VNx8HI   | 16       |
+   | vint16m2_t/vint16m2_t   | VNx16HI  | 8        |
+   | vint16m4_t/vint16m4_t   | VNx32HI  | 4        |
+   | vint16m8_t/vint16m8_t   | VNx64HI  | 2        |
+   | vint32m1_t/vint32m1_t   | VNx4SI   | 32       |
+   | vint32m2_t/vint32m2_t   | VNx8SI   | 16       |
+   | vint32m4_t/vint32m4_t   | VNx16SI  | 8        |
+   | vint32m8_t/vint32m8_t   | VNx32SI  | 4        |
+   | vint64m1_t/vint64m1_t   | VNx2DI   | 64       |
+   | vint64m2_t/vint64m2_t   | VNx4DI   | 32       |
+   | vint64m4_t/vint64m4_t   | VNx8DI   | 16       |
+   | vint64m8_t/vint64m8_t   | VNx16DI  | 8        |
+   | vfloat32m1_t            | VNx4SF   | 32       |
+   | vfloat32m2_t            | VNx8SF   | 16       |
+   | vfloat32m4_t            | VNx16SF  | 8        |
+   | vfloat32m8_t            | VNx32SF  | 4        |
+   | vfloat64m1_t            | VNx2DF   | 64       |
+   | vfloat64m2_t            | VNx4DF   | 32       |
+   | vfloat64m4_t            | VNx8DF   | 16       |
+   | vfloat64m8_t            | VNx16DF  | 8        |  */
+
+#define RVV_MODES(NVECS, VB, VH, VS, VD) \
+  VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \
+  VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \
+  \
+  ADJUST_NUNITS (VB##QI, riscv_vector_chunks * NVECS * 8); \
+  ADJUST_NUNITS (VH##HI, riscv_vector_chunks * NVECS * 4); \
+  ADJUST_NUNITS (VS##SI, riscv_vector_chunks * NVECS * 2); \
+  ADJUST_NUNITS (VD##DI, riscv_vector_chunks * NVECS); \
+  ADJUST_NUNITS (VS##SF, riscv_vector_chunks * NVECS * 2); \
+  ADJUST_NUNITS (VD##DF, riscv_vector_chunks * NVECS); \
+  \
+  ADJUST_ALIGNMENT (VB##QI, 1); \
+  ADJUST_ALIGNMENT (VH##HI, 2); \
+  ADJUST_ALIGNMENT (VS##SI, 4); \
+  ADJUST_ALIGNMENT (VD##DI, 8); \
+  ADJUST_ALIGNMENT (VS##SF, 4); \
+  ADJUST_ALIGNMENT (VD##DF, 8);
+
+/* Give vectors the names normally used for 128-bit vectors.
+   The actual number depends on command-line flags.  */
+RVV_MODES (1, VNx16, VNx8, VNx4, VNx2)
+RVV_MODES (2, VNx32, VNx16, VNx8, VNx4)
+RVV_MODES (4, VNx64, VNx32, VNx16, VNx8)
+RVV_MODES (8, VNx128, VNx64, VNx32, VNx16)
+
+/* Partial RVV vectors:
+
+      VNx8QI VNx4HI VNx2SI VNx2SF
+      VNx4QI VNx2HI
+      VNx2QI
+
+   In memory they occupy contiguous locations, in the same way as fixed-length
+   vectors.  E.g. VNx8QImode is half the size of VNx16QImode.
+
+   Passing 1 as the final argument ensures that the modes come after all
+   other modes in the GET_MODE_WIDER chain, so that we never pick them
+   in preference to a full vector mode.  */
+
+/* TODO:Because there is no 'Zfh' on the upstream GCC, we will support
+   vector mode with 16-bit half-precision floating-point in the another patch 
+   after 'Zfh' is supported in the GCC upstream. */
+
+/* | Type                     | Mode    | SEW/LMUL |
+   | vint8mf2_t/vuint8mf2_t   | VNx8QI  | 16       |
+   | vint8mf4_t/vuint8mf4_t   | VNx4QI  | 32       |
+   | vint8mf8_t/vuint8mf8_t   | VNx2QI  | 64       |
+   | vint16mf2_t/vuint16mf2_t | VNx4HI  | 32       |
+   | vint16mf4_t/vuint16mf4_t | VNx2HI  | 64       |
+   | vint32mf2_t/vuint32mf2_t | VNx2SI  | 64       |
+   | vfloat32mf2_t            | VNx2SF  | 64       |  */
+   
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 4, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8, 1);
+
+ADJUST_NUNITS (VNx2QI, riscv_vector_chunks);
+ADJUST_NUNITS (VNx2HI, riscv_vector_chunks);
+ADJUST_NUNITS (VNx2SI, riscv_vector_chunks);
+ADJUST_NUNITS (VNx2SF, riscv_vector_chunks);
+
+ADJUST_NUNITS (VNx4QI, riscv_vector_chunks * 2);
+ADJUST_NUNITS (VNx4HI, riscv_vector_chunks * 2);
+
+ADJUST_NUNITS (VNx8QI, riscv_vector_chunks * 4);
+
+ADJUST_ALIGNMENT (VNx2QI, 1);
+ADJUST_ALIGNMENT (VNx4QI, 1);
+ADJUST_ALIGNMENT (VNx8QI, 1);
+
+ADJUST_ALIGNMENT (VNx2HI, 2);
+ADJUST_ALIGNMENT (VNx4HI, 2);
+
+ADJUST_ALIGNMENT (VNx2SI, 4);
+ADJUST_ALIGNMENT (VNx2SF, 4);
+
+/* TODO:To support segment instructions, we need a new mode definition for tuple
+   mode in gcc/gcc/genmodes.cc which is not the codes in RISC-V port.
+   We support it in the another patch. */
+
+/* A 8-tuple of RVV vectors with the maximum -mriscv-vector-bits= setting.
+   Note that this is a limit only on the compile-time sizes of modes;
+   it is not a limit on the runtime sizes, since VL-agnostic code
+   must work with arbitary vector lengths.  */
+/* TODO:According to RISC-V 'V' ISA spec, the maximun vector length can
+   be 65536 for a single vector register which means the vector mode in
+   GCC can be maximum = 65536 * 8 bits (nf=8). However, 'GET_MODE_SIZE'
+   is using poly_uint16/unsigned short which will overflow if we specify
+   vector-length = 65536. To support this feature, we need to change the
+   codes outside the RISC-V port. We will support it in another patch. */    
+#define MAX_BITSIZE_MODE_ANY_MODE (4096 * 8)
+
+/* Coefficient 1 is multiplied by the number of 64-bit chunks in a vector
+   minus one.  */
+#define NUM_POLY_INT_COEFFS 2
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 1e153b3a6e7..d99b8dcbaf1 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -67,6 +67,33 @@  enum stack_protector_guard {
   SSP_GLOBAL			/* global canary */
 };
 
+/* RVV vector register sizes.  */
+enum riscv_vector_bits_enum
+{
+  RVV_SCALABLE,
+  RVV_NOT_IMPLEMENTED = RVV_SCALABLE,
+  RVV_64 = 64,
+  RVV_128 = 128,
+  RVV_256 = 256,
+  RVV_512 = 512,
+  RVV_1024 = 1024,
+  RVV_2048 = 2048,
+  RVV_4096 = 4096
+};
+
+enum vlmul_field_enum
+{
+  VLMUL_FIELD_000, /* LMUL = 1 */
+  VLMUL_FIELD_001, /* LMUL = 2 */
+  VLMUL_FIELD_010, /* LMUL = 4 */
+  VLMUL_FIELD_011, /* LMUL = 8 */
+  VLMUL_FIELD_100, /* RESERVED */
+  VLMUL_FIELD_101, /* LMUL = 1/8 */
+  VLMUL_FIELD_110, /* LMUL = 1/4 */
+  VLMUL_FIELD_111, /* LMUL = 1/2 */
+  MAX_VLMUL_FIELD
+};
+
 #define MASK_ZICSR    (1 << 0)
 #define MASK_ZIFENCEI (1 << 1)
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 20c2381c21a..19c50f0e702 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -64,7 +64,7 @@  extern rtx riscv_legitimize_call_address (rtx);
 extern void riscv_set_return_address (rtx, rtx);
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern rtx riscv_return_addr (int, rtx);
-extern HOST_WIDE_INT riscv_initial_elimination_offset (int, int);
+extern poly_int64 riscv_initial_elimination_offset (int, int);
 extern void riscv_expand_prologue (void);
 extern void riscv_expand_epilogue (int);
 extern bool riscv_epilogue_uses (unsigned int);
@@ -109,4 +109,11 @@  struct riscv_cpu_info {
 
 extern const riscv_cpu_info *riscv_find_cpu (const char *);
 
+/* Routines implemented in riscv-vector.cc. */
+extern bool rvv_mode_p (machine_mode);
+extern bool rvv_legitimate_poly_int_p (rtx);
+extern unsigned int rvv_offset_temporaries (bool, poly_int64);
+extern enum vlmul_field_enum riscv_classify_vlmul_field (machine_mode);
+extern int rvv_regsize (machine_mode);
+
 #endif /* ! GCC_RISCV_PROTOS_H */
diff --git a/gcc/config/riscv/riscv-sr.cc b/gcc/config/riscv/riscv-sr.cc
index 694f90c1583..7248f04d68f 100644
--- a/gcc/config/riscv/riscv-sr.cc
+++ b/gcc/config/riscv/riscv-sr.cc
@@ -247,7 +247,7 @@  riscv_remove_unneeded_save_restore_calls (void)
   /* We'll adjust stack size after this optimization, that require update every
      sp use site, which could be unsafe, so we decide to turn off this
      optimization if there are any arguments put on stack.  */
-  if (crtl->args.size != 0)
+  if (known_ne (crtl->args.size, 0))
     return;
 
   /* Will point to the first instruction of the function body, after the
diff --git a/gcc/config/riscv/riscv-vector.cc b/gcc/config/riscv/riscv-vector.cc
new file mode 100644
index 00000000000..e315b5d2cac
--- /dev/null
+++ b/gcc/config/riscv/riscv-vector.cc
@@ -0,0 +1,229 @@ 
+/* Subroutines used for code generation for RISC-V 'V' Extension for GNU compiler.
+   Copyright (C) 2022-2022 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_STRING
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "regs.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "output.h"
+#include "alias.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "function.h"
+#include "explow.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "target.h"
+#include "basic-block.h"
+#include "expr.h"
+#include "optabs.h"
+#include "bitmap.h"
+#include "df.h"
+#include "diagnostic.h"
+#include "builtins.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "opts.h"
+#include "langhooks.h"
+#include "rtl-iter.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-loop-niter.h"
+#include "rtx-vector-builder.h"
+#include "riscv-vector.h"
+/* This file should be included last.  */
+#include "target-def.h"
+
+#include <string.h>
+/* Helper functions for RVV */
+
+/* Return true if it is a RVV mask mode. */
+bool
+rvv_mask_mode_p (machine_mode mode)
+{
+  if (VECTOR_MODE_P (mode) 
+	    && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+	    && strncmp (GET_MODE_NAME (mode), "VNx", 3) == 0)
+	  return true;
+  
+  return false;
+}
+
+/* Return true if it is a RVV vector mode. */
+bool
+rvv_vector_mode_p (machine_mode mode)
+{
+  if (VECTOR_MODE_P (mode) 
+	    && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	    || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+	    && strncmp (GET_MODE_NAME (mode), "VNx", 3) == 0
+      /* So far we only support SEW <= 64 RVV mode. */
+      && GET_MODE_BITSIZE (GET_MODE_INNER (mode)) <= 64)
+	  return true;
+  
+  return false;
+}
+
+/* Return true if it is a RVV mode. */
+bool
+rvv_mode_p (machine_mode mode)
+{
+  return rvv_mask_mode_p (mode) || rvv_vector_mode_p (mode);
+}
+
+/* Return true if it is a const poly int whose size is equal to a LMUL = 1 RVV vector. */
+bool
+rvv_legitimate_poly_int_p (rtx x)
+{
+  poly_int64 value = rtx_to_poly_int64 (x);
+
+  HOST_WIDE_INT factor = value.coeffs[0];
+  return (value.coeffs[1] == factor && factor == UNITS_PER_V_REG.coeffs[0]);
+}
+
+/* Return the number of temporary registers that riscv_add_offset_1
+   would need to add OFFSET to a register.  */
+
+static unsigned int
+rvv_add_offset_1_temporaries (HOST_WIDE_INT offset)
+{
+  return SMALL_OPERAND (offset) ? 0 : 1;
+}
+
+/* Return the number of temporary registers that riscv_add_offset
+   would need to move OFFSET into a register or add OFFSET to a register;
+   ADD_P is true if we want the latter rather than the former.  */
+
+unsigned int
+rvv_offset_temporaries (bool add_p, poly_int64 offset)
+{
+  /* This follows the same structure as riscv_add_offset.  */
+  if (add_p && rvv_legitimate_poly_int_p (gen_int_mode (offset, Pmode)))
+    return 0;
+
+  unsigned int count = 0;
+  HOST_WIDE_INT factor = offset.coeffs[1];
+  HOST_WIDE_INT constant = offset.coeffs[0] - factor;
+  poly_int64 poly_offset (factor, factor);
+  if (add_p && rvv_legitimate_poly_int_p (gen_int_mode (offset, Pmode)))
+    /* Need one register for the csrr vlenb result.  */
+    count += 1;
+  else if (factor != 0)
+    {
+      factor = abs (factor);
+      if (!rvv_legitimate_poly_int_p (gen_int_mode (poly_offset, Pmode)))
+        /* Need one register for the CNT result and one for the multiplication
+           factor.  If necessary, the second temporary can be reused for the
+           constant part of the offset.  */
+        return 2;
+      /* Need one register for the CNT result (which might then
+         be shifted).  */
+      count += 1;
+    }
+  return count + rvv_add_offset_1_temporaries (constant);
+}
+
+/* Return the vlmul field for a specific machine mode. */
+
+enum vlmul_field_enum
+rvv_classify_vlmul_field (machine_mode mode)
+{
+	/* Case 1: LMUL = 1. */
+  if (known_eq (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))
+    return VLMUL_FIELD_000;
+  
+	/* Case 2: Fractional LMUL. */
+  if (known_gt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))
+    {
+	unsigned int factor = exact_div (GET_MODE_SIZE (mode), 
+	                           BYTES_PER_RISCV_VECTOR).to_constant ();
+	switch (factor)
+	  {
+	  case 2:
+	    return VLMUL_FIELD_001;
+	  case 4:
+	    return VLMUL_FIELD_010;		
+	  case 8:
+	    return VLMUL_FIELD_011;
+	  default:
+	    gcc_unreachable ();
+	  }
+    }
+	
+	/* Case 3: Fractional LMUL. */
+  if (known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))
+    {
+	unsigned int factor = exact_div (BYTES_PER_RISCV_VECTOR, 
+	                           GET_MODE_SIZE (mode)).to_constant ();
+	switch (factor)
+	  {
+	  case 2:
+	    return VLMUL_FIELD_111;
+	  case 4:
+	    return VLMUL_FIELD_110;		
+	  case 8:
+	    return VLMUL_FIELD_101;
+	  default:
+	    gcc_unreachable ();
+	  }
+    }
+	gcc_unreachable ();
+}
+
+/* Return vlmul register size for a machine mode. */
+
+int
+rvv_regsize (machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+    return 1;
+		
+  switch (rvv_classify_vlmul_field (mode))
+    {
+    case VLMUL_FIELD_001:
+		  return 2;
+    case VLMUL_FIELD_010:
+		  return 4;
+    case VLMUL_FIELD_011:
+      return 8;
+    default:
+      break;
+    }
+	
+	return 1;
+}
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv-vector.h b/gcc/config/riscv/riscv-vector.h
new file mode 100644
index 00000000000..b8d77ddb195
--- /dev/null
+++ b/gcc/config/riscv/riscv-vector.h
@@ -0,0 +1,28 @@ 
+/* Definition of RISC-V 'V' Extension for GNU compiler.
+   Copyright (C) 2022-2022 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RISCV_VECTOR_H
+#define GCC_RISCV_VECTOR_H
+bool riscv_vector_mode_p (machine_mode);
+bool rvv_legitimate_poly_int_p (rtx);
+unsigned int rvv_offset_temporaries (bool, poly_int64);
+vlmul_field_enum rvv_classify_vlmul_field (machine_mode);
+int rvv_regsize (machine_mode);
+#endif // GCC_RISCV_VECTOR_H
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f83dc796d88..37d8f1271d4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -57,6 +57,16 @@  along with GCC; see the file COPYING3.  If not see
 #include "predict.h"
 #include "tree-pass.h"
 #include "opts.h"
+#include "langhooks.h"
+#include "rtl-iter.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-loop-niter.h"
+#include "rtx-vector-builder.h"
 
 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
 #define UNSPEC_ADDRESS_P(X)					\
@@ -100,7 +110,7 @@  enum riscv_address_type {
 /* Information about a function's frame layout.  */
 struct GTY(())  riscv_frame_info {
   /* The size of the frame in bytes.  */
-  HOST_WIDE_INT total_size;
+  poly_int64 total_size;
 
   /* Bit X is set if the function saves or restores GPR X.  */
   unsigned int mask;
@@ -112,17 +122,20 @@  struct GTY(())  riscv_frame_info {
   unsigned save_libcall_adjustment;
 
   /* Offsets of fixed-point and floating-point save areas from frame bottom */
-  HOST_WIDE_INT gp_sp_offset;
-  HOST_WIDE_INT fp_sp_offset;
+  poly_int64 gp_sp_offset;
+  poly_int64 fp_sp_offset;
+  
+  /* constant offset of scalable frame. */
+  HOST_WIDE_INT constant_offset;
 
   /* Offset of virtual frame pointer from stack pointer/frame bottom */
-  HOST_WIDE_INT frame_pointer_offset;
+  poly_int64 frame_pointer_offset;
 
   /* Offset of hard frame pointer from stack pointer/frame bottom */
-  HOST_WIDE_INT hard_frame_pointer_offset;
+  poly_int64 hard_frame_pointer_offset;
 
   /* The offset of arg_pointer_rtx from the bottom of the frame.  */
-  HOST_WIDE_INT arg_pointer_offset;
+  poly_int64 arg_pointer_offset;
 };
 
 enum riscv_privilege_levels {
@@ -255,6 +268,9 @@  static const struct riscv_tune_param *tune_param;
 /* Which automaton to use for tuning.  */
 enum riscv_microarchitecture_type riscv_microarchitecture;
 
+/* The number of 64-bit elements in an RVV vector.  */
+poly_uint16 riscv_vector_chunks;
+
 /* Index R is the smallest register class that contains register R.  */
 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
@@ -273,7 +289,22 @@  const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FRAME_REGS,	FRAME_REGS,
+  FRAME_REGS,	FRAME_REGS, VL_REGS,	VTYPE_REGS,
+  NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
+  NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
+  NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
+  NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
+  NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
+  NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
+  NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
+  VM_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
+  VD_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
+  VD_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
+  VD_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
+  VD_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
+  VD_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
+  VD_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
+  VD_REGS,	VD_REGS,	VD_REGS,	VD_REGS,
 };
 
 /* Costs to use when optimizing for rocket.  */
@@ -713,6 +744,16 @@  static int riscv_symbol_insns (enum riscv_symbol_type type)
 static bool
 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
+	/* If an offset is being added to something else, we need to allow the
+     base to be moved into the destination register, meaning that there
+     are no free temporaries for the offset.  */
+  poly_int64 offset;
+  if (CONST_POLY_INT_P (x)
+    && poly_int_rtx_p (x, &offset)
+    && !offset.is_constant ()
+    && rvv_offset_temporaries (true, offset) > 0)
+    return false;
+		
   return riscv_const_insns (x) > 0;
 }
 
@@ -723,7 +764,13 @@  riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
   enum riscv_symbol_type type;
   rtx base, offset;
-
+  
+	/* There's no way to calculate VL-based values using relocations.  */
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, x, ALL)
+    if (GET_CODE (*iter) == CONST_POLY_INT)
+      return true;
+			
   /* There is no assembler syntax for expressing an address-sized
      high part.  */
   if (GET_CODE (x) == HIGH)
@@ -798,8 +845,8 @@  riscv_valid_offset_p (rtx x, machine_mode mode)
 
   /* We may need to split multiword moves, so make sure that every word
      is accessible.  */
-  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
-      && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
+  if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
+      && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
     return false;
 
   return true;
@@ -863,7 +910,7 @@  riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
   else
     {
       align = GET_MODE_ALIGNMENT (mode);
-      size = GET_MODE_BITSIZE (mode);
+      size = GET_MODE_BITSIZE (mode).to_constant ();
     }
 
   /* We may need to split multiword moves, so make sure that each word
@@ -893,6 +940,9 @@  riscv_classify_address (struct riscv_address_info *info, rtx x,
       return riscv_valid_base_register_p (info->reg, mode, strict_p);
 
     case PLUS:
+      /* RVV load/store disallow any offset.  */
+      if (rvv_mode_p (mode))
+        return false;
       info->type = ADDRESS_REG;
       info->reg = XEXP (x, 0);
       info->offset = XEXP (x, 1);
@@ -900,6 +950,9 @@  riscv_classify_address (struct riscv_address_info *info, rtx x,
 	      && riscv_valid_offset_p (info->offset, mode));
 
     case LO_SUM:
+      /* RVV load/store disallow LO_SUM.  */
+      if (rvv_mode_p (mode))
+        return false;
       info->type = ADDRESS_LO_SUM;
       info->reg = XEXP (x, 0);
       info->offset = XEXP (x, 1);
@@ -918,6 +971,9 @@  riscv_classify_address (struct riscv_address_info *info, rtx x,
 	      && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
 
     case CONST_INT:
+      /* RVV load/store disallow CONST_INT.  */
+      if (rvv_mode_p (mode))
+        return false;
       /* Small-integer addresses don't occur very often, but they
 	 are legitimate if x0 is a valid base register.  */
       info->type = ADDRESS_CONST_INT;
@@ -1003,8 +1059,8 @@  riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
 
   /* BLKmode is used for single unaligned loads and stores and should
      not count as a multiword mode. */
-  if (mode != BLKmode && might_split_p)
-    n += (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  if (mode != BLKmode && might_split_p && !rvv_mode_p (mode))
+    n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 
   if (addr.type == ADDRESS_LO_SUM)
     n += riscv_symbol_insns (addr.symbol_type) - 1;
@@ -1061,7 +1117,13 @@  riscv_const_insns (rtx x)
     case SYMBOL_REF:
     case LABEL_REF:
       return riscv_symbol_insns (riscv_classify_symbol (x));
-
+      
+    /* TDO: In RVV, we get CONST_POLY_INT by using csrr vlenb
+       instruction and several scalar shift or mult instructions,
+       it is so far unknown. We set it to 4 temporarily.  */
+    case CONST_POLY_INT:
+      return 4;
+      
     default:
       return 0;
     }
@@ -1097,9 +1159,9 @@  riscv_load_store_insns (rtx mem, rtx_insn *insn)
 
   /* Try to prove that INSN does not need to be split.  */
   might_split_p = true;
-  if (GET_MODE_BITSIZE (mode) <= 32)
+  if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
     might_split_p = false;
-  else if (GET_MODE_BITSIZE (mode) == 64)
+  else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
     {
       set = single_set (insn);
       if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
@@ -1616,7 +1678,7 @@  riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
        (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
      with auto-sign/zero extend.  */
   if (GET_MODE_CLASS (mode) == MODE_INT
-      && GET_MODE_SIZE (mode) < UNITS_PER_WORD
+      && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
       && can_create_pseudo_p ()
       && MEM_P (src))
     {
@@ -1641,7 +1703,7 @@  riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
 	     improve cse.  */
 	  machine_mode promoted_mode = mode;
 	  if (GET_MODE_CLASS (mode) == MODE_INT
-	      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+	      && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
 	    promoted_mode = word_mode;
 
 	  if (splittable_const_int_operand (src, mode))
@@ -1739,7 +1801,8 @@  riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
 static int
 riscv_binary_cost (rtx x, int single_insns, int double_insns)
 {
-  if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
+  if (!rvv_mode_p (GET_MODE (x))
+    && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
     return COSTS_N_INSNS (double_insns);
   return COSTS_N_INSNS (single_insns);
 }
@@ -1786,6 +1849,14 @@  static bool
 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
 		 int *total, bool speed)
 {
+  /* TODO:We set RVV instruction cost as 1 by default. 
+     Cost Model need to be well analyzed and supported in the future. */
+  if (rvv_mode_p (mode))
+    {
+      *total = COSTS_N_INSNS (1);
+      return true;      
+    }
+    
   bool float_mode_p = FLOAT_MODE_P (mode);
   int cost;
 
@@ -1845,7 +1916,7 @@  riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
       return false;
 
     case NOT:
-      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
       return false;
 
     case AND:
@@ -2092,7 +2163,7 @@  riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
       if (float_mode_p)
 	*total = tune_param->fp_add[mode == DFmode];
       else
-	*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
+	*total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
       return false;
 
     case MULT:
@@ -2101,7 +2172,7 @@  riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
       else if (!TARGET_MUL)
 	/* Estimate the cost of a library call.  */
 	*total = COSTS_N_INSNS (speed ? 32 : 6);
-      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+      else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
 	*total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
       else if (!speed)
 	*total = COSTS_N_INSNS (1);
@@ -2264,7 +2335,7 @@  riscv_output_move (rtx dest, rtx src)
   dest_code = GET_CODE (dest);
   src_code = GET_CODE (src);
   mode = GET_MODE (dest);
-  dbl_p = (GET_MODE_SIZE (mode) == 8);
+  dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
 
   if (dbl_p && riscv_split_64bit_move_p (dest, src))
     return "#";
@@ -2275,7 +2346,7 @@  riscv_output_move (rtx dest, rtx src)
 	return dbl_p ? "fmv.x.d\t%0,%1" : "fmv.x.w\t%0,%1";
 
       if (src_code == MEM)
-	switch (GET_MODE_SIZE (mode))
+	switch (GET_MODE_SIZE (mode).to_constant ())
 	  {
 	  case 1: return "lbu\t%0,%1";
 	  case 2: return "lhu\t%0,%1";
@@ -2328,7 +2399,7 @@  riscv_output_move (rtx dest, rtx src)
 	    }
 	}
       if (dest_code == MEM)
-	switch (GET_MODE_SIZE (mode))
+	switch (GET_MODE_SIZE (mode).to_constant ())
 	  {
 	  case 1: return "sb\t%z1,%0";
 	  case 2: return "sh\t%z1,%0";
@@ -2349,6 +2420,17 @@  riscv_output_move (rtx dest, rtx src)
       if (src_code == MEM)
 	return dbl_p ? "fld\t%0,%1" : "flw\t%0,%1";
     }
+  if (dest_code == REG
+	    && GP_REG_P (REGNO (dest))
+	    && src_code == CONST_POLY_INT)
+    {
+	    /* we only want a single LMUL = 1 RVV vector register vlenb
+	       read after reload. */
+	    poly_int64 value = rtx_to_poly_int64 (src);
+	    gcc_assert (value.coeffs[0] == UNITS_PER_V_REG.coeffs[0]
+	                && value.coeffs[1] == UNITS_PER_V_REG.coeffs[1]);
+	    return "csrr\t%0,vlenb";
+    }
   gcc_unreachable ();
 }
 
@@ -2495,7 +2577,7 @@  static void
 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
 {
   /* Comparisons consider all XLEN bits, so extend sub-XLEN values.  */
-  if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
+  if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
     {
       /* It is more profitable to zero-extend QImode values.  But not if the
 	 first operand has already been sign-extended, and the second one is
@@ -2854,7 +2936,7 @@  riscv_flatten_aggregate_field (const_tree type,
 	if (n != 0)
 	  return -1;
 
-	HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type)));
+	HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
 
 	if (elt_size <= UNITS_PER_FP_ARG)
 	  {
@@ -2872,9 +2954,9 @@  riscv_flatten_aggregate_field (const_tree type,
     default:
       if (n < 2
 	  && ((SCALAR_FLOAT_TYPE_P (type)
-	       && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG)
+	       && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
 	      || (INTEGRAL_TYPE_P (type)
-		  && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)))
+		  && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
 	{
 	  fields[n].type = type;
 	  fields[n].offset = offset;
@@ -3110,7 +3192,7 @@  riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
     }
 
   /* Work out the size of the argument.  */
-  num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
   num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 
   /* Doubleword-aligned varargs start on an even register boundary.  */
@@ -3204,7 +3286,7 @@  riscv_function_value (const_tree type, const_tree func, machine_mode mode)
 static bool
 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
 {
-  HOST_WIDE_INT size = arg.type_size_in_bytes ();
+  HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();
   struct riscv_arg_info info;
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
 
@@ -3833,7 +3915,7 @@  riscv_elf_select_rtx_section (machine_mode mode, rtx x,
 {
   section *s = default_elf_select_rtx_section (mode, x, align);
 
-  if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode)))
+  if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
     {
       if (startswith (s->named.name, ".rodata.cst"))
 	{
@@ -3941,6 +4023,19 @@  riscv_save_libcall_count (unsigned mask)
   abort ();
 }
 
+/* Handle stack align for poly_int.  */
+static poly_int64
+riscv_stack_align (poly_int64 value)
+{
+  return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
+}
+
+static HOST_WIDE_INT
+riscv_stack_align (HOST_WIDE_INT value)
+{
+  return RISCV_STACK_ALIGN (value);
+}
+
 /* Populate the current function's riscv_frame_info structure.
 
    RISC-V stack frames grown downward.  High addresses are at the top.
@@ -3989,7 +4084,7 @@  static void
 riscv_compute_frame_info (void)
 {
   struct riscv_frame_info *frame;
-  HOST_WIDE_INT offset;
+  poly_int64 offset;
   bool interrupt_save_prologue_temp = false;
   unsigned int regno, i, num_x_saved = 0, num_f_saved = 0;
 
@@ -4000,7 +4095,7 @@  riscv_compute_frame_info (void)
   if (cfun->machine->interrupt_handler_p)
     {
       HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
-      if (! SMALL_OPERAND (frame->total_size - step1))
+      if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
 	interrupt_save_prologue_temp = true;
     }
 
@@ -4030,23 +4125,23 @@  riscv_compute_frame_info (void)
     }
 
   /* At the bottom of the frame are any outgoing stack arguments. */
-  offset = RISCV_STACK_ALIGN (crtl->outgoing_args_size);
+  offset = riscv_stack_align (crtl->outgoing_args_size);
   /* Next are local stack variables. */
-  offset += RISCV_STACK_ALIGN (get_frame_size ());
+  offset += riscv_stack_align (get_frame_size ());
   /* The virtual frame pointer points above the local variables. */
   frame->frame_pointer_offset = offset;
   /* Next are the callee-saved FPRs. */
   if (frame->fmask)
-    offset += RISCV_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
+    offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
   frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
   /* Next are the callee-saved GPRs. */
   if (frame->mask)
     {
-      unsigned x_save_size = RISCV_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
+      unsigned x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
       unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
 
       /* Only use save/restore routines if they don't alter the stack size.  */
-      if (RISCV_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size)
+      if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size)
 	{
 	  /* Libcall saves/restores 3 registers at once, so we need to
 	     allocate 12 bytes for callee-saved register.  */
@@ -4062,17 +4157,18 @@  riscv_compute_frame_info (void)
   /* The hard frame pointer points above the callee-saved GPRs. */
   frame->hard_frame_pointer_offset = offset;
   /* Above the hard frame pointer is the callee-allocated varags save area. */
-  offset += RISCV_STACK_ALIGN (cfun->machine->varargs_size);
+  offset += riscv_stack_align (cfun->machine->varargs_size);
   /* Next is the callee-allocated area for pretend stack arguments.  */
-  offset += RISCV_STACK_ALIGN (crtl->args.pretend_args_size);
+  offset += riscv_stack_align (crtl->args.pretend_args_size);
   /* Arg pointer must be below pretend args, but must be above alignment
      padding.  */
   frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
   frame->total_size = offset;
+                        
   /* Next points the incoming stack pointer and any incoming arguments. */
 
   /* Only use save/restore routines when the GPRs are atop the frame.  */
-  if (frame->hard_frame_pointer_offset != frame->total_size)
+  if (known_ne (frame->hard_frame_pointer_offset, frame->total_size))
     frame->save_libcall_adjustment = 0;
 }
 
@@ -4089,10 +4185,10 @@  riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
    or argument pointer.  TO is either the stack pointer or hard frame
    pointer.  */
 
-HOST_WIDE_INT
+poly_int64
 riscv_initial_elimination_offset (int from, int to)
 {
-  HOST_WIDE_INT src, dest;
+  poly_int64 src, dest;
 
   riscv_compute_frame_info ();
 
@@ -4136,7 +4232,7 @@  riscv_set_return_address (rtx address, rtx scratch)
 
   gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
   slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
-				  cfun->machine->frame.gp_sp_offset);
+				  cfun->machine->frame.gp_sp_offset.to_constant());
   riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
 }
 
@@ -4163,13 +4259,13 @@  riscv_save_restore_reg (machine_mode mode, int regno,
    of the frame.  */
 
 static void
-riscv_for_each_saved_reg (HOST_WIDE_INT sp_offset, riscv_save_restore_fn fn,
+riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
 			  bool epilogue, bool maybe_eh_return)
 {
   HOST_WIDE_INT offset;
 
   /* Save the link register and s-registers. */
-  offset = cfun->machine->frame.gp_sp_offset - sp_offset;
+  offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ();
   for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
       {
@@ -4200,14 +4296,14 @@  riscv_for_each_saved_reg (HOST_WIDE_INT sp_offset, riscv_save_restore_fn fn,
 
   /* This loop must iterate over the same space as its companion in
      riscv_compute_frame_info.  */
-  offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+  offset = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
   for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
     if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
       {
 	machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
 
 	riscv_save_restore_reg (mode, regno, offset, fn);
-	offset -= GET_MODE_SIZE (mode);
+	offset -= GET_MODE_SIZE (mode).to_constant ();
       }
 }
 
@@ -4249,21 +4345,21 @@  riscv_restore_reg (rtx reg, rtx mem)
 static HOST_WIDE_INT
 riscv_first_stack_step (struct riscv_frame_info *frame)
 {
-  if (SMALL_OPERAND (frame->total_size))
-    return frame->total_size;
+  if (SMALL_OPERAND (frame->total_size.to_constant()))
+    return frame->total_size.to_constant ();
 
   HOST_WIDE_INT min_first_step =
-    RISCV_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
+    RISCV_STACK_ALIGN ((frame->total_size - frame->fp_sp_offset).to_constant());
   HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
-  HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
+  HOST_WIDE_INT min_second_step = frame->total_size.to_constant() - max_first_step;
   gcc_assert (min_first_step <= max_first_step);
 
   /* As an optimization, use the least-significant bits of the total frame
      size, so that the second adjustment step is just LUI + ADD.  */
   if (!SMALL_OPERAND (min_second_step)
-      && frame->total_size % IMM_REACH < IMM_REACH / 2
-      && frame->total_size % IMM_REACH >= min_first_step)
-    return frame->total_size % IMM_REACH;
+      && frame->total_size.to_constant() % IMM_REACH < IMM_REACH / 2
+      && frame->total_size.to_constant() % IMM_REACH >= min_first_step)
+    return frame->total_size.to_constant() % IMM_REACH;
 
   if (TARGET_RVC)
     {
@@ -4336,7 +4432,7 @@  void
 riscv_expand_prologue (void)
 {
   struct riscv_frame_info *frame = &cfun->machine->frame;
-  HOST_WIDE_INT size = frame->total_size;
+  HOST_WIDE_INT size = frame->total_size.to_constant ();
   unsigned mask = frame->mask;
   rtx insn;
 
@@ -4379,7 +4475,7 @@  riscv_expand_prologue (void)
   if (frame_pointer_needed)
     {
       insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
-			    GEN_INT (frame->hard_frame_pointer_offset - size));
+			    GEN_INT ((frame->hard_frame_pointer_offset - size).to_constant ()));
       RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
 
       riscv_emit_stack_tie ();
@@ -4445,7 +4541,7 @@  riscv_expand_epilogue (int style)
      Start off by assuming that no registers need to be restored.  */
   struct riscv_frame_info *frame = &cfun->machine->frame;
   unsigned mask = frame->mask;
-  HOST_WIDE_INT step1 = frame->total_size;
+  HOST_WIDE_INT step1 = frame->total_size.to_constant ();
   HOST_WIDE_INT step2 = 0;
   bool use_restore_libcall = ((style == NORMAL_RETURN)
 			      && riscv_use_save_libcall (frame));
@@ -4453,8 +4549,8 @@  riscv_expand_epilogue (int style)
   rtx insn;
 
   /* We need to add memory barrier to prevent read from deallocated stack.  */
-  bool need_barrier_p = (get_frame_size ()
-			 + cfun->machine->frame.arg_pointer_offset) != 0;
+  bool need_barrier_p = known_ne (get_frame_size (),
+			 cfun->machine->frame.arg_pointer_offset);
 
   if (cfun->machine->naked_p)
     {
@@ -4481,7 +4577,7 @@  riscv_expand_epilogue (int style)
       riscv_emit_stack_tie ();
       need_barrier_p = false;
 
-      rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
+      rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset.to_constant ());
       if (!SMALL_OPERAND (INTVAL (adjust)))
 	{
 	  riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
@@ -4495,7 +4591,7 @@  riscv_expand_epilogue (int style)
       rtx dwarf = NULL_RTX;
       rtx cfa_adjust_value = gen_rtx_PLUS (
 			       Pmode, hard_frame_pointer_rtx,
-			       GEN_INT (-frame->hard_frame_pointer_offset));
+			       GEN_INT (-frame->hard_frame_pointer_offset.to_constant ()));
       rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
       dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
       RTX_FRAME_RELATED_P (insn) = 1;
@@ -4512,7 +4608,7 @@  riscv_expand_epilogue (int style)
     }
 
   /* Set TARGET to BASE + STEP1.  */
-  if (step1 > 0)
+  if (known_gt (step1, 0))
     {
       /* Emit a barrier to prevent loads from a deallocated stack.  */
       riscv_emit_stack_tie ();
@@ -4638,7 +4734,7 @@  riscv_epilogue_uses (unsigned int regno)
 bool
 riscv_can_use_return_insn (void)
 {
-  return (reload_completed && cfun->machine->frame.total_size == 0
+  return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
 	  && ! cfun->machine->interrupt_handler_p);
 }
 
@@ -4738,7 +4834,8 @@  static bool
 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
 			       reg_class_t class2)
 {
-  return (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+  return !rvv_mode_p (mode)
+	  && (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
 	  && (class1 == FP_REGS) != (class2 == FP_REGS));
 }
 
@@ -4760,11 +4857,33 @@  riscv_register_move_cost (machine_mode mode,
 static unsigned int
 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
 {
+  if (rvv_mode_p (mode))
+    {
+			/* TODO: Tuple mode register manipulation will be supported
+			   for segment instructions in the future. */
+				
+			/* Handle fractional LMUL, it only occupy part of vector register but still
+         need one vector register to hold.  */
+      if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
+	return 1;
+
+      return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
+    }
+  
+  /* mode for VL or VTYPE are just a marker, not holding value,
+     so it always consume one register.  */
+  if (regno == VTYPE_REGNUM || regno == VL_REGNUM)
+    return 1;
+  
+	/* Assume every valid non-vector mode fits in one vector register.  */
+  if (V_REG_P (regno))
+    return 1;
+		
   if (FP_REG_P (regno))
-    return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
+    return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
 
   /* All other registers are word-sized.  */
-  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 }
 
 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
@@ -4776,11 +4895,17 @@  riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 
   if (GP_REG_P (regno))
     {
+      if (rvv_mode_p (mode))
+	return false;
+	
       if (!GP_REG_P (regno + nregs - 1))
 	return false;
     }
   else if (FP_REG_P (regno))
     {
+      if (rvv_mode_p (mode))
+	return false;
+	
       if (!FP_REG_P (regno + nregs - 1))
 	return false;
 
@@ -4795,6 +4920,18 @@  riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	      && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
 	return false;
     }
+	else if (V_REG_P (regno))
+    {
+      if (!rvv_mode_p (mode))
+	return false;
+
+      /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
+         but for mask vector register, register numbers can be any number. */
+      int regsize = rvv_regsize (mode);
+
+      if (regsize != 1)
+	return ((regno % regsize) == 0);		
+    }			
   else
     return false;
 
@@ -4971,6 +5108,26 @@  riscv_init_machine_status (void)
   return ggc_cleared_alloc<machine_function> ();
 }
 
+/* Return the Vlen value associated with -mriscv-vector-bits= value VALUE.  */
+
+static poly_uint16
+riscv_convert_riscv_vector_bits (riscv_vector_bits_enum value)
+{
+  /* 64-bit RVV modes use different register layouts
+     on small-endian targets, so we would need to forbid subregs that convert
+     from one to the other.  By default a reinterpret sequence would then
+     involve a store to memory in one mode and a load back in the other.
+     Even if we optimize that sequence using reverse instructions,
+     it would still be a significant potential overhead.
+
+     For now, it seems better to generate length-agnostic code for that
+     case instead.  */
+  if (value == RVV_SCALABLE)
+    return poly_uint16 (2, 2);
+  else
+    return (int) value / 64;
+}
+
 /* Implement TARGET_OPTION_OVERRIDE.  */
 
 static void
@@ -5116,7 +5273,9 @@  riscv_option_override (void)
 
       riscv_stack_protector_guard_offset = offs;
     }
-
+  
+  /* Convert -mriscv-vector-bits to a chunks count.  */
+  riscv_vector_chunks = riscv_convert_riscv_vector_bits (riscv_vector_bits);
 }
 
 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
@@ -5474,9 +5633,10 @@  riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
     return promote_mode (type, mode, punsignedp);
 
   unsignedp = *punsignedp;
-  PROMOTE_MODE (mode, unsignedp, type);
+  scalar_mode smode = as_a <scalar_mode> (mode);
+  PROMOTE_MODE (smode, unsignedp, type);
   *punsignedp = unsignedp;
-  return mode;
+  return smode;
 }
 
 /* Implement TARGET_MACHINE_DEPENDENT_REORG.  */
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 5083a1c24b0..8f56a5a4746 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -123,7 +123,7 @@  ASM_MISA_SPEC
 
 /* The mapping from gcc register number to DWARF 2 CFA column number.  */
 #define DWARF_FRAME_REGNUM(REGNO) \
-  (GP_REG_P (REGNO) || FP_REG_P (REGNO) ? REGNO : INVALID_REGNUM)
+  (GP_REG_P (REGNO) || FP_REG_P (REGNO) || (TARGET_VECTOR && V_REG_P (REGNO)) ? REGNO : INVALID_REGNUM)
 
 /* The DWARF 2 CFA column which tracks the return address.  */
 #define DWARF_FRAME_RETURN_COLUMN RETURN_ADDR_REGNUM
@@ -155,6 +155,7 @@  ASM_MISA_SPEC
 
 /* The `Q' extension is not yet supported.  */
 #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
+#define UNITS_PER_V_REG (GET_MODE_SIZE (VNx2DImode))
 
 /* The largest type that can be passed in floating-point registers.  */
 #define UNITS_PER_FP_ARG						\
@@ -289,9 +290,13 @@  ASM_MISA_SPEC
    - 32 floating point registers
    - 2 fake registers:
 	- ARG_POINTER_REGNUM
-	- FRAME_POINTER_REGNUM */
+	- FRAME_POINTER_REGNUM 
+   - 1 vl register 
+   - 1 vtype register
+   - 30 unused registers for future expansion
+   - 32 vector registers */
 
-#define FIRST_PSEUDO_REGISTER 66
+#define FIRST_PSEUDO_REGISTER 128
 
 /* x0, sp, gp, and tp are fixed.  */
 
@@ -303,7 +308,11 @@  ASM_MISA_SPEC
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
   /* Others.  */							\
-  1, 1									\
+  1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* Vector registers.  */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0			\
 }
 
 /* a0-a7, t0-t6, fa0-fa7, and ft0-ft11 are volatile across calls.
@@ -317,7 +326,11 @@  ASM_MISA_SPEC
   1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,			\
   1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,			\
   /* Others.  */							\
-  1, 1									\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* Vector registers.  */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1			\
 }
 
 /* Select a register mode required for caller save of hard regno REGNO.
@@ -337,6 +350,10 @@  ASM_MISA_SPEC
 #define FP_REG_LAST  63
 #define FP_REG_NUM   (FP_REG_LAST - FP_REG_FIRST + 1)
 
+#define V_REG_FIRST 96
+#define V_REG_LAST  127
+#define V_REG_NUM   (V_REG_LAST - V_REG_FIRST + 1)
+
 /* The DWARF 2 CFA column which tracks the return address from a
    signal handler context.  This means that to maintain backwards
    compatibility, no hard register can be assigned this column if it
@@ -347,6 +364,8 @@  ASM_MISA_SPEC
   ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM)
 #define FP_REG_P(REGNO)  \
   ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define V_REG_P(REGNO)  \
+  ((unsigned int) ((int) (REGNO) - V_REG_FIRST) < V_REG_NUM)
 
 /* True when REGNO is in SIBCALL_REGS set.  */
 #define SIBCALL_REG_P(REGNO)	\
@@ -430,6 +449,11 @@  enum reg_class
   GR_REGS,			/* integer registers */
   FP_REGS,			/* floating-point registers */
   FRAME_REGS,			/* arg pointer and frame pointer */
+  VL_REGS,			/* vl register */
+  VTYPE_REGS,			/* vype register */
+  VM_REGS,			/* v0.t registers */
+  VD_REGS,			/* vector registers except v0.t */
+  V_REGS,			/* vector registers */
   ALL_REGS,			/* all registers */
   LIM_REG_CLASSES		/* max value + 1 */
 };
@@ -450,6 +474,11 @@  enum reg_class
   "GR_REGS",								\
   "FP_REGS",								\
   "FRAME_REGS",								\
+  "VL_REGS",								\
+  "VTYPE_REGS",								\
+  "VM_REGS",								\
+  "VD_REGS",								\
+  "V_REGS",								\
   "ALL_REGS"								\
 }
 
@@ -466,13 +495,18 @@  enum reg_class
 
 #define REG_CLASS_CONTENTS						\
 {									\
-  { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
-  { 0xf003fcc0, 0x00000000, 0x00000000 },	/* SIBCALL_REGS */	\
-  { 0xffffffc0, 0x00000000, 0x00000000 },	/* JALR_REGS */		\
-  { 0xffffffff, 0x00000000, 0x00000000 },	/* GR_REGS */		\
-  { 0x00000000, 0xffffffff, 0x00000000 },	/* FP_REGS */		\
-  { 0x00000000, 0x00000000, 0x00000003 },	/* FRAME_REGS */	\
-  { 0xffffffff, 0xffffffff, 0x00000003 }	/* ALL_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0xf003fcc0, 0x00000000, 0x00000000, 0x00000000 },	/* SIBCALL_REGS */	\
+  { 0xffffffc0, 0x00000000, 0x00000000, 0x00000000 },	/* JALR_REGS */		\
+  { 0xffffffff, 0x00000000, 0x00000000, 0x00000000 },	/* GR_REGS */		\
+  { 0x00000000, 0xffffffff, 0x00000000, 0x00000000 },	/* FP_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000003, 0x00000000 },	/* FRAME_REGS */	\
+  { 0x00000000, 0x00000000, 0x00000004, 0x00000000 },	/* VL_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000008, 0x00000000 },	/* VTYPE_REGS */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000001 },	/* V0_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0xfffffffe },	/* VNoV0_REGS */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0xffffffff },	/* V_REGS */		\
+  { 0xffffffff, 0xffffffff, 0x0000000f, 0xffffffff }	/* ALL_REGS */		\
 }
 
 /* A C expression whose value is a register class containing hard
@@ -512,9 +546,16 @@  enum reg_class
   60, 61, 62, 63,							\
   /* Call-saved FPRs.  */						\
   40, 41, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,			\
+  /* V24 ~ V31.  */							\
+  120, 121, 122, 123, 124, 125, 126, 127,				\
+  /* V8 ~ V23.  */							\
+  104, 105, 106, 107, 108, 109, 110, 111,				\
+  112, 113, 114, 115, 116, 117, 118, 119,				\
+  /* V0 ~ V7.  */							\
+  96, 97, 98, 99, 100, 101, 102, 103,					\
   /* None of the remaining classes have defined call-saved		\
      registers.  */							\
-  64, 65								\
+  64, 65, 66, 67							\
 }
 
 /* True if VALUE is a signed 12-bit number.  */
@@ -522,6 +563,10 @@  enum reg_class
 #define SMALL_OPERAND(VALUE) \
   ((unsigned HOST_WIDE_INT) (VALUE) + IMM_REACH/2 < IMM_REACH)
 
+#define POLY_SMALL_OPERAND_P(POLY_VALUE)		\
+  (POLY_VALUE.is_constant () ?				\
+     SMALL_OPERAND (POLY_VALUE.to_constant ()) : false)
+
 /* True if VALUE can be loaded into a register using LUI.  */
 
 #define LUI_OPERAND(VALUE)						\
@@ -780,7 +825,14 @@  typedef struct {
   "fs0", "fs1", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5",	\
   "fa6", "fa7", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7",	\
   "fs8", "fs9", "fs10","fs11","ft8", "ft9", "ft10","ft11",	\
-  "arg", "frame", }
+  "arg", "frame", "vl", "vtype", "N/A", "N/A", "N/A", "N/A",	\
+  "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A",	\
+  "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A",	\
+  "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A",	\
+  "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",	\
+  "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",	\
+  "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",	\
+  "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", }
 
 #define ADDITIONAL_REGISTER_NAMES					\
 {									\
@@ -955,6 +1007,10 @@  while (0)
 extern const enum reg_class riscv_regno_to_class[];
 extern bool riscv_slow_unaligned_access_p;
 extern unsigned riscv_stack_boundary;
+extern poly_uint16 riscv_vector_chunks;
+/* The number of bits and bytes in a RVV vector.  */
+#define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * 64))
+#define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * 8))
 #endif
 
 #define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b8ab0cf169a..8e880ba8599 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -102,6 +102,10 @@ 
    (NORMAL_RETURN		0)
    (SIBCALL_RETURN		1)
    (EXCEPTION_RETURN		2)
+   
+   ;; Constant helper for RVV 
+   (VL_REGNUM			66)
+   (VTYPE_REGNUM		67)
 ])
 
 (include "predicates.md")
@@ -1619,23 +1623,23 @@ 
 })
 
 (define_insn "*movdi_32bit"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,m,  *f,*f,*r,*f,*m")
-	(match_operand:DI 1 "move_operand"         " r,i,m,r,*J*r,*m,*f,*f,*f"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,m,  *f,*f,*r,*f,*m")
+	(match_operand:DI 1 "move_operand"         " vp,r,i,m,r,*J*r,*m,*f,*f,*f"))]
   "!TARGET_64BIT
    && (register_operand (operands[0], DImode)
        || reg_or_0_operand (operands[1], DImode))"
   { return riscv_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" "move,const,load,store,mtc,fpload,mfc,fmove,fpstore")
+  [(set_attr "move_type" "const,move,const,load,store,mtc,fpload,mfc,fmove,fpstore")
    (set_attr "mode" "DI")])
 
 (define_insn "*movdi_64bit"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m,  *f,*f,*r,*f,*m")
-	(match_operand:DI 1 "move_operand"         " r,T,m,rJ,*r*J,*m,*f,*f,*f"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r, m,  *f,*f,*r,*f,*m")
+	(match_operand:DI 1 "move_operand"         " vp,r,T,m,rJ,*r*J,*m,*f,*f,*f"))]
   "TARGET_64BIT
    && (register_operand (operands[0], DImode)
        || reg_or_0_operand (operands[1], DImode))"
   { return riscv_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" "move,const,load,store,mtc,fpload,mfc,fmove,fpstore")
+  [(set_attr "move_type" "const,move,const,load,store,mtc,fpload,mfc,fmove,fpstore")
    (set_attr "mode" "DI")])
 
 ;; 32-bit Integer moves
@@ -1650,12 +1654,12 @@ 
 })
 
 (define_insn "*movsi_internal"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,  *f,*f,*r,*m")
-	(match_operand:SI 1 "move_operand"         " r,T,m,rJ,*r*J,*m,*f,*f"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r, m,  *f,*f,*r,*m")
+	(match_operand:SI 1 "move_operand"         " vp,r,T,m,rJ,*r*J,*m,*f,*f"))]
   "(register_operand (operands[0], SImode)
     || reg_or_0_operand (operands[1], SImode))"
   { return riscv_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" "move,const,load,store,mtc,fpload,mfc,fpstore")
+  [(set_attr "move_type" "const,move,const,load,store,mtc,fpload,mfc,fpstore")
    (set_attr "mode" "SI")])
 
 ;; 16-bit Integer moves
@@ -1675,12 +1679,12 @@ 
 })
 
 (define_insn "*movhi_internal"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r, m,  *f,*r")
-	(match_operand:HI 1 "move_operand"	   " r,T,m,rJ,*r*J,*f"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r, m,  *f,*r")
+	(match_operand:HI 1 "move_operand"	   " vp,r,T,m,rJ,*r*J,*f"))]
   "(register_operand (operands[0], HImode)
     || reg_or_0_operand (operands[1], HImode))"
   { return riscv_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" "move,const,load,store,mtc,mfc")
+  [(set_attr "move_type" "const,move,const,load,store,mtc,mfc")
    (set_attr "mode" "HI")])
 
 ;; HImode constant generation; see riscv_move_integer for details.
@@ -1717,12 +1721,12 @@ 
 })
 
 (define_insn "*movqi_internal"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r, m,  *f,*r")
-	(match_operand:QI 1 "move_operand"         " r,I,m,rJ,*r*J,*f"))]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,r, m,  *f,*r")
+	(match_operand:QI 1 "move_operand"         " vp,r,I,m,rJ,*r*J,*f"))]
   "(register_operand (operands[0], QImode)
     || reg_or_0_operand (operands[1], QImode))"
   { return riscv_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" "move,const,load,store,mtc,mfc")
+  [(set_attr "move_type" "const,move,const,load,store,mtc,mfc")
    (set_attr "mode" "QI")])
 
 ;; 32-bit floating point moves
@@ -2095,7 +2099,7 @@ 
        (lshiftrt:GPR (match_dup 3) (match_dup 2)))]
 {
   /* Op2 is a VOIDmode constant, so get the mode size from op1.  */
-  operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[1]))
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[1])).to_constant ()
 			 - exact_log2 (INTVAL (operands[2]) + 1));
 })
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 9e9fe6d8ccd..42bdca569cd 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -70,6 +70,38 @@  Enum(abi_type) String(lp64f) Value(ABI_LP64F)
 EnumValue
 Enum(abi_type) String(lp64d) Value(ABI_LP64D)
 
+Enum
+Name(riscv_vector_bits) Type(enum riscv_vector_bits_enum)
+The possible RVV vector lengths:
+
+EnumValue
+Enum(riscv_vector_bits) String(scalable) Value(RVV_SCALABLE)
+
+EnumValue
+Enum(riscv_vector_bits) String(64) Value(RVV_64)
+
+EnumValue
+Enum(riscv_vector_bits) String(128) Value(RVV_128)
+
+EnumValue
+Enum(riscv_vector_bits) String(256) Value(RVV_256)
+
+EnumValue
+Enum(riscv_vector_bits) String(512) Value(RVV_512)
+
+EnumValue
+Enum(riscv_vector_bits) String(1024) Value(RVV_1024)
+
+EnumValue
+Enum(riscv_vector_bits) String(2048) Value(RVV_2048)
+
+EnumValue
+Enum(riscv_vector_bits) String(4096) Value(RVV_4096)
+
+mriscv-vector-bits=
+Target RejectNegative Joined Enum(riscv_vector_bits) Var(riscv_vector_bits) Init(RVV_SCALABLE)
+-mriscv-vector-bits=<number>	Set the number of bits in an RVV vector register.
+
 mfdiv
 Target Mask(FDIV)
 Use hardware floating-point divide and square root instructions.
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 19736b3a38f..b5abf9c45d0 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -23,6 +23,10 @@  riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.cc
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+riscv-vector.o: $(srcdir)/config/riscv/riscv-vector.cc
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
 PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def
 
 $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \