[07/21] Add register spilling support

Message ID 20220531085012.269719-8-juzhe.zhong@rivai.ai
State Committed
Headers
Series *** Add RVV (RISC-V 'V' Extension) support *** |

Commit Message

juzhe.zhong@rivai.ai May 31, 2022, 8:49 a.m. UTC
  From: zhongjuzhe <juzhe.zhong@rivai.ai>

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (rvv_expand_const_vector): New function.
        (rvv_expand_const_mask): New function.
        (rvv_const_vec_all_same_in_range_p): New function.
        * config/riscv/riscv-vector.cc (classify_vtype_field): Move codes location.
        (get_lmulx8): New function. Move codes location.
        (force_reg_for_over_uimm): New function. Move codes location.
        (gen_vlx2): New function. Move codes location.
        (emit_int64_to_vector_32bit): Move codes location.
        (rvv_expand_const_vector): New function.
        (rvv_expand_const_mask): New function.
        (rvv_const_vec_all_same_in_range_p): New function.
        * config/riscv/riscv.cc (riscv_const_insns): Add const vector cost.
        * config/riscv/vector-iterators.md: New iterators and attributes.
        * config/riscv/vector.md (mov<mode>): New pattern.
        (*mov<mode>): New pattern.
        (*mov<mode>_reg): New pattern.
        (@vmclr<mode>_m): New pattern.
        (@vmset<mode>_m): New pattern.
        
---
 gcc/config/riscv/riscv-protos.h      |   3 +
 gcc/config/riscv/riscv-vector.cc     | 349 ++++++++++++++++-----------
 gcc/config/riscv/riscv.cc            |  67 ++++-
 gcc/config/riscv/vector-iterators.md |  24 ++
 gcc/config/riscv/vector.md           | 201 +++++++++++++++
 5 files changed, 502 insertions(+), 142 deletions(-)
  

Patch

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9a7e120854a..618eb746eaa 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -128,6 +128,9 @@  extern int rvv_regsize (machine_mode);
 extern rtx rvv_gen_policy (unsigned int rvv_policy = 0);
 extern opt_machine_mode rvv_get_mask_mode (machine_mode);
 extern machine_mode rvv_translate_attr_mode (rtx_insn *);
+extern bool rvv_expand_const_vector (rtx, rtx);
+extern bool rvv_expand_const_mask (rtx, rtx);
+extern bool rvv_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 extern void
 emit_op5 (
   unsigned int unspec,
diff --git a/gcc/config/riscv/riscv-vector.cc b/gcc/config/riscv/riscv-vector.cc
index 426490945dd..4b2fe2a8d11 100644
--- a/gcc/config/riscv/riscv-vector.cc
+++ b/gcc/config/riscv/riscv-vector.cc
@@ -71,7 +71,165 @@ 
 #include "target-def.h"
 
 #include <string.h>
-/* Helper functions for RVV */
+
+/* Internal helper functions for RVV */
+
+/* Return the vtype field for a specific machine mode. */
+static unsigned int
+classify_vtype_field (machine_mode mode)
+{
+  unsigned int vlmul = rvv_classify_vlmul_field (mode);
+  unsigned int vsew = rvv_classify_vsew_field (mode);
+  unsigned int vtype = (vsew << 3) | (vlmul & 0x7) | 0x40;
+  return vtype;
+}
+
+/* lmul = real_lmul * 8
+   guarantee integer
+   e.g.
+     1  => 1/8
+     2  => 1/4
+     4  => 1/2
+     8  => 1
+     16 => 2
+     32 => 4
+     64 => 8
+ */
+static unsigned int
+get_lmulx8 (machine_mode mode)
+{
+  unsigned int vlmul = rvv_classify_vlmul_field (mode);
+  switch (vlmul)
+    {
+      case VLMUL_FIELD_000:
+        return 8;
+      case VLMUL_FIELD_001:
+        return 16;
+      case VLMUL_FIELD_010:
+        return 32;
+      case VLMUL_FIELD_011:
+        return 64;
+      case VLMUL_FIELD_101:
+        return 1;
+      case VLMUL_FIELD_110:
+        return 2;
+      case VLMUL_FIELD_111:
+        return 4;
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+force_reg_for_over_uimm (rtx vl)
+{
+  if (CONST_SCALAR_INT_P (vl) && INTVAL (vl) >= 32)
+    {
+      return force_reg (Pmode, vl);
+    }
+
+  return vl;
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static rtx
+gen_vlx2 (rtx avl, machine_mode Vmode, machine_mode VSImode)
+{
+  if (rtx_equal_p (avl, gen_rtx_REG (Pmode, X0_REGNUM)))
+    {
+      return avl;
+    }
+  rtx i32vl = NULL_RTX;
+  if (CONST_SCALAR_INT_P (avl))
+    {
+      unsigned int vlen_max;
+      unsigned int vlen_min;
+      if (riscv_vector_chunks.is_constant ())
+        {
+          vlen_max = riscv_vector_chunks.to_constant () * 64;
+          vlen_min = vlen_max;
+        }
+      else
+        {
+          /* TODO: vlen_max will be supported as 65536 in the future. */ 
+          vlen_max = RVV_4096;
+          vlen_min = RVV_128;
+        }
+      unsigned int max_vlmax = (vlen_max / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+      unsigned int min_vlmax = (vlen_min / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
+      
+      unsigned HOST_WIDE_INT avl_int = INTVAL (avl);
+      if (avl_int <= min_vlmax)
+        {
+          i32vl = gen_int_mode (2 * avl_int, SImode);
+        }
+      else if (avl_int >= 2 * max_vlmax)
+        {
+          // Just set i32vl to VLMAX in this situation
+          i32vl = gen_reg_rtx (Pmode);
+          unsigned int vtype = classify_vtype_field (VSImode);
+          emit_insn (gen_vsetvl (Pmode, i32vl, gen_rtx_REG (Pmode, X0_REGNUM), GEN_INT (vtype)));
+        }
+      else
+        {
+          // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
+          // is related to the hardware implementation.
+          // So let the following code handle
+        }
+    }
+  if (!i32vl)
+    {
+      // Using vsetvli instruction to get actually used length which related to
+      // the hardware implementation
+      rtx i64vl = gen_reg_rtx (Pmode);
+      unsigned int vtype = classify_vtype_field (Vmode);
+      emit_insn (gen_vsetvl (Pmode, i64vl, force_reg (Pmode, avl), GEN_INT (vtype)));
+      // scale 2 for 32-bit length
+      i32vl = gen_reg_rtx (Pmode);
+      emit_insn (gen_rtx_SET (i32vl, gen_rtx_ASHIFT (Pmode, i64vl, const1_rtx)));
+    }
+
+  return force_reg_for_over_uimm (i32vl);
+}
+
+/* Helper functions for handling sew=64 on RV32 system. */
+static void
+emit_int64_to_vector_32bit (machine_mode Vmode, machine_mode VSImode,
+                            machine_mode VMSImode, rtx vd, rtx s, rtx vl,
+                            rtx tail)
+{
+  if (CONST_SCALAR_INT_P (s))
+    {
+      s = force_reg (DImode, s);
+    }
+
+  rtx hi = gen_highpart (SImode, s);
+  rtx lo = gen_lowpart (SImode, s);
+
+  rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
+
+  /* make a "0101..." mask vector */
+  rtx vm1 = gen_reg_rtx (VNx4SImode);
+  emit_insn (gen_vmv_v_x_internal (VNx4SImode, vm1, const0_rtx,
+                                   force_reg (SImode, GEN_INT (0x55555555)),
+                                   zero, rvv_gen_policy ()));
+  rtx vm2 = gen_reg_rtx (VMSImode);
+  emit_insn (gen_rtx_SET (vm2, gen_lowpart (VMSImode, vm1)));
+
+  rtx vlx2 = gen_vlx2 (vl, Vmode, VSImode);
+  rtx v2 = gen_reg_rtx (VSImode);
+  emit_insn (gen_vmv_v_x_internal (VSImode, v2, const0_rtx, hi, vlx2,
+                                   rvv_gen_policy ()));
+
+  rtx vd_si = gen_reg_rtx (VSImode);
+  emit_insn (gen_vmerge_vxm_internal (VSImode, vd_si, vm2, const0_rtx, v2, lo,
+                                      vlx2, tail));
+
+  emit_insn (gen_rtx_SET (vd, gen_lowpart (Vmode, vd_si)));
+}
+
+/* Globaer RVV implementation. */
 
 /* Return true if it is a RVV mask mode. */
 bool
@@ -370,159 +528,68 @@  rvv_translate_attr_mode (rtx_insn *insn)
   return VOIDmode;
 }
 
-/* Return the vtype field for a specific machine mode. */
-static unsigned int
-classify_vtype_field (machine_mode mode)
-{
-  unsigned int vlmul = rvv_classify_vlmul_field (mode);
-  unsigned int vsew = rvv_classify_vsew_field (mode);
-  unsigned int vtype = (vsew << 3) | (vlmul & 0x7) | 0x40;
-  return vtype;
-}
-
-/* lmul = real_lmul * 8
-   guarantee integer
-   e.g.
-     1  => 1/8
-     2  => 1/4
-     4  => 1/2
-     8  => 1
-     16 => 2
-     32 => 4
-     64 => 8
- */
-static unsigned int
-get_lmulx8 (machine_mode mode)
-{
-  unsigned int vlmul = rvv_classify_vlmul_field (mode);
-  switch (vlmul)
-    {
-      case VLMUL_FIELD_000:
-        return 8;
-      case VLMUL_FIELD_001:
-        return 16;
-      case VLMUL_FIELD_010:
-        return 32;
-      case VLMUL_FIELD_011:
-        return 64;
-      case VLMUL_FIELD_101:
-        return 1;
-      case VLMUL_FIELD_110:
-        return 2;
-      case VLMUL_FIELD_111:
-        return 4;
-      default:
-        gcc_unreachable ();
-    }
-}
-
-/* Helper functions for handling sew=64 on RV32 system. */
-static rtx
-force_reg_for_over_uimm (rtx vl)
+/* Expand const vector using RVV instructions. */
+bool
+rvv_expand_const_vector (rtx target, rtx src)
 {
-  if (CONST_SCALAR_INT_P (vl) && INTVAL (vl) >= 32)
+  rtx x;
+  machine_mode mode = GET_MODE (target);
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  
+  /* Case 1: Handle const duplicate vector. */
+  if (const_vec_duplicate_p (src, &x))
     {
-      return force_reg (Pmode, vl);
+      if (FLOAT_MODE_P (mode))
+        x = force_reg (inner_mode, x);
+      emit_insn (gen_vec_duplicate (mode, target, x));
+      return true;
     }
-
-  return vl;
+  /* TODO: In case of intrinsic support, we only need to deal with const duplicate vector. 
+     More cases will be supported for auto-vectorization. */
+  return false;
 }
 
-/* Helper functions for handling sew=64 on RV32 system. */
-static rtx
-gen_vlx2 (rtx avl, machine_mode Vmode, machine_mode VSImode)
+/* Expand const mask using RVV instructions. */
+bool
+rvv_expand_const_mask (rtx target, rtx src)
 {
-  if (rtx_equal_p (avl, gen_rtx_REG (Pmode, X0_REGNUM)))
+  rtx ele;
+  rtx zero = gen_rtx_REG (Pmode, X0_REGNUM);
+  machine_mode mode = GET_MODE (target);
+  if (const_vec_duplicate_p (src, &ele))
     {
-      return avl;
-    }
-  rtx i32vl = NULL_RTX;
-  if (CONST_SCALAR_INT_P (avl))
-    {
-      unsigned int vlen_max;
-      unsigned int vlen_min;
-      if (riscv_vector_chunks.is_constant ())
-        {
-          vlen_max = riscv_vector_chunks.to_constant () * 64;
-          vlen_min = vlen_max;
-        }
-      else
-        {
-          /* TODO: vlen_max will be supported as 65536 in the future. */ 
-          vlen_max = RVV_4096;
-          vlen_min = RVV_128;
-        }
-      unsigned int max_vlmax = (vlen_max / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
-      unsigned int min_vlmax = (vlen_min / GET_MODE_UNIT_BITSIZE (Vmode) * get_lmulx8 (Vmode)) / 8;
-      
-      unsigned HOST_WIDE_INT avl_int = INTVAL (avl);
-      if (avl_int <= min_vlmax)
-        {
-          i32vl = gen_int_mode (2 * avl_int, SImode);
-        }
-      else if (avl_int >= 2 * max_vlmax)
-        {
-          // Just set i32vl to VLMAX in this situation
-          i32vl = gen_reg_rtx (Pmode);
-          unsigned int vtype = classify_vtype_field (VSImode);
-          emit_insn (gen_vsetvl (Pmode, i32vl, gen_rtx_REG (Pmode, X0_REGNUM), GEN_INT (vtype)));
-        }
-      else
+      gcc_assert (CONST_SCALAR_INT_P (ele));
+      switch (INTVAL (ele))
         {
-          // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
-          // is related to the hardware implementation.
-          // So let the following code handle
+        case 0:
+          emit_insn (gen_vmclr_m (mode, target, zero,
+                                  rvv_gen_policy ()));
+          break;
+        case 1:
+          emit_insn (gen_vmset_m (mode, target, zero,
+                                  rvv_gen_policy ()));
+          break;
+        default:
+          gcc_unreachable ();
         }
+      return true;
     }
-  if (!i32vl)
-    {
-      // Using vsetvli instruction to get actually used length which related to
-      // the hardware implementation
-      rtx i64vl = gen_reg_rtx (Pmode);
-      unsigned int vtype = classify_vtype_field (Vmode);
-      emit_insn (gen_vsetvl (Pmode, i64vl, force_reg (Pmode, avl), GEN_INT (vtype)));
-      // scale 2 for 32-bit length
-      i32vl = gen_reg_rtx (Pmode);
-      emit_insn (gen_rtx_SET (i32vl, gen_rtx_ASHIFT (Pmode, i64vl, const1_rtx)));
-    }
-
-  return force_reg_for_over_uimm (i32vl);
+  
+  /* TODO: In case of intrinsic support, we only need to deal with const all zeros or const all ones mask. 
+     More cases will be supported for auto-vectorization. */
+  return false;
 }
 
-/* Helper functions for handling sew=64 on RV32 system. */
-static void
-emit_int64_to_vector_32bit (machine_mode Vmode, machine_mode VSImode,
-                            machine_mode VMSImode, rtx vd, rtx s, rtx vl,
-                            rtx tail)
-{
-  if (CONST_SCALAR_INT_P (s))
-    {
-      s = force_reg (DImode, s);
-    }
-
-  rtx hi = gen_highpart (SImode, s);
-  rtx lo = gen_lowpart (SImode, s);
-
-  rtx zero = gen_rtx_REG (SImode, X0_REGNUM);
-
-  /* make a "0101..." mask vector */
-  rtx vm1 = gen_reg_rtx (VNx4SImode);
-  emit_insn (gen_vmv_v_x_internal (VNx4SImode, vm1, const0_rtx,
-                                   force_reg (SImode, GEN_INT (0x55555555)),
-                                   zero, rvv_gen_policy ()));
-  rtx vm2 = gen_reg_rtx (VMSImode);
-  emit_insn (gen_rtx_SET (vm2, gen_lowpart (VMSImode, vm1)));
+/* Return true if X is a const_vector with all duplicate elements, which is in
+   the range between MINVAL and MAXVAL.  */
 
-  rtx vlx2 = gen_vlx2 (vl, Vmode, VSImode);
-  rtx v2 = gen_reg_rtx (VSImode);
-  emit_insn (gen_vmv_v_x_internal (VSImode, v2, const0_rtx, hi, vlx2,
-                                   rvv_gen_policy ()));
-
-  rtx vd_si = gen_reg_rtx (VSImode);
-  emit_insn (gen_vmerge_vxm_internal (VSImode, vd_si, vm2, const0_rtx, v2, lo,
-                                      vlx2, tail));
-
-  emit_insn (gen_rtx_SET (vd, gen_lowpart (Vmode, vd_si)));
+bool
+rvv_const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
+                                   HOST_WIDE_INT maxval)
+{
+  rtx elt;
+  return (const_vec_duplicate_p (x, &elt) && CONST_INT_P (elt) &&
+          IN_RANGE (INTVAL (elt), minval, maxval));
 }
 
 /* Helper functions for handling sew=64 on RV32 system. */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8c78e726a19..fc27dc957dc 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1097,9 +1097,74 @@  riscv_const_insns (rtx x)
       }
 
     case CONST_DOUBLE:
-    case CONST_VECTOR:
       /* We can use x0 to load floating-point zero.  */
       return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+    case CONST_VECTOR:
+      {
+	machine_mode mode = GET_MODE (x);
+	/* For the mode which is not RVV mode, we use
+	   default configuration. */
+	if (!rvv_mode_p (mode))
+	  return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+	unsigned int factor = 0;
+	if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL)
+	  {
+	    /* In RVV, we can use vmclr.m/vmset.m to generate
+	       all 0s/1s bool vector. Otherwise we can only use
+	       load instructions. */
+	    if (x == CONST0_RTX (GET_MODE (x))
+	      || x == CONSTM1_RTX (GET_MODE (x)))
+	      return 1;
+	    else
+	      return 0;
+	  }
+	else if (FLOAT_MODE_P (GET_MODE (x)))
+	  {
+	    /* In RVV, Floating-point should be first load
+	       into floating-point register
+	       then duplicate. */
+	    factor = 3;
+	  }
+	else
+	  {
+	    rtx elt;
+	    if (!const_vec_duplicate_p (x, &elt))
+	      {
+	        rtx base, step;
+	        if (const_vec_series_p (x, &base, &step))
+	          {
+	            /* For const vector: {0, 1, 2, ......},
+	               we can use a single instruction vid.v
+	               to generate the vector. */
+	            if (INTVAL (step) == 1
+	              && INTVAL (base) == 0)
+	              factor = 1;
+	            /* We need a vid + li + vmul.vx instruction. */
+	            else if (INTVAL (base) == 0)
+	              factor = 2 + riscv_integer_cost (INTVAL (step));
+	            /* We need a vid + (li + vadd.vx)/vadd.vi instruction. */
+	            else if (INTVAL (step) == 1)
+	              factor = IN_RANGE (INTVAL (base), -16, 15) ? 2
+	                  : 2 + riscv_integer_cost (INTVAL (base));
+	            /* We need a vid + (li + vadd.vx)/vadd.vi + li + vmul.vx instruction. */
+	            else
+	              factor = IN_RANGE (INTVAL (base), -16, 15) ? 4
+	                  : 4 + riscv_integer_cost (INTVAL (base));
+	          }
+	        else
+	          factor = 0;
+	      }
+	    else
+	      {
+	        /* Use vmv.v.i. */
+	        if (rvv_const_vec_all_same_in_range_p (x, -15, 16))
+	          factor = 1;
+	        /* Use li + vmv.v.x. */
+	        else
+	          factor = 1 + riscv_integer_cost (INTVAL (elt));
+	      }
+	  }
+      }
 
     case CONST:
       /* See if we can refer to X directly.  */
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 9832d2adaa3..e01305ef3fc 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -140,6 +140,30 @@ 
   (VNx2HI "vnx2di") (VNx4HI "vnx2di") (VNx8HI "vnx2di") (VNx16HI "vnx2di") (VNx32HI "vnx2di") (VNx64HI "vnx2di")
   (VNx2SI "vnx2di") (VNx4SI "vnx2di") (VNx8SI "vnx2di") (VNx16SI "vnx2di") (VNx32SI "vnx2di")
   (VNx2DI "vnx2di") (VNx4DI "vnx4di") (VNx8DI "vnx8di") (VNx16DI "vnx16di")])
+
+;; Map a vector mode to SEW
+(define_mode_attr sew [
+  (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8")
+  (VNx32QI "8") (VNx64QI "8") (VNx128QI "8") (VNx2HI "16")
+  (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16")
+  (VNx64HI "16") (VNx2SI "32") (VNx4SI "32") (VNx8SI "32")
+  (VNx16SI "32") (VNx32SI "32") (VNx2DI "64") (VNx4DI "64")
+  (VNx8DI "64") (VNx16DI "64") 
+  (VNx2SF "32") (VNx4SF "32") (VNx8SF "32") (VNx16SF "32")
+  (VNx32SF "32") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64")
+  (VNx16DF "64")])
+
+;; Map a vector mode to its LMUL.
+(define_mode_attr lmul [
+  (VNx2QI "1") (VNx4QI "1") (VNx8QI "1") (VNx16QI "1")
+  (VNx32QI "2") (VNx64QI "4") (VNx128QI "8") (VNx2HI "1")
+  (VNx4HI "1") (VNx8HI "1") (VNx16HI "2") (VNx32HI "4")
+  (VNx64HI "8") (VNx2SI "1") (VNx4SI "1") (VNx8SI "2")
+  (VNx16SI "4") (VNx32SI "8") (VNx2DI "1") (VNx4DI "2")
+  (VNx8DI "4") (VNx16DI "8")
+  (VNx2SF "1") (VNx4SF "1") (VNx8SF "2") (VNx16SF "4")
+  (VNx32SF "8") (VNx2DF "1") (VNx4DF "2") (VNx8DF "4")
+  (VNx16DF "8")])
   
 (define_int_iterator VMVOP [
   UNSPEC_VMV
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 4a9c6769812..1731d969372 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -62,6 +62,179 @@ 
       rvv_gen_policy ()));
   DONE;
 })
+
+;; =========================================================================
+;; == Vector spilling
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Moves Operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - Full vector load/store/move
+;; - Partial vector load/store/move
+;; - All vector misalign move
+;; -------------------------------------------------------------------------
+
+;; Move Pattern for all vector modes.
+(define_expand "mov<mode>"
+  [(set (match_operand:VFULL 0 "reg_or_mem_operand")
+        (match_operand:VFULL 1 "vector_move_operand"))]
+  "TARGET_VECTOR"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (GET_CODE (operands[1]) == CONST_VECTOR &&
+      rvv_expand_const_vector (operands[0], operands[1]))
+    DONE;
+})
+
+;; Full vector load/store/move.
+(define_insn "*mov<mode>"
+  [(set (match_operand:VFULL 0 "reg_or_mem_operand" "=vr,m,vr")
+        (match_operand:VFULL 1 "reg_or_mem_operand" "m,vr,vr"))]
+  "TARGET_VECTOR"
+  "@
+   vl<lmul>re<sew>.v\t%0,%1
+   vs<lmul>r.v\t%1,%0
+   vmv<lmul>r.v\t%0,%1"
+  [(set_attr "type" "vload,vstore,vcopy")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "mov<mode>"
+  [(parallel [(set (match_operand:VPARTIAL 0 "reg_or_mem_operand")
+                   (match_operand:VPARTIAL 1 "vector_move_operand"))
+              (clobber (scratch:SI))
+              (clobber (reg:SI VL_REGNUM))
+              (clobber (reg:SI VTYPE_REGNUM))])]
+  "TARGET_VECTOR"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (GET_CODE (operands[1]) == CONST_VECTOR &&
+      rvv_expand_const_vector (operands[0], operands[1]))
+    DONE;
+})
+
+;; Partial vector load/store/move.
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:VPARTIAL 0 "reg_or_mem_operand" "=vr,m,vr")
+        (match_operand:VPARTIAL 1 "reg_or_mem_operand" "m,vr,vr"))
+        (clobber (match_scratch:SI 2 "=&r,&r,X"))
+        (clobber (reg:SI VL_REGNUM))
+        (clobber (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR"
+  "@
+   vle<sew>.v\t%0,%1
+   vse<sew>.v\t%1,%0
+   #"
+  "&& (!reload_completed || (REG_P (operands[0])
+   && REG_P (operands[1])))"
+  [(const_int 0)]
+  {
+    /* Need to force register if mem <- !reg.  */
+    if (MEM_P (operands[0]) && !REG_P (operands[1]))
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      
+    if (MEM_P (operands[0]))
+      {
+        emit_insn (gen_vse (<MODE>mode, const0_rtx, XEXP (operands[0], 0),
+                            operands[1], gen_rtx_REG (Pmode, X0_REGNUM),
+                            rvv_gen_policy ()));
+        DONE;
+      }
+    if (MEM_P (operands[1]))
+      {
+        emit_insn (gen_vle (<MODE>mode, operands[0], const0_rtx, const0_rtx,
+                            XEXP (operands[1], 0), gen_rtx_REG (Pmode, X0_REGNUM),
+                            rvv_gen_policy ()));
+        DONE;
+      }
+
+    emit_insn (gen_rtx_SET (operands[0], operands[1]));
+    DONE;
+  }
+  [(set_attr "type" "vle,vse,vcopy")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_reg"
+  [(set (match_operand:VPARTIAL 0 "register_operand" "=vr")
+        (match_operand:VPARTIAL 1 "register_operand" "vr"))]
+  "TARGET_VECTOR"
+  "vmv1r.v\t%0,%1"
+  [(set_attr "type" "vcopy")
+   (set_attr "mode" "<MODE>")])
+
+;; Move pattern for mask modes.
+(define_expand "mov<mode>"
+  [(parallel [(set (match_operand:VB 0 "reg_or_mem_operand")
+                   (match_operand:VB 1 "vector_move_operand"))
+	      (clobber (scratch:SI))
+        (clobber (reg:SI VL_REGNUM))
+        (clobber (reg:SI VTYPE_REGNUM))])]
+  "TARGET_VECTOR"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  
+  if (GET_CODE (operands[1]) == CONST_VECTOR 
+    && rvv_expand_const_mask (operands[0], operands[1]))
+    DONE;
+})
+
+;; mask load/store/move.
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:VB 0 "reg_or_mem_operand" "=vr,m,vr")
+        (match_operand:VB 1 "reg_or_mem_operand" "m,vr,vr"))
+        (clobber (match_scratch:SI 2 "=&r,&r,X"))
+        (clobber (reg:SI VL_REGNUM))
+        (clobber (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR"
+  "@
+   vlm.v\t%0,%1
+   vsm.v\t%1,%0
+   #"
+  "&& (!reload_completed || (REG_P (operands[0])
+   && REG_P (operands[1])))"
+  [(const_int 0)]
+  {
+    /* Need to force register if mem <- !reg.  */
+    if (MEM_P (operands[0]) && !REG_P (operands[1]))
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      
+    if (MEM_P (operands[0]))
+      {
+        emit_insn (gen_vsm (<MODE>mode, XEXP (operands[0], 0), operands[1],
+                            gen_rtx_REG (Pmode, X0_REGNUM),
+                            rvv_gen_policy ()));
+        DONE;
+      }
+    if (MEM_P (operands[1]))
+      {
+        emit_insn (gen_vlm (<MODE>mode, operands[0], XEXP (operands[1], 0),
+                            gen_rtx_REG (Pmode, X0_REGNUM),
+                            rvv_gen_policy ()));
+        DONE;
+      }
+
+    emit_insn (gen_rtx_SET (operands[0], operands[1]));
+    DONE;
+  }
+  [(set_attr "type" "vle,vse,vcopy")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_reg"
+  [(set (match_operand:VB 0 "register_operand" "=vr")
+        (match_operand:VB 1 "register_operand" "vr"))]
+  "TARGET_VECTOR"
+  "vmv1r.v\t%0,%1"
+  [(set_attr "type" "vcopy")
+   (set_attr "mode" "<MODE>")])   
    
 ;; ===============================================================================
 ;; == Intrinsics
@@ -370,4 +543,32 @@ 
   vmerge.vxm\t%0,%3,%4,%1
   vmerge.vim\t%0,%3,%4,%1"
  [(set_attr "type" "vmerge")
+  (set_attr "mode" "<MODE>")])
+
+;; vmclr.m vd -> vmxor.mm vd,vd,vd # Clear mask register
+(define_insn "@vmclr<mode>_m"
+  [(set (match_operand:VB 0 "register_operand"        "=vr")
+    (unspec:VB
+      [(vec_duplicate:VB (const_int 0))
+       (match_operand 1 "p_reg_or_const_csr_operand"  "rK")
+       (match_operand 2 "const_int_operand")
+       (reg:SI VL_REGNUM)
+       (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vmclr.m\t%0"
+ [(set_attr "type" "vmask")
+  (set_attr "mode" "<MODE>")])
+
+;; vmset.m vd -> vmxnor.mm vd,vd,vd # Set mask register
+(define_insn "@vmset<mode>_m"
+  [(set (match_operand:VB 0 "register_operand"        "=vr")
+    (unspec:VB
+      [(vec_duplicate:VB (const_int 1))
+       (match_operand 1 "p_reg_or_const_csr_operand"  "rK")
+       (match_operand 2 "const_int_operand")
+       (reg:SI VL_REGNUM)
+       (reg:SI VTYPE_REGNUM)] UNSPEC_RVV))]
+ "TARGET_VECTOR"
+ "vmset.m\t%0"
+ [(set_attr "type" "vmask")
   (set_attr "mode" "<MODE>")])
\ No newline at end of file