RISC-V: Basic VLS code gen for RISC-V

Message ID 20230530060621.31449-1-kito.cheng@sifive.com
State Dropped
Headers
Series RISC-V: Basic VLS code gen for RISC-V |

Commit Message

Kito Cheng May 30, 2023, 6:06 a.m. UTC
  GNU vector extensions is widly used around this world, and this patch
enable that with RISC-V vector extensions, this can help people
leverage existing code base with RVV, and also can write vector programs in a
familiar way.

The idea of VLS code gen support is emulate VLS operation by VLA operation with
specific length.

Key design point is we defer the mode conversion (From VLS to VLA mode) after
register allocation, it come with several advantages:
- VLS pattern is much friendly for most optimization pass like combine.
- Register allocator can spill/restore exact size of VLS type instead of
  whole register.

This is compatible with VLA vectorization.

Only support move and binary part of operation patterns.

gcc/ChangeLog:

	* config/riscv/riscv-modes.def: Introduce VLS modes.
	* config/riscv/riscv-protos.h (riscv_vector::minimal_vls_mode): New.
	(riscv_vector::vls_insn_expander): New.
	(riscv_vector::vls_mode_p): New.
	* config/riscv/riscv-v.cc (riscv_vector::minimal_vls_mode): New.
	(riscv_vector::vls_mode_p): New.
	(riscv_vector::vls_insn_expander): New.
	(riscv_vector::update_vls_mode): New.
	* config/riscv/riscv.cc (riscv_v_ext_mode_p): New.
	(riscv_v_adjust_nunits): Handle VLS type.
	(riscv_hard_regno_nregs): Ditto.
	(riscv_hard_regno_mode_ok): Ditto.
	(riscv_regmode_natural_size): Ditto.
	* config/riscv/vector-iterators.md (VLS): New.
	(VM): Handle VLS type.
	(vel): Ditto.
	* config/riscv/vector.md: Include vector-vls.md.
	* config/riscv/vector-vls.md: New file.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/rvv.exp: Add vls folder.
	* gcc.target/riscv/rvv/vls/binop-template.h: New test.
	* gcc.target/riscv/rvv/vls/binop-v.c: New test.
	* gcc.target/riscv/rvv/vls/binop-zve32x.c: New test.
	* gcc.target/riscv/rvv/vls/binop-zve64x.c: New test.
	* gcc.target/riscv/rvv/vls/move-template.h: New test.
	* gcc.target/riscv/rvv/vls/move-v.c: New test.
	* gcc.target/riscv/rvv/vls/move-zve32x.c: New test.
	* gcc.target/riscv/rvv/vls/move-zve64x.c: New test.
	* gcc.target/riscv/rvv/vls/load-store-template.h: New test.
	* gcc.target/riscv/rvv/vls/load-store-v.c: New test.
	* gcc.target/riscv/rvv/vls/load-store-zve32x.c: New test.
	* gcc.target/riscv/rvv/vls/load-store-zve64x.c: New test.
	* gcc.target/riscv/rvv/vls/vls-types.h: New test.
---
 gcc/config/riscv/riscv-modes.def              |  3 +
 gcc/config/riscv/riscv-protos.h               |  4 ++
 gcc/config/riscv/riscv-v.cc                   | 67 +++++++++++++++++++
 gcc/config/riscv/riscv.cc                     | 27 +++++++-
 gcc/config/riscv/vector-iterators.md          |  6 ++
 gcc/config/riscv/vector-vls.md                | 64 ++++++++++++++++++
 gcc/config/riscv/vector.md                    |  2 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |  4 ++
 .../gcc.target/riscv/rvv/vls/binop-template.h | 18 +++++
 .../gcc.target/riscv/rvv/vls/binop-v.c        | 18 +++++
 .../gcc.target/riscv/rvv/vls/binop-zve32x.c   | 18 +++++
 .../gcc.target/riscv/rvv/vls/binop-zve64x.c   | 18 +++++
 .../riscv/rvv/vls/load-store-template.h       |  8 +++
 .../gcc.target/riscv/rvv/vls/load-store-v.c   | 17 +++++
 .../riscv/rvv/vls/load-store-zve32x.c         | 17 +++++
 .../riscv/rvv/vls/load-store-zve64x.c         | 17 +++++
 .../gcc.target/riscv/rvv/vls/move-template.h  | 13 ++++
 .../gcc.target/riscv/rvv/vls/move-v.c         | 10 +++
 .../gcc.target/riscv/rvv/vls/move-zve32x.c    | 10 +++
 .../gcc.target/riscv/rvv/vls/move-zve64x.c    | 10 +++
 .../gcc.target/riscv/rvv/vls/vls-types.h      | 42 ++++++++++++
 21 files changed, 391 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/vector-vls.md
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
  

Comments

juzhe.zhong@rivai.ai May 30, 2023, 6:32 a.m. UTC | #1
>> /* Return true if MODE is true VLS mode.  */
>> bool
>> vls_mode_p (machine_mode mode)
>> {
>>   switch (mode)
>>     {
>>     case E_V4SImode:
>>     case E_V2DImode:
>>     case E_V8HImode:
>>     case E_V16QImode:
>>       return true;
>>     default:
>>       return false;
>>     }
>> }
To be consistent, you should put these into riscv-vector-switching.def.
It can make the function easier extend,change it like this:
change name into riscv_v_ext_vls_mode_p 
bool
riscv_v_ext_vls_mode_p (machine_mode mode)
{
#define VLS_ENTRY(MODE, REQUIREMENT, ...)                                          \
  case MODE##mode:                                                             \
    return REQUIREMENT;
  switch (mode)
    {
#include "riscv-vector-switch.def"
    default:
      return false;
    }
  return false;
}
Then in riscv-vector-switch.def
VLS_ENTRY (V4SI...
VLS_ENTRY (V2DI..
...
In the future, we extend more VLS modes in riscv-vector-switch.def

>>(define_insn_and_split "<optab><mode>3"
>>  [(set (match_operand:VLS 0 "register_operand" "=vr")
>>	(any_int_binop_no_shift:VLS
>>	  (match_operand:VLS 1 "register_operand" "vr")
>>	  (match_operand:VLS 2 "register_operand" "vr")))]
>>  "TARGET_VECTOR"
>>  "#"
>>  "reload_completed"
>>  [(const_int 0)]
>>+{
>>  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
>>  riscv_vector::vls_insn_expander (
>>    code_for_pred (<CODE>, vla_mode), riscv_vector::RVV_BINOP,
>>    operands, <MODE>mode, vla_mode);
>>  DONE;
>>})
This pattern can work for current VLS modes so far since they are within 0~31, if we add more VLSmodes such as V32QImode, V64QImode,
it can't work . I am ok with this, but I should remind you early.

>> # VLS test
>>gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vls/*.\[cS\]]] \
>>	"" $CFLAGS
>>Add tests with -march=rv64gcv_zvl256b to see whether your testcase can generate LMUL = mf2 vsetvliand -march=rv64gcv_zvl2048 make sure your testcase will not go into the VLS modes (2048 * 1 / 8 > 128) 
For VSETVL part, I didn't see you define attribute sew/vlmul ...ratio for VLS modes.I wonder how these VLS modes emit correct VSETVL?For example in vector.md:
(define_attr "sew" ""
  (cond [(eq_attr "mode" "VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI,\
        VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI,\
        VNx128QI,VNx128BI,VNx2x64QI,VNx2x32QI,VNx3x32QI,VNx4x32QI,\
        VNx2x16QI,VNx3x16QI,VNx4x16QI,VNx5x16QI,VNx6x16QI,VNx7x16QI,VNx8x16QI,\
        VNx2x8QI,VNx3x8QI,VNx4x8QI,VNx5x8QI,VNx6x8QI,VNx7x8QI,VNx8x8QI,\
        VNx2x4QI,VNx3x4QI,VNx4x4QI,VNx5x4QI,VNx6x4QI,VNx7x4QI,VNx8x4QI,\
        VNx2x2QI,VNx3x2QI,VNx4x2QI,VNx5x2QI,VNx6x2QI,VNx7x2QI,VNx8x2QI,\
        VNx2x1QI,VNx3x1QI,VNx4x1QI,VNx5x1QI,VNx6x1QI,VNx7x1QI,VNx8x1QI")
   (const_int 8)
   (eq_attr "mode" "VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI,VNx64HI,\
        VNx2x32HI,VNx2x16HI,VNx3x16HI,VNx4x16HI,\
        VNx2x8HI,VNx3x8HI,VNx4x8HI,VNx5x8HI,VNx6x8HI,VNx7x8HI,VNx8x8HI,\
        VNx2x4HI,VNx3x4HI,VNx4x4HI,VNx5x4HI,VNx6x4HI,VNx7x4HI,VNx8x4HI,\
        VNx2x2HI,VNx3x2HI,VNx4x2HI,VNx5x2HI,VNx6x2HI,VNx7x2HI,VNx8x2HI,\
        VNx2x1HI,VNx3x1HI,VNx4x1HI,VNx5x1HI,VNx6x1HI,VNx7x1HI,VNx8x1HI")
   (const_int 16)
   (eq_attr "mode" "VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI,VNx32SI,\
        VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF,VNx32SF,\
        VNx2x16SI,VNx2x8SI,VNx3x8SI,VNx4x8SI,\
        VNx2x4SI,VNx3x4SI,VNx4x4SI,VNx5x4SI,VNx6x4SI,VNx7x4SI,VNx8x4SI,\
        VNx2x2SI,VNx3x2SI,VNx4x2SI,VNx5x2SI,VNx6x2SI,VNx7x2SI,VNx8x2SI,\
        VNx2x1SI,VNx3x1SI,VNx4x1SI,VNx5x1SI,VNx6x1SI,VNx7x1SI,VNx8x1SI,\
        VNx2x16SF,VNx2x8SF,VNx3x8SF,VNx4x8SF,\
        VNx2x4SF,VNx3x4SF,VNx4x4SF,VNx5x4SF,VNx6x4SF,VNx7x4SF,VNx8x4SF,\
        VNx2x2SF,VNx3x2SF,VNx4x2SF,VNx5x2SF,VNx6x2SF,VNx7x2SF,VNx8x2SF,\
        VNx2x1SF,VNx3x1SF,VNx4x1SF,VNx5x1SF,VNx6x1SF,VNx7x1SF,VNx8x1SF")
   (const_int 32)
   (eq_attr "mode" "VNx1DI,VNx2DI,VNx4DI,VNx8DI,VNx16DI,\
        VNx1DF,VNx2DF,VNx4DF,VNx8DF,VNx16DF,\
        VNx2x8DI,VNx2x4DI,VNx3x4DI,VNx4x4DI,\
        VNx2x2DI,VNx3x2DI,VNx4x2DI,VNx5x2DI,VNx6x2DI,VNx7x2DI,VNx8x2DI,\
        VNx2x1DI,VNx3x1DI,VNx4x1DI,VNx5x1DI,VNx6x1DI,VNx7x1DI,VNx8x1DI,\
        VNx2x8DF,VNx2x4DF,VNx3x4DF,VNx4x4DF,\
        VNx2x2DF,VNx3x2DF,VNx4x2DF,VNx5x2DF,VNx6x2DF,VNx7x2DF,VNx8x2DF,\
        VNx2x1DF,VNx3x1DF,VNx4x1DF,VNx5x1DF,VNx6x1DF,VNx7x1DF,VNx8x1DF")
   (const_int 64)]
  (const_int INVALID_ATTRIBUTE)))




juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-05-30 14:06
To: gcc-patches; palmer; kito.cheng; juzhe.zhong; jeffreyalaw; rdapp.gcc; pan2.li
CC: Kito Cheng
Subject: [PATCH] RISC-V: Basic VLS code gen for RISC-V
GNU vector extensions is widly used around this world, and this patch
enable that with RISC-V vector extensions, this can help people
leverage existing code base with RVV, and also can write vector programs in a
familiar way.
 
The idea of VLS code gen support is emulate VLS operation by VLA operation with
specific length.
 
Key design point is we defer the mode conversion (From VLS to VLA mode) after
register allocation, it come with several advantages:
- VLS pattern is much friendly for most optimization pass like combine.
- Register allocator can spill/restore exact size of VLS type instead of
  whole register.
 
This is compatible with VLA vectorization.
 
Only support move and binary part of operation patterns.
 
gcc/ChangeLog:
 
* config/riscv/riscv-modes.def: Introduce VLS modes.
* config/riscv/riscv-protos.h (riscv_vector::minimal_vls_mode): New.
(riscv_vector::vls_insn_expander): New.
(riscv_vector::vls_mode_p): New.
* config/riscv/riscv-v.cc (riscv_vector::minimal_vls_mode): New.
(riscv_vector::vls_mode_p): New.
(riscv_vector::vls_insn_expander): New.
(riscv_vector::update_vls_mode): New.
* config/riscv/riscv.cc (riscv_v_ext_mode_p): New.
(riscv_v_adjust_nunits): Handle VLS type.
(riscv_hard_regno_nregs): Ditto.
(riscv_hard_regno_mode_ok): Ditto.
(riscv_regmode_natural_size): Ditto.
* config/riscv/vector-iterators.md (VLS): New.
(VM): Handle VLS type.
(vel): Ditto.
* config/riscv/vector.md: Include vector-vls.md.
* config/riscv/vector-vls.md: New file.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/rvv.exp: Add vls folder.
* gcc.target/riscv/rvv/vls/binop-template.h: New test.
* gcc.target/riscv/rvv/vls/binop-v.c: New test.
* gcc.target/riscv/rvv/vls/binop-zve32x.c: New test.
* gcc.target/riscv/rvv/vls/binop-zve64x.c: New test.
* gcc.target/riscv/rvv/vls/move-template.h: New test.
* gcc.target/riscv/rvv/vls/move-v.c: New test.
* gcc.target/riscv/rvv/vls/move-zve32x.c: New test.
* gcc.target/riscv/rvv/vls/move-zve64x.c: New test.
* gcc.target/riscv/rvv/vls/load-store-template.h: New test.
* gcc.target/riscv/rvv/vls/load-store-v.c: New test.
* gcc.target/riscv/rvv/vls/load-store-zve32x.c: New test.
* gcc.target/riscv/rvv/vls/load-store-zve64x.c: New test.
* gcc.target/riscv/rvv/vls/vls-types.h: New test.
---
gcc/config/riscv/riscv-modes.def              |  3 +
gcc/config/riscv/riscv-protos.h               |  4 ++
gcc/config/riscv/riscv-v.cc                   | 67 +++++++++++++++++++
gcc/config/riscv/riscv.cc                     | 27 +++++++-
gcc/config/riscv/vector-iterators.md          |  6 ++
gcc/config/riscv/vector-vls.md                | 64 ++++++++++++++++++
gcc/config/riscv/vector.md                    |  2 +
gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |  4 ++
.../gcc.target/riscv/rvv/vls/binop-template.h | 18 +++++
.../gcc.target/riscv/rvv/vls/binop-v.c        | 18 +++++
.../gcc.target/riscv/rvv/vls/binop-zve32x.c   | 18 +++++
.../gcc.target/riscv/rvv/vls/binop-zve64x.c   | 18 +++++
.../riscv/rvv/vls/load-store-template.h       |  8 +++
.../gcc.target/riscv/rvv/vls/load-store-v.c   | 17 +++++
.../riscv/rvv/vls/load-store-zve32x.c         | 17 +++++
.../riscv/rvv/vls/load-store-zve64x.c         | 17 +++++
.../gcc.target/riscv/rvv/vls/move-template.h  | 13 ++++
.../gcc.target/riscv/rvv/vls/move-v.c         | 10 +++
.../gcc.target/riscv/rvv/vls/move-zve32x.c    | 10 +++
.../gcc.target/riscv/rvv/vls/move-zve64x.c    | 10 +++
.../gcc.target/riscv/rvv/vls/vls-types.h      | 42 ++++++++++++
21 files changed, 391 insertions(+), 2 deletions(-)
create mode 100644 gcc/config/riscv/vector-vls.md
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
 
diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index 19a4f9fb3db..f897e560667 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -318,6 +318,9 @@ RVV_TUPLE_PARTIAL_MODES (6)
RVV_TUPLE_PARTIAL_MODES (7)
RVV_TUPLE_PARTIAL_MODES (8)
+/* VLS modes.  */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+
/* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can
    be 65536 for a single vector register which means the vector mode in
    GCC can be maximum = 65536 * 8 bits (LMUL=8).
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0462f96c8d5..56f714bbfb5 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -261,6 +261,10 @@ enum frm_field_enum
opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
poly_uint64);
unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool);
+machine_mode minimal_vla_mode (machine_mode);
+void
+vls_insn_expander (unsigned, int, rtx *, machine_mode, machine_mode);
+bool vls_mode_p (machine_mode);
}
/* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a5715bb466c..f9fb8cd9be4 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1697,4 +1697,71 @@ expand_vcond (rtx *ops)
     gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
}
+/* Return the minimal containable VLA mode for MODE.  */
+
+machine_mode
+minimal_vla_mode (machine_mode mode)
+{
+  gcc_assert (GET_MODE_NUNITS (mode).is_constant ());
+  unsigned type_size = GET_MODE_NUNITS (mode).to_constant ();
+  poly_uint64 nunits = type_size * poly_uint64 (1, 1);
+  opt_machine_mode vla_mode = get_vector_mode (GET_MODE_INNER (mode), nunits);
+  return vla_mode.else_void ();
+}
+
+/* Return true if MODE is true VLS mode.  */
+
+bool
+vls_mode_p (machine_mode mode)
+{
+  switch (mode)
+    {
+    case E_V4SImode:
+    case E_V2DImode:
+    case E_V8HImode:
+    case E_V16QImode:
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* Convert all OPERANDS to VLA_MODE.  */
+
+static void
+update_vls_mode (machine_mode vla_mode, unsigned n_operands, rtx *operands)
+{
+  unsigned i;
+  for (i = 0; i < n_operands; ++i)
+    {
+      switch (GET_CODE (operands[i]))
+ {
+ case REG:
+   operands[i] = gen_rtx_REG (vla_mode, REGNO (operands[i]));
+   break;
+ case MEM:
+   operands[i] = change_address (operands[i], vla_mode, NULL_RTX);
+   break;
+ default:
+   gcc_unreachable ();
+ }
+    }
+}
+
+/* Expand VLS operation to VLA pattern.  */
+
+void
+vls_insn_expander (unsigned icode, int op_num, rtx *operands,
+    machine_mode vls_mode, machine_mode vla_mode)
+{
+  update_vls_mode (vla_mode, /* n_operands */ op_num, operands);
+
+  poly_uint16 nunit = GET_MODE_NUNITS (vls_mode);
+
+  gcc_assert (nunit.is_constant ());
+
+  riscv_vector::emit_nonvlmax_insn (icode, op_num, operands,
+     GEN_INT (nunit.to_constant ()));
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3954fc07a8b..2e14f2e0d53 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1034,7 +1034,8 @@ riscv_v_ext_tuple_mode_p (machine_mode mode)
static bool
riscv_v_ext_mode_p (machine_mode mode)
{
-  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode);
+  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
+ || riscv_vector::vls_mode_p (mode);
}
/* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
@@ -1043,7 +1044,7 @@ riscv_v_ext_mode_p (machine_mode mode)
poly_int64
riscv_v_adjust_nunits (machine_mode mode, int scale)
{
-  if (riscv_v_ext_mode_p (mode))
+  if (riscv_v_ext_mode_p (mode) && !riscv_vector::vls_mode_p (mode))
     return riscv_vector_chunks * scale;
   return scale;
}
@@ -6059,6 +6060,22 @@ riscv_register_move_cost (machine_mode mode,
static unsigned int
riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
{
+  if (riscv_vector::vls_mode_p (mode))
+    {
+      if (TARGET_MIN_VLEN)
+ {
+   unsigned min_byte_per_vector_register = TARGET_MIN_VLEN / 8;
+   unsigned mode_size = GET_MODE_SIZE (mode).to_constant ();
+
+   if (min_byte_per_vector_register >= mode_size)
+     return 1;
+
+   return mode_size / min_byte_per_vector_register;
+ }
+      else
+ return 1;
+    }
+
   if (riscv_v_ext_vector_mode_p (mode))
     {
       /* Handle fractional LMUL, it only occupy part of vector register but
@@ -6148,6 +6165,10 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
but for mask vector register, register numbers can be any number. */
       int lmul = 1;
       machine_mode rvv_mode = mode;
+
+      if (riscv_vector::vls_mode_p (mode))
+ rvv_mode = riscv_vector::minimal_vla_mode (mode);
+
       if (riscv_v_ext_tuple_mode_p (rvv_mode))
rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
       poly_int64 size = GET_MODE_SIZE (rvv_mode);
@@ -7223,6 +7244,8 @@ riscv_vector_alignment (const_tree type)
poly_uint64
riscv_regmode_natural_size (machine_mode mode)
{
+  if (riscv_vector::vls_mode_p (mode))
+    return GET_MODE_SIZE (mode);
   /* The natural size for RVV data modes is one RVV data vector,
      and similarly for predicates.  We can't independently modify
      anything smaller than that.  */
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 70fb5b80b1b..650f2651c7d 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -84,6 +84,10 @@ (define_c_enum "unspec" [
   UNSPEC_MODIFY_VL
])
+(define_mode_iterator VLS [
+  V2DI V4SI V8HI V16QI
+])
+
(define_mode_iterator V [
   (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
   (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
@@ -976,6 +980,7 @@ (define_mode_attr VM [
   (VNx2x4DF "VNx4BI") (VNx3x4DF "VNx4BI") (VNx4x4DF "VNx4BI")
   (VNx2x2DF "VNx2BI") (VNx3x2DF "VNx2BI") (VNx4x2DF "VNx2BI") (VNx5x2DF "VNx2BI") (VNx6x2DF "VNx2BI") (VNx7x2DF "VNx2BI") (VNx8x2DF "VNx2BI")
   (VNx2x1DF "VNx1BI") (VNx3x1DF "VNx1BI") (VNx4x1DF "VNx1BI") (VNx5x1DF "VNx1BI") (VNx6x1DF "VNx1BI") (VNx7x1DF "VNx1BI") (VNx8x1DF "VNx1BI")
+  (V2DI "V2BI") (V4SI "V4BI") (V8HI "V8BI") (V16QI "V16BI")
])
(define_mode_attr vm [
@@ -1003,6 +1008,7 @@ (define_mode_attr vel [
   (VNx1DI "di") (VNx2DI "di") (VNx4DI "di") (VNx8DI "di") (VNx16DI "di")
   (VNx1SF "sf") (VNx2SF "sf") (VNx4SF "sf") (VNx8SF "sf") (VNx16SF "sf") (VNx32SF "sf")
   (VNx1DF "df") (VNx2DF "df") (VNx4DF "df") (VNx8DF "df") (VNx16DF "df")
+  (V2DI "di") (V4SI "si") (V8HI "hi") (V16QI "qi")
])
(define_mode_attr VSUBEL [
diff --git a/gcc/config/riscv/vector-vls.md b/gcc/config/riscv/vector-vls.md
new file mode 100644
index 00000000000..af7e7a6c726
--- /dev/null
+++ b/gcc/config/riscv/vector-vls.md
@@ -0,0 +1,64 @@
+;; Machine description for vector length specific type operation with
+;; RISC-V 'V' Extension for GNU compiler.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VLS 0 "nonimmediate_operand")
+ (match_operand:VLS 1 "vector_move_operand"))]
+  "TARGET_VECTOR"
+{
+  // TODO: Only allow register and memory now, we should allow legal
+  //       vector_const too.
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = force_reg (GET_MODE(operands[1]), operands[1]);
+})
+
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:VLS 0 "nonimmediate_operand" "=vr,vr, m,vr")
+ (match_operand:VLS 1 "vector_move_operand"  " vr, m,vr,vi"))]
+  "TARGET_VECTOR &&
+   (register_operand (operands[0], <MODE>mode)
+    || register_operand (operands[1], <MODE>mode))"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
+  riscv_vector::vls_insn_expander (
+    code_for_pred_mov (vla_mode), riscv_vector::RVV_UNOP, operands,
+    <MODE>mode, vla_mode);
+  DONE;
+})
+
+(define_insn_and_split "<optab><mode>3"
+  [(set (match_operand:VLS 0 "register_operand" "=vr")
+ (any_int_binop_no_shift:VLS
+   (match_operand:VLS 1 "register_operand" "vr")
+   (match_operand:VLS 2 "register_operand" "vr")))]
+  "TARGET_VECTOR"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
+  riscv_vector::vls_insn_expander (
+    code_for_pred (<CODE>, vla_mode), riscv_vector::RVV_BINOP,
+    operands, <MODE>mode, vla_mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index cd696da5d89..e3fd0807b22 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -25,6 +25,7 @@
;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
;; - Auto-vectorization (autovec.md)
;; - Combine optimization (TBD)
+;; - VLS patterns (vector-vls.md)
(include "vector-iterators.md")
@@ -8407,3 +8408,4 @@ (define_split
)
(include "autovec.md")
+(include "vector-vls.md")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index bf03570b9cc..f6c56a63ada 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -77,5 +77,9 @@ foreach op $AUTOVEC_TEST_OPTS {
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
"-std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax" $CFLAGS
+# VLS test
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vls/*.\[cS\]]] \
+ "" $CFLAGS
+
# All done.
dg-finish
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
new file mode 100644
index 00000000000..0114c124646
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
@@ -0,0 +1,18 @@
+#include "vls-types.h"
+#define __BINOP(NAME, OP, VLS_TYPE, SCALAR_TYPE) \
+void binop_##NAME##VLS_TYPE \
+  (VLS_TYPE *src1, VLS_TYPE *src2, VLS_TYPE *dst) \
+{ \
+    *dst = *src1 OP *src2; \
+}
+
+#define BINOP(VLS_TYPE, SCALAR_TYPE) \
+__BINOP (ADD, +, VLS_TYPE, SCALAR_TYPE) \
+__BINOP (MUL, *, VLS_TYPE, SCALAR_TYPE) \
+__BINOP (DIV, /, VLS_TYPE, SCALAR_TYPE) \
+__BINOP (MOD, %, VLS_TYPE, SCALAR_TYPE) \
+__BINOP (AND, &, VLS_TYPE, SCALAR_TYPE) \
+__BINOP (IOR, |, VLS_TYPE, SCALAR_TYPE) \
+__BINOP (XOR, ^, VLS_TYPE, SCALAR_TYPE)
+
+FOR_EACH_VLS_TYPE(BINOP)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
new file mode 100644
index 00000000000..78c1a19cfbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
+
+#include "binop-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
+/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
new file mode 100644
index 00000000000..bca56ba32a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
+
+#include "binop-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
+/* { dg-final { scan-assembler-times {vadd\.vv} 6} } */
+/* { dg-final { scan-assembler-times {vmul\.vv} 6} } */
+/* { dg-final { scan-assembler-times {vdiv\.vv} 3} } */
+/* { dg-final { scan-assembler-times {vdivu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {vrem\.vv} 3} } */
+/* { dg-final { scan-assembler-times {vremu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {vand\.vv} 6} } */
+/* { dg-final { scan-assembler-times {vor\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv} 6} } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
new file mode 100644
index 00000000000..45dcad12a93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
+
+#include "binop-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
+/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
new file mode 100644
index 00000000000..9ea0c7cb5dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
@@ -0,0 +1,8 @@
+#include "vls-types.h"
+#define LOAD_STORE(VLS_TYPE, SCALAR_TYPE) \
+void load_store_##VLS_TYPE (VLS_TYPE *src, VLS_TYPE *dst) \
+{ \
+    *dst = *src; \
+}
+
+FOR_EACH_VLS_TYPE(LOAD_STORE)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
new file mode 100644
index 00000000000..b8adcea70d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
+
+#include "load-store-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
+/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
new file mode 100644
index 00000000000..ef3426d00a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
+
+#include "load-store-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
+/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-not {vle64\.v} } } */
+/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
+/* { dg-final { scan-assembler-not {vse64\.v} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
new file mode 100644
index 00000000000..e03220d89f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
+
+#include "load-store-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
+/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
new file mode 100644
index 00000000000..e328a42204c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
@@ -0,0 +1,13 @@
+#include "vls-types.h"
+
+#define MOVE(VLS_TYPE, SCALAR_TYPE) \
+void move_##VLS_TYPE () \
+{ \
+    register VLS_TYPE src##VLS_TYPE __asm__ ("v0"); \
+    register VLS_TYPE dst##VLS_TYPE __asm__ ("v8"); \
+    __asm__ volatile ("#def" : "=vr"(src##VLS_TYPE)); \
+    dst##VLS_TYPE = src##VLS_TYPE; \
+    __asm__ volatile ("#use" : : "vr"(dst##VLS_TYPE)); \
+}
+
+FOR_EACH_VLS_TYPE(MOVE)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
new file mode 100644
index 00000000000..91c89df098e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
+
+#include "move-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
new file mode 100644
index 00000000000..175986edf15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
+
+#include "move-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.v} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
new file mode 100644
index 00000000000..2e574b1f3ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
+
+#include "move-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
new file mode 100644
index 00000000000..302823b583f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
@@ -0,0 +1,42 @@
+#ifndef __VLS_TYPE_H
+#define __VLS_TYPE_H
+#include <stdint-gcc.h>
+
+typedef  int8_t int8x16_t __attribute__ ((vector_size (16)));
+typedef int16_t int16x8_t __attribute__ ((vector_size (16)));
+typedef int32_t int32x4_t __attribute__ ((vector_size (16)));
+typedef int64_t int64x2_t __attribute__ ((vector_size (16)));
+
+typedef  uint8_t uint8x16_t __attribute__ ((vector_size (16)));
+typedef uint16_t uint16x8_t __attribute__ ((vector_size (16)));
+typedef uint32_t uint32x4_t __attribute__ ((vector_size (16)));
+typedef uint64_t uint64x2_t __attribute__ ((vector_size (16)));
+
+
+#if __riscv_v_elen == 32
+
+#define FOR_EACH_VLS_TYPE(FUNC) \
+    FUNC (int8x16_t, int8_t) \
+    FUNC (int16x8_t, int16_t) \
+    FUNC (int32x4_t, int32_t) \
+    FUNC (uint8x16_t, uint8_t) \
+    FUNC (uint16x8_t, uint16_t) \
+    FUNC (uint32x4_t, uint32_t)
+
+#elif __riscv_v_elen == 64
+
+#define FOR_EACH_VLS_TYPE(FUNC) \
+    FUNC (int8x16_t, int8_t) \
+    FUNC (int16x8_t, int16_t) \
+    FUNC (int32x4_t, int32_t) \
+    FUNC (int64x2_t, int64_t) \
+    FUNC (uint8x16_t, uint8_t) \
+    FUNC (uint16x8_t, uint16_t) \
+    FUNC (uint32x4_t, uint32_t) \
+    FUNC (uint64x2_t, uint64_t)
+
+#else
+#error "zve* or v extension is required."
+#endif
+
+#endif
-- 
2.40.1
  
Kito Cheng May 30, 2023, 6:51 a.m. UTC | #2
> >> /* Return true if MODE is true VLS mode.  */
> >> bool
> >> vls_mode_p (machine_mode mode)
> >> {
> >>   switch (mode)
> >>     {
> >>     case E_V4SImode:
> >>     case E_V2DImode:
> >>     case E_V8HImode:
> >>     case E_V16QImode:
> >>       return true;
> >>     default:
> >>       return false;
> >>     }
> >> }
>
> To be consistent, you should put these into riscv-vector-switching.def.
> It can make the function easier extend,change it like this:
> change name into riscv_v_ext_vls_mode_p
>
> bool
> riscv_v_ext_vls_mode_p (machine_mode mode)
> {
> #define VLS_ENTRY(MODE, REQUIREMENT, ...)                                          \
>   case MODE##mode:                                                             \
>     return REQUIREMENT;
>   switch (mode)
>     {
> #include "riscv-vector-switch.def"
>     default:
>       return false;
>     }
>   return false;
> }
>
> Then in riscv-vector-switch.def
> VLS_ENTRY (V4SI...
> VLS_ENTRY (V2DI..
> ...
> In the future, we extend more VLS modes in riscv-vector-switch.def

Good point, we should make this more consistent :)

> >>(define_insn_and_split "<optab><mode>3"
> >>  [(set (match_operand:VLS 0 "register_operand" "=vr")
> >> (any_int_binop_no_shift:VLS
> >>  (match_operand:VLS 1 "register_operand" "vr")
> >>  (match_operand:VLS 2 "register_operand" "vr")))]
> >>  "TARGET_VECTOR"
> >>  "#"
> >>  "reload_completed"
> >>  [(const_int 0)]
> >>+{
> >>  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
> >>  riscv_vector::vls_insn_expander (
> >>    code_for_pred (<CODE>, vla_mode), riscv_vector::RVV_BINOP,
> >>    operands, <MODE>mode, vla_mode);
> >>  DONE;
> >>})
>
> This pattern can work for current VLS modes so far since they are within 0~31, if we add more VLSmodes such as V32QImode, V64QImode,
> it can't work . I am ok with this, but I should remind you early.

Yeah, I Know the problem, my thought is we will have another set of
VLS patterns for those NUNITS >= 32, and require one clobber with GPR.

> Add tests with -march=rv64gcv_zvl256b to see whether your testcase can generate LMUL = mf2 vsetvli
>
> and -march=rv64gcv_zvl2048 make sure your testcase will not go into the VLS modes (2048 * 1 / 8 > 128)

I guess I should make a loop to test those combinations instead of
spearted file but with different options.

>
>
> For VSETVL part, I didn't see you define attribute sew/vlmul ...ratio for VLS modes.
>
> I wonder how these VLS modes emit correct VSETVL?

That's the magic I made here, I split the pattern after RA, but before
vsetvli, and convert all operands to VLA mode and use VLA pattern, so
that we don't need to modify any line of vsetvli stuff.
  
juzhe.zhong@rivai.ai May 30, 2023, 6:59 a.m. UTC | #3
Ok.  LGTM as long as you change the patch as I suggested.

Thanks.


juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-05-30 14:51
To: juzhe.zhong@rivai.ai
CC: gcc-patches; palmer; kito.cheng; jeffreyalaw; Robin Dapp; pan2.li
Subject: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
> >> /* Return true if MODE is true VLS mode.  */
> >> bool
> >> vls_mode_p (machine_mode mode)
> >> {
> >>   switch (mode)
> >>     {
> >>     case E_V4SImode:
> >>     case E_V2DImode:
> >>     case E_V8HImode:
> >>     case E_V16QImode:
> >>       return true;
> >>     default:
> >>       return false;
> >>     }
> >> }
>
> To be consistent, you should put these into riscv-vector-switching.def.
> It can make the function easier extend,change it like this:
> change name into riscv_v_ext_vls_mode_p
>
> bool
> riscv_v_ext_vls_mode_p (machine_mode mode)
> {
> #define VLS_ENTRY(MODE, REQUIREMENT, ...)                                          \
>   case MODE##mode:                                                             \
>     return REQUIREMENT;
>   switch (mode)
>     {
> #include "riscv-vector-switch.def"
>     default:
>       return false;
>     }
>   return false;
> }
>
> Then in riscv-vector-switch.def
> VLS_ENTRY (V4SI...
> VLS_ENTRY (V2DI..
> ...
> In the future, we extend more VLS modes in riscv-vector-switch.def
 
Good point, we should make this more consistent :)
 
> >>(define_insn_and_split "<optab><mode>3"
> >>  [(set (match_operand:VLS 0 "register_operand" "=vr")
> >> (any_int_binop_no_shift:VLS
> >>  (match_operand:VLS 1 "register_operand" "vr")
> >>  (match_operand:VLS 2 "register_operand" "vr")))]
> >>  "TARGET_VECTOR"
> >>  "#"
> >>  "reload_completed"
> >>  [(const_int 0)]
> >>+{
> >>  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
> >>  riscv_vector::vls_insn_expander (
> >>    code_for_pred (<CODE>, vla_mode), riscv_vector::RVV_BINOP,
> >>    operands, <MODE>mode, vla_mode);
> >>  DONE;
> >>})
>
> This pattern can work for current VLS modes so far since they are within 0~31, if we add more VLSmodes such as V32QImode, V64QImode,
> it can't work . I am ok with this, but I should remind you early.
 
Yeah, I Know the problem, my thought is we will have another set of
VLS patterns for those NUNITS >= 32, and require one clobber with GPR.
 
> Add tests with -march=rv64gcv_zvl256b to see whether your testcase can generate LMUL = mf2 vsetvli
>
> and -march=rv64gcv_zvl2048 make sure your testcase will not go into the VLS modes (2048 * 1 / 8 > 128)
 
I guess I should make a loop to test those combinations instead of
spearted file but with different options.
 
>
>
> For VSETVL part, I didn't see you define attribute sew/vlmul ...ratio for VLS modes.
>
> I wonder how these VLS modes emit correct VSETVL?
 
That's the magic I made here, I split the pattern after RA, but before
vsetvli, and convert all operands to VLA mode and use VLA pattern, so
that we don't need to modify any line of vsetvli stuff.
  
Richard Biener May 30, 2023, 7:13 a.m. UTC | #4
On Tue, May 30, 2023 at 8:07 AM Kito Cheng via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> GNU vector extensions is widly used around this world, and this patch
> enable that with RISC-V vector extensions, this can help people
> leverage existing code base with RVV, and also can write vector programs in a
> familiar way.
>
> The idea of VLS code gen support is emulate VLS operation by VLA operation with
> specific length.

In the patch you added fixed 16 bytes vector modes, correct?  I've
never looked at
how ARM deals with the GNU vector extensions but I suppose they get mapped
to NEON and not SVE so basically behave the same way here.

But I do wonder about the efficiency for RVV where there doesn't exist a
complementary fixed-length ISA.  Shouldn't vector lowering
(tree-vect-generic.cc)
be enhanced to support lowering fixed-length vectors to variable length ones
with (variable) fixed length instead?  From your patch I second-guess the RVV
specification requires 16 byte vectors to be available (or will your
patch split the
insns?) but ideally the user would be able to specify -mrvv-size=32 for an
implementation with 32 byte vectors and then vector lowering would make use
of vectors up to 32 bytes?

Also vector lowering will split smaller vectors not equal to the fixed size to
scalars unless you add all fixed length modes smaller than 16 bytes as well.

> Key design point is we defer the mode conversion (From VLS to VLA mode) after
> register allocation, it come with several advantages:
> - VLS pattern is much friendly for most optimization pass like combine.
> - Register allocator can spill/restore exact size of VLS type instead of
>   whole register.
>
> This is compatible with VLA vectorization.
>
> Only support move and binary part of operation patterns.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-modes.def: Introduce VLS modes.
>         * config/riscv/riscv-protos.h (riscv_vector::minimal_vls_mode): New.
>         (riscv_vector::vls_insn_expander): New.
>         (riscv_vector::vls_mode_p): New.
>         * config/riscv/riscv-v.cc (riscv_vector::minimal_vls_mode): New.
>         (riscv_vector::vls_mode_p): New.
>         (riscv_vector::vls_insn_expander): New.
>         (riscv_vector::update_vls_mode): New.
>         * config/riscv/riscv.cc (riscv_v_ext_mode_p): New.
>         (riscv_v_adjust_nunits): Handle VLS type.
>         (riscv_hard_regno_nregs): Ditto.
>         (riscv_hard_regno_mode_ok): Ditto.
>         (riscv_regmode_natural_size): Ditto.
>         * config/riscv/vector-iterators.md (VLS): New.
>         (VM): Handle VLS type.
>         (vel): Ditto.
>         * config/riscv/vector.md: Include vector-vls.md.
>         * config/riscv/vector-vls.md: New file.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/rvv.exp: Add vls folder.
>         * gcc.target/riscv/rvv/vls/binop-template.h: New test.
>         * gcc.target/riscv/rvv/vls/binop-v.c: New test.
>         * gcc.target/riscv/rvv/vls/binop-zve32x.c: New test.
>         * gcc.target/riscv/rvv/vls/binop-zve64x.c: New test.
>         * gcc.target/riscv/rvv/vls/move-template.h: New test.
>         * gcc.target/riscv/rvv/vls/move-v.c: New test.
>         * gcc.target/riscv/rvv/vls/move-zve32x.c: New test.
>         * gcc.target/riscv/rvv/vls/move-zve64x.c: New test.
>         * gcc.target/riscv/rvv/vls/load-store-template.h: New test.
>         * gcc.target/riscv/rvv/vls/load-store-v.c: New test.
>         * gcc.target/riscv/rvv/vls/load-store-zve32x.c: New test.
>         * gcc.target/riscv/rvv/vls/load-store-zve64x.c: New test.
>         * gcc.target/riscv/rvv/vls/vls-types.h: New test.
> ---
>  gcc/config/riscv/riscv-modes.def              |  3 +
>  gcc/config/riscv/riscv-protos.h               |  4 ++
>  gcc/config/riscv/riscv-v.cc                   | 67 +++++++++++++++++++
>  gcc/config/riscv/riscv.cc                     | 27 +++++++-
>  gcc/config/riscv/vector-iterators.md          |  6 ++
>  gcc/config/riscv/vector-vls.md                | 64 ++++++++++++++++++
>  gcc/config/riscv/vector.md                    |  2 +
>  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |  4 ++
>  .../gcc.target/riscv/rvv/vls/binop-template.h | 18 +++++
>  .../gcc.target/riscv/rvv/vls/binop-v.c        | 18 +++++
>  .../gcc.target/riscv/rvv/vls/binop-zve32x.c   | 18 +++++
>  .../gcc.target/riscv/rvv/vls/binop-zve64x.c   | 18 +++++
>  .../riscv/rvv/vls/load-store-template.h       |  8 +++
>  .../gcc.target/riscv/rvv/vls/load-store-v.c   | 17 +++++
>  .../riscv/rvv/vls/load-store-zve32x.c         | 17 +++++
>  .../riscv/rvv/vls/load-store-zve64x.c         | 17 +++++
>  .../gcc.target/riscv/rvv/vls/move-template.h  | 13 ++++
>  .../gcc.target/riscv/rvv/vls/move-v.c         | 10 +++
>  .../gcc.target/riscv/rvv/vls/move-zve32x.c    | 10 +++
>  .../gcc.target/riscv/rvv/vls/move-zve64x.c    | 10 +++
>  .../gcc.target/riscv/rvv/vls/vls-types.h      | 42 ++++++++++++
>  21 files changed, 391 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/config/riscv/vector-vls.md
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
>
> diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
> index 19a4f9fb3db..f897e560667 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -318,6 +318,9 @@ RVV_TUPLE_PARTIAL_MODES (6)
>  RVV_TUPLE_PARTIAL_MODES (7)
>  RVV_TUPLE_PARTIAL_MODES (8)
>
> +/* VLS modes.  */
> +VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
> +
>  /* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can
>     be 65536 for a single vector register which means the vector mode in
>     GCC can be maximum = 65536 * 8 bits (LMUL=8).
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 0462f96c8d5..56f714bbfb5 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -261,6 +261,10 @@ enum frm_field_enum
>  opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
>                                          poly_uint64);
>  unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool);
> +machine_mode minimal_vla_mode (machine_mode);
> +void
> +vls_insn_expander (unsigned, int, rtx *, machine_mode, machine_mode);
> +bool vls_mode_p (machine_mode);
>  }
>
>  /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index a5715bb466c..f9fb8cd9be4 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -1697,4 +1697,71 @@ expand_vcond (rtx *ops)
>      gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
>  }
>
> +/* Return the minimal containable VLA mode for MODE.  */
> +
> +machine_mode
> +minimal_vla_mode (machine_mode mode)
> +{
> +  gcc_assert (GET_MODE_NUNITS (mode).is_constant ());
> +  unsigned type_size = GET_MODE_NUNITS (mode).to_constant ();
> +  poly_uint64 nunits = type_size * poly_uint64 (1, 1);
> +  opt_machine_mode vla_mode = get_vector_mode (GET_MODE_INNER (mode), nunits);
> +  return vla_mode.else_void ();
> +}
> +
> +/* Return true if MODE is true VLS mode.  */
> +
> +bool
> +vls_mode_p (machine_mode mode)
> +{
> +  switch (mode)
> +    {
> +    case E_V4SImode:
> +    case E_V2DImode:
> +    case E_V8HImode:
> +    case E_V16QImode:
> +      return true;
> +    default:
> +      return false;
> +    }
> +}
> +
> +/* Convert all OPERANDS to VLA_MODE.  */
> +
> +static void
> +update_vls_mode (machine_mode vla_mode, unsigned n_operands, rtx *operands)
> +{
> +  unsigned i;
> +  for (i = 0; i < n_operands; ++i)
> +    {
> +      switch (GET_CODE (operands[i]))
> +       {
> +       case REG:
> +         operands[i] = gen_rtx_REG (vla_mode, REGNO (operands[i]));
> +         break;
> +       case MEM:
> +         operands[i] = change_address (operands[i], vla_mode, NULL_RTX);
> +         break;
> +       default:
> +         gcc_unreachable ();
> +       }
> +    }
> +}
> +
> +/* Expand VLS operation to VLA pattern.  */
> +
> +void
> +vls_insn_expander (unsigned icode, int op_num, rtx *operands,
> +                  machine_mode vls_mode, machine_mode vla_mode)
> +{
> +  update_vls_mode (vla_mode, /* n_operands */ op_num, operands);
> +
> +  poly_uint16 nunit = GET_MODE_NUNITS (vls_mode);
> +
> +  gcc_assert (nunit.is_constant ());
> +
> +  riscv_vector::emit_nonvlmax_insn (icode, op_num, operands,
> +                                   GEN_INT (nunit.to_constant ()));
> +}
> +
>  } // namespace riscv_vector
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 3954fc07a8b..2e14f2e0d53 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -1034,7 +1034,8 @@ riscv_v_ext_tuple_mode_p (machine_mode mode)
>  static bool
>  riscv_v_ext_mode_p (machine_mode mode)
>  {
> -  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode);
> +  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
> +        || riscv_vector::vls_mode_p (mode);
>  }
>
>  /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
> @@ -1043,7 +1044,7 @@ riscv_v_ext_mode_p (machine_mode mode)
>  poly_int64
>  riscv_v_adjust_nunits (machine_mode mode, int scale)
>  {
> -  if (riscv_v_ext_mode_p (mode))
> +  if (riscv_v_ext_mode_p (mode) && !riscv_vector::vls_mode_p (mode))
>      return riscv_vector_chunks * scale;
>    return scale;
>  }
> @@ -6059,6 +6060,22 @@ riscv_register_move_cost (machine_mode mode,
>  static unsigned int
>  riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
>  {
> +  if (riscv_vector::vls_mode_p (mode))
> +    {
> +      if (TARGET_MIN_VLEN)
> +       {
> +         unsigned min_byte_per_vector_register = TARGET_MIN_VLEN / 8;
> +         unsigned mode_size = GET_MODE_SIZE (mode).to_constant ();
> +
> +         if (min_byte_per_vector_register >= mode_size)
> +           return 1;
> +
> +         return mode_size / min_byte_per_vector_register;
> +       }
> +      else
> +       return 1;
> +    }
> +
>    if (riscv_v_ext_vector_mode_p (mode))
>      {
>        /* Handle fractional LMUL, it only occupy part of vector register but
> @@ -6148,6 +6165,10 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
>          but for mask vector register, register numbers can be any number. */
>        int lmul = 1;
>        machine_mode rvv_mode = mode;
> +
> +      if (riscv_vector::vls_mode_p (mode))
> +       rvv_mode = riscv_vector::minimal_vla_mode (mode);
> +
>        if (riscv_v_ext_tuple_mode_p (rvv_mode))
>         rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
>        poly_int64 size = GET_MODE_SIZE (rvv_mode);
> @@ -7223,6 +7244,8 @@ riscv_vector_alignment (const_tree type)
>  poly_uint64
>  riscv_regmode_natural_size (machine_mode mode)
>  {
> +  if (riscv_vector::vls_mode_p (mode))
> +    return GET_MODE_SIZE (mode);
>    /* The natural size for RVV data modes is one RVV data vector,
>       and similarly for predicates.  We can't independently modify
>       anything smaller than that.  */
> diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
> index 70fb5b80b1b..650f2651c7d 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -84,6 +84,10 @@ (define_c_enum "unspec" [
>    UNSPEC_MODIFY_VL
>  ])
>
> +(define_mode_iterator VLS [
> +  V2DI V4SI V8HI V16QI
> +])
> +
>  (define_mode_iterator V [
>    (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
>    (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
> @@ -976,6 +980,7 @@ (define_mode_attr VM [
>    (VNx2x4DF "VNx4BI") (VNx3x4DF "VNx4BI") (VNx4x4DF "VNx4BI")
>    (VNx2x2DF "VNx2BI") (VNx3x2DF "VNx2BI") (VNx4x2DF "VNx2BI") (VNx5x2DF "VNx2BI") (VNx6x2DF "VNx2BI") (VNx7x2DF "VNx2BI") (VNx8x2DF "VNx2BI")
>    (VNx2x1DF "VNx1BI") (VNx3x1DF "VNx1BI") (VNx4x1DF "VNx1BI") (VNx5x1DF "VNx1BI") (VNx6x1DF "VNx1BI") (VNx7x1DF "VNx1BI") (VNx8x1DF "VNx1BI")
> +  (V2DI "V2BI") (V4SI "V4BI") (V8HI "V8BI") (V16QI "V16BI")
>  ])
>
>  (define_mode_attr vm [
> @@ -1003,6 +1008,7 @@ (define_mode_attr vel [
>    (VNx1DI "di") (VNx2DI "di") (VNx4DI "di") (VNx8DI "di") (VNx16DI "di")
>    (VNx1SF "sf") (VNx2SF "sf") (VNx4SF "sf") (VNx8SF "sf") (VNx16SF "sf") (VNx32SF "sf")
>    (VNx1DF "df") (VNx2DF "df") (VNx4DF "df") (VNx8DF "df") (VNx16DF "df")
> +  (V2DI "di") (V4SI "si") (V8HI "hi") (V16QI "qi")
>  ])
>
>  (define_mode_attr VSUBEL [
> diff --git a/gcc/config/riscv/vector-vls.md b/gcc/config/riscv/vector-vls.md
> new file mode 100644
> index 00000000000..af7e7a6c726
> --- /dev/null
> +++ b/gcc/config/riscv/vector-vls.md
> @@ -0,0 +1,64 @@
> +;; Machine description for vector length specific type operation with
> +;; RISC-V 'V' Extension for GNU compiler.
> +;; Copyright (C) 2023 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;; GNU General Public License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +(define_expand "mov<mode>"
> +  [(set (match_operand:VLS 0 "nonimmediate_operand")
> +       (match_operand:VLS 1 "vector_move_operand"))]
> +  "TARGET_VECTOR"
> +{
> +  // TODO: Only allow register and memory now, we should allow legal
> +  //       vector_const too.
> +  if (MEM_P (operands[0]) && MEM_P (operands[1]))
> +    operands[1] = force_reg (GET_MODE(operands[1]), operands[1]);
> +})
> +
> +(define_insn_and_split "*mov<mode>"
> +  [(set (match_operand:VLS 0 "nonimmediate_operand" "=vr,vr, m,vr")
> +       (match_operand:VLS 1 "vector_move_operand"  " vr, m,vr,vi"))]
> +  "TARGET_VECTOR &&
> +   (register_operand (operands[0], <MODE>mode)
> +    || register_operand (operands[1], <MODE>mode))"
> +  "#"
> +  "reload_completed"
> +  [(const_int 0)]
> +{
> +  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
> +  riscv_vector::vls_insn_expander (
> +    code_for_pred_mov (vla_mode), riscv_vector::RVV_UNOP, operands,
> +    <MODE>mode, vla_mode);
> +  DONE;
> +})
> +
> +(define_insn_and_split "<optab><mode>3"
> +  [(set (match_operand:VLS 0 "register_operand" "=vr")
> +       (any_int_binop_no_shift:VLS
> +         (match_operand:VLS 1 "register_operand" "vr")
> +         (match_operand:VLS 2 "register_operand" "vr")))]
> +  "TARGET_VECTOR"
> +  "#"
> +  "reload_completed"
> +  [(const_int 0)]
> +{
> +  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
> +  riscv_vector::vls_insn_expander (
> +    code_for_pred (<CODE>, vla_mode), riscv_vector::RVV_BINOP,
> +    operands, <MODE>mode, vla_mode);
> +  DONE;
> +})
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index cd696da5d89..e3fd0807b22 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -25,6 +25,7 @@
>  ;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
>  ;; - Auto-vectorization (autovec.md)
>  ;; - Combine optimization (TBD)
> +;; - VLS patterns (vector-vls.md)
>
>  (include "vector-iterators.md")
>
> @@ -8407,3 +8408,4 @@ (define_split
>  )
>
>  (include "autovec.md")
> +(include "vector-vls.md")
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> index bf03570b9cc..f6c56a63ada 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> @@ -77,5 +77,9 @@ foreach op $AUTOVEC_TEST_OPTS {
>  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
>         "-std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax" $CFLAGS
>
> +# VLS test
> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vls/*.\[cS\]]] \
> +       "" $CFLAGS
> +
>  # All done.
>  dg-finish
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
> new file mode 100644
> index 00000000000..0114c124646
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
> @@ -0,0 +1,18 @@
> +#include "vls-types.h"
> +#define __BINOP(NAME, OP, VLS_TYPE, SCALAR_TYPE)               \
> +void binop_##NAME##VLS_TYPE                                    \
> +  (VLS_TYPE *src1, VLS_TYPE *src2, VLS_TYPE *dst)              \
> +{                                                              \
> +    *dst = *src1 OP *src2;                                     \
> +}
> +
> +#define BINOP(VLS_TYPE, SCALAR_TYPE)                           \
> +__BINOP (ADD, +, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (MUL, *, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (DIV, /, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (MOD, %, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (AND, &, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (IOR, |, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (XOR, ^, VLS_TYPE, SCALAR_TYPE)
> +
> +FOR_EACH_VLS_TYPE(BINOP)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
> new file mode 100644
> index 00000000000..78c1a19cfbf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
> +
> +#include "binop-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
> +/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
> +/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
> new file mode 100644
> index 00000000000..bca56ba32a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
> +
> +#include "binop-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
> +/* { dg-final { scan-assembler-times {vadd\.vv} 6} } */
> +/* { dg-final { scan-assembler-times {vmul\.vv} 6} } */
> +/* { dg-final { scan-assembler-times {vdiv\.vv} 3} } */
> +/* { dg-final { scan-assembler-times {vdivu\.vv} 3 } } */
> +/* { dg-final { scan-assembler-times {vrem\.vv} 3} } */
> +/* { dg-final { scan-assembler-times {vremu\.vv} 3 } } */
> +/* { dg-final { scan-assembler-times {vand\.vv} 6} } */
> +/* { dg-final { scan-assembler-times {vor\.vv} 6 } } */
> +/* { dg-final { scan-assembler-times {vxor\.vv} 6} } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
> new file mode 100644
> index 00000000000..45dcad12a93
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
> +
> +#include "binop-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
> +/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
> +/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
> new file mode 100644
> index 00000000000..9ea0c7cb5dc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
> @@ -0,0 +1,8 @@
> +#include "vls-types.h"
> +#define LOAD_STORE(VLS_TYPE, SCALAR_TYPE)                      \
> +void load_store_##VLS_TYPE (VLS_TYPE *src, VLS_TYPE *dst)      \
> +{                                                              \
> +    *dst = *src;                                               \
> +}
> +
> +FOR_EACH_VLS_TYPE(LOAD_STORE)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
> new file mode 100644
> index 00000000000..b8adcea70d6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
> +
> +#include "load-store-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
> +/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
> new file mode 100644
> index 00000000000..ef3426d00a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
> +
> +#include "load-store-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
> +/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
> +/* { dg-final { scan-assembler-not {vle64\.v} } } */
> +/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
> +/* { dg-final { scan-assembler-not {vse64\.v} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
> new file mode 100644
> index 00000000000..e03220d89f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
> +
> +#include "load-store-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
> +/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
> new file mode 100644
> index 00000000000..e328a42204c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
> @@ -0,0 +1,13 @@
> +#include "vls-types.h"
> +
> +#define MOVE(VLS_TYPE, SCALAR_TYPE)                            \
> +void move_##VLS_TYPE ()                                                \
> +{                                                              \
> +    register VLS_TYPE src##VLS_TYPE __asm__ ("v0");            \
> +    register VLS_TYPE dst##VLS_TYPE __asm__ ("v8");            \
> +    __asm__ volatile ("#def" : "=vr"(src##VLS_TYPE));          \
> +    dst##VLS_TYPE = src##VLS_TYPE;                             \
> +    __asm__ volatile ("#use" : : "vr"(dst##VLS_TYPE));         \
> +}
> +
> +FOR_EACH_VLS_TYPE(MOVE)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
> new file mode 100644
> index 00000000000..91c89df098e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
> +
> +#include "move-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
> new file mode 100644
> index 00000000000..175986edf15
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
> +
> +#include "move-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.v} 6 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
> new file mode 100644
> index 00000000000..2e574b1f3ee
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
> +
> +#include "move-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
> new file mode 100644
> index 00000000000..302823b583f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
> @@ -0,0 +1,42 @@
> +#ifndef __VLS_TYPE_H
> +#define __VLS_TYPE_H
> +#include <stdint-gcc.h>
> +
> +typedef  int8_t int8x16_t __attribute__ ((vector_size (16)));
> +typedef int16_t int16x8_t __attribute__ ((vector_size (16)));
> +typedef int32_t int32x4_t __attribute__ ((vector_size (16)));
> +typedef int64_t int64x2_t __attribute__ ((vector_size (16)));
> +
> +typedef  uint8_t uint8x16_t __attribute__ ((vector_size (16)));
> +typedef uint16_t uint16x8_t __attribute__ ((vector_size (16)));
> +typedef uint32_t uint32x4_t __attribute__ ((vector_size (16)));
> +typedef uint64_t uint64x2_t __attribute__ ((vector_size (16)));
> +
> +
> +#if __riscv_v_elen == 32
> +
> +#define FOR_EACH_VLS_TYPE(FUNC) \
> +    FUNC (int8x16_t, int8_t) \
> +    FUNC (int16x8_t, int16_t) \
> +    FUNC (int32x4_t, int32_t) \
> +    FUNC (uint8x16_t, uint8_t) \
> +    FUNC (uint16x8_t, uint16_t) \
> +    FUNC (uint32x4_t, uint32_t)
> +
> +#elif __riscv_v_elen == 64
> +
> +#define FOR_EACH_VLS_TYPE(FUNC) \
> +    FUNC (int8x16_t, int8_t) \
> +    FUNC (int16x8_t, int16_t) \
> +    FUNC (int32x4_t, int32_t) \
> +    FUNC (int64x2_t, int64_t) \
> +    FUNC (uint8x16_t, uint8_t) \
> +    FUNC (uint16x8_t, uint16_t) \
> +    FUNC (uint32x4_t, uint32_t) \
> +    FUNC (uint64x2_t, uint64_t)
> +
> +#else
> +#error "zve* or v extension is required."
> +#endif
> +
> +#endif
> --
> 2.40.1
>
  
Robin Dapp May 30, 2023, 7:27 a.m. UTC | #5
Hi Kito,

> GNU vector extensions is widly used around this world, and this patch
> enable that with RISC-V vector extensions, this can help people
> leverage existing code base with RVV, and also can write vector programs in a
> familiar way.
> 
> The idea of VLS code gen support is emulate VLS operation by VLA operation with
> specific length.
> 
> Key design point is we defer the mode conversion (From VLS to VLA mode) after
> register allocation, it come with several advantages:
> - VLS pattern is much friendly for most optimization pass like combine.
> - Register allocator can spill/restore exact size of VLS type instead of
>   whole register.
> 
> This is compatible with VLA vectorization.
> 
> Only support move and binary part of operation patterns.

On a high-level:  Why do we need to do it this way and not any other way? :)
Some more comments/explanations would definitely help, i.e. prior art on
aarch64, what exactly is easier for combine and friends now (no undef and so
on) and, importantly, why is the conversion after register allocation always
safe?  Couldn't we "lower" the fixed-length vectors to VLA at some point and
how does everything relate to fixed-vlmax? Essentially this is a "separate"
backend similar to ARM NEON but we share most of the things and possibly grow
it in the future?

What would the alternative be?

That said, couldn't we reuse the existing binop tests?  If you don't like them
change the existing ones as well and reuse then?

> +/* Return the minimal containable VLA mode for MODE.  */
> +
> +machine_mode
> +minimal_vla_mode (machine_mode mode)
> +{
> +  gcc_assert (GET_MODE_NUNITS (mode).is_constant ());
> +  unsigned type_size = GET_MODE_NUNITS (mode).to_constant ();

Couldn't you use .require () right away?  Same in some other hunks.

Regards
 Robin
  
juzhe.zhong@rivai.ai May 30, 2023, 7:40 a.m. UTC | #6
>> why is the conversion after register allocation always
>> safe?
I do worry about this issue too. 
I just notice :

+	case MEM:
+	  operands[i] = change_address (operands[i], vla_mode, NULL_RTX);

I am not sure whether it is safe.

>> Couldn't we "lower" the fixed-length vectors to VLA at some point and
>> how does everything relate to fixed-vlmax?

I can answer you why we need this patch (I call it fixed-vlmin).
You can take a look at this example:
https://godbolt.org/z/3jYqoM84h 

This is how LLVM works.
This example, you can see GCC need --param=riscv-autovec-preference=fixed-vlmax -march=rv64gcv (same as mrvv-vector-bits=128).
However, LLVM doesn't need to specify the vector-length.

The benefits:
1. We don't need to specify actual real vector length, then we can vectorize this example.
2. GCC codegen can only run on CPU with vector length=128. However, LLVM can run on any RVV CPU with vector length >= 128.

Thanks.


juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-05-30 15:27
To: Kito Cheng; gcc-patches; palmer; kito.cheng; juzhe.zhong; jeffreyalaw; pan2.li
CC: rdapp.gcc
Subject: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
Hi Kito,
 
> GNU vector extensions is widly used around this world, and this patch
> enable that with RISC-V vector extensions, this can help people
> leverage existing code base with RVV, and also can write vector programs in a
> familiar way.
> 
> The idea of VLS code gen support is emulate VLS operation by VLA operation with
> specific length.
> 
> Key design point is we defer the mode conversion (From VLS to VLA mode) after
> register allocation, it come with several advantages:
> - VLS pattern is much friendly for most optimization pass like combine.
> - Register allocator can spill/restore exact size of VLS type instead of
>   whole register.
> 
> This is compatible with VLA vectorization.
> 
> Only support move and binary part of operation patterns.
 
On a high-level:  Why do we need to do it this way and not any other way? :)
Some more comments/explanations would definitely help, i.e. prior art on
aarch64, what exactly is easier for combine and friends now (no undef and so
on) and, importantly, why is the conversion after register allocation always
safe?  Couldn't we "lower" the fixed-length vectors to VLA at some point and
how does everything relate to fixed-vlmax? Essentially this is a "separate"
backend similar to ARM NEON but we share most of the things and possibly grow
it in the future?
 
What would the alternative be?
 
That said, couldn't we reuse the existing binop tests?  If you don't like them
change the existing ones as well and reuse then?
 
> +/* Return the minimal containable VLA mode for MODE.  */
> +
> +machine_mode
> +minimal_vla_mode (machine_mode mode)
> +{
> +  gcc_assert (GET_MODE_NUNITS (mode).is_constant ());
> +  unsigned type_size = GET_MODE_NUNITS (mode).to_constant ();
 
Couldn't you use .require () right away?  Same in some other hunks.
 
Regards
Robin
  
juzhe.zhong@rivai.ai May 30, 2023, 7:45 a.m. UTC | #7
Hi, Richi.

>> but ideally the user would be able to specify -mrvv-size=32 for an
>> implementation with 32 byte vectors and then vector lowering would make use
>> of vectors up to 32 bytes?

Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
You can take a look this example:
https://godbolt.org/z/3jYqoM84h 

GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.

This is what this patch want to do.

Thanks.


juzhe.zhong@rivai.ai
 
From: Richard Biener
Date: 2023-05-30 15:13
To: Kito Cheng
CC: gcc-patches; palmer; kito.cheng; juzhe.zhong; jeffreyalaw; rdapp.gcc; pan2.li
Subject: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
On Tue, May 30, 2023 at 8:07 AM Kito Cheng via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> GNU vector extensions is widly used around this world, and this patch
> enable that with RISC-V vector extensions, this can help people
> leverage existing code base with RVV, and also can write vector programs in a
> familiar way.
>
> The idea of VLS code gen support is emulate VLS operation by VLA operation with
> specific length.
 
In the patch you added fixed 16 bytes vector modes, correct?  I've
never looked at
how ARM deals with the GNU vector extensions but I suppose they get mapped
to NEON and not SVE so basically behave the same way here.
 
But I do wonder about the efficiency for RVV where there doesn't exist a
complementary fixed-length ISA.  Shouldn't vector lowering
(tree-vect-generic.cc)
be enhanced to support lowering fixed-length vectors to variable length ones
with (variable) fixed length instead?  From your patch I second-guess the RVV
specification requires 16 byte vectors to be available (or will your
patch split the
insns?) but ideally the user would be able to specify -mrvv-size=32 for an
implementation with 32 byte vectors and then vector lowering would make use
of vectors up to 32 bytes?
 
Also vector lowering will split smaller vectors not equal to the fixed size to
scalars unless you add all fixed length modes smaller than 16 bytes as well.
 
> Key design point is we defer the mode conversion (From VLS to VLA mode) after
> register allocation, it come with several advantages:
> - VLS pattern is much friendly for most optimization pass like combine.
> - Register allocator can spill/restore exact size of VLS type instead of
>   whole register.
>
> This is compatible with VLA vectorization.
>
> Only support move and binary part of operation patterns.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-modes.def: Introduce VLS modes.
>         * config/riscv/riscv-protos.h (riscv_vector::minimal_vls_mode): New.
>         (riscv_vector::vls_insn_expander): New.
>         (riscv_vector::vls_mode_p): New.
>         * config/riscv/riscv-v.cc (riscv_vector::minimal_vls_mode): New.
>         (riscv_vector::vls_mode_p): New.
>         (riscv_vector::vls_insn_expander): New.
>         (riscv_vector::update_vls_mode): New.
>         * config/riscv/riscv.cc (riscv_v_ext_mode_p): New.
>         (riscv_v_adjust_nunits): Handle VLS type.
>         (riscv_hard_regno_nregs): Ditto.
>         (riscv_hard_regno_mode_ok): Ditto.
>         (riscv_regmode_natural_size): Ditto.
>         * config/riscv/vector-iterators.md (VLS): New.
>         (VM): Handle VLS type.
>         (vel): Ditto.
>         * config/riscv/vector.md: Include vector-vls.md.
>         * config/riscv/vector-vls.md: New file.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/rvv.exp: Add vls folder.
>         * gcc.target/riscv/rvv/vls/binop-template.h: New test.
>         * gcc.target/riscv/rvv/vls/binop-v.c: New test.
>         * gcc.target/riscv/rvv/vls/binop-zve32x.c: New test.
>         * gcc.target/riscv/rvv/vls/binop-zve64x.c: New test.
>         * gcc.target/riscv/rvv/vls/move-template.h: New test.
>         * gcc.target/riscv/rvv/vls/move-v.c: New test.
>         * gcc.target/riscv/rvv/vls/move-zve32x.c: New test.
>         * gcc.target/riscv/rvv/vls/move-zve64x.c: New test.
>         * gcc.target/riscv/rvv/vls/load-store-template.h: New test.
>         * gcc.target/riscv/rvv/vls/load-store-v.c: New test.
>         * gcc.target/riscv/rvv/vls/load-store-zve32x.c: New test.
>         * gcc.target/riscv/rvv/vls/load-store-zve64x.c: New test.
>         * gcc.target/riscv/rvv/vls/vls-types.h: New test.
> ---
>  gcc/config/riscv/riscv-modes.def              |  3 +
>  gcc/config/riscv/riscv-protos.h               |  4 ++
>  gcc/config/riscv/riscv-v.cc                   | 67 +++++++++++++++++++
>  gcc/config/riscv/riscv.cc                     | 27 +++++++-
>  gcc/config/riscv/vector-iterators.md          |  6 ++
>  gcc/config/riscv/vector-vls.md                | 64 ++++++++++++++++++
>  gcc/config/riscv/vector.md                    |  2 +
>  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |  4 ++
>  .../gcc.target/riscv/rvv/vls/binop-template.h | 18 +++++
>  .../gcc.target/riscv/rvv/vls/binop-v.c        | 18 +++++
>  .../gcc.target/riscv/rvv/vls/binop-zve32x.c   | 18 +++++
>  .../gcc.target/riscv/rvv/vls/binop-zve64x.c   | 18 +++++
>  .../riscv/rvv/vls/load-store-template.h       |  8 +++
>  .../gcc.target/riscv/rvv/vls/load-store-v.c   | 17 +++++
>  .../riscv/rvv/vls/load-store-zve32x.c         | 17 +++++
>  .../riscv/rvv/vls/load-store-zve64x.c         | 17 +++++
>  .../gcc.target/riscv/rvv/vls/move-template.h  | 13 ++++
>  .../gcc.target/riscv/rvv/vls/move-v.c         | 10 +++
>  .../gcc.target/riscv/rvv/vls/move-zve32x.c    | 10 +++
>  .../gcc.target/riscv/rvv/vls/move-zve64x.c    | 10 +++
>  .../gcc.target/riscv/rvv/vls/vls-types.h      | 42 ++++++++++++
>  21 files changed, 391 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/config/riscv/vector-vls.md
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
>
> diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
> index 19a4f9fb3db..f897e560667 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -318,6 +318,9 @@ RVV_TUPLE_PARTIAL_MODES (6)
>  RVV_TUPLE_PARTIAL_MODES (7)
>  RVV_TUPLE_PARTIAL_MODES (8)
>
> +/* VLS modes.  */
> +VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
> +
>  /* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can
>     be 65536 for a single vector register which means the vector mode in
>     GCC can be maximum = 65536 * 8 bits (LMUL=8).
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 0462f96c8d5..56f714bbfb5 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -261,6 +261,10 @@ enum frm_field_enum
>  opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
>                                          poly_uint64);
>  unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool);
> +machine_mode minimal_vla_mode (machine_mode);
> +void
> +vls_insn_expander (unsigned, int, rtx *, machine_mode, machine_mode);
> +bool vls_mode_p (machine_mode);
>  }
>
>  /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index a5715bb466c..f9fb8cd9be4 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -1697,4 +1697,71 @@ expand_vcond (rtx *ops)
>      gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
>  }
>
> +/* Return the minimal containable VLA mode for MODE.  */
> +
> +machine_mode
> +minimal_vla_mode (machine_mode mode)
> +{
> +  gcc_assert (GET_MODE_NUNITS (mode).is_constant ());
> +  unsigned type_size = GET_MODE_NUNITS (mode).to_constant ();
> +  poly_uint64 nunits = type_size * poly_uint64 (1, 1);
> +  opt_machine_mode vla_mode = get_vector_mode (GET_MODE_INNER (mode), nunits);
> +  return vla_mode.else_void ();
> +}
> +
> +/* Return true if MODE is true VLS mode.  */
> +
> +bool
> +vls_mode_p (machine_mode mode)
> +{
> +  switch (mode)
> +    {
> +    case E_V4SImode:
> +    case E_V2DImode:
> +    case E_V8HImode:
> +    case E_V16QImode:
> +      return true;
> +    default:
> +      return false;
> +    }
> +}
> +
> +/* Convert all OPERANDS to VLA_MODE.  */
> +
> +static void
> +update_vls_mode (machine_mode vla_mode, unsigned n_operands, rtx *operands)
> +{
> +  unsigned i;
> +  for (i = 0; i < n_operands; ++i)
> +    {
> +      switch (GET_CODE (operands[i]))
> +       {
> +       case REG:
> +         operands[i] = gen_rtx_REG (vla_mode, REGNO (operands[i]));
> +         break;
> +       case MEM:
> +         operands[i] = change_address (operands[i], vla_mode, NULL_RTX);
> +         break;
> +       default:
> +         gcc_unreachable ();
> +       }
> +    }
> +}
> +
> +/* Expand VLS operation to VLA pattern.  */
> +
> +void
> +vls_insn_expander (unsigned icode, int op_num, rtx *operands,
> +                  machine_mode vls_mode, machine_mode vla_mode)
> +{
> +  update_vls_mode (vla_mode, /* n_operands */ op_num, operands);
> +
> +  poly_uint16 nunit = GET_MODE_NUNITS (vls_mode);
> +
> +  gcc_assert (nunit.is_constant ());
> +
> +  riscv_vector::emit_nonvlmax_insn (icode, op_num, operands,
> +                                   GEN_INT (nunit.to_constant ()));
> +}
> +
>  } // namespace riscv_vector
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 3954fc07a8b..2e14f2e0d53 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -1034,7 +1034,8 @@ riscv_v_ext_tuple_mode_p (machine_mode mode)
>  static bool
>  riscv_v_ext_mode_p (machine_mode mode)
>  {
> -  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode);
> +  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
> +        || riscv_vector::vls_mode_p (mode);
>  }
>
>  /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
> @@ -1043,7 +1044,7 @@ riscv_v_ext_mode_p (machine_mode mode)
>  poly_int64
>  riscv_v_adjust_nunits (machine_mode mode, int scale)
>  {
> -  if (riscv_v_ext_mode_p (mode))
> +  if (riscv_v_ext_mode_p (mode) && !riscv_vector::vls_mode_p (mode))
>      return riscv_vector_chunks * scale;
>    return scale;
>  }
> @@ -6059,6 +6060,22 @@ riscv_register_move_cost (machine_mode mode,
>  static unsigned int
>  riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
>  {
> +  if (riscv_vector::vls_mode_p (mode))
> +    {
> +      if (TARGET_MIN_VLEN)
> +       {
> +         unsigned min_byte_per_vector_register = TARGET_MIN_VLEN / 8;
> +         unsigned mode_size = GET_MODE_SIZE (mode).to_constant ();
> +
> +         if (min_byte_per_vector_register >= mode_size)
> +           return 1;
> +
> +         return mode_size / min_byte_per_vector_register;
> +       }
> +      else
> +       return 1;
> +    }
> +
>    if (riscv_v_ext_vector_mode_p (mode))
>      {
>        /* Handle fractional LMUL, it only occupy part of vector register but
> @@ -6148,6 +6165,10 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
>          but for mask vector register, register numbers can be any number. */
>        int lmul = 1;
>        machine_mode rvv_mode = mode;
> +
> +      if (riscv_vector::vls_mode_p (mode))
> +       rvv_mode = riscv_vector::minimal_vla_mode (mode);
> +
>        if (riscv_v_ext_tuple_mode_p (rvv_mode))
>         rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
>        poly_int64 size = GET_MODE_SIZE (rvv_mode);
> @@ -7223,6 +7244,8 @@ riscv_vector_alignment (const_tree type)
>  poly_uint64
>  riscv_regmode_natural_size (machine_mode mode)
>  {
> +  if (riscv_vector::vls_mode_p (mode))
> +    return GET_MODE_SIZE (mode);
>    /* The natural size for RVV data modes is one RVV data vector,
>       and similarly for predicates.  We can't independently modify
>       anything smaller than that.  */
> diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
> index 70fb5b80b1b..650f2651c7d 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -84,6 +84,10 @@ (define_c_enum "unspec" [
>    UNSPEC_MODIFY_VL
>  ])
>
> +(define_mode_iterator VLS [
> +  V2DI V4SI V8HI V16QI
> +])
> +
>  (define_mode_iterator V [
>    (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
>    (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
> @@ -976,6 +980,7 @@ (define_mode_attr VM [
>    (VNx2x4DF "VNx4BI") (VNx3x4DF "VNx4BI") (VNx4x4DF "VNx4BI")
>    (VNx2x2DF "VNx2BI") (VNx3x2DF "VNx2BI") (VNx4x2DF "VNx2BI") (VNx5x2DF "VNx2BI") (VNx6x2DF "VNx2BI") (VNx7x2DF "VNx2BI") (VNx8x2DF "VNx2BI")
>    (VNx2x1DF "VNx1BI") (VNx3x1DF "VNx1BI") (VNx4x1DF "VNx1BI") (VNx5x1DF "VNx1BI") (VNx6x1DF "VNx1BI") (VNx7x1DF "VNx1BI") (VNx8x1DF "VNx1BI")
> +  (V2DI "V2BI") (V4SI "V4BI") (V8HI "V8BI") (V16QI "V16BI")
>  ])
>
>  (define_mode_attr vm [
> @@ -1003,6 +1008,7 @@ (define_mode_attr vel [
>    (VNx1DI "di") (VNx2DI "di") (VNx4DI "di") (VNx8DI "di") (VNx16DI "di")
>    (VNx1SF "sf") (VNx2SF "sf") (VNx4SF "sf") (VNx8SF "sf") (VNx16SF "sf") (VNx32SF "sf")
>    (VNx1DF "df") (VNx2DF "df") (VNx4DF "df") (VNx8DF "df") (VNx16DF "df")
> +  (V2DI "di") (V4SI "si") (V8HI "hi") (V16QI "qi")
>  ])
>
>  (define_mode_attr VSUBEL [
> diff --git a/gcc/config/riscv/vector-vls.md b/gcc/config/riscv/vector-vls.md
> new file mode 100644
> index 00000000000..af7e7a6c726
> --- /dev/null
> +++ b/gcc/config/riscv/vector-vls.md
> @@ -0,0 +1,64 @@
> +;; Machine description for vector length specific type operation with
> +;; RISC-V 'V' Extension for GNU compiler.
> +;; Copyright (C) 2023 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;; GNU General Public License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +(define_expand "mov<mode>"
> +  [(set (match_operand:VLS 0 "nonimmediate_operand")
> +       (match_operand:VLS 1 "vector_move_operand"))]
> +  "TARGET_VECTOR"
> +{
> +  // TODO: Only allow register and memory now, we should allow legal
> +  //       vector_const too.
> +  if (MEM_P (operands[0]) && MEM_P (operands[1]))
> +    operands[1] = force_reg (GET_MODE(operands[1]), operands[1]);
> +})
> +
> +(define_insn_and_split "*mov<mode>"
> +  [(set (match_operand:VLS 0 "nonimmediate_operand" "=vr,vr, m,vr")
> +       (match_operand:VLS 1 "vector_move_operand"  " vr, m,vr,vi"))]
> +  "TARGET_VECTOR &&
> +   (register_operand (operands[0], <MODE>mode)
> +    || register_operand (operands[1], <MODE>mode))"
> +  "#"
> +  "reload_completed"
> +  [(const_int 0)]
> +{
> +  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
> +  riscv_vector::vls_insn_expander (
> +    code_for_pred_mov (vla_mode), riscv_vector::RVV_UNOP, operands,
> +    <MODE>mode, vla_mode);
> +  DONE;
> +})
> +
> +(define_insn_and_split "<optab><mode>3"
> +  [(set (match_operand:VLS 0 "register_operand" "=vr")
> +       (any_int_binop_no_shift:VLS
> +         (match_operand:VLS 1 "register_operand" "vr")
> +         (match_operand:VLS 2 "register_operand" "vr")))]
> +  "TARGET_VECTOR"
> +  "#"
> +  "reload_completed"
> +  [(const_int 0)]
> +{
> +  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
> +  riscv_vector::vls_insn_expander (
> +    code_for_pred (<CODE>, vla_mode), riscv_vector::RVV_BINOP,
> +    operands, <MODE>mode, vla_mode);
> +  DONE;
> +})
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index cd696da5d89..e3fd0807b22 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -25,6 +25,7 @@
>  ;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
>  ;; - Auto-vectorization (autovec.md)
>  ;; - Combine optimization (TBD)
> +;; - VLS patterns (vector-vls.md)
>
>  (include "vector-iterators.md")
>
> @@ -8407,3 +8408,4 @@ (define_split
>  )
>
>  (include "autovec.md")
> +(include "vector-vls.md")
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> index bf03570b9cc..f6c56a63ada 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> @@ -77,5 +77,9 @@ foreach op $AUTOVEC_TEST_OPTS {
>  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
>         "-std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax" $CFLAGS
>
> +# VLS test
> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vls/*.\[cS\]]] \
> +       "" $CFLAGS
> +
>  # All done.
>  dg-finish
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
> new file mode 100644
> index 00000000000..0114c124646
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
> @@ -0,0 +1,18 @@
> +#include "vls-types.h"
> +#define __BINOP(NAME, OP, VLS_TYPE, SCALAR_TYPE)               \
> +void binop_##NAME##VLS_TYPE                                    \
> +  (VLS_TYPE *src1, VLS_TYPE *src2, VLS_TYPE *dst)              \
> +{                                                              \
> +    *dst = *src1 OP *src2;                                     \
> +}
> +
> +#define BINOP(VLS_TYPE, SCALAR_TYPE)                           \
> +__BINOP (ADD, +, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (MUL, *, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (DIV, /, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (MOD, %, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (AND, &, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (IOR, |, VLS_TYPE, SCALAR_TYPE)                                \
> +__BINOP (XOR, ^, VLS_TYPE, SCALAR_TYPE)
> +
> +FOR_EACH_VLS_TYPE(BINOP)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
> new file mode 100644
> index 00000000000..78c1a19cfbf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
> +
> +#include "binop-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
> +/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
> +/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
> new file mode 100644
> index 00000000000..bca56ba32a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
> +
> +#include "binop-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
> +/* { dg-final { scan-assembler-times {vadd\.vv} 6} } */
> +/* { dg-final { scan-assembler-times {vmul\.vv} 6} } */
> +/* { dg-final { scan-assembler-times {vdiv\.vv} 3} } */
> +/* { dg-final { scan-assembler-times {vdivu\.vv} 3 } } */
> +/* { dg-final { scan-assembler-times {vrem\.vv} 3} } */
> +/* { dg-final { scan-assembler-times {vremu\.vv} 3 } } */
> +/* { dg-final { scan-assembler-times {vand\.vv} 6} } */
> +/* { dg-final { scan-assembler-times {vor\.vv} 6 } } */
> +/* { dg-final { scan-assembler-times {vxor\.vv} 6} } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
> new file mode 100644
> index 00000000000..45dcad12a93
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
> +
> +#include "binop-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
> +/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
> +/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
> +/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
> +/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
> +/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
> new file mode 100644
> index 00000000000..9ea0c7cb5dc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
> @@ -0,0 +1,8 @@
> +#include "vls-types.h"
> +#define LOAD_STORE(VLS_TYPE, SCALAR_TYPE)                      \
> +void load_store_##VLS_TYPE (VLS_TYPE *src, VLS_TYPE *dst)      \
> +{                                                              \
> +    *dst = *src;                                               \
> +}
> +
> +FOR_EACH_VLS_TYPE(LOAD_STORE)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
> new file mode 100644
> index 00000000000..b8adcea70d6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
> +
> +#include "load-store-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
> +/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
> new file mode 100644
> index 00000000000..ef3426d00a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
> +
> +#include "load-store-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
> +/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
> +/* { dg-final { scan-assembler-not {vle64\.v} } } */
> +/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
> +/* { dg-final { scan-assembler-not {vse64\.v} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
> new file mode 100644
> index 00000000000..e03220d89f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
> +
> +#include "load-store-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
> +/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
> +/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
> new file mode 100644
> index 00000000000..e328a42204c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
> @@ -0,0 +1,13 @@
> +#include "vls-types.h"
> +
> +#define MOVE(VLS_TYPE, SCALAR_TYPE)                            \
> +void move_##VLS_TYPE ()                                                \
> +{                                                              \
> +    register VLS_TYPE src##VLS_TYPE __asm__ ("v0");            \
> +    register VLS_TYPE dst##VLS_TYPE __asm__ ("v8");            \
> +    __asm__ volatile ("#def" : "=vr"(src##VLS_TYPE));          \
> +    dst##VLS_TYPE = src##VLS_TYPE;                             \
> +    __asm__ volatile ("#use" : : "vr"(dst##VLS_TYPE));         \
> +}
> +
> +FOR_EACH_VLS_TYPE(MOVE)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
> new file mode 100644
> index 00000000000..91c89df098e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
> +
> +#include "move-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
> new file mode 100644
> index 00000000000..175986edf15
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
> +
> +#include "move-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.v} 6 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
> new file mode 100644
> index 00000000000..2e574b1f3ee
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
> +
> +#include "move-template.h"
> +
> +/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
> +/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
> new file mode 100644
> index 00000000000..302823b583f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
> @@ -0,0 +1,42 @@
> +#ifndef __VLS_TYPE_H
> +#define __VLS_TYPE_H
> +#include <stdint-gcc.h>
> +
> +typedef  int8_t int8x16_t __attribute__ ((vector_size (16)));
> +typedef int16_t int16x8_t __attribute__ ((vector_size (16)));
> +typedef int32_t int32x4_t __attribute__ ((vector_size (16)));
> +typedef int64_t int64x2_t __attribute__ ((vector_size (16)));
> +
> +typedef  uint8_t uint8x16_t __attribute__ ((vector_size (16)));
> +typedef uint16_t uint16x8_t __attribute__ ((vector_size (16)));
> +typedef uint32_t uint32x4_t __attribute__ ((vector_size (16)));
> +typedef uint64_t uint64x2_t __attribute__ ((vector_size (16)));
> +
> +
> +#if __riscv_v_elen == 32
> +
> +#define FOR_EACH_VLS_TYPE(FUNC) \
> +    FUNC (int8x16_t, int8_t) \
> +    FUNC (int16x8_t, int16_t) \
> +    FUNC (int32x4_t, int32_t) \
> +    FUNC (uint8x16_t, uint8_t) \
> +    FUNC (uint16x8_t, uint16_t) \
> +    FUNC (uint32x4_t, uint32_t)
> +
> +#elif __riscv_v_elen == 64
> +
> +#define FOR_EACH_VLS_TYPE(FUNC) \
> +    FUNC (int8x16_t, int8_t) \
> +    FUNC (int16x8_t, int16_t) \
> +    FUNC (int32x4_t, int32_t) \
> +    FUNC (int64x2_t, int64_t) \
> +    FUNC (uint8x16_t, uint8_t) \
> +    FUNC (uint16x8_t, uint16_t) \
> +    FUNC (uint32x4_t, uint32_t) \
> +    FUNC (uint64x2_t, uint64_t)
> +
> +#else
> +#error "zve* or v extension is required."
> +#endif
> +
> +#endif
> --
> 2.40.1
>
  
Robin Dapp May 30, 2023, 9:05 a.m. UTC | #8
>>> but ideally the user would be able to specify -mrvv-size=32 for an
>>> implementation with 32 byte vectors and then vector lowering would make use
>>> of vectors up to 32 bytes?
> 
> Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
> You can take a look this example:
> https://godbolt.org/z/3jYqoM84h <https://godbolt.org/z/3jYqoM84h> 
> 
> GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
> However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.
> 
> This is what this patch want to do.
> 
> Thanks.
I think Richard's question was rather if it wasn't better to do it more
generically and lower vectors to what either the current cpu or what the
user specified rather than just 16-byte vectors (i.e. indeed a fixed
vlmin and not a fixed vlmin == fixed vlmax).

This patch assumes everything is fixed for optimization purposes and then
switches over to variable-length when nothing can be changed anymore.  That
is, we would work on "vlmin"-sized chunks in a VLA fashion at runtime?
We would need to make sure that no pass after reload makes use of VLA
properties at all.

In general I don't have a good overview of which optimizations we gain by
such an approach or rather which ones are prevented by VLA altogether?
What's the idea for the future?  Still use LEN_LOAD et al. (and masking)
with "fixed vlmin"?  Wouldn't we select different IVs with this patch than
what we would have for pure VLA?

Regards
 Robin
  
Kito Cheng May 30, 2023, 9:11 a.m. UTC | #9
(I am still on the meeting hell, and will be released very later,
apology for short and incomplete reply, and will reply complete later)

One point for adding VLS mode support is because SLP, especially for
those SLP candidate not in the loop, those case use VLS type can be
better, of cause using larger safe VLA type can optimize too, but that
will cause one issue we found in RISC-V in LLVM - it will spill/reload
whole register instead of exact size.

e.g.

int32x4_t a;
// def a
// spill a
foo ()
// reload a
// use a

Consider we use a VLA mode for a, it will spill and reload with whole
register VLA mode
Online demo here: https://godbolt.org/z/Y1fThbxE6

On Tue, May 30, 2023 at 5:05 PM Robin Dapp <rdapp.gcc@gmail.com> wrote:
>
> >>> but ideally the user would be able to specify -mrvv-size=32 for an
> >>> implementation with 32 byte vectors and then vector lowering would make use
> >>> of vectors up to 32 bytes?
> >
> > Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
> > You can take a look this example:
> > https://godbolt.org/z/3jYqoM84h <https://godbolt.org/z/3jYqoM84h>
> >
> > GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
> > However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.
> >
> > This is what this patch want to do.
> >
> > Thanks.
> I think Richard's question was rather if it wasn't better to do it more
> generically and lower vectors to what either the current cpu or what the
> user specified rather than just 16-byte vectors (i.e. indeed a fixed
> vlmin and not a fixed vlmin == fixed vlmax).
>
> This patch assumes everything is fixed for optimization purposes and then
> switches over to variable-length when nothing can be changed anymore.  That
> is, we would work on "vlmin"-sized chunks in a VLA fashion at runtime?
> We would need to make sure that no pass after reload makes use of VLA
> properties at all.
>
> In general I don't have a good overview of which optimizations we gain by
> such an approach or rather which ones are prevented by VLA altogether?
> What's the idea for the future?  Still use LEN_LOAD et al. (and masking)
> with "fixed vlmin"?  Wouldn't we select different IVs with this patch than
> what we would have for pure VLA?
>
> Regards
>  Robin
  
juzhe.zhong@rivai.ai May 30, 2023, 9:16 a.m. UTC | #10
In the future, we will definitely mixing VLA and VLS-vlmin together in a codegen and it will not cause any issues.
For VLS-vlmin, I prefer it is used in length style auto-vectorization (I am not sure since my SELECT_VL patch is not
finished, I will check if can work when I am working in SELECT_VL patch).

>> In general I don't have a good overview of which optimizations we gain by
>> such an approach or rather which ones are prevented by VLA altogether?
These patches VLS modes can help for SLP auto-vectorization.



juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-05-30 17:05
To: juzhe.zhong@rivai.ai; Richard Biener; Kito.cheng
CC: rdapp.gcc; gcc-patches; palmer; kito.cheng; jeffreyalaw; pan2.li
Subject: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
>>> but ideally the user would be able to specify -mrvv-size=32 for an
>>> implementation with 32 byte vectors and then vector lowering would make use
>>> of vectors up to 32 bytes?
> 
> Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
> You can take a look this example:
> https://godbolt.org/z/3jYqoM84h <https://godbolt.org/z/3jYqoM84h> 
> 
> GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
> However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.
> 
> This is what this patch want to do.
> 
> Thanks.
I think Richard's question was rather if it wasn't better to do it more
generically and lower vectors to what either the current cpu or what the
user specified rather than just 16-byte vectors (i.e. indeed a fixed
vlmin and not a fixed vlmin == fixed vlmax).
 
This patch assumes everything is fixed for optimization purposes and then
switches over to variable-length when nothing can be changed anymore.  That
is, we would work on "vlmin"-sized chunks in a VLA fashion at runtime?
We would need to make sure that no pass after reload makes use of VLA
properties at all.
 
In general I don't have a good overview of which optimizations we gain by
such an approach or rather which ones are prevented by VLA altogether?
What's the idea for the future?  Still use LEN_LOAD et al. (and masking)
with "fixed vlmin"?  Wouldn't we select different IVs with this patch than
what we would have for pure VLA?
 
Regards
Robin
  
Kito Cheng May 30, 2023, 9:16 a.m. UTC | #11
One more note: we found a real case in spec 2006, SLP convert two 8
bit into int8x2_t, but the value has live across the function call, it
only need to save-restore 16 bit, but it become save-restore VLEN bits
because it using VLA mode in backend, you could imagine when VLEN is
larger, the performance penalty will also increase, which is opposite
way we expect - larger VLEN better performance.

On Tue, May 30, 2023 at 5:11 PM Kito Cheng <kito.cheng@sifive.com> wrote:
>
> (I am still on the meeting hell, and will be released very later,
> apology for short and incomplete reply, and will reply complete later)
>
> One point for adding VLS mode support is because SLP, especially for
> those SLP candidate not in the loop, those case use VLS type can be
> better, of cause using larger safe VLA type can optimize too, but that
> will cause one issue we found in RISC-V in LLVM - it will spill/reload
> whole register instead of exact size.
>
> e.g.
>
> int32x4_t a;
> // def a
> // spill a
> foo ()
> // reload a
> // use a
>
> Consider we use a VLA mode for a, it will spill and reload with whole
> register VLA mode
> Online demo here: https://godbolt.org/z/Y1fThbxE6
>
> On Tue, May 30, 2023 at 5:05 PM Robin Dapp <rdapp.gcc@gmail.com> wrote:
> >
> > >>> but ideally the user would be able to specify -mrvv-size=32 for an
> > >>> implementation with 32 byte vectors and then vector lowering would make use
> > >>> of vectors up to 32 bytes?
> > >
> > > Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
> > > You can take a look this example:
> > > https://godbolt.org/z/3jYqoM84h <https://godbolt.org/z/3jYqoM84h>
> > >
> > > GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
> > > However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.
> > >
> > > This is what this patch want to do.
> > >
> > > Thanks.
> > I think Richard's question was rather if it wasn't better to do it more
> > generically and lower vectors to what either the current cpu or what the
> > user specified rather than just 16-byte vectors (i.e. indeed a fixed
> > vlmin and not a fixed vlmin == fixed vlmax).
> >
> > This patch assumes everything is fixed for optimization purposes and then
> > switches over to variable-length when nothing can be changed anymore.  That
> > is, we would work on "vlmin"-sized chunks in a VLA fashion at runtime?
> > We would need to make sure that no pass after reload makes use of VLA
> > properties at all.
> >
> > In general I don't have a good overview of which optimizations we gain by
> > such an approach or rather which ones are prevented by VLA altogether?
> > What's the idea for the future?  Still use LEN_LOAD et al. (and masking)
> > with "fixed vlmin"?  Wouldn't we select different IVs with this patch than
> > what we would have for pure VLA?
> >
> > Regards
> >  Robin
  
Richard Biener May 30, 2023, 9:29 a.m. UTC | #12
On Tue, May 30, 2023 at 11:17 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> In the future, we will definitely mixing VLA and VLS-vlmin together in a codegen and it will not cause any issues.
> For VLS-vlmin, I prefer it is used in length style auto-vectorization (I am not sure since my SELECT_VL patch is not
> finished, I will check if can work when I am working in SELECT_VL patch).

For the future it would be then good to have the vectorizer
re-vectorize loops with
VLS vector uses to VLA style?  I think there's a PR with a draft patch
from a few
years ago attached (from me) somewhere.  Currently the vectorizer will give
up when seeing vector operations in a loop but ideally those should simply
be SLPed.

> >> In general I don't have a good overview of which optimizations we gain by
> >> such an approach or rather which ones are prevented by VLA altogether?
> These patches VLS modes can help for SLP auto-vectorization.
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Robin Dapp
> Date: 2023-05-30 17:05
> To: juzhe.zhong@rivai.ai; Richard Biener; Kito.cheng
> CC: rdapp.gcc; gcc-patches; palmer; kito.cheng; jeffreyalaw; pan2.li
> Subject: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
> >>> but ideally the user would be able to specify -mrvv-size=32 for an
> >>> implementation with 32 byte vectors and then vector lowering would make use
> >>> of vectors up to 32 bytes?
> >
> > Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
> > You can take a look this example:
> > https://godbolt.org/z/3jYqoM84h <https://godbolt.org/z/3jYqoM84h>
> >
> > GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
> > However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.
> >
> > This is what this patch want to do.
> >
> > Thanks.
> I think Richard's question was rather if it wasn't better to do it more
> generically and lower vectors to what either the current cpu or what the
> user specified rather than just 16-byte vectors (i.e. indeed a fixed
> vlmin and not a fixed vlmin == fixed vlmax).
>
> This patch assumes everything is fixed for optimization purposes and then
> switches over to variable-length when nothing can be changed anymore.  That
> is, we would work on "vlmin"-sized chunks in a VLA fashion at runtime?
> We would need to make sure that no pass after reload makes use of VLA
> properties at all.
>
> In general I don't have a good overview of which optimizations we gain by
> such an approach or rather which ones are prevented by VLA altogether?
> What's the idea for the future?  Still use LEN_LOAD et al. (and masking)
> with "fixed vlmin"?  Wouldn't we select different IVs with this patch than
> what we would have for pure VLA?
>
> Regards
> Robin
>
  
juzhe.zhong@rivai.ai May 30, 2023, 9:37 a.m. UTC | #13
>> For the future it would be then good to have the vectorizer
>>re-vectorize loops with
>>VLS vector uses to VLA style?
 Not really, this patch is just using a magic convert VLS vector into VLA stype since
 it can avoid defining the RVV patterns with VLS modes and avoid a lot of work.

 There is no benefits in case of convert VLS into VLS....
 And I don't even consider it's safe.....

especially this code:
+	case MEM: 
+ operands[i] = change_address (operands[i], vla_mode, NULL_RTX); 

I feel it is unsafe code.

Actually, my original plan is to define new RVV patterns with new VLS modes (The patterns are same as VLA patterns, just modes are different).
Then emit codegen this VLS RVV patterns.




juzhe.zhong@rivai.ai
 
From: Richard Biener
Date: 2023-05-30 17:29
To: juzhe.zhong@rivai.ai
CC: Robin Dapp; Kito.cheng; gcc-patches; palmer; kito.cheng; jeffreyalaw; pan2.li
Subject: Re: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
On Tue, May 30, 2023 at 11:17 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> In the future, we will definitely mixing VLA and VLS-vlmin together in a codegen and it will not cause any issues.
> For VLS-vlmin, I prefer it is used in length style auto-vectorization (I am not sure since my SELECT_VL patch is not
> finished, I will check if can work when I am working in SELECT_VL patch).
 
For the future it would be then good to have the vectorizer
re-vectorize loops with
VLS vector uses to VLA style?  I think there's a PR with a draft patch
from a few
years ago attached (from me) somewhere.  Currently the vectorizer will give
up when seeing vector operations in a loop but ideally those should simply
be SLPed.
 
> >> In general I don't have a good overview of which optimizations we gain by
> >> such an approach or rather which ones are prevented by VLA altogether?
> These patches VLS modes can help for SLP auto-vectorization.
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Robin Dapp
> Date: 2023-05-30 17:05
> To: juzhe.zhong@rivai.ai; Richard Biener; Kito.cheng
> CC: rdapp.gcc; gcc-patches; palmer; kito.cheng; jeffreyalaw; pan2.li
> Subject: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
> >>> but ideally the user would be able to specify -mrvv-size=32 for an
> >>> implementation with 32 byte vectors and then vector lowering would make use
> >>> of vectors up to 32 bytes?
> >
> > Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
> > You can take a look this example:
> > https://godbolt.org/z/3jYqoM84h <https://godbolt.org/z/3jYqoM84h>
> >
> > GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
> > However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.
> >
> > This is what this patch want to do.
> >
> > Thanks.
> I think Richard's question was rather if it wasn't better to do it more
> generically and lower vectors to what either the current cpu or what the
> user specified rather than just 16-byte vectors (i.e. indeed a fixed
> vlmin and not a fixed vlmin == fixed vlmax).
>
> This patch assumes everything is fixed for optimization purposes and then
> switches over to variable-length when nothing can be changed anymore.  That
> is, we would work on "vlmin"-sized chunks in a VLA fashion at runtime?
> We would need to make sure that no pass after reload makes use of VLA
> properties at all.
>
> In general I don't have a good overview of which optimizations we gain by
> such an approach or rather which ones are prevented by VLA altogether?
> What's the idea for the future?  Still use LEN_LOAD et al. (and masking)
> with "fixed vlmin"?  Wouldn't we select different IVs with this patch than
> what we would have for pure VLA?
>
> Regards
> Robin
>
  
juzhe.zhong@rivai.ai May 30, 2023, 9:44 a.m. UTC | #14
I think I prefer doing VLS mode like these:
This is current VLA patterns:
(define_insn "@pred_<optab><mode>"
  [(set (match_operand:VI 0 "register_operand"           "=vd, vd, vr, vr, vd, vd, vr, vr, vd, vd, vr, vr")
  (if_then_else:VI
    (unspec:<VM>
      [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1, Wc1, vm, vm,Wc1,Wc1, vm, vm,Wc1,Wc1")
       (match_operand 5 "vector_length_operand"    " rK, rK, rK,  rK, rK, rK, rK, rK, rK, rK, rK, rK")
       (match_operand 6 "const_int_operand"        "  i,  i,  i,   i,  i,  i,  i,  i,  i,  i,  i,  i")
       (match_operand 7 "const_int_operand"        "  i,  i,  i,   i,  i,  i,  i,  i,  i,  i,  i,  i")
       (match_operand 8 "const_int_operand"        "  i,  i,  i,   i,  i,  i,  i,  i,  i,  i,  i,  i")
       (reg:SI VL_REGNUM)
       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
    (any_int_binop:VI
      (match_operand:VI 3 "<binop_rhs1_predicate>" "<binop_rhs1_constraint>")
      (match_operand:VI 4 "<binop_rhs2_predicate>" "<binop_rhs2_constraint>"))
    (match_operand:VI 2 "vector_merge_operand"     "vu,0,vu,0,vu,0,vu,0,vu,0,vu,0")))]
  "TARGET_VECTOR"
  "@
   v<insn>.vv\t%0,%3,%4%p1
   v<insn>.vv\t%0,%3,%4%p1
   v<insn>.vv\t%0,%3,%4%p1
   v<insn>.vv\t%0,%3,%4%p1
   v<binop_vi_variant_insn>\t%0,<binop_vi_variant_op>%p1
   v<binop_vi_variant_insn>\t%0,<binop_vi_variant_op>%p1
   v<binop_vi_variant_insn>\t%0,<binop_vi_variant_op>%p1
   v<binop_vi_variant_insn>\t%0,<binop_vi_variant_op>%p1
   v<binop_reverse_vi_variant_insn>\t%0,<binop_reverse_vi_variant_op>%p1
   v<binop_reverse_vi_variant_insn>\t%0,<binop_reverse_vi_variant_op>%p1
   v<binop_reverse_vi_variant_insn>\t%0,<binop_reverse_vi_variant_op>%p1
   v<binop_reverse_vi_variant_insn>\t%0,<binop_reverse_vi_variant_op>%p1"
  [(set_attr "type" "<int_binop_insn_type>")
   (set_attr "mode" "<MODE>")])

(define_mode_iterator VI [
  (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
  (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
  (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
  (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64")
  (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
])

You can see there is no VLS modes in "VI". Now to support VLS, I think we should extend "VI" iterator:
(define_mode_iterator VI [
  (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
  (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
  (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
  (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64")
  (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
V4SI V2DI V8HI V16QI
])

Then codegen directly to this VLS patterns without any conversion.
This is the safe way to deal with VLS patterns.

Thanks.


juzhe.zhong@rivai.ai
 
From: Richard Biener
Date: 2023-05-30 17:29
To: juzhe.zhong@rivai.ai
CC: Robin Dapp; Kito.cheng; gcc-patches; palmer; kito.cheng; jeffreyalaw; pan2.li
Subject: Re: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
On Tue, May 30, 2023 at 11:17 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> In the future, we will definitely mixing VLA and VLS-vlmin together in a codegen and it will not cause any issues.
> For VLS-vlmin, I prefer it is used in length style auto-vectorization (I am not sure since my SELECT_VL patch is not
> finished, I will check if can work when I am working in SELECT_VL patch).
 
For the future it would be then good to have the vectorizer
re-vectorize loops with
VLS vector uses to VLA style?  I think there's a PR with a draft patch
from a few
years ago attached (from me) somewhere.  Currently the vectorizer will give
up when seeing vector operations in a loop but ideally those should simply
be SLPed.
 
> >> In general I don't have a good overview of which optimizations we gain by
> >> such an approach or rather which ones are prevented by VLA altogether?
> These patches VLS modes can help for SLP auto-vectorization.
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Robin Dapp
> Date: 2023-05-30 17:05
> To: juzhe.zhong@rivai.ai; Richard Biener; Kito.cheng
> CC: rdapp.gcc; gcc-patches; palmer; kito.cheng; jeffreyalaw; pan2.li
> Subject: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
> >>> but ideally the user would be able to specify -mrvv-size=32 for an
> >>> implementation with 32 byte vectors and then vector lowering would make use
> >>> of vectors up to 32 bytes?
> >
> > Actually, we don't want to specify -mrvv-size = 32 to enable vectorization on GNU vectors.
> > You can take a look this example:
> > https://godbolt.org/z/3jYqoM84h <https://godbolt.org/z/3jYqoM84h>
> >
> > GCC need to specify the mrvv size to enable GNU vectors and the codegen only can run on CPU with vector-length = 128bit.
> > However, LLVM doesn't need to specify the vector length, and the codegen can run on any CPU with RVV  vector-length >= 128 bits.
> >
> > This is what this patch want to do.
> >
> > Thanks.
> I think Richard's question was rather if it wasn't better to do it more
> generically and lower vectors to what either the current cpu or what the
> user specified rather than just 16-byte vectors (i.e. indeed a fixed
> vlmin and not a fixed vlmin == fixed vlmax).
>
> This patch assumes everything is fixed for optimization purposes and then
> switches over to variable-length when nothing can be changed anymore.  That
> is, we would work on "vlmin"-sized chunks in a VLA fashion at runtime?
> We would need to make sure that no pass after reload makes use of VLA
> properties at all.
>
> In general I don't have a good overview of which optimizations we gain by
> such an approach or rather which ones are prevented by VLA altogether?
> What's the idea for the future?  Still use LEN_LOAD et al. (and masking)
> with "fixed vlmin"?  Wouldn't we select different IVs with this patch than
> what we would have for pure VLA?
>
> Regards
> Robin
>
  
Kito Cheng May 30, 2023, 3:45 p.m. UTC | #15
It's long mail but I think this should explain most high level concept
why I did this:

I guess I skipped too much story about the VLS-mode support; VLS-mode
support can be split into the middle-end and back-end.

# Middle-end
As Richard mentioned, those VLS types can be held by VLA-modes; for
example, int32x4_t can be held by VNx4SI mode, so IMO there are three
different options here: 1) use VLS type with VLS mode in middle-end,
2) use VLS type with VLA mode in middle-end 3) use VLA type with VLA
mode.

Option 2 might be weird and not natural to implement in GCC, so let me
ignore that.

Option 3 is a possible way, and actually, I did that on our downstream
compiler, and then...we found a fact that is not friendly to
optimization; give a few practical examples here VLA type is hard to
present a vector constructor other than a step or splat/duplicated
value, we need to push those value into memory first - and then load
by len_load, okay, so constant propagation and folding can't work well
here - since it's hard to evaluate that with unknown vector length.

And it is also not friendly to pointer alias - because the length is
unknown, so GCC must be conservative on this, which will block some
optimization due to AA issues.

So IMO the use the VLS-type with VLS mode is the best way in the middle-end.

# Back-end
OK, it's back-end time; we have two options in the back-end to support
the VLS-type: support that with VLS mode or VLA mode.

What's the meaning of support with VLA mode? convert VLS-type stuff
into VLA mode pattern and give the right length information  - then
everything works.

But what is wrong with this path? Again, similar issues in the
back-end: the propagation and folding with constant vector will be
limited when we hold in VLA type - we can't be held const_vector other
than splat/duplicated value or step value; it can't even be held
during the combine process, give an example here, we have a = {1, 2,
3, 4} and b = {4, 3, 2, 1}, this can be easily present at VLS mode
RTL, but impossible to present in VLA mode RLT, and then we can
folding to a+b to {5, 5, 5, 5}, but VLA mode will get a bunch of
problems to optimize those stuff.

And also the stack issue mentioned before - unless we can teach RA to
track the length used for each register with VLA mode, I believe it
would be terrible for RA...

# Back to this patch

Ju-Zhe has suggested we could reuse VLA pattern for VLS mode, I
considered that before, however, I feel that might not be friendly
with combine pass, because our VLA pattern is kind of complicated than
the plain VLS pattern, BUT I believe we will improve that in the near
future :P so I think that it should be reasonable just to use the same
pattern - then we could just add VLS mode to the mode iterator to
support that without magic mode changing, I can understand that really
seems very unsafe.
  
juzhe.zhong@rivai.ai May 30, 2023, 11:19 p.m. UTC | #16
Hi, Kito.

After consideration,  I think extending VLS modes into VLA pattern is not a wise choice now.
And I prefer everything to be pefect (Otherwise, I will rework the whole thing in the future and it's wasting time). 
So I have suggestions as follows:

First, add a new avl_type here:
enum avl_type
{
  NONVLMAX,
  VLMAX,
+ VLS_AVL,
};

Second, define SEW && VLMUL && RATIO for VLS modes:
(define_attr "sew" ""
  (cond [(eq_attr "mode" "V16QI")
     (const_int 8)
     (eq_attr "mode" "V8HI")
     (const_int 16)
     (eq_attr "mode" "V4SI")
     (const_int 32)
     (eq_attr "mode" "V2DI")
     (const_int 64)]
    (const_int INVALID_ATTRIBUTE)))
(define_attr "vlmul" ""
  (cond [(eq_attr "mode" "V16QI")
       (symbol_ref "riscv_vector::get_vlmul(E_V16QImode)")
(eq_attr "mode" "V8HI")
       (symbol_ref "riscv_vector::get_vlmul(E_V8HImode)")
(eq_attr "mode" "V4SI")
       (symbol_ref "riscv_vector::get_vlmul(E_V4SImode)")
(eq_attr "mode" "V2DI")
       (symbol_ref "riscv_vector::get_vlmul(E_V2DImode)")
....


For "get_vlmul", we should be careful:
Since V16QI should LMUL = 1 when TARGET_MIN_VLEN == 128,
 LMUL = 1/2 when TARGET_MIN_VLEN == 256...
etc....

Third, I think for VLS modes, you can define VLS pattern like this:

For GET_MODE_NUNITS (mode).to_constant () < 32:
+(define_insn "<optab><mode>3"
+  [(set (match_operand:VLS 0 "register_operand" "=vr")
+	(any_int_binop_no_shift:VLS
+	  (match_operand:VLS 1 "register_operand" "vr")
+	  (match_operand:VLS 2 "register_operand" "vr")))]
+  "TARGET_VECTOR"
+  "v<insn>.vv\t%0,%1,%2"

+   [(set_attr "type" "<int_binop_insn_type>")
+    (set_attr "mode" "<MODE>")
+    (set_attr "merge_op_idx" const_int INVALID_ATTRIBUTE)
+    (set_attr "vl_op_idx" const_int INVALID_ATTRIBUTE)
+    (set (attr "ta") (symbol_ref "riscv_vector::TAIL_ANY"))
+    (set (attr "ma") (symbol_ref "riscv_vector::MASK_ANY"))
+   (set (attr "avl_type") (symbol_ref "riscv_vector::VLS_AVL"))])

For GET_MODE_NUNITS (mode).to_constant () >= 32:

+(define_insn "<optab><mode>3"
+  [(set (match_operand:VLS 0 "register_operand" "=vr")
+	(any_int_binop_no_shift:VLS
+	  (match_operand:VLS 1 "register_operand" "vr")
+	  (match_operand:VLS 2 "register_operand" "vr")))+    (clobber (mactch_opearnd:SI 2 ....))]
+  "TARGET_VECTOR"
+  "v<insn>.vv\t%0,%1,%2"

+   [(set_attr "type" "<int_binop_insn_type>")
+    (set_attr "mode" "<MODE>")
+    (set_attr "merge_op_idx" const_int INVALID_ATTRIBUTE)
+    (set_attr "vl_op_idx" const_int 2)
+    (set (attr "ta") (symbol_ref "riscv_vector::TAIL_ANY"))
+    (set (attr "ma") (symbol_ref "riscv_vector::MASK_ANY"))
+   (set (attr "avl_type") (symbol_ref "riscv_vector::VLS_AVL"))])

Then, with some minor tricks in VSETVL PASS (in "parse_insn" function), I think it should work and this is the real optimal solution for
VLS modes auto-vectorizaiton.

Thanks.


juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-05-30 23:45
To: juzhe.zhong@rivai.ai
CC: Richard Biener; Robin Dapp; Kito.cheng; gcc-patches; palmer; jeffreyalaw; pan2.li
Subject: Re: Re: [PATCH] RISC-V: Basic VLS code gen for RISC-V
It's long mail but I think this should explain most high level concept
why I did this:
 
I guess I skipped too much story about the VLS-mode support; VLS-mode
support can be split into the middle-end and back-end.
 
# Middle-end
As Richard mentioned, those VLS types can be held by VLA-modes; for
example, int32x4_t can be held by VNx4SI mode, so IMO there are three
different options here: 1) use VLS type with VLS mode in middle-end,
2) use VLS type with VLA mode in middle-end 3) use VLA type with VLA
mode.
 
Option 2 might be weird and not natural to implement in GCC, so let me
ignore that.
 
Option 3 is a possible way, and actually, I did that on our downstream
compiler, and then...we found a fact that is not friendly to
optimization; give a few practical examples here VLA type is hard to
present a vector constructor other than a step or splat/duplicated
value, we need to push those value into memory first - and then load
by len_load, okay, so constant propagation and folding can't work well
here - since it's hard to evaluate that with unknown vector length.
 
And it is also not friendly to pointer alias - because the length is
unknown, so GCC must be conservative on this, which will block some
optimization due to AA issues.
 
So IMO the use the VLS-type with VLS mode is the best way in the middle-end.
 
# Back-end
OK, it's back-end time; we have two options in the back-end to support
the VLS-type: support that with VLS mode or VLA mode.
 
What's the meaning of support with VLA mode? convert VLS-type stuff
into VLA mode pattern and give the right length information  - then
everything works.
 
But what is wrong with this path? Again, similar issues in the
back-end: the propagation and folding with constant vector will be
limited when we hold in VLA type - we can't be held const_vector other
than splat/duplicated value or step value; it can't even be held
during the combine process, give an example here, we have a = {1, 2,
3, 4} and b = {4, 3, 2, 1}, this can be easily present at VLS mode
RTL, but impossible to present in VLA mode RLT, and then we can
folding to a+b to {5, 5, 5, 5}, but VLA mode will get a bunch of
problems to optimize those stuff.
 
And also the stack issue mentioned before - unless we can teach RA to
track the length used for each register with VLA mode, I believe it
would be terrible for RA...
 
# Back to this patch
 
Ju-Zhe has suggested we could reuse VLA pattern for VLS mode, I
considered that before, however, I feel that might not be friendly
with combine pass, because our VLA pattern is kind of complicated than
the plain VLS pattern, BUT I believe we will improve that in the near
future :P so I think that it should be reasonable just to use the same
pattern - then we could just add VLS mode to the mode iterator to
support that without magic mode changing, I can understand that really
seems very unsafe.
  
Jeff Law June 12, 2023, 11:34 p.m. UTC | #17
On 5/30/23 17:37, juzhe.zhong wrote:
> Oh. I forgot we need vl/vtype regnum dependency. It seems extending vla 
> pattern with vls mode is unavoidable. So.... I think we can 
> define_insn_and _spit and split intructions after RA so that we can get 
> benefits from general rtl code patterns.
So you're suggesting to represent them in the relatively simple VLS form 
until some split pass (split1?) then lower the VLS form into the VLA from?

Isn't that option #2 from Kito's proposal?    Essentially at split1 we 
convert the VLS into VLA and it stays VLA from that point through 
codegen.  I know Kito said it "might be weird and not natural", but in 
my mind I can easily see the version step as a lowering which very much 
matches what we often are trying to do with define_insn_and_split.   The 
downside is we end up duplicating a ton of patterns.

I think one of Kito's proposals was to add an iterator to the existing 
sets so that the VLA patterns match.  Presumably we'd then key a 
lowering step based on the mode?

Where do we stand on this now?  Waiting on Kito to resubmit a VLS->VLA 
lowering via define_insn_and_split or do we think Kito's approach is viable?

Is this blocking any work?

jeff
  
Jeff Law June 13, 2023, 4:10 p.m. UTC | #18
On 6/12/23 17:39, juzhe.zhong wrote:
> I take this work which is very important for VLA SLP too.  I will 
> support VLS after I finish VLA SLP.
OK.  I think I'll mark Kito's patch as dropped and we'll wait for your 
implementation in this space.

jeff
  

Patch

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index 19a4f9fb3db..f897e560667 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -318,6 +318,9 @@  RVV_TUPLE_PARTIAL_MODES (6)
 RVV_TUPLE_PARTIAL_MODES (7)
 RVV_TUPLE_PARTIAL_MODES (8)
 
+/* VLS modes.  */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+
 /* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can
    be 65536 for a single vector register which means the vector mode in
    GCC can be maximum = 65536 * 8 bits (LMUL=8).
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0462f96c8d5..56f714bbfb5 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -261,6 +261,10 @@  enum frm_field_enum
 opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
 					 poly_uint64);
 unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool);
+machine_mode minimal_vla_mode (machine_mode);
+void
+vls_insn_expander (unsigned, int, rtx *, machine_mode, machine_mode);
+bool vls_mode_p (machine_mode);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a5715bb466c..f9fb8cd9be4 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1697,4 +1697,71 @@  expand_vcond (rtx *ops)
     gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
 }
 
+/* Return the minimal containable VLA mode for MODE.  */
+
+machine_mode
+minimal_vla_mode (machine_mode mode)
+{
+  gcc_assert (GET_MODE_NUNITS (mode).is_constant ());
+  unsigned type_size = GET_MODE_NUNITS (mode).to_constant ();
+  poly_uint64 nunits = type_size * poly_uint64 (1, 1);
+  opt_machine_mode vla_mode = get_vector_mode (GET_MODE_INNER (mode), nunits);
+  return vla_mode.else_void ();
+}
+
+/* Return true if MODE is true VLS mode.  */
+
+bool
+vls_mode_p (machine_mode mode)
+{
+  switch (mode)
+    {
+    case E_V4SImode:
+    case E_V2DImode:
+    case E_V8HImode:
+    case E_V16QImode:
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* Convert all OPERANDS to VLA_MODE.  */
+
+static void
+update_vls_mode (machine_mode vla_mode, unsigned n_operands, rtx *operands)
+{
+  unsigned i;
+  for (i = 0; i < n_operands; ++i)
+    {
+      switch (GET_CODE (operands[i]))
+	{
+	case REG:
+	  operands[i] = gen_rtx_REG (vla_mode, REGNO (operands[i]));
+	  break;
+	case MEM:
+	  operands[i] = change_address (operands[i], vla_mode, NULL_RTX);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}
+
+/* Expand VLS operation to VLA pattern.  */
+
+void
+vls_insn_expander (unsigned icode, int op_num, rtx *operands,
+		   machine_mode vls_mode, machine_mode vla_mode)
+{
+  update_vls_mode (vla_mode, /* n_operands */ op_num, operands);
+
+  poly_uint16 nunit = GET_MODE_NUNITS (vls_mode);
+
+  gcc_assert (nunit.is_constant ());
+
+  riscv_vector::emit_nonvlmax_insn (icode, op_num, operands,
+				    GEN_INT (nunit.to_constant ()));
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3954fc07a8b..2e14f2e0d53 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1034,7 +1034,8 @@  riscv_v_ext_tuple_mode_p (machine_mode mode)
 static bool
 riscv_v_ext_mode_p (machine_mode mode)
 {
-  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode);
+  return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
+	 || riscv_vector::vls_mode_p (mode);
 }
 
 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
@@ -1043,7 +1044,7 @@  riscv_v_ext_mode_p (machine_mode mode)
 poly_int64
 riscv_v_adjust_nunits (machine_mode mode, int scale)
 {
-  if (riscv_v_ext_mode_p (mode))
+  if (riscv_v_ext_mode_p (mode) && !riscv_vector::vls_mode_p (mode))
     return riscv_vector_chunks * scale;
   return scale;
 }
@@ -6059,6 +6060,22 @@  riscv_register_move_cost (machine_mode mode,
 static unsigned int
 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
 {
+  if (riscv_vector::vls_mode_p (mode))
+    {
+      if (TARGET_MIN_VLEN)
+	{
+	  unsigned min_byte_per_vector_register = TARGET_MIN_VLEN / 8;
+	  unsigned mode_size = GET_MODE_SIZE (mode).to_constant ();
+
+	  if (min_byte_per_vector_register >= mode_size)
+	    return 1;
+
+	  return mode_size / min_byte_per_vector_register;
+	}
+      else
+	return 1;
+    }
+
   if (riscv_v_ext_vector_mode_p (mode))
     {
       /* Handle fractional LMUL, it only occupy part of vector register but
@@ -6148,6 +6165,10 @@  riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	 but for mask vector register, register numbers can be any number. */
       int lmul = 1;
       machine_mode rvv_mode = mode;
+
+      if (riscv_vector::vls_mode_p (mode))
+	rvv_mode = riscv_vector::minimal_vla_mode (mode);
+
       if (riscv_v_ext_tuple_mode_p (rvv_mode))
 	rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
       poly_int64 size = GET_MODE_SIZE (rvv_mode);
@@ -7223,6 +7244,8 @@  riscv_vector_alignment (const_tree type)
 poly_uint64
 riscv_regmode_natural_size (machine_mode mode)
 {
+  if (riscv_vector::vls_mode_p (mode))
+    return GET_MODE_SIZE (mode);
   /* The natural size for RVV data modes is one RVV data vector,
      and similarly for predicates.  We can't independently modify
      anything smaller than that.  */
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 70fb5b80b1b..650f2651c7d 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -84,6 +84,10 @@  (define_c_enum "unspec" [
   UNSPEC_MODIFY_VL
 ])
 
+(define_mode_iterator VLS [
+  V2DI V4SI V8HI V16QI
+])
+
 (define_mode_iterator V [
   (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
   (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
@@ -976,6 +980,7 @@  (define_mode_attr VM [
   (VNx2x4DF "VNx4BI") (VNx3x4DF "VNx4BI") (VNx4x4DF "VNx4BI")
   (VNx2x2DF "VNx2BI") (VNx3x2DF "VNx2BI") (VNx4x2DF "VNx2BI") (VNx5x2DF "VNx2BI") (VNx6x2DF "VNx2BI") (VNx7x2DF "VNx2BI") (VNx8x2DF "VNx2BI")
   (VNx2x1DF "VNx1BI") (VNx3x1DF "VNx1BI") (VNx4x1DF "VNx1BI") (VNx5x1DF "VNx1BI") (VNx6x1DF "VNx1BI") (VNx7x1DF "VNx1BI") (VNx8x1DF "VNx1BI")
+  (V2DI "V2BI") (V4SI "V4BI") (V8HI "V8BI") (V16QI "V16BI")
 ])
 
 (define_mode_attr vm [
@@ -1003,6 +1008,7 @@  (define_mode_attr vel [
   (VNx1DI "di") (VNx2DI "di") (VNx4DI "di") (VNx8DI "di") (VNx16DI "di")
   (VNx1SF "sf") (VNx2SF "sf") (VNx4SF "sf") (VNx8SF "sf") (VNx16SF "sf") (VNx32SF "sf")
   (VNx1DF "df") (VNx2DF "df") (VNx4DF "df") (VNx8DF "df") (VNx16DF "df")
+  (V2DI "di") (V4SI "si") (V8HI "hi") (V16QI "qi")
 ])
 
 (define_mode_attr VSUBEL [
diff --git a/gcc/config/riscv/vector-vls.md b/gcc/config/riscv/vector-vls.md
new file mode 100644
index 00000000000..af7e7a6c726
--- /dev/null
+++ b/gcc/config/riscv/vector-vls.md
@@ -0,0 +1,64 @@ 
+;; Machine description for vector length specific type operation with
+;; RISC-V 'V' Extension for GNU compiler.
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VLS 0 "nonimmediate_operand")
+	(match_operand:VLS 1 "vector_move_operand"))]
+  "TARGET_VECTOR"
+{
+  // TODO: Only allow register and memory now, we should allow legal
+  //       vector_const too.
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = force_reg (GET_MODE(operands[1]), operands[1]);
+})
+
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:VLS 0 "nonimmediate_operand" "=vr,vr, m,vr")
+	(match_operand:VLS 1 "vector_move_operand"  " vr, m,vr,vi"))]
+  "TARGET_VECTOR &&
+   (register_operand (operands[0], <MODE>mode)
+    || register_operand (operands[1], <MODE>mode))"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
+  riscv_vector::vls_insn_expander (
+    code_for_pred_mov (vla_mode), riscv_vector::RVV_UNOP, operands,
+    <MODE>mode, vla_mode);
+  DONE;
+})
+
+(define_insn_and_split "<optab><mode>3"
+  [(set (match_operand:VLS 0 "register_operand" "=vr")
+	(any_int_binop_no_shift:VLS
+	  (match_operand:VLS 1 "register_operand" "vr")
+	  (match_operand:VLS 2 "register_operand" "vr")))]
+  "TARGET_VECTOR"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  machine_mode vla_mode = riscv_vector::minimal_vla_mode (<MODE>mode);
+  riscv_vector::vls_insn_expander (
+    code_for_pred (<CODE>, vla_mode), riscv_vector::RVV_BINOP,
+    operands, <MODE>mode, vla_mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index cd696da5d89..e3fd0807b22 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -25,6 +25,7 @@ 
 ;; - Intrinsics (https://github.com/riscv/rvv-intrinsic-doc)
 ;; - Auto-vectorization (autovec.md)
 ;; - Combine optimization (TBD)
+;; - VLS patterns (vector-vls.md)
 
 (include "vector-iterators.md")
 
@@ -8407,3 +8408,4 @@  (define_split
 )
 
 (include "autovec.md")
+(include "vector-vls.md")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index bf03570b9cc..f6c56a63ada 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -77,5 +77,9 @@  foreach op $AUTOVEC_TEST_OPTS {
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
 	"-std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax" $CFLAGS
 
+# VLS test
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vls/*.\[cS\]]] \
+	"" $CFLAGS
+
 # All done.
 dg-finish
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
new file mode 100644
index 00000000000..0114c124646
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-template.h
@@ -0,0 +1,18 @@ 
+#include "vls-types.h"
+#define __BINOP(NAME, OP, VLS_TYPE, SCALAR_TYPE)		\
+void binop_##NAME##VLS_TYPE					\
+  (VLS_TYPE *src1, VLS_TYPE *src2, VLS_TYPE *dst)		\
+{								\
+    *dst = *src1 OP *src2;					\
+}
+
+#define BINOP(VLS_TYPE, SCALAR_TYPE)				\
+__BINOP (ADD, +, VLS_TYPE, SCALAR_TYPE)				\
+__BINOP (MUL, *, VLS_TYPE, SCALAR_TYPE)				\
+__BINOP (DIV, /, VLS_TYPE, SCALAR_TYPE)				\
+__BINOP (MOD, %, VLS_TYPE, SCALAR_TYPE)				\
+__BINOP (AND, &, VLS_TYPE, SCALAR_TYPE)				\
+__BINOP (IOR, |, VLS_TYPE, SCALAR_TYPE)				\
+__BINOP (XOR, ^, VLS_TYPE, SCALAR_TYPE)
+
+FOR_EACH_VLS_TYPE(BINOP)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
new file mode 100644
index 00000000000..78c1a19cfbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-v.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
+
+#include "binop-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
+/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
new file mode 100644
index 00000000000..bca56ba32a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve32x.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
+
+#include "binop-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
+/* { dg-final { scan-assembler-times {vadd\.vv} 6} } */
+/* { dg-final { scan-assembler-times {vmul\.vv} 6} } */
+/* { dg-final { scan-assembler-times {vdiv\.vv} 3} } */
+/* { dg-final { scan-assembler-times {vdivu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {vrem\.vv} 3} } */
+/* { dg-final { scan-assembler-times {vremu\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {vand\.vv} 6} } */
+/* { dg-final { scan-assembler-times {vor\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv} 6} } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
new file mode 100644
index 00000000000..45dcad12a93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/binop-zve64x.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
+
+#include "binop-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
+/* { dg-final { scan-assembler-times {vadd\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vmul\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vdiv\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vdivu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vrem\.vv} 4} } */
+/* { dg-final { scan-assembler-times {vremu\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {vand\.vv} 8} } */
+/* { dg-final { scan-assembler-times {vor\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {vxor\.vv} 8} } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
new file mode 100644
index 00000000000..9ea0c7cb5dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-template.h
@@ -0,0 +1,8 @@ 
+#include "vls-types.h"
+#define LOAD_STORE(VLS_TYPE, SCALAR_TYPE)			\
+void load_store_##VLS_TYPE (VLS_TYPE *src, VLS_TYPE *dst)	\
+{								\
+    *dst = *src;						\
+}
+
+FOR_EACH_VLS_TYPE(LOAD_STORE)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
new file mode 100644
index 00000000000..b8adcea70d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-v.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
+
+#include "load-store-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
+/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
new file mode 100644
index 00000000000..ef3426d00a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve32x.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
+
+#include "load-store-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
+/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-not {vle64\.v} } } */
+/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
+/* { dg-final { scan-assembler-not {vse64\.v} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
new file mode 100644
index 00000000000..e03220d89f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/load-store-zve64x.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
+
+#include "load-store-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
+/* { dg-final { scan-assembler-times {vle8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vle64\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse8\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse16\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vse64\.v} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
new file mode 100644
index 00000000000..e328a42204c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-template.h
@@ -0,0 +1,13 @@ 
+#include "vls-types.h"
+
+#define MOVE(VLS_TYPE, SCALAR_TYPE)				\
+void move_##VLS_TYPE ()						\
+{								\
+    register VLS_TYPE src##VLS_TYPE __asm__ ("v0");		\
+    register VLS_TYPE dst##VLS_TYPE __asm__ ("v8");		\
+    __asm__ volatile ("#def" : "=vr"(src##VLS_TYPE));		\
+    dst##VLS_TYPE = src##VLS_TYPE;				\
+    __asm__ volatile ("#use" : : "vr"(dst##VLS_TYPE));		\
+}
+
+FOR_EACH_VLS_TYPE(MOVE)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
new file mode 100644
index 00000000000..91c89df098e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-v.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64" } */
+
+#include "move-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m1} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m1} } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
new file mode 100644
index 00000000000..175986edf15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve32x.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve32x -mabi=ilp32" } */
+
+#include "move-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m4} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m4} } } */
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,2,e64,m4} } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.v} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
new file mode 100644
index 00000000000..2e574b1f3ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/move-zve64x.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gc_zve64x -mabi=ilp32" } */
+
+#include "move-template.h"
+
+/* { dg-final { scan-assembler {vsetivli\s+zero,16,e8,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e16,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,4,e32,m2} } } */
+/* { dg-final { scan-assembler {vsetivli\s+zero,2,e64,m2} } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.v} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
new file mode 100644
index 00000000000..302823b583f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vls/vls-types.h
@@ -0,0 +1,42 @@ 
+#ifndef __VLS_TYPE_H
+#define __VLS_TYPE_H
+#include <stdint-gcc.h>
+
+typedef  int8_t int8x16_t __attribute__ ((vector_size (16)));
+typedef int16_t int16x8_t __attribute__ ((vector_size (16)));
+typedef int32_t int32x4_t __attribute__ ((vector_size (16)));
+typedef int64_t int64x2_t __attribute__ ((vector_size (16)));
+
+typedef  uint8_t uint8x16_t __attribute__ ((vector_size (16)));
+typedef uint16_t uint16x8_t __attribute__ ((vector_size (16)));
+typedef uint32_t uint32x4_t __attribute__ ((vector_size (16)));
+typedef uint64_t uint64x2_t __attribute__ ((vector_size (16)));
+
+
+#if __riscv_v_elen == 32
+
+#define FOR_EACH_VLS_TYPE(FUNC) \
+    FUNC (int8x16_t, int8_t) \
+    FUNC (int16x8_t, int16_t) \
+    FUNC (int32x4_t, int32_t) \
+    FUNC (uint8x16_t, uint8_t) \
+    FUNC (uint16x8_t, uint16_t) \
+    FUNC (uint32x4_t, uint32_t)
+
+#elif __riscv_v_elen == 64
+
+#define FOR_EACH_VLS_TYPE(FUNC) \
+    FUNC (int8x16_t, int8_t) \
+    FUNC (int16x8_t, int16_t) \
+    FUNC (int32x4_t, int32_t) \
+    FUNC (int64x2_t, int64_t) \
+    FUNC (uint8x16_t, uint8_t) \
+    FUNC (uint16x8_t, uint16_t) \
+    FUNC (uint32x4_t, uint32_t) \
+    FUNC (uint64x2_t, uint64_t)
+
+#else
+#error "zve* or v extension is required."
+#endif
+
+#endif