s390: Add LEN_LOAD/LEN_STORE support.

Message ID 4a42f530-cd67-6bd8-3f3d-1e7a68bffea1@linux.ibm.com
State New
Headers
Series s390: Add LEN_LOAD/LEN_STORE support. |

Commit Message

Robin Dapp Feb. 2, 2023, 8:43 a.m. UTC
  Hi,

this patch adds LEN_LOAD/LEN_STORE support for z14 and newer.
It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE
optabs.

It also includes various vll/vstl testcases adapted from Kewen Lin's patch
for Power.

Bootstrapped and regtested on z13-z16.

Is it OK?

Regards
 Robin

gcc/ChangeLog:

	* config/s390/predicates.md (vll_bias_operand): Add -1 bias.
	* config/s390/s390.cc (s390_option_override_internal): Make
	partial vector usage the default from z13 on.
	* config/s390/vector.md (len_load_v16qi): Add.
	(len_store_v16qi): Add.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/s390.exp: Add partial subdirectory.
	* gcc.target/s390/vector/vec-nopeel-2.c: Change test
	expectation.
	* lib/target-supports.exp: Add s390.
	* gcc.target/s390/vector/partial/s390-vec-length-1.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-2.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-3.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-7.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-1.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-2.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-3.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-7.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-small.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length.h: New test.
---
 gcc/config/s390/predicates.md                 |  8 ++++
 gcc/config/s390/s390.cc                       |  8 ++++
 gcc/config/s390/vector.md                     | 39 +++++++++++++++++++
 gcc/testsuite/gcc.target/s390/s390.exp        |  3 ++
 .../s390/vector/partial/s390-vec-length-1.h   | 18 +++++++++
 .../s390/vector/partial/s390-vec-length-2.h   | 18 +++++++++
 .../s390/vector/partial/s390-vec-length-3.h   | 31 +++++++++++++++
 .../s390/vector/partial/s390-vec-length-7.h   | 17 ++++++++
 .../vector/partial/s390-vec-length-epil-1.c   | 13 +++++++
 .../vector/partial/s390-vec-length-epil-2.c   | 13 +++++++
 .../vector/partial/s390-vec-length-epil-3.c   | 16 ++++++++
 .../vector/partial/s390-vec-length-epil-7.c   | 11 ++++++
 .../partial/s390-vec-length-epil-run-1.c      |  7 ++++
 .../partial/s390-vec-length-epil-run-2.c      |  7 ++++
 .../partial/s390-vec-length-epil-run-3.c      |  7 ++++
 .../partial/s390-vec-length-epil-run-7.c      |  7 ++++
 .../vector/partial/s390-vec-length-full-1.c   | 12 ++++++
 .../vector/partial/s390-vec-length-full-2.c   | 12 ++++++
 .../vector/partial/s390-vec-length-full-3.c   | 13 +++++++
 .../vector/partial/s390-vec-length-full-7.c   | 14 +++++++
 .../partial/s390-vec-length-full-run-1.c      |  7 ++++
 .../partial/s390-vec-length-full-run-2.c      |  7 ++++
 .../partial/s390-vec-length-full-run-3.c      |  7 ++++
 .../partial/s390-vec-length-full-run-7.c      |  7 ++++
 .../vector/partial/s390-vec-length-run-1.h    | 34 ++++++++++++++++
 .../vector/partial/s390-vec-length-run-2.h    | 36 +++++++++++++++++
 .../vector/partial/s390-vec-length-run-3.h    | 34 ++++++++++++++++
 .../vector/partial/s390-vec-length-run-7.h    | 16 ++++++++
 .../vector/partial/s390-vec-length-small.c    | 15 +++++++
 .../s390/vector/partial/s390-vec-length.h     | 14 +++++++
 .../gcc.target/s390/vector/vec-nopeel-2.c     |  2 +-
 gcc/testsuite/lib/target-supports.exp         |  3 +-
 32 files changed, 454 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-1.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-2.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-3.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-7.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-3.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-7.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-3.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-7.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-1.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-2.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-3.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-7.h
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-small.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length.h
  

Comments

Andreas Krebbel Feb. 13, 2023, 10:40 a.m. UTC | #1
On 2/2/23 09:43, Robin Dapp wrote:
> Hi,
> 
> this patch adds LEN_LOAD/LEN_STORE support for z14 and newer.
> It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE
> optabs.
> 
> It also includes various vll/vstl testcases adapted from Kewen Lin's patch
> for Power.
> 
> Bootstrapped and regtested on z13-z16.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
> 	* config/s390/predicates.md (vll_bias_operand): Add -1 bias.
> 	* config/s390/s390.cc (s390_option_override_internal): Make
> 	partial vector usage the default from z13 on.
> 	* config/s390/vector.md (len_load_v16qi): Add.
> 	(len_store_v16qi): Add.

...

> +;
> +; Implement len_load/len_store optabs with vll/vstl.
> +(define_expand "len_load_v16qi"
> +  [(match_operand:V16QI 0 "register_operand")
> +   (match_operand:V16QI 1 "memory_operand")
> +   (match_operand:QI 2 "register_operand")
> +   (match_operand:QI 3 "vll_bias_operand")
> +  ]
> +  "TARGET_VX && TARGET_64BIT"
> +{
> +  rtx src1 = XEXP (operands[1], 0);
> +  rtx src = gen_reg_rtx (Pmode);
> +  emit_move_insn (src, src1);
> +  rtx mem = gen_rtx_MEM (BLKmode, src);

Do you really need a copy of the address register? Couldn't you just do a
src = adjust_address (operands[1], BLKmode, 0);

> +
> +  rtx len = gen_lowpart (SImode, operands[2]);
> +  emit_insn (gen_vllv16qi (operands[0], len, mem));

You create a paradoxical subreg of the QImode input but vll actually uses the whole 32 bit value.
Couldn't we end up with uninitialized bytes being used as part of the length then? Do we need a
zero-extend here?

Bye,

Andreas
  
Robin Dapp Feb. 27, 2023, 10:13 a.m. UTC | #2
> Do you really need a copy of the address register? Couldn't you just do a
> src = adjust_address (operands[1], BLKmode, 0);
> You create a paradoxical subreg of the QImode input but vll actually
> uses the whole 32 bit value. Couldn't we end up with uninitialized
> bytes being used as part of the length then? Do we need a zero-extend
> here?

v2 attached with these problems addressed.

Testsuite and bootstrap as before.

Regards
 Robin
  
Andreas Krebbel Feb. 27, 2023, 12:28 p.m. UTC | #3
On 2/27/23 11:13, Robin Dapp wrote:
>> Do you really need a copy of the address register? Couldn't you just do a
>> src = adjust_address (operands[1], BLKmode, 0);
>> You create a paradoxical subreg of the QImode input but vll actually
>> uses the whole 32 bit value. Couldn't we end up with uninitialized
>> bytes being used as part of the length then? Do we need a zero-extend
>> here?
> 
> v2 attached with these problems addressed.
> 
> Testsuite and bootstrap as before.

Ok. Thanks!

Andreas
  

Patch

diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 430cf6edfd6c..ebed3f0fed40 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -606,3 +606,11 @@  (define_predicate "subreg_register_operand"
        (and (match_code "reg")
 	    (match_test "reload_completed || reload_in_progress")
 	    (match_test "register_operand (op, GET_MODE (op))"))))
+
+; Bias value for LEN_LOAD and LEN_STORE.  The bias will be added to the
+; length (in bytes for s390) to be loaded.  vll/vstl expect the lowest byte
+; to load while LEN_LOAD/LEN_STORE use the actual length in bytes.  This implies
+; that we cannot load a length of 0.
+(define_predicate "vll_bias_operand"
+  (and (match_code "const_int")
+       (match_test "op == CONSTM1_RTX (QImode)")))
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index f5c75395cf38..c64a74c04022 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -15782,6 +15782,14 @@  s390_option_override_internal (struct gcc_options *opts,
 
   /* Use the alternative scheduling-pressure algorithm by default.  */
   SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
+
+  /* Allow simple vector masking using vll/vstl for epilogues.  */
+  if (TARGET_Z13)
+    SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 1);
+  else
+    SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 0);
+
+  /* Do not vectorize loops with a low trip count for now.  */
   SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
 
   /* Set the default alignment.  */
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index dde74afc869e..3278e4bf9b11 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -2947,6 +2947,45 @@  (define_insn_and_split "*bswap<mode>"
   ""
   [(set_attr "op_type"      "*,VRX,VRX")])
 
+;
+; Implement len_load/len_store optabs with vll/vstl.
+(define_expand "len_load_v16qi"
+  [(match_operand:V16QI 0 "register_operand")
+   (match_operand:V16QI 1 "memory_operand")
+   (match_operand:QI 2 "register_operand")
+   (match_operand:QI 3 "vll_bias_operand")
+  ]
+  "TARGET_VX && TARGET_64BIT"
+{
+  rtx src1 = XEXP (operands[1], 0);
+  rtx src = gen_reg_rtx (Pmode);
+  emit_move_insn (src, src1);
+  rtx mem = gen_rtx_MEM (BLKmode, src);
+
+  rtx len = gen_lowpart (SImode, operands[2]);
+  emit_insn (gen_vllv16qi (operands[0], len, mem));
+  DONE;
+})
+
+(define_expand "len_store_v16qi"
+  [(match_operand:V16QI 0 "memory_operand")
+   (match_operand:V16QI 1 "register_operand")
+   (match_operand:QI 2 "register_operand")
+   (match_operand:QI 3 "vll_bias_operand")
+  ]
+  "TARGET_VX && TARGET_64BIT"
+{
+  rtx dst1 = XEXP (operands[0], 0);
+  rtx dst = gen_reg_rtx (Pmode);
+  emit_move_insn (dst, dst1);
+  rtx mem = gen_rtx_MEM (BLKmode, dst);
+
+  rtx len = gen_lowpart (SImode, operands[2]);
+  emit_insn (gen_vstlv16qi (operands[1], len, mem));
+  DONE;
+});;
+
+
 ; reduc_smin
 ; reduc_smax
 ; reduc_umin
diff --git a/gcc/testsuite/gcc.target/s390/s390.exp b/gcc/testsuite/gcc.target/s390/s390.exp
index cae7ea6b2121..440247b0ca4a 100644
--- a/gcc/testsuite/gcc.target/s390/s390.exp
+++ b/gcc/testsuite/gcc.target/s390/s390.exp
@@ -236,6 +236,9 @@  dg-runtest [lsort [prune [glob -nocomplain $srcdir/$subdir/*.{c,S}] \
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.{c,S}]] \
 	"" $DEFAULT_CFLAGS
 
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/partial/*.{c,S}]] \
+	"" $DEFAULT_CFLAGS
+
 gfortran-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.F90]] \
 	"" $DEFAULT_FFLAGS
 
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-1.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-1.h
new file mode 100644
index 000000000000..5a27c32a6d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-1.h
@@ -0,0 +1,18 @@ 
+#include "s390-vec-length.h"
+
+/* Test the case loop iteration is known.  */
+
+#define N 127
+
+#define test(TYPE)                                                             \
+  extern TYPE a_##TYPE[N];                                                     \
+  extern TYPE b_##TYPE[N];                                                     \
+  extern TYPE c_##TYPE[N];                                                     \
+  void __attribute__ ((noinline, noclone)) test##TYPE ()                       \
+  {                                                                            \
+    unsigned int i = 0;                                                        \
+    for (i = 0; i < N; i++)                                                    \
+      c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i];                                 \
+  }
+
+TEST_ALL (test)
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-2.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-2.h
new file mode 100644
index 000000000000..a9a188c4d16f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-2.h
@@ -0,0 +1,18 @@ 
+#include "s390-vec-length.h"
+
+/* Test the case loop iteration is unknown.  */
+
+#define N 255
+
+#define test(TYPE)                                                             \
+  extern TYPE a_##TYPE[N];                                                     \
+  extern TYPE b_##TYPE[N];                                                     \
+  extern TYPE c_##TYPE[N];                                                     \
+  void __attribute__ ((noinline, noclone)) test##TYPE (unsigned int n)         \
+  {                                                                            \
+    unsigned int i = 0;                                                        \
+    for (i = 0; i < n; i++)                                                    \
+      c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i];                                 \
+  }
+
+TEST_ALL (test)
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-3.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-3.h
new file mode 100644
index 000000000000..30d7c383d727
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-3.h
@@ -0,0 +1,31 @@ 
+#include "s390-vec-length.h"
+
+/* Test the case loop iteration less than VF.  */
+
+/* For char.  */
+#define N_uint8_t 15
+#define N_int8_t 15
+/* For short.  */
+#define N_uint16_t 6
+#define N_int16_t 6
+/* For int/float.  */
+#define N_uint32_t 3
+#define N_int32_t 3
+#define N_float 3
+/* For long/double.  */
+#define N_uint64_t 1
+#define N_int64_t 1
+#define N_double 1
+
+#define test(TYPE)                                                             \
+  extern TYPE a_##TYPE[N_##TYPE];                                              \
+  extern TYPE b_##TYPE[N_##TYPE];                                              \
+  extern TYPE c_##TYPE[N_##TYPE];                                              \
+  void __attribute__ ((noinline, noclone)) test##TYPE ()                       \
+  {                                                                            \
+    unsigned int i = 0;                                                        \
+    for (i = 0; i < N_##TYPE; i++)                                             \
+      c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i];                                 \
+  }
+
+TEST_ALL (test)
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-7.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-7.h
new file mode 100644
index 000000000000..e441bf6ed08b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-7.h
@@ -0,0 +1,17 @@ 
+#include "s390-vec-length.h"
+
+#define N 64
+#define START 1
+#define END 59
+
+#define test(TYPE)                                                             \
+  TYPE x_##TYPE[N] __attribute__((aligned(16)));                                \
+  void __attribute__((noinline, noclone)) test_npeel_##TYPE() {                \
+    TYPE v = 0;                                                                \
+    for (unsigned int i = START; i < END; i++) {                               \
+      x_##TYPE[i] = v;                                                         \
+      v += 1;                                                                  \
+    }                                                                          \
+  }
+
+TEST_ALL (test)
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-1.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-1.c
new file mode 100644
index 000000000000..6caf09ed085c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-1.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */
+
+/* Test that we only vectorize the epilogue with vector load/store with length,
+   the main body still uses normal vector load/store.  */
+
+#include "s390-vec-length-1.h"
+
+/* { dg-final { scan-assembler-times {\mvll\M} 14 } } */
+/* { dg-final { scan-assembler-times {\mvstl\M} 7 } } */
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-2.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-2.c
new file mode 100644
index 000000000000..5ec95f5e334a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-2.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */
+
+/* Test that we only vectorize the epilogue with vector load/store with length,
+   the main body still uses normal vector load/store.  */
+
+#include "s390-vec-length-2.h"
+
+/* { dg-final { scan-assembler-times {\mvll\M} 20 } } */
+/* { dg-final { scan-assembler-times {\mvstl\M} 10 } } */
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-3.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-3.c
new file mode 100644
index 000000000000..ce96929e3402
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-3.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */
+
+/* Test that we only vectorize the epilogue with vector load/store with length,
+   the main body still uses normal vector load/store.  */
+
+#include "s390-vec-length-3.h"
+
+/* { dg-final { scan-assembler-not   {\mvl\M} } } */
+/* { dg-final { scan-assembler-not   {\mvst\M} } } */
+/* 64bit types get completely unrolled, so only check the others.  */
+/* { dg-final { scan-assembler-times {\mvll\M} 14 } } */
+/* { dg-final { scan-assembler-times {\mvstl\M} 7 } } */
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-7.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-7.c
new file mode 100644
index 000000000000..d7ef9b02856d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-7.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -ffast-math" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */
+
+/* Test that we only vectorize the epilogue with vector load/store with length,
+   the main body still uses normal vector load/store.  */
+
+#include "s390-vec-length-7.h"
+
+/* { dg-final { scan-assembler-times {\mvstl\M} 4 } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c
new file mode 100644
index 000000000000..3adbfdf47cad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
+
+#include "s390-vec-length-run-1.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c
new file mode 100644
index 000000000000..ba7fa1039e96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
+
+#include "s390-vec-length-run-2.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c
new file mode 100644
index 000000000000..9595175aed4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
+
+#include "s390-vec-length-run-3.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c
new file mode 100644
index 000000000000..a9434bf484d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */
+
+#include "s390-vec-length-run-7.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-1.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-1.c
new file mode 100644
index 000000000000..df732525e083
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
+
+/* Test that the loop body uses vector load/store with length,
+   there should not be any epilogues.  */
+
+#include "s390-vec-length-1.h"
+
+/* { dg-final { scan-assembler-times {\mvll\M} 20 } } */
+/* { dg-final { scan-assembler-times {\mvstl\M} 10 } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-2.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-2.c
new file mode 100644
index 000000000000..592df1d3f789
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-2.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
+
+/* Test that the loop body uses vector load/store with length,
+   there should not be any epilogues.  */
+
+#include "s390-vec-length-2.h"
+
+/* { dg-final { scan-assembler-times {\mvll\M} 20 } } */
+/* { dg-final { scan-assembler-times {\mvstl\M} 10 } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-3.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-3.c
new file mode 100644
index 000000000000..d08e3ab360e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-3.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2 --param=min-vect-loop-bound=0" } */
+
+/* Test that the loop body uses vector load/store with length,
+   there should not be any epilogues.  */
+
+#include "s390-vec-length-3.h"
+
+/* 64bit types get completely unrolled, so only check the others.  */
+/* { dg-final { scan-assembler-times {\mvll\M} 14 } } */
+/* { dg-final { scan-assembler-times {\mvstl\M} 7 } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-7.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-7.c
new file mode 100644
index 000000000000..b5e35465e5c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-7.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
+
+/* Test that the loop body uses vector load/store with length,
+   there should not be any epilogues.  */
+
+#include "s390-vec-length-7.h"
+
+/* Each type should have one vstl but we do not currently vectorize the
+   float and double variants and the [u]int64_t ones which do not require
+   partial vectors.  */
+/* { dg-final { scan-assembler-times {\mvstl\M} 6 } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c
new file mode 100644
index 000000000000..eb1f4f701e6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
+
+#include "s390-vec-length-run-1.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c
new file mode 100644
index 000000000000..a9ea15285038
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
+
+#include "s390-vec-length-run-2.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c
new file mode 100644
index 000000000000..320a84d4705f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
+
+#include "s390-vec-length-run-3.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c
new file mode 100644
index 000000000000..a3fd59a2c3df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c
@@ -0,0 +1,7 @@ 
+/* { dg-do run { target { lp64 && s390_vx } } } */
+/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */
+
+#include "s390-vec-length-run-7.h"
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-1.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-1.h
new file mode 100644
index 000000000000..ba9e113dc640
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-1.h
@@ -0,0 +1,34 @@ 
+#include "s390-vec-length-1.h"
+
+#define decl(TYPE)                                                             \
+  TYPE a_##TYPE[N];                                                            \
+  TYPE b_##TYPE[N];                                                            \
+  TYPE c_##TYPE[N];
+
+#define run(TYPE)                                                              \
+  {                                                                            \
+    unsigned int i = 0;                                                        \
+    for (i = 0; i < N; i++)                                                    \
+      {                                                                        \
+	a_##TYPE[i] = i * 2 + 1;                                               \
+	b_##TYPE[i] = i % 2 - 2;                                               \
+      }                                                                        \
+    test##TYPE ();                                                             \
+    for (i = 0; i < N; i++)                                                    \
+      {                                                                        \
+	TYPE a1 = i * 2 + 1;                                                   \
+	TYPE b1 = i % 2 - 2;                                                   \
+	TYPE exp_c = a1 + b1;                                                  \
+	if (c_##TYPE[i] != exp_c)                                              \
+	  __builtin_abort ();                                                  \
+      }                                                                        \
+  }
+
+TEST_ALL (decl)
+
+int
+main (void)
+{
+  TEST_ALL (run)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-2.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-2.h
new file mode 100644
index 000000000000..87582cefe7de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-2.h
@@ -0,0 +1,36 @@ 
+#include "s390-vec-length-2.h"
+
+#define decl(TYPE)                                                             \
+  TYPE a_##TYPE[N];                                                            \
+  TYPE b_##TYPE[N];                                                            \
+  TYPE c_##TYPE[N];
+
+#define N1 195
+
+#define run(TYPE)                                                              \
+  {                                                                            \
+    unsigned int i = 0;                                                        \
+    for (i = 0; i < N; i++)                                                    \
+      {                                                                        \
+	a_##TYPE[i] = i * 2 + 1;                                               \
+	b_##TYPE[i] = i % 2 - 2;                                               \
+      }                                                                        \
+    test##TYPE (N1);                                                           \
+    for (i = 0; i < N1; i++)                                                   \
+      {                                                                        \
+	TYPE a1 = i * 2 + 1;                                                   \
+	TYPE b1 = i % 2 - 2;                                                   \
+	TYPE exp_c = a1 + b1;                                                  \
+	if (c_##TYPE[i] != exp_c)                                              \
+	  __builtin_abort ();                                                  \
+      }                                                                        \
+  }
+
+TEST_ALL (decl)
+
+int
+main (void)
+{
+  TEST_ALL (run)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-3.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-3.h
new file mode 100644
index 000000000000..eef57f41dbb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-3.h
@@ -0,0 +1,34 @@ 
+#include "s390-vec-length-3.h"
+
+#define decl(TYPE)                                                             \
+  TYPE a_##TYPE[N_##TYPE];                                                     \
+  TYPE b_##TYPE[N_##TYPE];                                                     \
+  TYPE c_##TYPE[N_##TYPE];
+
+#define run(TYPE)                                                              \
+  {                                                                            \
+    unsigned int i = 0;                                                        \
+    for (i = 0; i < N_##TYPE; i++)                                             \
+      {                                                                        \
+	a_##TYPE[i] = i * 2 + 1;                                               \
+	b_##TYPE[i] = i % 2 - 2;                                               \
+      }                                                                        \
+    test##TYPE ();                                                             \
+    for (i = 0; i < N_##TYPE; i++)                                             \
+      {                                                                        \
+	TYPE a1 = i * 2 + 1;                                                   \
+	TYPE b1 = i % 2 - 2;                                                   \
+	TYPE exp_c = a1 + b1;                                                  \
+	if (c_##TYPE[i] != exp_c)                                              \
+	  __builtin_abort ();                                                  \
+      }                                                                        \
+  }
+
+TEST_ALL (decl)
+
+int
+main (void)
+{
+  TEST_ALL (run)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-7.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-7.h
new file mode 100644
index 000000000000..39af9a64714d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-run-7.h
@@ -0,0 +1,16 @@ 
+#include "s390-vec-length-7.h"
+
+#define run(TYPE)                                                              \
+  {                                                                            \
+    unsigned int i = 0;                                                        \
+    test_npeel_##TYPE();                                                       \
+    for (int i = 0; i < N; ++i) {                                              \
+      if (x_##TYPE[i] != (i < START || i >= END ? 0 : (i - START)))            \
+        __builtin_abort();                                                     \
+    }                                                                          \
+  }
+
+int main() {
+  TEST_ALL(run)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-small.c b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-small.c
new file mode 100644
index 000000000000..f1d5c867f636
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-small.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target { lp64 && s390_vx } } } */
+/* { dg-options "-mzarch -march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops" } */
+
+/* { dg-additional-options "--param=min-vect-loop-bound=0 --param=vect-partial-vector-usage=2" } */
+
+#define SZ 333
+
+void foo (char *restrict a, char *restrict b, char *restrict c, int n)
+{
+#pragma gcc unroll 0
+    for (int i = 0; i < 17; i++)
+          c[i] = a[i] + b[i];
+/* { dg-final { scan-assembler-times "lhi\t%r\[0-9\]*,0\n" 1 } } */
+}
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length.h b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length.h
new file mode 100644
index 000000000000..83418b0b6418
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length.h
@@ -0,0 +1,14 @@ 
+#include <stdint.h>
+
+#define TEST_ALL(T)                                                            \
+  T (int8_t)                                                                   \
+  T (uint8_t)                                                                  \
+  T (int16_t)                                                                  \
+  T (uint16_t)                                                                 \
+  T (int32_t)                                                                  \
+  T (uint32_t)                                                                 \
+  T (int64_t)                                                                  \
+  T (uint64_t)                                                                 \
+  T (float)                                                                    \
+  T (double)
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-2.c b/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-2.c
index 9b67793e6f4f..03bf5ee60f10 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-2.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-2.c
@@ -9,4 +9,4 @@  void foo(int *restrict a, int *restrict b, unsigned int n)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" } } */
+/* { dg-final { scan-tree-dump "Vectorizing an unaligned access" "vect" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 2a058c67c534..164a786cb713 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7740,7 +7740,8 @@  proc check_effective_target_vect_fully_masked { } {
 # @code{len_store} optabs.
 
 proc check_effective_target_vect_len_load_store { } {
-    return [check_effective_target_has_arch_pwr9]
+    return [expr { [check_effective_target_has_arch_pwr9]
+      || [check_effective_target_s390_vx] }]
 }
 
 # Return the value of parameter vect-partial-vector-usage specified for