[08/27] S390: Optimize strnlen and wcsnlen.

Message ID 1435319512-22245-9-git-send-email-stli@linux.vnet.ibm.com
State Superseded
Headers

Commit Message

Stefan Liebler June 26, 2015, 11:51 a.m. UTC
  This patch provides optimized versions of strnlen and wcsnlen with the z13
vector instructions.

ChangeLog:

	* sysdeps/s390/multiarch/strnlen-c.c: New File.
	* sysdeps/s390/multiarch/strnlen-vx.S: Likewise.
	* sysdeps/s390/multiarch/strnlen.c: Likewise.
	* sysdeps/s390/multiarch/wcsnlen-c.c: Likewise.
	* sysdeps/s390/multiarch/wcsnlen-vx.S: Likewise.
	* sysdeps/s390/multiarch/wcsnlen.c: Likewise.
	* sysdeps/s390/multiarch/Makefile (sysdep_routines): Add strnlen and
	wcsnlen functions.
	* sysdeps/s390/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add ifunc test for strnlen, wcsnlen.
	* wcsmbs/wcsnlen.c: Use WCSNLEN if defined.
	* string/test-strnlen.c: Add wcsnlen support.
	* wcsmbs/test-wcsnlen.c: New File.
	* wcsmbs/Makefile (strop-tests): Add wcsnlen.
	* benchtests/bench-strnlen.c: Add wcsnlen support.
	* benchtests/bench-wcsnlen.c: New File.
	* benchtests/Makefile (wcsmbs-bench): Add wcsnlen.
---
 benchtests/Makefile                      |   2 +-
 benchtests/bench-strnlen.c               |  71 +++++++++++-------
 benchtests/bench-wcsnlen.c               |  20 ++++++
 string/test-strnlen.c                    |  87 +++++++++++++---------
 sysdeps/s390/multiarch/Makefile          |   6 +-
 sysdeps/s390/multiarch/ifunc-impl-list.c |   3 +
 sysdeps/s390/multiarch/strnlen-c.c       |  30 ++++++++
 sysdeps/s390/multiarch/strnlen-vx.S      | 102 ++++++++++++++++++++++++++
 sysdeps/s390/multiarch/strnlen.c         |  29 ++++++++
 sysdeps/s390/multiarch/wcsnlen-c.c       |  25 +++++++
 sysdeps/s390/multiarch/wcsnlen-vx.S      | 119 +++++++++++++++++++++++++++++++
 sysdeps/s390/multiarch/wcsnlen.c         |  28 ++++++++
 wcsmbs/Makefile                          |   2 +-
 wcsmbs/test-wcsnlen.c                    |  20 ++++++
 wcsmbs/wcsnlen.c                         |   7 +-
 15 files changed, 488 insertions(+), 63 deletions(-)
 create mode 100644 benchtests/bench-wcsnlen.c
 create mode 100644 sysdeps/s390/multiarch/strnlen-c.c
 create mode 100644 sysdeps/s390/multiarch/strnlen-vx.S
 create mode 100644 sysdeps/s390/multiarch/strnlen.c
 create mode 100644 sysdeps/s390/multiarch/wcsnlen-c.c
 create mode 100644 sysdeps/s390/multiarch/wcsnlen-vx.S
 create mode 100644 sysdeps/s390/multiarch/wcsnlen.c
 create mode 100644 wcsmbs/test-wcsnlen.c
  

Comments

Ondrej Bilka June 26, 2015, 1:12 p.m. UTC | #1
On Fri, Jun 26, 2015 at 01:51:33PM +0200, Stefan Liebler wrote:
> This patch provides optimized versions of strnlen and wcsnlen with the z13
> vector instructions.
snip

> +	/* Find zero in 16 byte aligned loop.  */
> +.Lloop1:
> +	vl	%v16,0(%r5,%r2) /* Load s.  */
> +	aghi	%r5,16
> +	vfenezbs %v16,%v16,%v16	/* Find element not equal with zero search.  */
> +	je	.Lfound	/* Jump away if zero was found.  */
> +	clgrjhe	%r5,%r3,.Lfound /* current_len >= maxlen -> end.  */
> +	vl	%v16,0(%r5,%r2)
> +	aghi	%r5,16
> +	vfenezbs %v16,%v16,%v16
> +	je	.Lfound
> +	clgrjhe	%r5,%r3,.Lfound
> +	vl	%v16,0(%r5,%r2)
> +	aghi	%r5,16
> +	vfenezbs %v16,%v16,%v16
> +	je	.Lfound
> +	clgrjhe	%r5,%r3,.Lfound
> +	vl	%v16,0(%r5,%r2)
> +	aghi	%r5,16
> +	vfenezbs %v16,%v16,%v16
> +	je	.Lfound
> +	clgrjl	%r5,%r3,.Lloop1 /* maxlen not reached -> loop.  */

While I couldn't find any program that calls strnlen and one could argue
to optimize this for size this also looks bit suboptimal.

Here you could expand prologue with four checks to be make loop 64-byte
aligned. Then you could check maxlen only once per 64 bytes.
  

Patch

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 5d4afab..295738e 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -36,7 +36,7 @@  string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
 		strcoll
-wcsmbs-bench := wcslen
+wcsmbs-bench := wcslen wcsnlen
 string-bench-all := $(string-bench) ${wcsmbs-bench}
 
 # We have to generate locales
diff --git a/benchtests/bench-strnlen.c b/benchtests/bench-strnlen.c
index 35266e5..cbdce75 100644
--- a/benchtests/bench-strnlen.c
+++ b/benchtests/bench-strnlen.c
@@ -17,17 +17,36 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 #define TEST_MAIN
-#define TEST_NAME "strnlen"
+#ifndef WIDE
+# define TEST_NAME "strnlen"
+#else
+# define TEST_NAME "wcsnlen"
+#endif /* WIDE */
 #include "bench-string.h"
 
-typedef size_t (*proto_t) (const char *, size_t);
-size_t simple_strnlen (const char *, size_t);
-
-IMPL (simple_strnlen, 0)
-IMPL (strnlen, 1)
+#ifndef WIDE
+# define STRNLEN strnlen
+# define CHAR char
+# define BIG_CHAR CHAR_MAX
+# define MIDDLE_CHAR 127
+# define SIMPLE_STRNLEN simple_strnlen
+#else
+# include <wchar.h>
+# define STRNLEN wcsnlen
+# define CHAR wchar_t
+# define BIG_CHAR WCHAR_MAX
+# define MIDDLE_CHAR 1121
+# define SIMPLE_STRNLEN simple_wcsnlen
+#endif /* WIDE */
+
+typedef size_t (*proto_t) (const CHAR *, size_t);
+size_t SIMPLE_STRNLEN (const CHAR *, size_t);
+
+IMPL (SIMPLE_STRNLEN, 0)
+IMPL (STRNLEN, 1)
 
 size_t
-simple_strnlen (const char *s, size_t maxlen)
+SIMPLE_STRNLEN (const CHAR *s, size_t maxlen)
 {
   size_t i;
 
@@ -36,7 +55,7 @@  simple_strnlen (const char *s, size_t maxlen)
 }
 
 static void
-do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len)
+do_one_test (impl_t *impl, const CHAR *s, size_t maxlen, size_t exp_len)
 {
   size_t len = CALL (impl, s, maxlen), i, iters = INNER_LOOP_ITERS;
   timing_t start, stop, cur;
@@ -66,18 +85,20 @@  do_test (size_t align, size_t len, size_t maxlen, int max_char)
 {
   size_t i;
 
-  align &= 7;
-  if (align + len >= page_size)
+  align &= 63;
+  if ((align + len) * sizeof (CHAR) >= page_size)
     return;
 
+  CHAR *buf = (CHAR *) (buf1);
+
   for (i = 0; i < len; ++i)
-    buf1[align + i] = 1 + 7 * i % max_char;
-  buf1[align + len] = 0;
+    buf[align + i] = 1 + 7 * i % max_char;
+  buf[align + len] = 0;
 
   printf ("Length %4zd, alignment %2zd:", len, align);
 
   FOR_EACH_IMPL (impl, 0)
-    do_one_test (impl, (char *) (buf1 + align), maxlen, MIN (len, maxlen));
+    do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen));
 
   putchar ('\n');
 }
@@ -96,34 +117,34 @@  test_main (void)
 
   for (i = 1; i < 8; ++i)
     {
-      do_test (0, i, i - 1, 127);
-      do_test (0, i, i, 127);
-      do_test (0, i, i + 1, 127);
+      do_test (0, i, i - 1, MIDDLE_CHAR);
+      do_test (0, i, i, MIDDLE_CHAR);
+      do_test (0, i, i + 1, MIDDLE_CHAR);
     }
 
   for (i = 1; i < 8; ++i)
     {
-      do_test (i, i, i - 1, 127);
-      do_test (i, i, i, 127);
-      do_test (i, i, i + 1, 127);
+      do_test (i, i, i - 1, MIDDLE_CHAR);
+      do_test (i, i, i, MIDDLE_CHAR);
+      do_test (i, i, i + 1, MIDDLE_CHAR);
     }
 
   for (i = 2; i <= 10; ++i)
     {
-      do_test (0, 1 << i, 5000, 127);
-      do_test (1, 1 << i, 5000, 127);
+      do_test (0, 1 << i, 5000, MIDDLE_CHAR);
+      do_test (1, 1 << i, 5000, MIDDLE_CHAR);
     }
 
   for (i = 1; i < 8; ++i)
-    do_test (0, i, 5000, 255);
+    do_test (0, i, 5000, BIG_CHAR);
 
   for (i = 1; i < 8; ++i)
-    do_test (i, i, 5000, 255);
+    do_test (i, i, 5000, BIG_CHAR);
 
   for (i = 2; i <= 10; ++i)
     {
-      do_test (0, 1 << i, 5000, 255);
-      do_test (1, 1 << i, 5000, 255);
+      do_test (0, 1 << i, 5000, BIG_CHAR);
+      do_test (1, 1 << i, 5000, BIG_CHAR);
     }
 
   return ret;
diff --git a/benchtests/bench-wcsnlen.c b/benchtests/bench-wcsnlen.c
new file mode 100644
index 0000000..2b5a51c
--- /dev/null
+++ b/benchtests/bench-wcsnlen.c
@@ -0,0 +1,20 @@ 
+/* Measure wcsnlen functions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define WIDE 1
+#include "bench-strnlen.c"
diff --git a/string/test-strnlen.c b/string/test-strnlen.c
index 96797e1..8d35cac 100644
--- a/string/test-strnlen.c
+++ b/string/test-strnlen.c
@@ -1,4 +1,4 @@ 
-/* Test and measure strlen functions.
+/* Test strlen functions.
    Copyright (C) 1999-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Written by Jakub Jelinek <jakub@redhat.com>, 1999.
@@ -18,17 +18,36 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 #define TEST_MAIN
-#define TEST_NAME "strnlen"
+#ifndef WIDE
+# define TEST_NAME "strnlen"
+#else
+# define TEST_NAME "wcsnlen"
+#endif /* !WIDE */
 #include "test-string.h"
 
-typedef size_t (*proto_t) (const char *, size_t);
-size_t simple_strnlen (const char *, size_t);
-
-IMPL (simple_strnlen, 0)
-IMPL (strnlen, 1)
+#ifndef WIDE
+# define STRNLEN strnlen
+# define CHAR char
+# define BIG_CHAR CHAR_MAX
+# define MIDDLE_CHAR 127
+# define SIMPLE_STRNLEN simple_strnlen
+#else
+# include <wchar.h>
+# define STRNLEN wcsnlen
+# define CHAR wchar_t
+# define BIG_CHAR WCHAR_MAX
+# define MIDDLE_CHAR 1121
+# define SIMPLE_STRNLEN simple_wcsnlen
+#endif /* !WIDE */
+
+typedef size_t (*proto_t) (const CHAR *, size_t);
+size_t SIMPLE_STRNLEN (const CHAR *, size_t);
+
+IMPL (SIMPLE_STRNLEN, 0)
+IMPL (STRNLEN, 1)
 
 size_t
-simple_strnlen (const char *s, size_t maxlen)
+SIMPLE_STRNLEN (const CHAR *s, size_t maxlen)
 {
   size_t i;
 
@@ -37,7 +56,7 @@  simple_strnlen (const char *s, size_t maxlen)
 }
 
 static void
-do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len)
+do_one_test (impl_t *impl, const CHAR *s, size_t maxlen, size_t exp_len)
 {
   size_t len = CALL (impl, s, maxlen);
   if (len != exp_len)
@@ -54,23 +73,25 @@  do_test (size_t align, size_t len, size_t maxlen, int max_char)
 {
   size_t i;
 
-  align &= 7;
-  if (align + len >= page_size)
+  align &= 63;
+  if ((align + len) * sizeof (CHAR) >= page_size)
     return;
 
+  CHAR *buf = (CHAR *) (buf1);
+
   for (i = 0; i < len; ++i)
-    buf1[align + i] = 1 + 7 * i % max_char;
-  buf1[align + len] = 0;
+    buf[align + i] = 1 + 11111 * i % max_char;
+  buf[align + len] = 0;
 
   FOR_EACH_IMPL (impl, 0)
-    do_one_test (impl, (char *) (buf1 + align), maxlen, MIN (len, maxlen));
+    do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen));
 }
 
 static void
 do_random_tests (void)
 {
   size_t i, j, n, align, len;
-  unsigned char *p = buf1 + page_size - 512;
+  CHAR *p = (CHAR *) (buf1 + page_size - 512 * sizeof (CHAR));
 
   for (n = 0; n < ITERATIONS; n++)
     {
@@ -97,25 +118,25 @@  do_random_tests (void)
       FOR_EACH_IMPL (impl, 1)
 	{
 	  if (len > 0
-	      && CALL (impl, (char *) (p + align), len - 1) != len - 1)
+	      && CALL (impl, (CHAR *) (p + align), len - 1) != len - 1)
 	    {
 	      error (0, 0, "Iteration %zd (limited) - wrong result in function %s (%zd) %zd != %zd, p %p",
 		     n, impl->name, align,
-		     CALL (impl, (char *) (p + align), len - 1), len - 1, p);
+		     CALL (impl, (CHAR *) (p + align), len - 1), len - 1, p);
 	      ret = 1;
 	    }
-	  if (CALL (impl, (char *) (p + align), len) != len)
+	  if (CALL (impl, (CHAR *) (p + align), len) != len)
 	    {
 	      error (0, 0, "Iteration %zd (exact) - wrong result in function %s (%zd) %zd != %zd, p %p",
 		     n, impl->name, align,
-		     CALL (impl, (char *) (p + align), len), len, p);
+		     CALL (impl, (CHAR *) (p + align), len), len, p);
 	      ret = 1;
 	    }
-	  if (CALL (impl, (char *) (p + align), len + 1) != len)
+	  if (CALL (impl, (CHAR *) (p + align), len + 1) != len)
 	    {
 	      error (0, 0, "Iteration %zd (long) - wrong result in function %s (%zd) %zd != %zd, p %p",
 		     n, impl->name, align,
-		     CALL (impl, (char *) (p + align), len + 1), len, p);
+		     CALL (impl, (CHAR *) (p + align), len + 1), len, p);
 	      ret = 1;
 	    }
 	}
@@ -136,34 +157,34 @@  test_main (void)
 
   for (i = 1; i < 8; ++i)
     {
-      do_test (0, i, i - 1, 127);
-      do_test (0, i, i, 127);
-      do_test (0, i, i + 1, 127);
+      do_test (0, i, i - 1, MIDDLE_CHAR);
+      do_test (0, i, i, MIDDLE_CHAR);
+      do_test (0, i, i + 1, MIDDLE_CHAR);
     }
 
   for (i = 1; i < 8; ++i)
     {
-      do_test (i, i, i - 1, 127);
-      do_test (i, i, i, 127);
-      do_test (i, i, i + 1, 127);
+      do_test (i, i, i - 1, MIDDLE_CHAR);
+      do_test (i, i, i, MIDDLE_CHAR);
+      do_test (i, i, i + 1, MIDDLE_CHAR);
     }
 
   for (i = 2; i <= 10; ++i)
     {
-      do_test (0, 1 << i, 5000, 127);
-      do_test (1, 1 << i, 5000, 127);
+      do_test (0, 1 << i, 5000, MIDDLE_CHAR);
+      do_test (1, 1 << i, 5000, MIDDLE_CHAR);
     }
 
   for (i = 1; i < 8; ++i)
-    do_test (0, i, 5000, 255);
+    do_test (0, i, 5000, BIG_CHAR);
 
   for (i = 1; i < 8; ++i)
-    do_test (i, i, 5000, 255);
+    do_test (i, i, 5000, BIG_CHAR);
 
   for (i = 2; i <= 10; ++i)
     {
-      do_test (0, 1 << i, 5000, 255);
-      do_test (1, 1 << i, 5000, 255);
+      do_test (0, 1 << i, 5000, BIG_CHAR);
+      do_test (1, 1 << i, 5000, BIG_CHAR);
     }
 
   do_random_tests ();
diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile
index 3a98098..3397f24 100644
--- a/sysdeps/s390/multiarch/Makefile
+++ b/sysdeps/s390/multiarch/Makefile
@@ -1,7 +1,9 @@ 
 ifeq ($(subdir),string)
-sysdep_routines += strlen strlen-vx strlen-c
+sysdep_routines += strlen strlen-vx strlen-c \
+		   strnlen strnlen-vx strnlen-c
 endif
 
 ifeq ($(subdir),wcsmbs)
-sysdep_routines += wcslen wcslen-vx wcslen-c
+sysdep_routines += wcslen wcslen-vx wcslen-c \
+		   wcsnlen wcsnlen-vx wcsnlen-c
 endif
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
index e9639ef..bc17c59 100644
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
@@ -82,6 +82,9 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_VX_IMPL (strlen);
   IFUNC_VX_IMPL (wcslen);
 
+  IFUNC_VX_IMPL (strnlen);
+  IFUNC_VX_IMPL (wcsnlen);
+
 #endif /* HAVE_S390_VX_ASM_SUPPORT */
 
   return i;
diff --git a/sysdeps/s390/multiarch/strnlen-c.c b/sysdeps/s390/multiarch/strnlen-c.c
new file mode 100644
index 0000000..72e82ae
--- /dev/null
+++ b/sysdeps/s390/multiarch/strnlen-c.c
@@ -0,0 +1,30 @@ 
+/* Default strnlen implementation for S/390.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
+# define STRNLEN  __strnlen_c
+# ifdef SHARED
+#  undef libc_hidden_def
+#  define libc_hidden_def(name)					\
+  __hidden_ver1 (__strnlen_c, __GI_strnlen, __strnlen_c);	\
+  strong_alias (__strnlen_c, __strnlen_c_1);			\
+  __hidden_ver1 (__strnlen_c_1, __GI___strnlen, __strnlen_c_1);
+# endif /* SHARED */
+
+# include <string/strnlen.c>
+#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */
diff --git a/sysdeps/s390/multiarch/strnlen-vx.S b/sysdeps/s390/multiarch/strnlen-vx.S
new file mode 100644
index 0000000..32c7704
--- /dev/null
+++ b/sysdeps/s390/multiarch/strnlen-vx.S
@@ -0,0 +1,102 @@ 
+/* Vector optimized 32/64 bit S/390 version of strnlen.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
+
+# include "sysdep.h"
+# include "asm-syntax.h"
+
+	.text
+
+/* size_t strnlen (const char *s, size_t maxlen)
+   Returns the number of characters in s or at most maxlen.
+
+   Register usage:
+   -r1=tmp
+   -r2=address of string
+   -r3=maxlen (number of characters to be read)
+   -r4=tmp
+   -r5=current_len and return_value
+   -v16=part of s
+*/
+ENTRY(__strnlen_vx)
+	.machine "z13"
+	.machinemode "zarch_nohighgprs"
+
+# if !defined __s390x__
+	llgfr	%r3,%r3
+# endif /* !defined __s390x__ */
+
+	lghi	%r5,0		/* current_len = 0.  */
+	clgije	%r3,0,.Lend	/* Return if maxlen == 0.  */
+
+	/* Align s to 16 byte.  */
+	risbg	%r4,%r2,60,128+63,0 /* Test if s is aligned and
+				       %r4 = bits 60-63 'and' 15.  */
+	je	.Lloop1	/* If s is aligned, loop aligned.  */
+	lghi	%r1,15
+	slr	%r1,%r4		/* Compute byte count to load (15-x).  */
+	vll	%v16,%r1,0(%r2) /* Load up to 16 byte boundary. (vll needs
+				   highest index, left bytes are 0.)  */
+	ahi	%r1,1		/* Work with loaded byte count.  */
+	vfenezb	%v16,%v16,%v16	/* Find element not equal with zero search.  */
+	clgr	%r1,%r3
+	locgrh	%r1,%r3		/* loaded_byte_count
+				   = min (loaded_byte_count, maxlen)  */
+	vlgvb	%r5,%v16,7	/* Load zero index or 16 if not found.  */
+	clrjl	%r5,%r1,.Lend	/* Found zero within loaded bytes -> return.  */
+	lgr	%r5,%r1		/* No zero within loaded bytes,
+				   process further bytes aligned.  */
+	clgrje	%r1,%r3,.Lend	/* current_len == maxlen -> end.  */
+	/* No zero found and maxlen > loaded_byte_count.  */
+
+	/* Find zero in 16 byte aligned loop.  */
+.Lloop1:
+	vl	%v16,0(%r5,%r2) /* Load s.  */
+	aghi	%r5,16
+	vfenezbs %v16,%v16,%v16	/* Find element not equal with zero search.  */
+	je	.Lfound	/* Jump away if zero was found.  */
+	clgrjhe	%r5,%r3,.Lfound /* current_len >= maxlen -> end.  */
+	vl	%v16,0(%r5,%r2)
+	aghi	%r5,16
+	vfenezbs %v16,%v16,%v16
+	je	.Lfound
+	clgrjhe	%r5,%r3,.Lfound
+	vl	%v16,0(%r5,%r2)
+	aghi	%r5,16
+	vfenezbs %v16,%v16,%v16
+	je	.Lfound
+	clgrjhe	%r5,%r3,.Lfound
+	vl	%v16,0(%r5,%r2)
+	aghi	%r5,16
+	vfenezbs %v16,%v16,%v16
+	je	.Lfound
+	clgrjl	%r5,%r3,.Lloop1 /* maxlen not reached -> loop.  */
+
+.Lfound:
+	vlgvb	%r4,%v16,7	/* Load byte index of zero or 16 if no zero.  */
+	slgfi	%r5,16		/* current_len -=16.  */
+	algr	%r5,%r4
+
+	clgr	%r5,%r3
+	locgrh	%r5,%r3		/* Return min (current_len, maxlen).  */
+.Lend:
+	lgr	%r2,%r5
+	br	%r14
+END(__strnlen_vx)
+#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */
diff --git a/sysdeps/s390/multiarch/strnlen.c b/sysdeps/s390/multiarch/strnlen.c
new file mode 100644
index 0000000..51659de
--- /dev/null
+++ b/sysdeps/s390/multiarch/strnlen.c
@@ -0,0 +1,29 @@ 
+/* Multiple versions of strnlen.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
+# include <string.h>
+# include <ifunc-resolve.h>
+
+s390_vx_libc_ifunc (__strnlen)
+weak_alias (__strnlen, strnlen)
+libc_hidden_def (strnlen)
+
+#else
+# include <string/strnlen.c>
+#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */
diff --git a/sysdeps/s390/multiarch/wcsnlen-c.c b/sysdeps/s390/multiarch/wcsnlen-c.c
new file mode 100644
index 0000000..1ead4b1
--- /dev/null
+++ b/sysdeps/s390/multiarch/wcsnlen-c.c
@@ -0,0 +1,25 @@ 
+/* Default wcsnlen implementation for S/390.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
+# define WCSNLEN  __wcsnlen_c
+
+# include <wchar.h>
+extern __typeof (__wcsnlen) __wcsnlen_c;
+# include <wcsmbs/wcsnlen.c>
+#endif
diff --git a/sysdeps/s390/multiarch/wcsnlen-vx.S b/sysdeps/s390/multiarch/wcsnlen-vx.S
new file mode 100644
index 0000000..79d51a1
--- /dev/null
+++ b/sysdeps/s390/multiarch/wcsnlen-vx.S
@@ -0,0 +1,119 @@ 
+/* Vector optimized 32/64 bit S/390 version of wcsnlen.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
+
+# include "sysdep.h"
+# include "asm-syntax.h"
+
+	.text
+
+/* size_t wcsnlen (const wchar_t *s, size_t maxlen)
+   Returns the number of characters in s or at most maxlen.
+
+   Register usage:
+   -r1=tmp
+   -r2=address of string
+   -r3=maxlen (number of characters to be read)
+   -r4=tmp
+   -r5=current_len and return_value
+   -v16=part of s
+*/
+ENTRY(__wcsnlen_vx)
+
+	.machine "z13"
+	.machinemode "zarch_nohighgprs"
+
+# if !defined __s390x__
+	llgfr	%r3,%r3
+# endif /* !defined __s390x__ */
+
+	lghi	%r5,0		/* current_len = 0.  */
+	clgije	%r3,0,.Lend	/* Return if maxlen == 0.  */
+
+	/* Check range of maxlen and convert to byte-count.  */
+# ifdef __s390x__
+	tmhh	%r3,49152	/* Test bit 0 or 1 of maxlen.  */
+	lghi	%r4,-4		/* Max byte-count is 18446744073709551612.  */
+# else
+	tmlh	%r3,49152	/* Test bit 0 or 1 of maxlen.  */
+	llilf	%r4,4294967292	/* Max byte-count is 4294967292.  */
+# endif /* !__s390x__ */
+	sllg	%r3,%r3,2	/* Convert character-count to byte-count.  */
+	locgrne	%r3,%r4		/* Use max byte-count, if bit 0/1 was one.  */
+
+	/* Align s to 16 byte.  */
+	risbg	%r4,%r2,60,128+63,0 /* Test if s is aligned and
+				       %r4 = bits 60-63 'and' 15.  */
+	je	.Lloop1		/* If s is aligned, loop aligned.  */
+	tmll	%r2,3		/* Test if s is 4-byte aligned?  */
+	jne	.Lfallback	/* And use common-code variant if not.  */
+	lghi	%r1,15
+	slr	%r1,%r4		/* Compute highest index to load (15-x).  */
+	vll	%v16,%r1,0(%r2) /* Load up to 16 byte boundary. (vll needs
+				   highest index, remaining bytes are 0.)  */
+	ahi	%r1,1		/* Work with loaded byte count.  */
+	vfenezf	%v16,%v16,%v16	/* Find element not equal with zero search.  */
+	clgr	%r1,%r3
+	locgrh	%r1,%r3		/* loaded_byte_count
+				   = min (loaded_byte_count, maxlen).  */
+	vlgvb	%r5,%v16,7	/* Load zero index or 16 if not found.  */
+	clrjl	%r5,%r1,.Lend	/* Found zero within loaded bytes -> return.  */
+	lgr	%r5,%r1		/* No zero within loaded bytes,
+				   process further bytes aligned.  */
+	clgrje	%r1,%r3,.Lend	/* If current_len == maxlen -> end.  */
+	/* No zero found and maxlen > loaded_byte_count.  */
+
+	/* Find zero in 16byte aligned loop.  */
+.Lloop1:
+	vl	%v16,0(%r5,%r2)	/* Load s.  */
+	aghi	%r5,16
+	vfenezfs %v16,%v16,%v16	/* Find element not equal with zero search.  */
+	je	.Lfound		/* Jump away if zero was found.  */
+	clgrjhe	%r5,%r3,.Lfound /* If current_len >= maxlen -> end.  */
+	vl	%v16,0(%r5,%r2)
+	aghi	%r5,16
+	vfenezfs %v16,%v16,%v16
+	je	.Lfound
+	clgrjhe	%r5,%r3,.Lfound
+	vl	%v16,0(%r5,%r2)
+	aghi	%r5,16
+	vfenezfs %v16,%v16,%v16
+	je	.Lfound
+	clgrjhe	%r5,%r3,.Lfound
+	vl	%v16,0(%r5,%r2)
+	aghi	%r5,16
+	vfenezfs %v16,%v16,%v16
+	je	.Lfound
+	clgrjl	%r5,%r3,.Lloop1	/* maxlen not reached -> loop.  */
+
+.Lfound:
+	vlgvb	%r4,%v16,7	/* Load byte index of zero or 16 if no zero.  */
+	slgfi	%r5,16		/* current_len -=16  */
+	algr	%r5,%r4
+
+	clgr	%r5,%r3
+	locgrh	%r5,%r3		/* Return min (current_len, maxlen).  */
+.Lend:
+	srlg	%r2,%r5,2	/* Convert byte-count to character-count.  */
+	br	%r14
+.Lfallback:
+	srlg	%r3,%r3,2	/* Convert byte-count to character-count.  */
+	jg	__wcsnlen_c
+END(__wcsnlen_vx)
+#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */
diff --git a/sysdeps/s390/multiarch/wcsnlen.c b/sysdeps/s390/multiarch/wcsnlen.c
new file mode 100644
index 0000000..bf2fd63
--- /dev/null
+++ b/sysdeps/s390/multiarch/wcsnlen.c
@@ -0,0 +1,28 @@ 
+/* Multiple versions of wcsnlen.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
+# include <wchar.h>
+# include <ifunc-resolve.h>
+
+s390_vx_libc_ifunc (__wcsnlen)
+weak_alias (__wcsnlen, wcsnlen)
+
+#else
+# include <wcsmbs/wcsnlen.c>
+#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile
index 44a4494..3858d74 100644
--- a/wcsmbs/Makefile
+++ b/wcsmbs/Makefile
@@ -42,7 +42,7 @@  routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \
 	    isoc99_swscanf isoc99_vswscanf \
 	    mbrtoc16 c16rtomb
 
-strop-tests :=  wcscmp wcsncmp wmemcmp wcslen wcschr wcsrchr wcscpy
+strop-tests :=  wcscmp wcsncmp wmemcmp wcslen wcschr wcsrchr wcscpy wcsnlen
 tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
 	 tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \
 	 tst-c16c32-1 wcsatcliff $(addprefix test-,$(strop-tests))
diff --git a/wcsmbs/test-wcsnlen.c b/wcsmbs/test-wcsnlen.c
new file mode 100644
index 0000000..262ab30
--- /dev/null
+++ b/wcsmbs/test-wcsnlen.c
@@ -0,0 +1,20 @@ 
+/* Test wcsnlen function.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define WIDE 1
+#include "../string/test-strnlen.c"
diff --git a/wcsmbs/wcsnlen.c b/wcsmbs/wcsnlen.c
index 2bee705..e928ab6 100644
--- a/wcsmbs/wcsnlen.c
+++ b/wcsmbs/wcsnlen.c
@@ -18,8 +18,11 @@ 
 
 #include <wchar.h>
 
+#ifdef WCSNLEN
+# define __wcsnlen WCSNLEN
+#endif
 
-/* Copy SRC to DEST.  */
+/* Return length of string S at most maxlen.  */
 size_t
 __wcsnlen (s, maxlen)
      const wchar_t *s;
@@ -44,4 +47,6 @@  __wcsnlen (s, maxlen)
 
   return len;
 }
+#ifndef WCSNLEN
 weak_alias (__wcsnlen, wcsnlen)
+#endif