From patchwork Fri Jun 26 11:51:46 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stefan Liebler X-Patchwork-Id: 7379 Received: (qmail 27026 invoked by alias); 26 Jun 2015 11:53:02 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 24477 invoked by uid 89); 26 Jun 2015 11:52:39 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=AWL, BAYES_00, KAM_ASCII_DIVIDERS, KAM_LAZY_DOMAIN_SECURITY, RP_MATCHES_RCVD autolearn=no version=3.3.2 X-HELO: e06smtp13.uk.ibm.com X-MailFrom: stli@linux.vnet.ibm.com X-RcptTo: libc-alpha@sourceware.org From: Stefan Liebler To: libc-alpha@sourceware.org Cc: Stefan Liebler Subject: [PATCH 21/27] S390: Optimize strpbrk and wcspbrk. Date: Fri, 26 Jun 2015 13:51:46 +0200 Message-Id: <1435319512-22245-22-git-send-email-stli@linux.vnet.ibm.com> In-Reply-To: <1435319512-22245-1-git-send-email-stli@linux.vnet.ibm.com> References: <1435319512-22245-1-git-send-email-stli@linux.vnet.ibm.com> X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 15062611-0013-0000-0000-0000047EF5F4 This patch provides optimized versions of strpbrk and wcspbrk with the z13 vector instructions. ChangeLog: * sysdeps/s390/multiarch/strpbrk-c.c: New File. * sysdeps/s390/multiarch/strpbrk-vx.S: Likewise. * sysdeps/s390/multiarch/strpbrk.c: Likewise. * sysdeps/s390/multiarch/wcspbrk-c.c: Likewise. * sysdeps/s390/multiarch/wcspbrk-vx.S: Likewise. * sysdeps/s390/multiarch/wcspbrk.c: Likewise. * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add strpbrk and wcspbrk functions. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add ifunc test for strpbrk, wcspbrk. * wcsmbs/wcspbrk.c: Use WCSPBRK if defined. * string/test-strpbrk.c: Add wcspbrk support. * wcsmbs/test-wcspbrk.c: New File. * wcsmbs/Makefile (strop-tests): Add wcspbrk. * benchtests/bench-strpbrk.c: Add wcspbrk support. * benchtests/bench-wcspbrk.c: New File. * benchtests/Makefile (wcsmbs-bench): Add wcspbrk. --- benchtests/Makefile | 2 +- benchtests/bench-strpbrk.c | 100 ++++++---- benchtests/bench-wcspbrk.c | 20 ++ string/test-strpbrk.c | 130 ++++++++----- sysdeps/s390/multiarch/Makefile | 6 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 3 + sysdeps/s390/multiarch/strpbrk-c.c | 28 +++ sysdeps/s390/multiarch/strpbrk-vx.S | 300 +++++++++++++++++++++++++++++ sysdeps/s390/multiarch/strpbrk.c | 27 +++ sysdeps/s390/multiarch/wcspbrk-c.c | 31 +++ sysdeps/s390/multiarch/wcspbrk-vx.S | 316 +++++++++++++++++++++++++++++++ sysdeps/s390/multiarch/wcspbrk.c | 27 +++ wcsmbs/Makefile | 2 +- wcsmbs/test-wcspbrk.c | 20 ++ wcsmbs/wcspbrk.c | 3 + 15 files changed, 927 insertions(+), 88 deletions(-) create mode 100644 benchtests/bench-wcspbrk.c create mode 100644 sysdeps/s390/multiarch/strpbrk-c.c create mode 100644 sysdeps/s390/multiarch/strpbrk-vx.S create mode 100644 sysdeps/s390/multiarch/strpbrk.c create mode 100644 sysdeps/s390/multiarch/wcspbrk-c.c create mode 100644 sysdeps/s390/multiarch/wcspbrk-vx.S create mode 100644 sysdeps/s390/multiarch/wcspbrk.c create mode 100644 wcsmbs/test-wcspbrk.c diff --git a/benchtests/Makefile b/benchtests/Makefile index f00196b..f882f98 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -37,7 +37,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ strcoll wcsmbs-bench := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \ - wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn + wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk string-bench-all := $(string-bench) ${wcsmbs-bench} # We have to generate locales diff --git a/benchtests/bench-strpbrk.c b/benchtests/bench-strpbrk.c index a3bc8d6..eb09a13 100644 --- a/benchtests/bench-strpbrk.c +++ b/benchtests/bench-strpbrk.c @@ -16,50 +16,80 @@ License along with the GNU C Library; if not, see . */ +#ifndef WIDE +# define CHAR char +# define STRLEN strlen +# define STRCHR strchr +# define BIG_CHAR CHAR_MAX +# define SMALL_CHAR 127 +#else +# include +# define CHAR wchar_t +# define STRLEN wcslen +# define STRCHR wcschr +# define BIG_CHAR WCHAR_MAX +# define SMALL_CHAR 1273 +#endif /* WIDE */ + #ifndef STRPBRK_RESULT # define STRPBRK_RESULT(s, pos) ((s)[(pos)] ? (s) + (pos) : NULL) -# define RES_TYPE char * +# define RES_TYPE CHAR * # define TEST_MAIN -# define TEST_NAME "strpbrk" +# ifndef WIDE +# define TEST_NAME "strpbrk" +# else +# define TEST_NAME "wcspbrk" +# endif /* WIDE */ # include "bench-string.h" -typedef char *(*proto_t) (const char *, const char *); -char *simple_strpbrk (const char *, const char *); -char *stupid_strpbrk (const char *, const char *); - -IMPL (stupid_strpbrk, 0) -IMPL (simple_strpbrk, 0) -IMPL (strpbrk, 1) - -char * -simple_strpbrk (const char *s, const char *rej) +# ifndef WIDE +# define STRPBRK strpbrk +# define SIMPLE_STRPBRK simple_strpbrk +# define STUPID_STRPBRK stupid_strpbrk +# else +# include +# define STRPBRK wcspbrk +# define SIMPLE_STRPBRK simple_wcspbrk +# define STUPID_STRPBRK stupid_wcspbrk +# endif /* WIDE */ + +typedef CHAR *(*proto_t) (const CHAR *, const CHAR *); +CHAR *SIMPLE_STRPBRK (const CHAR *, const CHAR *); +CHAR *STUPID_STRPBRK (const CHAR *, const CHAR *); + +IMPL (STUPID_STRPBRK, 0) +IMPL (SIMPLE_STRPBRK, 0) +IMPL (STRPBRK, 1) + +CHAR * +SIMPLE_STRPBRK (const CHAR *s, const CHAR *rej) { - const char *r; - char c; + const CHAR *r; + CHAR c; while ((c = *s++) != '\0') for (r = rej; *r != '\0'; ++r) if (*r == c) - return (char *) s - 1; + return (CHAR *) s - 1; return NULL; } -char * -stupid_strpbrk (const char *s, const char *rej) +CHAR * +STUPID_STRPBRK (const CHAR *s, const CHAR *rej) { - size_t ns = strlen (s), nrej = strlen (rej); + size_t ns = STRLEN (s), nrej = STRLEN (rej); size_t i, j; for (i = 0; i < ns; ++i) for (j = 0; j < nrej; ++j) if (s[i] == rej[j]) - return (char *) s + i; + return (CHAR *) s + i; return NULL; } -#endif +#endif /* !STRPBRK_RESULT */ static void -do_one_test (impl_t *impl, const char *s, const char *rej, RES_TYPE exp_res) +do_one_test (impl_t *impl, const CHAR *s, const CHAR *rej, RES_TYPE exp_res) { RES_TYPE res = CALL (impl, s, rej); size_t i, iters = INNER_LOOP_ITERS; @@ -91,35 +121,35 @@ do_test (size_t align, size_t pos, size_t len) size_t i; int c; RES_TYPE result; - char *rej, *s; + CHAR *rej, *s; align &= 7; - if (align + pos + 10 >= page_size || len > 240) + if ((align + pos + 10) * sizeof (CHAR) >= page_size || len > 240) return; - rej = (char *) (buf2 + (random () & 255)); - s = (char *) (buf1 + align); + rej = (CHAR *) (buf2) + (random () & 255); + s = (CHAR *) (buf1) + align; for (i = 0; i < len; ++i) { - rej[i] = random () & 255; + rej[i] = random () & BIG_CHAR; if (!rej[i]) - rej[i] = random () & 255; + rej[i] = random () & BIG_CHAR; if (!rej[i]) - rej[i] = 1 + (random () & 127); + rej[i] = 1 + (random () & SMALL_CHAR); } rej[len] = '\0'; - for (c = 1; c <= 255; ++c) - if (strchr (rej, c) == NULL) + for (c = 1; c <= BIG_CHAR; ++c) + if (STRCHR (rej, c) == NULL) break; for (i = 0; i < pos; ++i) { - s[i] = random () & 255; - if (strchr (rej, s[i])) + s[i] = random () & BIG_CHAR; + if (STRCHR (rej, s[i])) { - s[i] = random () & 255; - if (strchr (rej, s[i])) + s[i] = random () & BIG_CHAR; + if (STRCHR (rej, s[i])) s[i] = c; } } @@ -127,7 +157,7 @@ do_test (size_t align, size_t pos, size_t len) if (s[pos]) { for (i = pos + 1; i < pos + 10; ++i) - s[i] = random () & 255; + s[i] = random () & BIG_CHAR; s[i] = '\0'; } result = STRPBRK_RESULT (s, pos); diff --git a/benchtests/bench-wcspbrk.c b/benchtests/bench-wcspbrk.c new file mode 100644 index 0000000..3d9f00f --- /dev/null +++ b/benchtests/bench-wcspbrk.c @@ -0,0 +1,20 @@ +/* Measure wcspbrk functions. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "bench-strpbrk.c" diff --git a/string/test-strpbrk.c b/string/test-strpbrk.c index b4ac389..a4dffe7 100644 --- a/string/test-strpbrk.c +++ b/string/test-strpbrk.c @@ -17,50 +17,82 @@ License along with the GNU C Library; if not, see . */ +#ifndef WIDE +# define CHAR char +# define UCHAR unsigned char +# define STRLEN strlen +# define STRCHR strchr +# define BIG_CHAR CHAR_MAX +# define SMALL_CHAR 127 +#else +# include +# define CHAR wchar_t +# define UCHAR wchar_t +# define STRLEN wcslen +# define STRCHR wcschr +# define BIG_CHAR WCHAR_MAX +# define SMALL_CHAR 1273 +#endif /* WIDE */ + #ifndef STRPBRK_RESULT # define STRPBRK_RESULT(s, pos) ((s)[(pos)] ? (s) + (pos) : NULL) -# define RES_TYPE char * +# define RES_TYPE CHAR * # define TEST_MAIN -# define TEST_NAME "strpbrk" +# ifndef WIDE +# define TEST_NAME "strpbrk" +# else +# define TEST_NAME "wcspbrk" +# endif /* WIDE */ # include "test-string.h" -typedef char *(*proto_t) (const char *, const char *); -char *simple_strpbrk (const char *, const char *); -char *stupid_strpbrk (const char *, const char *); +# ifndef WIDE +# define STRPBRK strpbrk +# define SIMPLE_STRPBRK simple_strpbrk +# define STUPID_STRPBRK stupid_strpbrk +# else +# include +# define STRPBRK wcspbrk +# define SIMPLE_STRPBRK simple_wcspbrk +# define STUPID_STRPBRK stupid_wcspbrk +# endif /* WIDE */ + +typedef CHAR *(*proto_t) (const CHAR *, const CHAR *); +CHAR *SIMPLE_STRPBRK (const CHAR *, const CHAR *); +CHAR *STUPID_STRPBRK (const CHAR *, const CHAR *); -IMPL (stupid_strpbrk, 0) -IMPL (simple_strpbrk, 0) -IMPL (strpbrk, 1) +IMPL (STUPID_STRPBRK, 0) +IMPL (SIMPLE_STRPBRK, 0) +IMPL (STRPBRK, 1) -char * -simple_strpbrk (const char *s, const char *rej) +CHAR * +SIMPLE_STRPBRK (const CHAR *s, const CHAR *rej) { - const char *r; - char c; + const CHAR *r; + CHAR c; while ((c = *s++) != '\0') for (r = rej; *r != '\0'; ++r) if (*r == c) - return (char *) s - 1; + return (CHAR *) s - 1; return NULL; } -char * -stupid_strpbrk (const char *s, const char *rej) +CHAR * +STUPID_STRPBRK (const CHAR *s, const CHAR *rej) { - size_t ns = strlen (s), nrej = strlen (rej); + size_t ns = STRLEN (s), nrej = STRLEN (rej); size_t i, j; for (i = 0; i < ns; ++i) for (j = 0; j < nrej; ++j) if (s[i] == rej[j]) - return (char *) s + i; + return (CHAR *) s + i; return NULL; } -#endif +#endif /* !STRPBRK_RESULT */ static void -do_one_test (impl_t *impl, const char *s, const char *rej, RES_TYPE exp_res) +do_one_test (impl_t *impl, const CHAR *s, const CHAR *rej, RES_TYPE exp_res) { RES_TYPE res = CALL (impl, s, rej); if (res != exp_res) @@ -78,35 +110,35 @@ do_test (size_t align, size_t pos, size_t len) size_t i; int c; RES_TYPE result; - char *rej, *s; + CHAR *rej, *s; align &= 7; - if (align + pos + 10 >= page_size || len > 240) + if ((align + pos + 10) * sizeof (CHAR) >= page_size || len > 240) return; - rej = (char *) (buf2 + (random () & 255)); - s = (char *) (buf1 + align); + rej = (CHAR *) (buf2) + (random () & 255); + s = (CHAR *) (buf1) + align; for (i = 0; i < len; ++i) { - rej[i] = random () & 255; + rej[i] = random () & BIG_CHAR; if (!rej[i]) - rej[i] = random () & 255; + rej[i] = random () & BIG_CHAR; if (!rej[i]) - rej[i] = 1 + (random () & 127); + rej[i] = 1 + (random () & SMALL_CHAR); } rej[len] = '\0'; - for (c = 1; c <= 255; ++c) - if (strchr (rej, c) == NULL) + for (c = 1; c <= BIG_CHAR; ++c) + if (STRCHR (rej, c) == NULL) break; for (i = 0; i < pos; ++i) { - s[i] = random () & 255; - if (strchr (rej, s[i])) + s[i] = random () & BIG_CHAR; + if (STRCHR (rej, s[i])) { - s[i] = random () & 255; - if (strchr (rej, s[i])) + s[i] = random () & BIG_CHAR; + if (STRCHR (rej, s[i])) s[i] = c; } } @@ -114,7 +146,7 @@ do_test (size_t align, size_t pos, size_t len) if (s[pos]) { for (i = pos + 1; i < pos + 10; ++i) - s[i] = random () & 255; + s[i] = random () & BIG_CHAR; s[i] = '\0'; } result = STRPBRK_RESULT (s, pos); @@ -129,8 +161,8 @@ do_random_tests (void) size_t i, j, n, align, pos, len, rlen; RES_TYPE result; int c; - unsigned char *p = buf1 + page_size - 512; - unsigned char *rej; + UCHAR *p = (UCHAR *) (buf1 + page_size) - 512; + UCHAR *rej; for (n = 0; n < ITERATIONS; n++) { @@ -147,18 +179,18 @@ do_random_tests (void) rlen = random () & 63; else rlen = random () & 15; - rej = buf2 + page_size - rlen - 1 - (random () & 7); + rej = (UCHAR *) (buf2 + page_size) - rlen - 1 - (random () & 7); for (i = 0; i < rlen; ++i) { - rej[i] = random () & 255; + rej[i] = random () & BIG_CHAR; if (!rej[i]) - rej[i] = random () & 255; + rej[i] = random () & BIG_CHAR; if (!rej[i]) - rej[i] = 1 + (random () & 127); + rej[i] = 1 + (random () & SMALL_CHAR); } rej[i] = '\0'; - for (c = 1; c <= 255; ++c) - if (strchr ((char *) rej, c) == NULL) + for (c = 1; c <= BIG_CHAR; ++c) + if (STRCHR ((CHAR *) rej, c) == NULL) break; j = (pos > len ? pos : len) + align + 64; if (j > 512) @@ -171,27 +203,27 @@ do_random_tests (void) else if (i == pos + align) p[i] = rej[random () % (rlen + 1)]; else if (i < align || i > pos + align) - p[i] = random () & 255; + p[i] = random () & BIG_CHAR; else { - p[i] = random () & 255; - if (strchr ((char *) rej, p[i])) + p[i] = random () & BIG_CHAR; + if (STRCHR ((CHAR *) rej, p[i])) { - p[i] = random () & 255; - if (strchr ((char *) rej, p[i])) + p[i] = random () & BIG_CHAR; + if (STRCHR ((CHAR *) rej, p[i])) p[i] = c; } } } - result = STRPBRK_RESULT ((char *) (p + align), pos < len ? pos : len); + result = STRPBRK_RESULT ((CHAR *) (p + align), pos < len ? pos : len); FOR_EACH_IMPL (impl, 1) - if (CALL (impl, (char *) (p + align), (char *) rej) != result) + if (CALL (impl, (CHAR *) (p + align), (CHAR *) rej) != result) { error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %p, %zd, %zd, %zd) %p != %p", n, impl->name, align, rej, rlen, pos, len, - (void *) CALL (impl, (char *) (p + align), (char *) rej), + (void *) CALL (impl, (CHAR *) (p + align), (CHAR *) rej), (void *) result); ret = 1; } diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index 9403169..5765a8c 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -12,7 +12,8 @@ sysdep_routines += strlen strlen-vx strlen-c \ strchr strchr-vx strchr-c \ strchrnul strchrnul-vx strchrnul-c \ strrchr strrchr-vx strrchr-c \ - strspn strspn-vx strspn-c + strspn strspn-vx strspn-c \ + strpbrk strpbrk-vx strpbrk-c endif ifeq ($(subdir),wcsmbs) @@ -29,5 +30,6 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \ wcschr wcschr-vx wcschr-c \ wcschrnul wcschrnul-vx wcschrnul-c \ wcsrchr wcsrchr-vx wcsrchr-c \ - wcsspn wcsspn-vx wcsspn-c + wcsspn wcsspn-vx wcsspn-c \ + wcspbrk wcspbrk-vx wcspbrk-c endif diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index cbedf64..b39a5c5 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -121,6 +121,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_VX_IMPL (strspn); IFUNC_VX_IMPL (wcsspn); + IFUNC_VX_IMPL (strpbrk); + IFUNC_VX_IMPL (wcspbrk); + #endif /* HAVE_S390_VX_ASM_SUPPORT */ return i; diff --git a/sysdeps/s390/multiarch/strpbrk-c.c b/sysdeps/s390/multiarch/strpbrk-c.c new file mode 100644 index 0000000..3923ae8 --- /dev/null +++ b/sysdeps/s390/multiarch/strpbrk-c.c @@ -0,0 +1,28 @@ +/* Default strpbrk implementation for S/390. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRPBRK __strpbrk_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strpbrk_c, __GI_strpbrk, __strpbrk_c); +# endif /* SHARED */ + +# include +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strpbrk-vx.S b/sysdeps/s390/multiarch/strpbrk-vx.S new file mode 100644 index 0000000..8a97744 --- /dev/null +++ b/sysdeps/s390/multiarch/strpbrk-vx.S @@ -0,0 +1,300 @@ +/* Vector optimized 32/64 bit S/390 version of strpbrk. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strpbrk (const char *s, const char * accept) + The strpbrk() function locates the first occurrence in the string s + of any of the characters in the string accept and returns a pointer + to that character or NULL if not found. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string (32bit unsigned) + r4: found byte index (32bit unsigned) + r1: current return len (64bit unsigned) + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__strpbrk_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lghi %r1,0 /* Zero out current len. */ + vlgvb %r0,%v17,0 /* Get first element. */ + clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg */ + + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + /* Align s to 16 byte */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lfast_loop1 /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any + in v17 or first zero element. */ + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes, return with found + element index (=equal count). */ + clrjl %r4,%r0,.Lfast_loop_found2 + lgr %r1,%r0 /* Current len = loaded byte count. */ + j .Lfast_loop1 + + /* Process s in 16byte aligned loop. */ +.Lfast_loop2: + aghi %r1,16 +.Lfast_loop1: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + aghi %r1,16 + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + aghi %r1,16 + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + aghi %r1,16 + vfaezbs %v18,%v16,%v17,0 + jo .Lfast_loop2 /* Loop if no element was unequal to accept + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ +.Lfast_loop_found2: + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ + algfr %r1,%r4 /* Add found index of char to current len. */ + la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ + br %r14 + +.Lfast_end_null: + lghi %r2,0 /* Return null if no character is equal. */ + br %r14 + + + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former string. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeeb %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) + or 16 if no match. */ + /* Equal-index < min(zero-index, loaded byte count) + -> return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> former str-part was last str-part + -> return null */ + clrjl %r6,%r0,.Lslow_end_null + /* All elements are zero (=no match) -> proceed with next str-part. */ + + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vlgvb %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any accepted characters + (all other parts of accept cannot lead to a match before this one) + -> current len is pointing to first element + -> return found */ + clijh %r4,0,.Lslow_end_found + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part + -> add index to current_len and + end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvb %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_next_str /* Proceed with next string-part, + if first char in this part of accept + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_null: + lghi %r1,0 /* Return null if no character is equal. */ + j .Lslow_end + +.Lslow_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ + +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end_found: + la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ + +.Lslow_end: + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + lgr %r2,%r1 + br %r14 +END(__strpbrk_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strpbrk.c b/sysdeps/s390/multiarch/strpbrk.c new file mode 100644 index 0000000..96dad5d --- /dev/null +++ b/sysdeps/s390/multiarch/strpbrk.c @@ -0,0 +1,27 @@ +/* Multiple versions of strpbrk. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include +# include + +s390_vx_libc_ifunc2 (__strpbrk, strpbrk) + +#else +# include +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/multiarch/wcspbrk-c.c b/sysdeps/s390/multiarch/wcspbrk-c.c new file mode 100644 index 0000000..634497b --- /dev/null +++ b/sysdeps/s390/multiarch/wcspbrk-c.c @@ -0,0 +1,31 @@ +/* Default wcspbrk implementation for S/390. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSPBRK __wcspbrk_c + +# include +extern __typeof (wcspbrk) __wcspbrk_c; +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcspbrk_c, __GI_wcspbrk, __wcspbrk_c); +# endif /* SHARED */ + +# include +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcspbrk-vx.S b/sysdeps/s390/multiarch/wcspbrk-vx.S new file mode 100644 index 0000000..d92b909 --- /dev/null +++ b/sysdeps/s390/multiarch/wcspbrk-vx.S @@ -0,0 +1,316 @@ +/* Vector optimized 32/64 bit S/390 version of wcspbrk. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcspbrk (const wchar_t *s, const wchar_t * accept) + The wcspbrk() function locates the first occurrence in the string s + of any of the characters in the string accept and returns a pointer + to that character or NULL if not found. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string (32bit unsigned) + r4: found byte index (32bit unsigned) + r1: current return len (64bit unsigned) + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__wcspbrk_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ + +.Lcheck_notonbb: + lghi %r1,0 /* Zero out current len. */ + vlgvf %r0,%v17,0 /* Get first element. */ + clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ + + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg */ + + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + nill %r0,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lfast_loop1 /* If s is aligned, loop aligned. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any + in v17 or first zero element. */ + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes, return with found + element index (=equal count). */ + clrjl %r4,%r0,.Lfast_loop_found2 + lgr %r1,%r0 /* Current len = loaded byte count. */ + j .Lfast_loop1 + + /* Process s in 16byte aligned loop. */ +.Lfast_loop2: + aghi %r1,16 +.Lfast_loop1: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + aghi %r1,16 + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + aghi %r1,16 + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + aghi %r1,16 + vfaezfs %v18,%v16,%v17,0 + jo .Lfast_loop2 /* Loop if no element was unequal to accept + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ +.Lfast_loop_found2: + srlg %r5,%r4,2 /* Convert byte-index to character-index. */ + vlgvf %r0,%v16,0(%r5) /* Get found element. */ + clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ + algfr %r1,%r4 /* Add found index of char to current len. */ + la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ + br %r14 + +.Lfast_end_null: + lghi %r2,0 /* Return null if no character is equal. */ + br %r14 + + + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Accept in v17 without zero */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former string. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16byte boundary; + needs highest index, left bytes are 0. */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeef %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) + or 16 if no match. */ + /* Equal-index < min(zero-index, loaded byte count) + -> return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> former str-part was last str-part + -> return null */ + clrjl %r6,%r0,.Lslow_end_null + /* All elements are zero (=no match) -> proceed with next str-part. */ + + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vlgvf %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any accepted characters + (all other parts of accept cannot lead to a match before this one) + -> current len is pointing to first element + -> return found */ + clijh %r4,0,.Lslow_end_found + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part + -> add index to current len and + end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvf %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_next_str /* Proceed with next string-part, + If first char in this part of accept + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_null: + lghi %r1,0 /* Return null if no character is equal. */ + j .Lslow_end + +.Lslow_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + srlg %r5,%r4,2 /* Convert byte-index to character-index. */ + vlgvf %r0,%v16,0(%r5) /* Get found element. */ + clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ + +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end_found: + la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ + +.Lslow_end: + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + lgr %r2,%r1 + br %r14 +.Lfallback: + jg __wcspbrk_c +END(__wcspbrk_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcspbrk.c b/sysdeps/s390/multiarch/wcspbrk.c new file mode 100644 index 0000000..442532f --- /dev/null +++ b/sysdeps/s390/multiarch/wcspbrk.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcspbrk. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include +# include + +s390_vx_libc_ifunc2 (__wcspbrk, wcspbrk) + +#else +# include +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile index b51e7b6..03edaec 100644 --- a/wcsmbs/Makefile +++ b/wcsmbs/Makefile @@ -43,7 +43,7 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \ mbrtoc16 c16rtomb strop-tests := wcscmp wcsncmp wmemcmp wcslen wcschr wcsrchr wcscpy wcsnlen \ - wcpcpy wcsncpy wcpncpy wcscat wcsncat wcschrnul wcsspn + wcpcpy wcsncpy wcpncpy wcscat wcsncat wcschrnul wcsspn wcspbrk tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \ tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \ tst-c16c32-1 wcsatcliff $(addprefix test-,$(strop-tests)) diff --git a/wcsmbs/test-wcspbrk.c b/wcsmbs/test-wcspbrk.c new file mode 100644 index 0000000..98e44e5 --- /dev/null +++ b/wcsmbs/test-wcspbrk.c @@ -0,0 +1,20 @@ +/* Test wcspbrk functions. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "../string/test-strpbrk.c" diff --git a/wcsmbs/wcspbrk.c b/wcsmbs/wcspbrk.c index 17e821c..9bb43d6 100644 --- a/wcsmbs/wcspbrk.c +++ b/wcsmbs/wcspbrk.c @@ -18,6 +18,9 @@ #include +#ifdef WCSPBRK +# define wcspbrk WCSPBRK +#endif /* Find the first occurrence in WCS of any wide-character in ACCEPT. */ wchar_t *