[2/7] RISC-V: Add Zbb optimized memchr as ifunc

Message ID 20240422074403.2399529-3-christoph.muellner@vrull.eu
State Superseded
Delegated to: Carlos O'Donell
Headers
Series Add ifunc support for existing Zbb optimizations |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed

Commit Message

Christoph Müllner April 22, 2024, 7:43 a.m. UTC
  When building with Zbb enabled, memchr benefits from using orc.b in
find_zero_all().  This patch changes the build system such, that a
non-Zbb version as well as a Zbb version of this routine is built.
Further, a ifunc resolver is provided that selects the right routine
based on the outcome of extension probing via hwprobe().

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
 sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
 sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
 .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
 .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
 .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
 5 files changed, 142 insertions(+), 5 deletions(-)
 create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
 create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
 create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
  

Comments

Adhemerval Zanella Netto April 24, 2024, 12:53 p.m. UTC | #1
On 22/04/24 04:43, Christoph Müllner wrote:
> When building with Zbb enabled, memchr benefits from using orc.b in
> find_zero_all().  This patch changes the build system such, that a
> non-Zbb version as well as a Zbb version of this routine is built.
> Further, a ifunc resolver is provided that selects the right routine
> based on the outcome of extension probing via hwprobe().
> 
> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> ---
>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
>  5 files changed, 142 insertions(+), 5 deletions(-)
>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> 
> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
> new file mode 100644
> index 0000000000..a96c36398b
> --- /dev/null
> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
> @@ -0,0 +1,26 @@
> +/* Re-include the default memchr implementation.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <string.h>
> +
> +#if IS_IN(libc)
> +# define MEMCHR __memchr_generic
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(x)
> +#endif
> +#include <string/memchr.c>
> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
> new file mode 100644
> index 0000000000..bead0335ae
> --- /dev/null
> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
> @@ -0,0 +1,30 @@
> +/* Re-include the default memchr implementation for Zbb.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <string.h>
> +
> +#if IS_IN(libc)
> +# define MEMCHR __memchr_zbb
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(x)
> +#endif
> +/* Convince preprocessor to have Zbb instructions.  */
> +#ifndef __riscv_zbb
> +# define __riscv_zbb
> +#endif

Is there a way to specific the compiler to enable a extension, like aarch64
-march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
instead of messing with compiler defined pre-processor.

> +#include <string/memchr.c>
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> index fcef5659d4..5586d11c89 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> @@ -1,5 +1,8 @@
>  ifeq ($(subdir),string)
>  sysdep_routines += \
> +  memchr \
> +  memchr-generic \
> +  memchr-zbb \
>    memcpy \
>    memcpy-generic \
>    memcpy_noalignment \
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> index 9f806d7a9e..7321144a32 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> @@ -20,19 +20,40 @@
>  #include <string.h>
>  #include <sys/hwprobe.h>
>  
> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
> +
>  size_t
>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>  			size_t max)
>  {
>    size_t i = max;
> +  struct riscv_hwprobe pairs[] = {
> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
> +  };
>  
> +  bool has_zbb = false;
>    bool fast_unaligned = false;
>  
> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
> -          == RISCV_HWPROBE_MISALIGNED_FAST)
> -    fast_unaligned = true;
> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
> +    {
> +      struct riscv_hwprobe *pair;
> +
> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
> +      pair = &pairs[0];
> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
> +        has_zbb = true;
> +
> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
> +      pair = &pairs[1];
> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
> +	   == RISCV_HWPROBE_MISALIGNED_FAST)
> +        fast_unaligned = true;
> +    }
> +
> +  IFUNC_IMPL (i, name, memchr,
> +	      IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
> +	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
>  
>    IFUNC_IMPL (i, name, memcpy,
>  	      IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> new file mode 100644
> index 0000000000..bc076cbf24
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> @@ -0,0 +1,57 @@
> +/* Multiple versions of memchr.
> +   All versions must be listed in ifunc-impl-list.c.
> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#if IS_IN (libc)
> +/* Redefine memchr so that the compiler won't complain about the type
> +   mismatch with the IFUNC selector in strong_alias, below.  */
> +# undef memchr
> +# define memchr __redirect_memchr
> +# include <stdint.h>
> +# include <string.h>
> +# include <ifunc-init.h>
> +# include <riscv-ifunc.h>
> +# include <sys/hwprobe.h>
> +
> +extern __typeof (__redirect_memchr) __libc_memchr;
> +
> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
> +
> +static inline __typeof (__redirect_memchr) *
> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> +{
> +  unsigned long long int v;
> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
> +      && (v & RISCV_HWPROBE_EXT_ZBB))
> +    return __memchr_zbb;
> +
> +  return __memchr_generic;
> +}
> +
> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
> +
> +# undef memchr
> +strong_alias (__libc_memchr, memchr);
> +# ifdef SHARED
> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
> +# endif
> +#else
> +# include <string/memchr.c>
> +#endif
  
Christoph Müllner April 24, 2024, 1:16 p.m. UTC | #2
On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 22/04/24 04:43, Christoph Müllner wrote:
> > When building with Zbb enabled, memchr benefits from using orc.b in
> > find_zero_all().  This patch changes the build system such, that a
> > non-Zbb version as well as a Zbb version of this routine is built.
> > Further, a ifunc resolver is provided that selects the right routine
> > based on the outcome of extension probing via hwprobe().
> >
> > Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> > ---
> >  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
> >  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
> >  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
> >  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
> >  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
> >  5 files changed, 142 insertions(+), 5 deletions(-)
> >  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
> >  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
> >  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> >
> > diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
> > new file mode 100644
> > index 0000000000..a96c36398b
> > --- /dev/null
> > +++ b/sysdeps/riscv/multiarch/memchr-generic.c
> > @@ -0,0 +1,26 @@
> > +/* Re-include the default memchr implementation.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <string.h>
> > +
> > +#if IS_IN(libc)
> > +# define MEMCHR __memchr_generic
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(x)
> > +#endif
> > +#include <string/memchr.c>
> > diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
> > new file mode 100644
> > index 0000000000..bead0335ae
> > --- /dev/null
> > +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
> > @@ -0,0 +1,30 @@
> > +/* Re-include the default memchr implementation for Zbb.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <string.h>
> > +
> > +#if IS_IN(libc)
> > +# define MEMCHR __memchr_zbb
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(x)
> > +#endif
> > +/* Convince preprocessor to have Zbb instructions.  */
> > +#ifndef __riscv_zbb
> > +# define __riscv_zbb
> > +#endif
>
> Is there a way to specific the compiler to enable a extension, like aarch64
> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
> instead of messing with compiler defined pre-processor.

The tools expect a list of all extensions as parameter to the -march= option.
But there is no way to append extensions to an existing march string
on the command line.

And if we would add this feature today, it would take many years until we could
use it here, because we want to remain compatible with old tools.
Or we enable the optimization only when being built with new tools, but that
adds even more complexity and build/test configurations.

What we have is:
* Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
* Command line (since forever): -march=BASE_EXTENSIONLIST
* GAS (since Nov 21): .option arch, +EXTENSION (in combination with
option push/pop)
* GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))

I was not sure about using __riscv_zbb as well, but I considered it safe within
ifdef tests that ensure the macro won't be set twice.
If that's a concern, I could change to use something like this:
#define __riscv_force_zbb
#include <impl.c>
#undef __riscv_force_zbb
... and change string-fza.h like this:
#if defined(__riscv_zbb) || defined(__riscv_force_zbb)
// orc.b
#endif

BR
Christoph

> > +#include <string/memchr.c>
> > diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > index fcef5659d4..5586d11c89 100644
> > --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > @@ -1,5 +1,8 @@
> >  ifeq ($(subdir),string)
> >  sysdep_routines += \
> > +  memchr \
> > +  memchr-generic \
> > +  memchr-zbb \
> >    memcpy \
> >    memcpy-generic \
> >    memcpy_noalignment \
> > diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > index 9f806d7a9e..7321144a32 100644
> > --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > @@ -20,19 +20,40 @@
> >  #include <string.h>
> >  #include <sys/hwprobe.h>
> >
> > +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
> > +
> >  size_t
> >  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                       size_t max)
> >  {
> >    size_t i = max;
> > +  struct riscv_hwprobe pairs[] = {
> > +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
> > +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
> > +  };
> >
> > +  bool has_zbb = false;
> >    bool fast_unaligned = false;
> >
> > -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> > -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> > -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
> > -          == RISCV_HWPROBE_MISALIGNED_FAST)
> > -    fast_unaligned = true;
> > +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
> > +    {
> > +      struct riscv_hwprobe *pair;
> > +
> > +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
> > +      pair = &pairs[0];
> > +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
> > +        has_zbb = true;
> > +
> > +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
> > +      pair = &pairs[1];
> > +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
> > +        == RISCV_HWPROBE_MISALIGNED_FAST)
> > +        fast_unaligned = true;
> > +    }
> > +
> > +  IFUNC_IMPL (i, name, memchr,
> > +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
> > +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
> >
> >    IFUNC_IMPL (i, name, memcpy,
> >             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> > diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > new file mode 100644
> > index 0000000000..bc076cbf24
> > --- /dev/null
> > +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > @@ -0,0 +1,57 @@
> > +/* Multiple versions of memchr.
> > +   All versions must be listed in ifunc-impl-list.c.
> > +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#if IS_IN (libc)
> > +/* Redefine memchr so that the compiler won't complain about the type
> > +   mismatch with the IFUNC selector in strong_alias, below.  */
> > +# undef memchr
> > +# define memchr __redirect_memchr
> > +# include <stdint.h>
> > +# include <string.h>
> > +# include <ifunc-init.h>
> > +# include <riscv-ifunc.h>
> > +# include <sys/hwprobe.h>
> > +
> > +extern __typeof (__redirect_memchr) __libc_memchr;
> > +
> > +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
> > +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
> > +
> > +static inline __typeof (__redirect_memchr) *
> > +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> > +{
> > +  unsigned long long int v;
> > +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
> > +      && (v & RISCV_HWPROBE_EXT_ZBB))
> > +    return __memchr_zbb;
> > +
> > +  return __memchr_generic;
> > +}
> > +
> > +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
> > +
> > +# undef memchr
> > +strong_alias (__libc_memchr, memchr);
> > +# ifdef SHARED
> > +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
> > +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
> > +# endif
> > +#else
> > +# include <string/memchr.c>
> > +#endif
  
Adhemerval Zanella Netto April 24, 2024, 1:36 p.m. UTC | #3
On 24/04/24 10:16, Christoph Müllner wrote:
> On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 22/04/24 04:43, Christoph Müllner wrote:
>>> When building with Zbb enabled, memchr benefits from using orc.b in
>>> find_zero_all().  This patch changes the build system such, that a
>>> non-Zbb version as well as a Zbb version of this routine is built.
>>> Further, a ifunc resolver is provided that selects the right routine
>>> based on the outcome of extension probing via hwprobe().
>>>
>>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
>>> ---
>>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
>>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
>>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
>>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
>>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
>>>  5 files changed, 142 insertions(+), 5 deletions(-)
>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
>>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>
>>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
>>> new file mode 100644
>>> index 0000000000..a96c36398b
>>> --- /dev/null
>>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
>>> @@ -0,0 +1,26 @@
>>> +/* Re-include the default memchr implementation.
>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include <string.h>
>>> +
>>> +#if IS_IN(libc)
>>> +# define MEMCHR __memchr_generic
>>> +# undef libc_hidden_builtin_def
>>> +# define libc_hidden_builtin_def(x)
>>> +#endif
>>> +#include <string/memchr.c>
>>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
>>> new file mode 100644
>>> index 0000000000..bead0335ae
>>> --- /dev/null
>>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
>>> @@ -0,0 +1,30 @@
>>> +/* Re-include the default memchr implementation for Zbb.
>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include <string.h>
>>> +
>>> +#if IS_IN(libc)
>>> +# define MEMCHR __memchr_zbb
>>> +# undef libc_hidden_builtin_def
>>> +# define libc_hidden_builtin_def(x)
>>> +#endif
>>> +/* Convince preprocessor to have Zbb instructions.  */
>>> +#ifndef __riscv_zbb
>>> +# define __riscv_zbb
>>> +#endif
>>
>> Is there a way to specific the compiler to enable a extension, like aarch64
>> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
>> instead of messing with compiler defined pre-processor.
> 
> The tools expect a list of all extensions as parameter to the -march= option.
> But there is no way to append extensions to an existing march string
> on the command line.
> 
> And if we would add this feature today, it would take many years until we could
> use it here, because we want to remain compatible with old tools.
> Or we enable the optimization only when being built with new tools, but that
> adds even more complexity and build/test configurations.
> 
> What we have is:
> * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
> * Command line (since forever): -march=BASE_EXTENSIONLIST
> * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
> option push/pop)
> * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
> 
> I was not sure about using __riscv_zbb as well, but I considered it safe within
> ifdef tests that ensure the macro won't be set twice.
> If that's a concern, I could change to use something like this:
> #define __riscv_force_zbb
> #include <impl.c>
> #undef __riscv_force_zbb
> ... and change string-fza.h like this:
> #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
> // orc.b
> #endif
> 
> BR
> Christoph

Another options would to parse the current march and add the extension if required,
something like:

abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
if [[ ! "$abi" =~ "_zbb" ]]
then
  abi="$abi"_zbb
fi

I don't have a strong preference, it is just that by not using the compiler flag
we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
a possible better code generation from compiler.

> 
>>> +#include <string/memchr.c>
>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>> index fcef5659d4..5586d11c89 100644
>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>> @@ -1,5 +1,8 @@
>>>  ifeq ($(subdir),string)
>>>  sysdep_routines += \
>>> +  memchr \
>>> +  memchr-generic \
>>> +  memchr-zbb \
>>>    memcpy \
>>>    memcpy-generic \
>>>    memcpy_noalignment \
>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>> index 9f806d7a9e..7321144a32 100644
>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>> @@ -20,19 +20,40 @@
>>>  #include <string.h>
>>>  #include <sys/hwprobe.h>
>>>
>>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
>>> +
>>>  size_t
>>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>>                       size_t max)
>>>  {
>>>    size_t i = max;
>>> +  struct riscv_hwprobe pairs[] = {
>>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
>>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
>>> +  };
>>>
>>> +  bool has_zbb = false;
>>>    bool fast_unaligned = false;
>>>
>>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
>>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
>>> -    fast_unaligned = true;
>>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
>>> +    {
>>> +      struct riscv_hwprobe *pair;
>>> +
>>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
>>> +      pair = &pairs[0];
>>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
>>> +        has_zbb = true;
>>> +
>>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
>>> +      pair = &pairs[1];
>>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
>>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
>>> +        fast_unaligned = true;
>>> +    }
>>> +
>>> +  IFUNC_IMPL (i, name, memchr,
>>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
>>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
>>>
>>>    IFUNC_IMPL (i, name, memcpy,
>>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>> new file mode 100644
>>> index 0000000000..bc076cbf24
>>> --- /dev/null
>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>> @@ -0,0 +1,57 @@
>>> +/* Multiple versions of memchr.
>>> +   All versions must be listed in ifunc-impl-list.c.
>>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#if IS_IN (libc)
>>> +/* Redefine memchr so that the compiler won't complain about the type
>>> +   mismatch with the IFUNC selector in strong_alias, below.  */
>>> +# undef memchr
>>> +# define memchr __redirect_memchr
>>> +# include <stdint.h>
>>> +# include <string.h>
>>> +# include <ifunc-init.h>
>>> +# include <riscv-ifunc.h>
>>> +# include <sys/hwprobe.h>
>>> +
>>> +extern __typeof (__redirect_memchr) __libc_memchr;
>>> +
>>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
>>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
>>> +
>>> +static inline __typeof (__redirect_memchr) *
>>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>>> +{
>>> +  unsigned long long int v;
>>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
>>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
>>> +    return __memchr_zbb;
>>> +
>>> +  return __memchr_generic;
>>> +}
>>> +
>>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
>>> +
>>> +# undef memchr
>>> +strong_alias (__libc_memchr, memchr);
>>> +# ifdef SHARED
>>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
>>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
>>> +# endif
>>> +#else
>>> +# include <string/memchr.c>
>>> +#endif
  
Christoph Müllner April 26, 2024, 11:40 a.m. UTC | #4
On Wed, Apr 24, 2024 at 3:36 PM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 24/04/24 10:16, Christoph Müllner wrote:
> > On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
> > <adhemerval.zanella@linaro.org> wrote:
> >>
> >>
> >>
> >> On 22/04/24 04:43, Christoph Müllner wrote:
> >>> When building with Zbb enabled, memchr benefits from using orc.b in
> >>> find_zero_all().  This patch changes the build system such, that a
> >>> non-Zbb version as well as a Zbb version of this routine is built.
> >>> Further, a ifunc resolver is provided that selects the right routine
> >>> based on the outcome of extension probing via hwprobe().
> >>>
> >>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> >>> ---
> >>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
> >>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
> >>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
> >>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
> >>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
> >>>  5 files changed, 142 insertions(+), 5 deletions(-)
> >>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
> >>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
> >>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> >>>
> >>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
> >>> new file mode 100644
> >>> index 0000000000..a96c36398b
> >>> --- /dev/null
> >>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
> >>> @@ -0,0 +1,26 @@
> >>> +/* Re-include the default memchr implementation.
> >>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> >>> +   This file is part of the GNU C Library.
> >>> +
> >>> +   The GNU C Library is free software; you can redistribute it and/or
> >>> +   modify it under the terms of the GNU Lesser General Public
> >>> +   License as published by the Free Software Foundation; either
> >>> +   version 2.1 of the License, or (at your option) any later version.
> >>> +
> >>> +   The GNU C Library is distributed in the hope that it will be useful,
> >>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>> +   Lesser General Public License for more details.
> >>> +
> >>> +   You should have received a copy of the GNU Lesser General Public
> >>> +   License along with the GNU C Library; if not, see
> >>> +   <https://www.gnu.org/licenses/>.  */
> >>> +
> >>> +#include <string.h>
> >>> +
> >>> +#if IS_IN(libc)
> >>> +# define MEMCHR __memchr_generic
> >>> +# undef libc_hidden_builtin_def
> >>> +# define libc_hidden_builtin_def(x)
> >>> +#endif
> >>> +#include <string/memchr.c>
> >>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
> >>> new file mode 100644
> >>> index 0000000000..bead0335ae
> >>> --- /dev/null
> >>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
> >>> @@ -0,0 +1,30 @@
> >>> +/* Re-include the default memchr implementation for Zbb.
> >>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> >>> +   This file is part of the GNU C Library.
> >>> +
> >>> +   The GNU C Library is free software; you can redistribute it and/or
> >>> +   modify it under the terms of the GNU Lesser General Public
> >>> +   License as published by the Free Software Foundation; either
> >>> +   version 2.1 of the License, or (at your option) any later version.
> >>> +
> >>> +   The GNU C Library is distributed in the hope that it will be useful,
> >>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>> +   Lesser General Public License for more details.
> >>> +
> >>> +   You should have received a copy of the GNU Lesser General Public
> >>> +   License along with the GNU C Library; if not, see
> >>> +   <https://www.gnu.org/licenses/>.  */
> >>> +
> >>> +#include <string.h>
> >>> +
> >>> +#if IS_IN(libc)
> >>> +# define MEMCHR __memchr_zbb
> >>> +# undef libc_hidden_builtin_def
> >>> +# define libc_hidden_builtin_def(x)
> >>> +#endif
> >>> +/* Convince preprocessor to have Zbb instructions.  */
> >>> +#ifndef __riscv_zbb
> >>> +# define __riscv_zbb
> >>> +#endif
> >>
> >> Is there a way to specific the compiler to enable a extension, like aarch64
> >> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
> >> instead of messing with compiler defined pre-processor.
> >
> > The tools expect a list of all extensions as parameter to the -march= option.
> > But there is no way to append extensions to an existing march string
> > on the command line.
> >
> > And if we would add this feature today, it would take many years until we could
> > use it here, because we want to remain compatible with old tools.
> > Or we enable the optimization only when being built with new tools, but that
> > adds even more complexity and build/test configurations.
> >
> > What we have is:
> > * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
> > * Command line (since forever): -march=BASE_EXTENSIONLIST
> > * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
> > option push/pop)
> > * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
> >
> > I was not sure about using __riscv_zbb as well, but I considered it safe within
> > ifdef tests that ensure the macro won't be set twice.
> > If that's a concern, I could change to use something like this:
> > #define __riscv_force_zbb
> > #include <impl.c>
> > #undef __riscv_force_zbb
> > ... and change string-fza.h like this:
> > #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
> > // orc.b
> > #endif
> >
> > BR
> > Christoph
>
> Another options would to parse the current march and add the extension if required,
> something like:
>
> abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
> if [[ ! "$abi" =~ "_zbb" ]]
> then
>   abi="$abi"_zbb
> fi

I tried to work something out, but this attempt also won't work reliably.
Until recently (Jan 5, 2024) GCC required that the extensions on the command
line were provided in canonical order. I.e., "_zbb" would probably have to be
inserted somewhere in the middle. Sorting the tokens of the march string seems
to be an overkill.

> I don't have a strong preference, it is just that by not using the compiler flag
> we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
> a possible better code generation from compiler.

I agree that builtins are better.
At the same time, the solution in the patch is no worse than what we
already have
in glibc right now. Further, this builtin was added to GCC on Jan 15, 2024.
So, it can only be used if the build tools are recent enough.

>
> >
> >>> +#include <string/memchr.c>
> >>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> >>> index fcef5659d4..5586d11c89 100644
> >>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> >>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> >>> @@ -1,5 +1,8 @@
> >>>  ifeq ($(subdir),string)
> >>>  sysdep_routines += \
> >>> +  memchr \
> >>> +  memchr-generic \
> >>> +  memchr-zbb \
> >>>    memcpy \
> >>>    memcpy-generic \
> >>>    memcpy_noalignment \
> >>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> >>> index 9f806d7a9e..7321144a32 100644
> >>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> >>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> >>> @@ -20,19 +20,40 @@
> >>>  #include <string.h>
> >>>  #include <sys/hwprobe.h>
> >>>
> >>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
> >>> +
> >>>  size_t
> >>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >>>                       size_t max)
> >>>  {
> >>>    size_t i = max;
> >>> +  struct riscv_hwprobe pairs[] = {
> >>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
> >>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
> >>> +  };
> >>>
> >>> +  bool has_zbb = false;
> >>>    bool fast_unaligned = false;
> >>>
> >>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> >>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> >>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
> >>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
> >>> -    fast_unaligned = true;
> >>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
> >>> +    {
> >>> +      struct riscv_hwprobe *pair;
> >>> +
> >>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
> >>> +      pair = &pairs[0];
> >>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
> >>> +        has_zbb = true;
> >>> +
> >>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
> >>> +      pair = &pairs[1];
> >>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
> >>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
> >>> +        fast_unaligned = true;
> >>> +    }
> >>> +
> >>> +  IFUNC_IMPL (i, name, memchr,
> >>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
> >>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
> >>>
> >>>    IFUNC_IMPL (i, name, memcpy,
> >>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> >>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> >>> new file mode 100644
> >>> index 0000000000..bc076cbf24
> >>> --- /dev/null
> >>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> >>> @@ -0,0 +1,57 @@
> >>> +/* Multiple versions of memchr.
> >>> +   All versions must be listed in ifunc-impl-list.c.
> >>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
> >>> +   This file is part of the GNU C Library.
> >>> +
> >>> +   The GNU C Library is free software; you can redistribute it and/or
> >>> +   modify it under the terms of the GNU Lesser General Public
> >>> +   License as published by the Free Software Foundation; either
> >>> +   version 2.1 of the License, or (at your option) any later version.
> >>> +
> >>> +   The GNU C Library is distributed in the hope that it will be useful,
> >>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>> +   Lesser General Public License for more details.
> >>> +
> >>> +   You should have received a copy of the GNU Lesser General Public
> >>> +   License along with the GNU C Library; if not, see
> >>> +   <https://www.gnu.org/licenses/>.  */
> >>> +
> >>> +#if IS_IN (libc)
> >>> +/* Redefine memchr so that the compiler won't complain about the type
> >>> +   mismatch with the IFUNC selector in strong_alias, below.  */
> >>> +# undef memchr
> >>> +# define memchr __redirect_memchr
> >>> +# include <stdint.h>
> >>> +# include <string.h>
> >>> +# include <ifunc-init.h>
> >>> +# include <riscv-ifunc.h>
> >>> +# include <sys/hwprobe.h>
> >>> +
> >>> +extern __typeof (__redirect_memchr) __libc_memchr;
> >>> +
> >>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
> >>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
> >>> +
> >>> +static inline __typeof (__redirect_memchr) *
> >>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> >>> +{
> >>> +  unsigned long long int v;
> >>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
> >>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
> >>> +    return __memchr_zbb;
> >>> +
> >>> +  return __memchr_generic;
> >>> +}
> >>> +
> >>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
> >>> +
> >>> +# undef memchr
> >>> +strong_alias (__libc_memchr, memchr);
> >>> +# ifdef SHARED
> >>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
> >>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
> >>> +# endif
> >>> +#else
> >>> +# include <string/memchr.c>
> >>> +#endif
  
Palmer Dabbelt April 30, 2024, 3:13 p.m. UTC | #5
On Wed, 24 Apr 2024 06:36:43 PDT (-0700), adhemerval.zanella@linaro.org wrote:
>
>
> On 24/04/24 10:16, Christoph Müllner wrote:
>> On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
>> <adhemerval.zanella@linaro.org> wrote:
>>>
>>>
>>>
>>> On 22/04/24 04:43, Christoph Müllner wrote:
>>>> When building with Zbb enabled, memchr benefits from using orc.b in
>>>> find_zero_all().  This patch changes the build system such, that a
>>>> non-Zbb version as well as a Zbb version of this routine is built.
>>>> Further, a ifunc resolver is provided that selects the right routine
>>>> based on the outcome of extension probing via hwprobe().
>>>>
>>>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
>>>> ---
>>>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
>>>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
>>>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
>>>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
>>>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
>>>>  5 files changed, 142 insertions(+), 5 deletions(-)
>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
>>>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>>
>>>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
>>>> new file mode 100644
>>>> index 0000000000..a96c36398b
>>>> --- /dev/null
>>>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
>>>> @@ -0,0 +1,26 @@
>>>> +/* Re-include the default memchr implementation.
>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>>> +   This file is part of the GNU C Library.
>>>> +
>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>> +   modify it under the terms of the GNU Lesser General Public
>>>> +   License as published by the Free Software Foundation; either
>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> +   Lesser General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU Lesser General Public
>>>> +   License along with the GNU C Library; if not, see
>>>> +   <https://www.gnu.org/licenses/>.  */
>>>> +
>>>> +#include <string.h>
>>>> +
>>>> +#if IS_IN(libc)
>>>> +# define MEMCHR __memchr_generic
>>>> +# undef libc_hidden_builtin_def
>>>> +# define libc_hidden_builtin_def(x)
>>>> +#endif
>>>> +#include <string/memchr.c>
>>>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
>>>> new file mode 100644
>>>> index 0000000000..bead0335ae
>>>> --- /dev/null
>>>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
>>>> @@ -0,0 +1,30 @@
>>>> +/* Re-include the default memchr implementation for Zbb.
>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>>> +   This file is part of the GNU C Library.
>>>> +
>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>> +   modify it under the terms of the GNU Lesser General Public
>>>> +   License as published by the Free Software Foundation; either
>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> +   Lesser General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU Lesser General Public
>>>> +   License along with the GNU C Library; if not, see
>>>> +   <https://www.gnu.org/licenses/>.  */
>>>> +
>>>> +#include <string.h>
>>>> +
>>>> +#if IS_IN(libc)
>>>> +# define MEMCHR __memchr_zbb
>>>> +# undef libc_hidden_builtin_def
>>>> +# define libc_hidden_builtin_def(x)
>>>> +#endif
>>>> +/* Convince preprocessor to have Zbb instructions.  */
>>>> +#ifndef __riscv_zbb
>>>> +# define __riscv_zbb
>>>> +#endif
>>>
>>> Is there a way to specific the compiler to enable a extension, like aarch64
>>> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
>>> instead of messing with compiler defined pre-processor.
>>
>> The tools expect a list of all extensions as parameter to the -march= option.
>> But there is no way to append extensions to an existing march string
>> on the command line.
>>
>> And if we would add this feature today, it would take many years until we could
>> use it here, because we want to remain compatible with old tools.
>> Or we enable the optimization only when being built with new tools, but that
>> adds even more complexity and build/test configurations.
>>
>> What we have is:
>> * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
>> * Command line (since forever): -march=BASE_EXTENSIONLIST
>> * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
>> option push/pop)
>> * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
>>
>> I was not sure about using __riscv_zbb as well, but I considered it safe within
>> ifdef tests that ensure the macro won't be set twice.
>> If that's a concern, I could change to use something like this:
>> #define __riscv_force_zbb
>> #include <impl.c>
>> #undef __riscv_force_zbb
>> ... and change string-fza.h like this:
>> #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
>> // orc.b
>> #endif
>>
>> BR
>> Christoph
>
> Another options would to parse the current march and add the extension if required,
> something like:
>
> abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
> if [[ ! "$abi" =~ "_zbb" ]]
> then
>   abi="$abi"_zbb
> fi

That alone likely won't do it, there's a bunch of ordering rules in the 
ISA string handling so we might get tripped up on them.  We've got a 
fairly relaxed version of the rules in GCC to try and match the various 
older rules, though, so it might be possible to make something similar 
work.

We should probably just add some sort of -march=+zbb type argument.  
IIRC Kito was going to do it at some point, not sure if he got around to 
it?

> I don't have a strong preference, it is just that by not using the compiler flag
> we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
> a possible better code generation from compiler.

I think we'd likely get slightly better codgen from telling the compiler 
about the bitmanip extensions.  Maybe we want something like

    diff --git a/string/memchr.c b/string/memchr.c
    index 08b5c41667..1b62dce8d8 100644
    --- a/string/memchr.c
    +++ b/string/memchr.c
    @@ -29,15 +29,19 @@
     # define __memchr MEMCHR
     #endif
    
    +#ifndef __MEMCHR_CODEGEN_ATTRIBUTE
    +#define __MEMCHR_CODEGEN_ATTRIBUTE
    +#endif
    +
     static __always_inline const char *
    -sadd (uintptr_t x, uintptr_t y)
    +sadd (uintptr_t x, uintptr_t y) __MEMCHR_CODEGEN_ATTRIBUTE
     {
       return (const char *)(y > UINTPTR_MAX - x ? UINTPTR_MAX : x + y);
     }
    
     /* Search no more than N bytes of S for C.  */
     void *
    -__memchr (void const *s, int c_in, size_t n)
    +__memchr (void const *s, int c_in, size_t n) __MEMCHR_CODEGEN_ATTRIBUTE
     {
       if (__glibc_unlikely (n == 0))
         return NULL;

in the generic versions, so we can add a

    #define __MEMCHR_CODEGEN_ATTRIBUTE __attribuet__((target("+zbb")))

(or whatever the syntax is) to the Zbb-flavored versions of these 
routines?

It might also be worth just jumping to the fast-misaligned versions for 
these routines, too --the slow-misaligned stuff is there for 
compatibility with old stuff (though memchr aligns the pointer, so it 
doesn't matter so much here).

>>>> +#include <string/memchr.c>
>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>> index fcef5659d4..5586d11c89 100644
>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>> @@ -1,5 +1,8 @@
>>>>  ifeq ($(subdir),string)
>>>>  sysdep_routines += \
>>>> +  memchr \
>>>> +  memchr-generic \
>>>> +  memchr-zbb \
>>>>    memcpy \
>>>>    memcpy-generic \
>>>>    memcpy_noalignment \
>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>> index 9f806d7a9e..7321144a32 100644
>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>> @@ -20,19 +20,40 @@
>>>>  #include <string.h>
>>>>  #include <sys/hwprobe.h>
>>>>
>>>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
>>>> +
>>>>  size_t
>>>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>>>                       size_t max)
>>>>  {
>>>>    size_t i = max;
>>>> +  struct riscv_hwprobe pairs[] = {
>>>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
>>>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
>>>> +  };
>>>>
>>>> +  bool has_zbb = false;
>>>>    bool fast_unaligned = false;
>>>>
>>>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>>>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>>>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
>>>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
>>>> -    fast_unaligned = true;
>>>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
>>>> +    {
>>>> +      struct riscv_hwprobe *pair;
>>>> +
>>>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
>>>> +      pair = &pairs[0];
>>>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
>>>> +        has_zbb = true;
>>>> +
>>>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
>>>> +      pair = &pairs[1];
>>>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
>>>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
>>>> +        fast_unaligned = true;
>>>> +    }
>>>> +
>>>> +  IFUNC_IMPL (i, name, memchr,
>>>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
>>>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
>>>>
>>>>    IFUNC_IMPL (i, name, memcpy,
>>>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>> new file mode 100644
>>>> index 0000000000..bc076cbf24
>>>> --- /dev/null
>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>> @@ -0,0 +1,57 @@
>>>> +/* Multiple versions of memchr.
>>>> +   All versions must be listed in ifunc-impl-list.c.
>>>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
>>>> +   This file is part of the GNU C Library.
>>>> +
>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>> +   modify it under the terms of the GNU Lesser General Public
>>>> +   License as published by the Free Software Foundation; either
>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>> +
>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> +   Lesser General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU Lesser General Public
>>>> +   License along with the GNU C Library; if not, see
>>>> +   <https://www.gnu.org/licenses/>.  */
>>>> +
>>>> +#if IS_IN (libc)
>>>> +/* Redefine memchr so that the compiler won't complain about the type
>>>> +   mismatch with the IFUNC selector in strong_alias, below.  */
>>>> +# undef memchr
>>>> +# define memchr __redirect_memchr
>>>> +# include <stdint.h>
>>>> +# include <string.h>
>>>> +# include <ifunc-init.h>
>>>> +# include <riscv-ifunc.h>
>>>> +# include <sys/hwprobe.h>
>>>> +
>>>> +extern __typeof (__redirect_memchr) __libc_memchr;
>>>> +
>>>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
>>>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
>>>> +
>>>> +static inline __typeof (__redirect_memchr) *
>>>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>>>> +{
>>>> +  unsigned long long int v;
>>>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
>>>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
>>>> +    return __memchr_zbb;
>>>> +
>>>> +  return __memchr_generic;
>>>> +}
>>>> +
>>>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
>>>> +
>>>> +# undef memchr
>>>> +strong_alias (__libc_memchr, memchr);
>>>> +# ifdef SHARED
>>>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
>>>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
>>>> +# endif
>>>> +#else
>>>> +# include <string/memchr.c>
>>>> +#endif
  
Adhemerval Zanella Netto April 30, 2024, 5:45 p.m. UTC | #6
On 30/04/24 12:13, Palmer Dabbelt wrote:
> On Wed, 24 Apr 2024 06:36:43 PDT (-0700), adhemerval.zanella@linaro.org wrote:
>>
>>
>> On 24/04/24 10:16, Christoph Müllner wrote:
>>> On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
>>> <adhemerval.zanella@linaro.org> wrote:
>>>>
>>>>
>>>>
>>>> On 22/04/24 04:43, Christoph Müllner wrote:
>>>>> When building with Zbb enabled, memchr benefits from using orc.b in
>>>>> find_zero_all().  This patch changes the build system such, that a
>>>>> non-Zbb version as well as a Zbb version of this routine is built.
>>>>> Further, a ifunc resolver is provided that selects the right routine
>>>>> based on the outcome of extension probing via hwprobe().
>>>>>
>>>>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
>>>>> ---
>>>>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
>>>>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
>>>>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
>>>>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
>>>>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
>>>>>  5 files changed, 142 insertions(+), 5 deletions(-)
>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
>>>>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>>>
>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
>>>>> new file mode 100644
>>>>> index 0000000000..a96c36398b
>>>>> --- /dev/null
>>>>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
>>>>> @@ -0,0 +1,26 @@
>>>>> +/* Re-include the default memchr implementation.
>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>>>> +   This file is part of the GNU C Library.
>>>>> +
>>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>>> +   modify it under the terms of the GNU Lesser General Public
>>>>> +   License as published by the Free Software Foundation; either
>>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>>> +
>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>> +   Lesser General Public License for more details.
>>>>> +
>>>>> +   You should have received a copy of the GNU Lesser General Public
>>>>> +   License along with the GNU C Library; if not, see
>>>>> +   <https://www.gnu.org/licenses/>.  */
>>>>> +
>>>>> +#include <string.h>
>>>>> +
>>>>> +#if IS_IN(libc)
>>>>> +# define MEMCHR __memchr_generic
>>>>> +# undef libc_hidden_builtin_def
>>>>> +# define libc_hidden_builtin_def(x)
>>>>> +#endif
>>>>> +#include <string/memchr.c>
>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
>>>>> new file mode 100644
>>>>> index 0000000000..bead0335ae
>>>>> --- /dev/null
>>>>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
>>>>> @@ -0,0 +1,30 @@
>>>>> +/* Re-include the default memchr implementation for Zbb.
>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>>>> +   This file is part of the GNU C Library.
>>>>> +
>>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>>> +   modify it under the terms of the GNU Lesser General Public
>>>>> +   License as published by the Free Software Foundation; either
>>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>>> +
>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>> +   Lesser General Public License for more details.
>>>>> +
>>>>> +   You should have received a copy of the GNU Lesser General Public
>>>>> +   License along with the GNU C Library; if not, see
>>>>> +   <https://www.gnu.org/licenses/>.  */
>>>>> +
>>>>> +#include <string.h>
>>>>> +
>>>>> +#if IS_IN(libc)
>>>>> +# define MEMCHR __memchr_zbb
>>>>> +# undef libc_hidden_builtin_def
>>>>> +# define libc_hidden_builtin_def(x)
>>>>> +#endif
>>>>> +/* Convince preprocessor to have Zbb instructions.  */
>>>>> +#ifndef __riscv_zbb
>>>>> +# define __riscv_zbb
>>>>> +#endif
>>>>
>>>> Is there a way to specific the compiler to enable a extension, like aarch64
>>>> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
>>>> instead of messing with compiler defined pre-processor.
>>>
>>> The tools expect a list of all extensions as parameter to the -march= option.
>>> But there is no way to append extensions to an existing march string
>>> on the command line.
>>>
>>> And if we would add this feature today, it would take many years until we could
>>> use it here, because we want to remain compatible with old tools.
>>> Or we enable the optimization only when being built with new tools, but that
>>> adds even more complexity and build/test configurations.
>>>
>>> What we have is:
>>> * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
>>> * Command line (since forever): -march=BASE_EXTENSIONLIST
>>> * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
>>> option push/pop)
>>> * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
>>>
>>> I was not sure about using __riscv_zbb as well, but I considered it safe within
>>> ifdef tests that ensure the macro won't be set twice.
>>> If that's a concern, I could change to use something like this:
>>> #define __riscv_force_zbb
>>> #include <impl.c>
>>> #undef __riscv_force_zbb
>>> ... and change string-fza.h like this:
>>> #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
>>> // orc.b
>>> #endif
>>>
>>> BR
>>> Christoph
>>
>> Another options would to parse the current march and add the extension if required,
>> something like:
>>
>> abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
>> if [[ ! "$abi" =~ "_zbb" ]]
>> then
>>   abi="$abi"_zbb
>> fi
> 
> That alone likely won't do it, there's a bunch of ordering rules in the ISA string handling so we might get tripped up on them.  We've got a fairly relaxed version of the rules in GCC to try and match the various older rules, though, so it might be possible to make something similar work.
> 
> We should probably just add some sort of -march=+zbb type argument.  IIRC Kito was going to do it at some point, not sure if he got around to it?

I am just pointing this out because I think the way RISCV extension selection
is currently implemented makes it awkward to provide ifunc implementation in
a agnostic way (specially now that RISCV has dozens of extensions) without
knowing the current target compiler is generating.

Some other ABI allows to either specify a ISA/chip reference (like powerpc
with -mcpu=powerX) or a ABI extension directly (like aarch64 with -march=+xxx).

> 
>> I don't have a strong preference, it is just that by not using the compiler flag
>> we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
>> a possible better code generation from compiler.
> 
> I think we'd likely get slightly better codgen from telling the compiler about the bitmanip extensions.  Maybe we want something like
> 
>    diff --git a/string/memchr.c b/string/memchr.c
>    index 08b5c41667..1b62dce8d8 100644
>    --- a/string/memchr.c
>    +++ b/string/memchr.c
>    @@ -29,15 +29,19 @@
>     # define __memchr MEMCHR
>     #endif
>       +#ifndef __MEMCHR_CODEGEN_ATTRIBUTE
>    +#define __MEMCHR_CODEGEN_ATTRIBUTE
>    +#endif
>    +
>     static __always_inline const char *
>    -sadd (uintptr_t x, uintptr_t y)
>    +sadd (uintptr_t x, uintptr_t y) __MEMCHR_CODEGEN_ATTRIBUTE
>     {
>       return (const char *)(y > UINTPTR_MAX - x ? UINTPTR_MAX : x + y);
>     }
>        /* Search no more than N bytes of S for C.  */
>     void *
>    -__memchr (void const *s, int c_in, size_t n)
>    +__memchr (void const *s, int c_in, size_t n) __MEMCHR_CODEGEN_ATTRIBUTE
>     {
>       if (__glibc_unlikely (n == 0))
>         return NULL;
> 
> in the generic versions, so we can add a
> 
>    #define __MEMCHR_CODEGEN_ATTRIBUTE __attribuet__((target("+zbb")))
> 
> (or whatever the syntax is) to the Zbb-flavored versions of these routines?

Yeah, this might work and it is clear than messing with compiler-defined
macros.

> 
> It might also be worth just jumping to the fast-misaligned versions for these routines, too --the slow-misaligned stuff is there for compatibility with old stuff (though memchr aligns the pointer, so it doesn't matter so much here).

I was hopping that ABIs that would like to provide unaligned variants
for mem* routines to improve the generic code, but it seems that for some
it is easier to just add an assembly routine (as loongarch did).

For memchr, I think it should be easy to provide a unaligned version.
Something like (completely untested):

/* Search no more than N bytes of S for C.  */
void *
__memchr (void const *s, int c_in, size_t n)
{
  if (__glibc_unlikely (n == 0))
    return NULL;

#ifdef USE_MEMCHR_UNALIGNED
  /* Read the first word, but munge it so that bytes before the array
     will not match goal.  */
  const op_t *word_ptr = PTR_ALIGN_DOWN (s, sizeof (op_t));
  uintptr_t s_int = (uintptr_t) s;

  op_t word = *word_ptr;
  op_t repeated_c = repeat_bytes (c_in);
  /* Compute the address of the last byte taking in consideration possible
     overflow.  */
  const char *lbyte = sadd (s_int, n - 1);
  /* And also the address of the word containing the last byte. */
  const op_t *lword = (const op_t *) PTR_ALIGN_DOWN (lbyte, sizeof (op_t));

  find_t mask = shift_find (find_eq_all (word, repeated_c), s_int);
  if (mask != 0)
    {
      char *ret = (char *) s + index_first (mask);
      return (ret <= lbyte) ? ret : NULL;
    }
  if (word_ptr == lword)
    return NULL;
#endif

  word = *++word_ptr;
  while (word_ptr != lword)
    {
      if (has_eq (word, repeated_c))
        return (char *) word_ptr + index_first_eq (word, repeated_c);
      word = *++word_ptr;
    }

  if (has_eq (word, repeated_c))
    {
      /* We found a match, but it might be in a byte past the end of the
         array.  */
      char *ret = (char *) word_ptr + index_first_eq (word, repeated_c);
      if (ret <= lbyte)
        return ret;
    }
  return NULL;
}

> 
>>>>> +#include <string/memchr.c>
>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>>> index fcef5659d4..5586d11c89 100644
>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>>> @@ -1,5 +1,8 @@
>>>>>  ifeq ($(subdir),string)
>>>>>  sysdep_routines += \
>>>>> +  memchr \
>>>>> +  memchr-generic \
>>>>> +  memchr-zbb \
>>>>>    memcpy \
>>>>>    memcpy-generic \
>>>>>    memcpy_noalignment \
>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>>> index 9f806d7a9e..7321144a32 100644
>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>>> @@ -20,19 +20,40 @@
>>>>>  #include <string.h>
>>>>>  #include <sys/hwprobe.h>
>>>>>
>>>>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
>>>>> +
>>>>>  size_t
>>>>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>>>>                       size_t max)
>>>>>  {
>>>>>    size_t i = max;
>>>>> +  struct riscv_hwprobe pairs[] = {
>>>>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
>>>>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
>>>>> +  };
>>>>>
>>>>> +  bool has_zbb = false;
>>>>>    bool fast_unaligned = false;
>>>>>
>>>>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>>>>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>>>>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
>>>>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
>>>>> -    fast_unaligned = true;
>>>>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
>>>>> +    {
>>>>> +      struct riscv_hwprobe *pair;
>>>>> +
>>>>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
>>>>> +      pair = &pairs[0];
>>>>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
>>>>> +        has_zbb = true;
>>>>> +
>>>>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
>>>>> +      pair = &pairs[1];
>>>>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
>>>>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
>>>>> +        fast_unaligned = true;
>>>>> +    }
>>>>> +
>>>>> +  IFUNC_IMPL (i, name, memchr,
>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
>>>>>
>>>>>    IFUNC_IMPL (i, name, memcpy,
>>>>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>>> new file mode 100644
>>>>> index 0000000000..bc076cbf24
>>>>> --- /dev/null
>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>>> @@ -0,0 +1,57 @@
>>>>> +/* Multiple versions of memchr.
>>>>> +   All versions must be listed in ifunc-impl-list.c.
>>>>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
>>>>> +   This file is part of the GNU C Library.
>>>>> +
>>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>>> +   modify it under the terms of the GNU Lesser General Public
>>>>> +   License as published by the Free Software Foundation; either
>>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>>> +
>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>> +   Lesser General Public License for more details.
>>>>> +
>>>>> +   You should have received a copy of the GNU Lesser General Public
>>>>> +   License along with the GNU C Library; if not, see
>>>>> +   <https://www.gnu.org/licenses/>.  */
>>>>> +
>>>>> +#if IS_IN (libc)
>>>>> +/* Redefine memchr so that the compiler won't complain about the type
>>>>> +   mismatch with the IFUNC selector in strong_alias, below.  */
>>>>> +# undef memchr
>>>>> +# define memchr __redirect_memchr
>>>>> +# include <stdint.h>
>>>>> +# include <string.h>
>>>>> +# include <ifunc-init.h>
>>>>> +# include <riscv-ifunc.h>
>>>>> +# include <sys/hwprobe.h>
>>>>> +
>>>>> +extern __typeof (__redirect_memchr) __libc_memchr;
>>>>> +
>>>>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
>>>>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
>>>>> +
>>>>> +static inline __typeof (__redirect_memchr) *
>>>>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>>>>> +{
>>>>> +  unsigned long long int v;
>>>>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
>>>>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
>>>>> +    return __memchr_zbb;
>>>>> +
>>>>> +  return __memchr_generic;
>>>>> +}
>>>>> +
>>>>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
>>>>> +
>>>>> +# undef memchr
>>>>> +strong_alias (__libc_memchr, memchr);
>>>>> +# ifdef SHARED
>>>>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
>>>>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
>>>>> +# endif
>>>>> +#else
>>>>> +# include <string/memchr.c>
>>>>> +#endif
  
Palmer Dabbelt April 30, 2024, 5:54 p.m. UTC | #7
On Tue, 30 Apr 2024 10:45:12 PDT (-0700), adhemerval.zanella@linaro.org wrote:
>
>
> On 30/04/24 12:13, Palmer Dabbelt wrote:
>> On Wed, 24 Apr 2024 06:36:43 PDT (-0700), adhemerval.zanella@linaro.org wrote:
>>>
>>>
>>> On 24/04/24 10:16, Christoph Müllner wrote:
>>>> On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
>>>> <adhemerval.zanella@linaro.org> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 22/04/24 04:43, Christoph Müllner wrote:
>>>>>> When building with Zbb enabled, memchr benefits from using orc.b in
>>>>>> find_zero_all().  This patch changes the build system such, that a
>>>>>> non-Zbb version as well as a Zbb version of this routine is built.
>>>>>> Further, a ifunc resolver is provided that selects the right routine
>>>>>> based on the outcome of extension probing via hwprobe().
>>>>>>
>>>>>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
>>>>>> ---
>>>>>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
>>>>>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
>>>>>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
>>>>>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
>>>>>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
>>>>>>  5 files changed, 142 insertions(+), 5 deletions(-)
>>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
>>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
>>>>>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>>>>
>>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
>>>>>> new file mode 100644
>>>>>> index 0000000000..a96c36398b
>>>>>> --- /dev/null
>>>>>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
>>>>>> @@ -0,0 +1,26 @@
>>>>>> +/* Re-include the default memchr implementation.
>>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>>>>> +   This file is part of the GNU C Library.
>>>>>> +
>>>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>>>> +   modify it under the terms of the GNU Lesser General Public
>>>>>> +   License as published by the Free Software Foundation; either
>>>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>>>> +
>>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>>> +   Lesser General Public License for more details.
>>>>>> +
>>>>>> +   You should have received a copy of the GNU Lesser General Public
>>>>>> +   License along with the GNU C Library; if not, see
>>>>>> +   <https://www.gnu.org/licenses/>.  */
>>>>>> +
>>>>>> +#include <string.h>
>>>>>> +
>>>>>> +#if IS_IN(libc)
>>>>>> +# define MEMCHR __memchr_generic
>>>>>> +# undef libc_hidden_builtin_def
>>>>>> +# define libc_hidden_builtin_def(x)
>>>>>> +#endif
>>>>>> +#include <string/memchr.c>
>>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
>>>>>> new file mode 100644
>>>>>> index 0000000000..bead0335ae
>>>>>> --- /dev/null
>>>>>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
>>>>>> @@ -0,0 +1,30 @@
>>>>>> +/* Re-include the default memchr implementation for Zbb.
>>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
>>>>>> +   This file is part of the GNU C Library.
>>>>>> +
>>>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>>>> +   modify it under the terms of the GNU Lesser General Public
>>>>>> +   License as published by the Free Software Foundation; either
>>>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>>>> +
>>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>>> +   Lesser General Public License for more details.
>>>>>> +
>>>>>> +   You should have received a copy of the GNU Lesser General Public
>>>>>> +   License along with the GNU C Library; if not, see
>>>>>> +   <https://www.gnu.org/licenses/>.  */
>>>>>> +
>>>>>> +#include <string.h>
>>>>>> +
>>>>>> +#if IS_IN(libc)
>>>>>> +# define MEMCHR __memchr_zbb
>>>>>> +# undef libc_hidden_builtin_def
>>>>>> +# define libc_hidden_builtin_def(x)
>>>>>> +#endif
>>>>>> +/* Convince preprocessor to have Zbb instructions.  */
>>>>>> +#ifndef __riscv_zbb
>>>>>> +# define __riscv_zbb
>>>>>> +#endif
>>>>>
>>>>> Is there a way to specific the compiler to enable a extension, like aarch64
>>>>> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
>>>>> instead of messing with compiler defined pre-processor.
>>>>
>>>> The tools expect a list of all extensions as parameter to the -march= option.
>>>> But there is no way to append extensions to an existing march string
>>>> on the command line.
>>>>
>>>> And if we would add this feature today, it would take many years until we could
>>>> use it here, because we want to remain compatible with old tools.
>>>> Or we enable the optimization only when being built with new tools, but that
>>>> adds even more complexity and build/test configurations.
>>>>
>>>> What we have is:
>>>> * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
>>>> * Command line (since forever): -march=BASE_EXTENSIONLIST
>>>> * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
>>>> option push/pop)
>>>> * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
>>>>
>>>> I was not sure about using __riscv_zbb as well, but I considered it safe within
>>>> ifdef tests that ensure the macro won't be set twice.
>>>> If that's a concern, I could change to use something like this:
>>>> #define __riscv_force_zbb
>>>> #include <impl.c>
>>>> #undef __riscv_force_zbb
>>>> ... and change string-fza.h like this:
>>>> #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
>>>> // orc.b
>>>> #endif
>>>>
>>>> BR
>>>> Christoph
>>>
>>> Another options would to parse the current march and add the extension if required,
>>> something like:
>>>
>>> abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
>>> if [[ ! "$abi" =~ "_zbb" ]]
>>> then
>>>   abi="$abi"_zbb
>>> fi
>>
>> That alone likely won't do it, there's a bunch of ordering rules in the ISA string handling so we might get tripped up on them.  We've got a fairly relaxed version of the rules in GCC to try and match the various older rules, though, so it might be possible to make something similar work.
>>
>> We should probably just add some sort of -march=+zbb type argument.  IIRC Kito was going to do it at some point, not sure if he got around to it?
>
> I am just pointing this out because I think the way RISCV extension selection
> is currently implemented makes it awkward to provide ifunc implementation in
> a agnostic way (specially now that RISCV has dozens of extensions) without
> knowing the current target compiler is generating.
>
> Some other ABI allows to either specify a ISA/chip reference (like powerpc
> with -mcpu=powerX) or a ABI extension directly (like aarch64 with -march=+xxx).

We have -mcpu, but for RISC-V that's even more fragmented than -march 
(-mcpu essentially just rolls together -march and -mtune, and only 
allows a curated list).  The `-march=+`-type stuff has been a pretty 
common request, I think someone just needst oget around to actually 
implementing it.

>>> I don't have a strong preference, it is just that by not using the compiler flag
>>> we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
>>> a possible better code generation from compiler.
>>
>> I think we'd likely get slightly better codgen from telling the compiler about the bitmanip extensions.  Maybe we want something like
>>
>>    diff --git a/string/memchr.c b/string/memchr.c
>>    index 08b5c41667..1b62dce8d8 100644
>>    --- a/string/memchr.c
>>    +++ b/string/memchr.c
>>    @@ -29,15 +29,19 @@
>>     # define __memchr MEMCHR
>>     #endif
>>       +#ifndef __MEMCHR_CODEGEN_ATTRIBUTE
>>    +#define __MEMCHR_CODEGEN_ATTRIBUTE
>>    +#endif
>>    +
>>     static __always_inline const char *
>>    -sadd (uintptr_t x, uintptr_t y)
>>    +sadd (uintptr_t x, uintptr_t y) __MEMCHR_CODEGEN_ATTRIBUTE
>>     {
>>       return (const char *)(y > UINTPTR_MAX - x ? UINTPTR_MAX : x + y);
>>     }
>>        /* Search no more than N bytes of S for C.  */
>>     void *
>>    -__memchr (void const *s, int c_in, size_t n)
>>    +__memchr (void const *s, int c_in, size_t n) __MEMCHR_CODEGEN_ATTRIBUTE
>>     {
>>       if (__glibc_unlikely (n == 0))
>>         return NULL;
>>
>> in the generic versions, so we can add a
>>
>>    #define __MEMCHR_CODEGEN_ATTRIBUTE __attribuet__((target("+zbb")))
>>
>> (or whatever the syntax is) to the Zbb-flavored versions of these routines?
>
> Yeah, this might work and it is clear than messing with compiler-defined
> macros.
>
>>
>> It might also be worth just jumping to the fast-misaligned versions for these routines, too --the slow-misaligned stuff is there for compatibility with old stuff (though memchr aligns the pointer, so it doesn't matter so much here).
>
> I was hopping that ABIs that would like to provide unaligned variants
> for mem* routines to improve the generic code, but it seems that for some
> it is easier to just add an assembly routine (as loongarch did).
>
> For memchr, I think it should be easy to provide a unaligned version.
> Something like (completely untested):
>
> /* Search no more than N bytes of S for C.  */
> void *
> __memchr (void const *s, int c_in, size_t n)
> {
>   if (__glibc_unlikely (n == 0))
>     return NULL;
>
> #ifdef USE_MEMCHR_UNALIGNED
>   /* Read the first word, but munge it so that bytes before the array
>      will not match goal.  */
>   const op_t *word_ptr = PTR_ALIGN_DOWN (s, sizeof (op_t));
>   uintptr_t s_int = (uintptr_t) s;
>
>   op_t word = *word_ptr;
>   op_t repeated_c = repeat_bytes (c_in);
>   /* Compute the address of the last byte taking in consideration possible
>      overflow.  */
>   const char *lbyte = sadd (s_int, n - 1);
>   /* And also the address of the word containing the last byte. */
>   const op_t *lword = (const op_t *) PTR_ALIGN_DOWN (lbyte, sizeof (op_t));
>
>   find_t mask = shift_find (find_eq_all (word, repeated_c), s_int);
>   if (mask != 0)
>     {
>       char *ret = (char *) s + index_first (mask);
>       return (ret <= lbyte) ? ret : NULL;
>     }
>   if (word_ptr == lword)
>     return NULL;
> #endif
>
>   word = *++word_ptr;
>   while (word_ptr != lword)
>     {
>       if (has_eq (word, repeated_c))
>         return (char *) word_ptr + index_first_eq (word, repeated_c);
>       word = *++word_ptr;
>     }
>
>   if (has_eq (word, repeated_c))
>     {
>       /* We found a match, but it might be in a byte past the end of the
>          array.  */
>       char *ret = (char *) word_ptr + index_first_eq (word, repeated_c);
>       if (ret <= lbyte)
>         return ret;
>     }
>   return NULL;
> }
>
>>
>>>>>> +#include <string/memchr.c>
>>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>>>> index fcef5659d4..5586d11c89 100644
>>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>>>>> @@ -1,5 +1,8 @@
>>>>>>  ifeq ($(subdir),string)
>>>>>>  sysdep_routines += \
>>>>>> +  memchr \
>>>>>> +  memchr-generic \
>>>>>> +  memchr-zbb \
>>>>>>    memcpy \
>>>>>>    memcpy-generic \
>>>>>>    memcpy_noalignment \
>>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>>>> index 9f806d7a9e..7321144a32 100644
>>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>>>>> @@ -20,19 +20,40 @@
>>>>>>  #include <string.h>
>>>>>>  #include <sys/hwprobe.h>
>>>>>>
>>>>>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
>>>>>> +
>>>>>>  size_t
>>>>>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>>>>>                       size_t max)
>>>>>>  {
>>>>>>    size_t i = max;
>>>>>> +  struct riscv_hwprobe pairs[] = {
>>>>>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
>>>>>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
>>>>>> +  };
>>>>>>
>>>>>> +  bool has_zbb = false;
>>>>>>    bool fast_unaligned = false;
>>>>>>
>>>>>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>>>>>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>>>>>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
>>>>>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
>>>>>> -    fast_unaligned = true;
>>>>>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
>>>>>> +    {
>>>>>> +      struct riscv_hwprobe *pair;
>>>>>> +
>>>>>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
>>>>>> +      pair = &pairs[0];
>>>>>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
>>>>>> +        has_zbb = true;
>>>>>> +
>>>>>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
>>>>>> +      pair = &pairs[1];
>>>>>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
>>>>>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
>>>>>> +        fast_unaligned = true;
>>>>>> +    }
>>>>>> +
>>>>>> +  IFUNC_IMPL (i, name, memchr,
>>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
>>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
>>>>>>
>>>>>>    IFUNC_IMPL (i, name, memcpy,
>>>>>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>>>> new file mode 100644
>>>>>> index 0000000000..bc076cbf24
>>>>>> --- /dev/null
>>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
>>>>>> @@ -0,0 +1,57 @@
>>>>>> +/* Multiple versions of memchr.
>>>>>> +   All versions must be listed in ifunc-impl-list.c.
>>>>>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
>>>>>> +   This file is part of the GNU C Library.
>>>>>> +
>>>>>> +   The GNU C Library is free software; you can redistribute it and/or
>>>>>> +   modify it under the terms of the GNU Lesser General Public
>>>>>> +   License as published by the Free Software Foundation; either
>>>>>> +   version 2.1 of the License, or (at your option) any later version.
>>>>>> +
>>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>>> +   Lesser General Public License for more details.
>>>>>> +
>>>>>> +   You should have received a copy of the GNU Lesser General Public
>>>>>> +   License along with the GNU C Library; if not, see
>>>>>> +   <https://www.gnu.org/licenses/>.  */
>>>>>> +
>>>>>> +#if IS_IN (libc)
>>>>>> +/* Redefine memchr so that the compiler won't complain about the type
>>>>>> +   mismatch with the IFUNC selector in strong_alias, below.  */
>>>>>> +# undef memchr
>>>>>> +# define memchr __redirect_memchr
>>>>>> +# include <stdint.h>
>>>>>> +# include <string.h>
>>>>>> +# include <ifunc-init.h>
>>>>>> +# include <riscv-ifunc.h>
>>>>>> +# include <sys/hwprobe.h>
>>>>>> +
>>>>>> +extern __typeof (__redirect_memchr) __libc_memchr;
>>>>>> +
>>>>>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
>>>>>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
>>>>>> +
>>>>>> +static inline __typeof (__redirect_memchr) *
>>>>>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>>>>>> +{
>>>>>> +  unsigned long long int v;
>>>>>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
>>>>>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
>>>>>> +    return __memchr_zbb;
>>>>>> +
>>>>>> +  return __memchr_generic;
>>>>>> +}
>>>>>> +
>>>>>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
>>>>>> +
>>>>>> +# undef memchr
>>>>>> +strong_alias (__libc_memchr, memchr);
>>>>>> +# ifdef SHARED
>>>>>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
>>>>>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
>>>>>> +# endif
>>>>>> +#else
>>>>>> +# include <string/memchr.c>
>>>>>> +#endif
  
Vineet Gupta April 30, 2024, 6:44 p.m. UTC | #8
On 4/30/24 10:54, Palmer Dabbelt wrote:
>>> That alone likely won't do it, there's a bunch of ordering rules in the ISA string handling so we might get tripped up on them.  We've got a fairly relaxed version of the rules in GCC to try and match the various older rules, though, so it might be possible to make something similar work.
>>>
>>> We should probably just add some sort of -march=+zbb type argument.  IIRC Kito was going to do it at some point, not sure if he got around to it?
>> I am just pointing this out because I think the way RISCV extension selection
>> is currently implemented makes it awkward to provide ifunc implementation in
>> a agnostic way (specially now that RISCV has dozens of extensions) without
>> knowing the current target compiler is generating.
>>
>> Some other ABI allows to either specify a ISA/chip reference (like powerpc
>> with -mcpu=powerX) or a ABI extension directly (like aarch64 with -march=+xxx).
> We have -mcpu, but for RISC-V that's even more fragmented than -march 
> (-mcpu essentially just rolls together -march and -mtune, and only 
> allows a curated list).  The `-march=+`-type stuff has been a pretty 
> common request, I think someone just needst oget around to actually 
> implementing it.

But the lack of this infra as of today, means downstream projects such
as glibc either need to wait for that to get merged and rely on a
specific gcc version or need to implement both ways of doing this.
Unfortunately that means as of today we have to use the non perfect
solutions.

-Vineet
  
Christoph Müllner May 6, 2024, 1:20 p.m. UTC | #9
On Tue, Apr 30, 2024 at 7:54 PM Palmer Dabbelt <palmer@dabbelt.com> wrote:
>
> On Tue, 30 Apr 2024 10:45:12 PDT (-0700), adhemerval.zanella@linaro.org wrote:
> >
> >
> > On 30/04/24 12:13, Palmer Dabbelt wrote:
> >> On Wed, 24 Apr 2024 06:36:43 PDT (-0700), adhemerval.zanella@linaro.org wrote:
> >>>
> >>>
> >>> On 24/04/24 10:16, Christoph Müllner wrote:
> >>>> On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
> >>>> <adhemerval.zanella@linaro.org> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 22/04/24 04:43, Christoph Müllner wrote:
> >>>>>> When building with Zbb enabled, memchr benefits from using orc.b in
> >>>>>> find_zero_all().  This patch changes the build system such, that a
> >>>>>> non-Zbb version as well as a Zbb version of this routine is built.
> >>>>>> Further, a ifunc resolver is provided that selects the right routine
> >>>>>> based on the outcome of extension probing via hwprobe().
> >>>>>>
> >>>>>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> >>>>>> ---
> >>>>>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
> >>>>>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
> >>>>>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
> >>>>>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
> >>>>>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
> >>>>>>  5 files changed, 142 insertions(+), 5 deletions(-)
> >>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
> >>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
> >>>>>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> >>>>>>
> >>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
> >>>>>> new file mode 100644
> >>>>>> index 0000000000..a96c36398b
> >>>>>> --- /dev/null
> >>>>>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
> >>>>>> @@ -0,0 +1,26 @@
> >>>>>> +/* Re-include the default memchr implementation.
> >>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> >>>>>> +   This file is part of the GNU C Library.
> >>>>>> +
> >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> >>>>>> +   modify it under the terms of the GNU Lesser General Public
> >>>>>> +   License as published by the Free Software Foundation; either
> >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> >>>>>> +
> >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>>>>> +   Lesser General Public License for more details.
> >>>>>> +
> >>>>>> +   You should have received a copy of the GNU Lesser General Public
> >>>>>> +   License along with the GNU C Library; if not, see
> >>>>>> +   <https://www.gnu.org/licenses/>.  */
> >>>>>> +
> >>>>>> +#include <string.h>
> >>>>>> +
> >>>>>> +#if IS_IN(libc)
> >>>>>> +# define MEMCHR __memchr_generic
> >>>>>> +# undef libc_hidden_builtin_def
> >>>>>> +# define libc_hidden_builtin_def(x)
> >>>>>> +#endif
> >>>>>> +#include <string/memchr.c>
> >>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
> >>>>>> new file mode 100644
> >>>>>> index 0000000000..bead0335ae
> >>>>>> --- /dev/null
> >>>>>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
> >>>>>> @@ -0,0 +1,30 @@
> >>>>>> +/* Re-include the default memchr implementation for Zbb.
> >>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> >>>>>> +   This file is part of the GNU C Library.
> >>>>>> +
> >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> >>>>>> +   modify it under the terms of the GNU Lesser General Public
> >>>>>> +   License as published by the Free Software Foundation; either
> >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> >>>>>> +
> >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>>>>> +   Lesser General Public License for more details.
> >>>>>> +
> >>>>>> +   You should have received a copy of the GNU Lesser General Public
> >>>>>> +   License along with the GNU C Library; if not, see
> >>>>>> +   <https://www.gnu.org/licenses/>.  */
> >>>>>> +
> >>>>>> +#include <string.h>
> >>>>>> +
> >>>>>> +#if IS_IN(libc)
> >>>>>> +# define MEMCHR __memchr_zbb
> >>>>>> +# undef libc_hidden_builtin_def
> >>>>>> +# define libc_hidden_builtin_def(x)
> >>>>>> +#endif
> >>>>>> +/* Convince preprocessor to have Zbb instructions.  */
> >>>>>> +#ifndef __riscv_zbb
> >>>>>> +# define __riscv_zbb
> >>>>>> +#endif
> >>>>>
> >>>>> Is there a way to specific the compiler to enable a extension, like aarch64
> >>>>> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
> >>>>> instead of messing with compiler defined pre-processor.
> >>>>
> >>>> The tools expect a list of all extensions as parameter to the -march= option.
> >>>> But there is no way to append extensions to an existing march string
> >>>> on the command line.
> >>>>
> >>>> And if we would add this feature today, it would take many years until we could
> >>>> use it here, because we want to remain compatible with old tools.
> >>>> Or we enable the optimization only when being built with new tools, but that
> >>>> adds even more complexity and build/test configurations.
> >>>>
> >>>> What we have is:
> >>>> * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
> >>>> * Command line (since forever): -march=BASE_EXTENSIONLIST
> >>>> * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
> >>>> option push/pop)
> >>>> * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
> >>>>
> >>>> I was not sure about using __riscv_zbb as well, but I considered it safe within
> >>>> ifdef tests that ensure the macro won't be set twice.
> >>>> If that's a concern, I could change to use something like this:
> >>>> #define __riscv_force_zbb
> >>>> #include <impl.c>
> >>>> #undef __riscv_force_zbb
> >>>> ... and change string-fza.h like this:
> >>>> #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
> >>>> // orc.b
> >>>> #endif
> >>>>
> >>>> BR
> >>>> Christoph
> >>>
> >>> Another options would to parse the current march and add the extension if required,
> >>> something like:
> >>>
> >>> abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
> >>> if [[ ! "$abi" =~ "_zbb" ]]
> >>> then
> >>>   abi="$abi"_zbb
> >>> fi
> >>
> >> That alone likely won't do it, there's a bunch of ordering rules in the ISA string handling so we might get tripped up on them.  We've got a fairly relaxed version of the rules in GCC to try and match the various older rules, though, so it might be possible to make something similar work.
> >>
> >> We should probably just add some sort of -march=+zbb type argument.  IIRC Kito was going to do it at some point, not sure if he got around to it?
> >
> > I am just pointing this out because I think the way RISCV extension selection
> > is currently implemented makes it awkward to provide ifunc implementation in
> > a agnostic way (specially now that RISCV has dozens of extensions) without
> > knowing the current target compiler is generating.
> >
> > Some other ABI allows to either specify a ISA/chip reference (like powerpc
> > with -mcpu=powerX) or a ABI extension directly (like aarch64 with -march=+xxx).
>
> We have -mcpu, but for RISC-V that's even more fragmented than -march
> (-mcpu essentially just rolls together -march and -mtune, and only
> allows a curated list).  The `-march=+`-type stuff has been a pretty
> common request, I think someone just needst oget around to actually
> implementing it.

I agree that `-march=+` would be the best solution.
As we don't have it, I've created a PR for the riscv-toolchain-conventions:
  https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/47
This will have to depend on GCC 15+ (assuming there will be consensus
in the next 10 months about it).

Thanks for the proposal with the function-attributes.
I'm working on a patchset for that and will send it out after some testing.
This will have to depend on GCC 14+.

> >>> I don't have a strong preference, it is just that by not using the compiler flag
> >>> we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
> >>> a possible better code generation from compiler.
> >>
> >> I think we'd likely get slightly better codgen from telling the compiler about the bitmanip extensions.  Maybe we want something like
> >>
> >>    diff --git a/string/memchr.c b/string/memchr.c
> >>    index 08b5c41667..1b62dce8d8 100644
> >>    --- a/string/memchr.c
> >>    +++ b/string/memchr.c
> >>    @@ -29,15 +29,19 @@
> >>     # define __memchr MEMCHR
> >>     #endif
> >>       +#ifndef __MEMCHR_CODEGEN_ATTRIBUTE
> >>    +#define __MEMCHR_CODEGEN_ATTRIBUTE
> >>    +#endif
> >>    +
> >>     static __always_inline const char *
> >>    -sadd (uintptr_t x, uintptr_t y)
> >>    +sadd (uintptr_t x, uintptr_t y) __MEMCHR_CODEGEN_ATTRIBUTE
> >>     {
> >>       return (const char *)(y > UINTPTR_MAX - x ? UINTPTR_MAX : x + y);
> >>     }
> >>        /* Search no more than N bytes of S for C.  */
> >>     void *
> >>    -__memchr (void const *s, int c_in, size_t n)
> >>    +__memchr (void const *s, int c_in, size_t n) __MEMCHR_CODEGEN_ATTRIBUTE
> >>     {
> >>       if (__glibc_unlikely (n == 0))
> >>         return NULL;
> >>
> >> in the generic versions, so we can add a
> >>
> >>    #define __MEMCHR_CODEGEN_ATTRIBUTE __attribuet__((target("+zbb")))
> >>
> >> (or whatever the syntax is) to the Zbb-flavored versions of these routines?
> >
> > Yeah, this might work and it is clear than messing with compiler-defined
> > macros.
> >
> >>
> >> It might also be worth just jumping to the fast-misaligned versions for these routines, too --the slow-misaligned stuff is there for compatibility with old stuff (though memchr aligns the pointer, so it doesn't matter so much here).
> >
> > I was hopping that ABIs that would like to provide unaligned variants
> > for mem* routines to improve the generic code, but it seems that for some
> > it is easier to just add an assembly routine (as loongarch did).
> >
> > For memchr, I think it should be easy to provide a unaligned version.
> > Something like (completely untested):
> >
> > /* Search no more than N bytes of S for C.  */
> > void *
> > __memchr (void const *s, int c_in, size_t n)
> > {
> >   if (__glibc_unlikely (n == 0))
> >     return NULL;
> >
> > #ifdef USE_MEMCHR_UNALIGNED
> >   /* Read the first word, but munge it so that bytes before the array
> >      will not match goal.  */
> >   const op_t *word_ptr = PTR_ALIGN_DOWN (s, sizeof (op_t));
> >   uintptr_t s_int = (uintptr_t) s;
> >
> >   op_t word = *word_ptr;
> >   op_t repeated_c = repeat_bytes (c_in);
> >   /* Compute the address of the last byte taking in consideration possible
> >      overflow.  */
> >   const char *lbyte = sadd (s_int, n - 1);
> >   /* And also the address of the word containing the last byte. */
> >   const op_t *lword = (const op_t *) PTR_ALIGN_DOWN (lbyte, sizeof (op_t));
> >
> >   find_t mask = shift_find (find_eq_all (word, repeated_c), s_int);
> >   if (mask != 0)
> >     {
> >       char *ret = (char *) s + index_first (mask);
> >       return (ret <= lbyte) ? ret : NULL;
> >     }
> >   if (word_ptr == lword)
> >     return NULL;
> > #endif
> >
> >   word = *++word_ptr;
> >   while (word_ptr != lword)
> >     {
> >       if (has_eq (word, repeated_c))
> >         return (char *) word_ptr + index_first_eq (word, repeated_c);
> >       word = *++word_ptr;
> >     }
> >
> >   if (has_eq (word, repeated_c))
> >     {
> >       /* We found a match, but it might be in a byte past the end of the
> >          array.  */
> >       char *ret = (char *) word_ptr + index_first_eq (word, repeated_c);
> >       if (ret <= lbyte)
> >         return ret;
> >     }
> >   return NULL;
> > }
> >
> >>
> >>>>>> +#include <string/memchr.c>
> >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> >>>>>> index fcef5659d4..5586d11c89 100644
> >>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> >>>>>> @@ -1,5 +1,8 @@
> >>>>>>  ifeq ($(subdir),string)
> >>>>>>  sysdep_routines += \
> >>>>>> +  memchr \
> >>>>>> +  memchr-generic \
> >>>>>> +  memchr-zbb \
> >>>>>>    memcpy \
> >>>>>>    memcpy-generic \
> >>>>>>    memcpy_noalignment \
> >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> >>>>>> index 9f806d7a9e..7321144a32 100644
> >>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> >>>>>> @@ -20,19 +20,40 @@
> >>>>>>  #include <string.h>
> >>>>>>  #include <sys/hwprobe.h>
> >>>>>>
> >>>>>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
> >>>>>> +
> >>>>>>  size_t
> >>>>>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >>>>>>                       size_t max)
> >>>>>>  {
> >>>>>>    size_t i = max;
> >>>>>> +  struct riscv_hwprobe pairs[] = {
> >>>>>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
> >>>>>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
> >>>>>> +  };
> >>>>>>
> >>>>>> +  bool has_zbb = false;
> >>>>>>    bool fast_unaligned = false;
> >>>>>>
> >>>>>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> >>>>>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> >>>>>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
> >>>>>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
> >>>>>> -    fast_unaligned = true;
> >>>>>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
> >>>>>> +    {
> >>>>>> +      struct riscv_hwprobe *pair;
> >>>>>> +
> >>>>>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
> >>>>>> +      pair = &pairs[0];
> >>>>>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
> >>>>>> +        has_zbb = true;
> >>>>>> +
> >>>>>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
> >>>>>> +      pair = &pairs[1];
> >>>>>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
> >>>>>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
> >>>>>> +        fast_unaligned = true;
> >>>>>> +    }
> >>>>>> +
> >>>>>> +  IFUNC_IMPL (i, name, memchr,
> >>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
> >>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
> >>>>>>
> >>>>>>    IFUNC_IMPL (i, name, memcpy,
> >>>>>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> >>>>>> new file mode 100644
> >>>>>> index 0000000000..bc076cbf24
> >>>>>> --- /dev/null
> >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> >>>>>> @@ -0,0 +1,57 @@
> >>>>>> +/* Multiple versions of memchr.
> >>>>>> +   All versions must be listed in ifunc-impl-list.c.
> >>>>>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
> >>>>>> +   This file is part of the GNU C Library.
> >>>>>> +
> >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> >>>>>> +   modify it under the terms of the GNU Lesser General Public
> >>>>>> +   License as published by the Free Software Foundation; either
> >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> >>>>>> +
> >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>>>>> +   Lesser General Public License for more details.
> >>>>>> +
> >>>>>> +   You should have received a copy of the GNU Lesser General Public
> >>>>>> +   License along with the GNU C Library; if not, see
> >>>>>> +   <https://www.gnu.org/licenses/>.  */
> >>>>>> +
> >>>>>> +#if IS_IN (libc)
> >>>>>> +/* Redefine memchr so that the compiler won't complain about the type
> >>>>>> +   mismatch with the IFUNC selector in strong_alias, below.  */
> >>>>>> +# undef memchr
> >>>>>> +# define memchr __redirect_memchr
> >>>>>> +# include <stdint.h>
> >>>>>> +# include <string.h>
> >>>>>> +# include <ifunc-init.h>
> >>>>>> +# include <riscv-ifunc.h>
> >>>>>> +# include <sys/hwprobe.h>
> >>>>>> +
> >>>>>> +extern __typeof (__redirect_memchr) __libc_memchr;
> >>>>>> +
> >>>>>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
> >>>>>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
> >>>>>> +
> >>>>>> +static inline __typeof (__redirect_memchr) *
> >>>>>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> >>>>>> +{
> >>>>>> +  unsigned long long int v;
> >>>>>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
> >>>>>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
> >>>>>> +    return __memchr_zbb;
> >>>>>> +
> >>>>>> +  return __memchr_generic;
> >>>>>> +}
> >>>>>> +
> >>>>>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
> >>>>>> +
> >>>>>> +# undef memchr
> >>>>>> +strong_alias (__libc_memchr, memchr);
> >>>>>> +# ifdef SHARED
> >>>>>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
> >>>>>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
> >>>>>> +# endif
> >>>>>> +#else
> >>>>>> +# include <string/memchr.c>
> >>>>>> +#endif
  
Kito Cheng May 6, 2024, 1:32 p.m. UTC | #10
`target` attribute is provided by GCC 14, and `-march=+ext` will at
least require GCC 15 (if we add),
also we relaxed the canonical order requirement for `-march=`, that
means we can relatively easy to manipulate the ISA
string by just concat that? so I don't really think we need  `-march=+ext`.

On Mon, May 6, 2024 at 9:20 PM Christoph Müllner
<christoph.muellner@vrull.eu> wrote:
>
> On Tue, Apr 30, 2024 at 7:54 PM Palmer Dabbelt <palmer@dabbelt.com> wrote:
> >
> > On Tue, 30 Apr 2024 10:45:12 PDT (-0700), adhemerval.zanella@linaro.org wrote:
> > >
> > >
> > > On 30/04/24 12:13, Palmer Dabbelt wrote:
> > >> On Wed, 24 Apr 2024 06:36:43 PDT (-0700), adhemerval.zanella@linaro.org wrote:
> > >>>
> > >>>
> > >>> On 24/04/24 10:16, Christoph Müllner wrote:
> > >>>> On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
> > >>>> <adhemerval.zanella@linaro.org> wrote:
> > >>>>>
> > >>>>>
> > >>>>>
> > >>>>> On 22/04/24 04:43, Christoph Müllner wrote:
> > >>>>>> When building with Zbb enabled, memchr benefits from using orc.b in
> > >>>>>> find_zero_all().  This patch changes the build system such, that a
> > >>>>>> non-Zbb version as well as a Zbb version of this routine is built.
> > >>>>>> Further, a ifunc resolver is provided that selects the right routine
> > >>>>>> based on the outcome of extension probing via hwprobe().
> > >>>>>>
> > >>>>>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> > >>>>>> ---
> > >>>>>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
> > >>>>>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
> > >>>>>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
> > >>>>>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
> > >>>>>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
> > >>>>>>  5 files changed, 142 insertions(+), 5 deletions(-)
> > >>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
> > >>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
> > >>>>>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > >>>>>>
> > >>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
> > >>>>>> new file mode 100644
> > >>>>>> index 0000000000..a96c36398b
> > >>>>>> --- /dev/null
> > >>>>>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
> > >>>>>> @@ -0,0 +1,26 @@
> > >>>>>> +/* Re-include the default memchr implementation.
> > >>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> > >>>>>> +   This file is part of the GNU C Library.
> > >>>>>> +
> > >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> > >>>>>> +   modify it under the terms of the GNU Lesser General Public
> > >>>>>> +   License as published by the Free Software Foundation; either
> > >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> > >>>>>> +
> > >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> > >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > >>>>>> +   Lesser General Public License for more details.
> > >>>>>> +
> > >>>>>> +   You should have received a copy of the GNU Lesser General Public
> > >>>>>> +   License along with the GNU C Library; if not, see
> > >>>>>> +   <https://www.gnu.org/licenses/>.  */
> > >>>>>> +
> > >>>>>> +#include <string.h>
> > >>>>>> +
> > >>>>>> +#if IS_IN(libc)
> > >>>>>> +# define MEMCHR __memchr_generic
> > >>>>>> +# undef libc_hidden_builtin_def
> > >>>>>> +# define libc_hidden_builtin_def(x)
> > >>>>>> +#endif
> > >>>>>> +#include <string/memchr.c>
> > >>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
> > >>>>>> new file mode 100644
> > >>>>>> index 0000000000..bead0335ae
> > >>>>>> --- /dev/null
> > >>>>>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
> > >>>>>> @@ -0,0 +1,30 @@
> > >>>>>> +/* Re-include the default memchr implementation for Zbb.
> > >>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> > >>>>>> +   This file is part of the GNU C Library.
> > >>>>>> +
> > >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> > >>>>>> +   modify it under the terms of the GNU Lesser General Public
> > >>>>>> +   License as published by the Free Software Foundation; either
> > >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> > >>>>>> +
> > >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> > >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > >>>>>> +   Lesser General Public License for more details.
> > >>>>>> +
> > >>>>>> +   You should have received a copy of the GNU Lesser General Public
> > >>>>>> +   License along with the GNU C Library; if not, see
> > >>>>>> +   <https://www.gnu.org/licenses/>.  */
> > >>>>>> +
> > >>>>>> +#include <string.h>
> > >>>>>> +
> > >>>>>> +#if IS_IN(libc)
> > >>>>>> +# define MEMCHR __memchr_zbb
> > >>>>>> +# undef libc_hidden_builtin_def
> > >>>>>> +# define libc_hidden_builtin_def(x)
> > >>>>>> +#endif
> > >>>>>> +/* Convince preprocessor to have Zbb instructions.  */
> > >>>>>> +#ifndef __riscv_zbb
> > >>>>>> +# define __riscv_zbb
> > >>>>>> +#endif
> > >>>>>
> > >>>>> Is there a way to specific the compiler to enable a extension, like aarch64
> > >>>>> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
> > >>>>> instead of messing with compiler defined pre-processor.
> > >>>>
> > >>>> The tools expect a list of all extensions as parameter to the -march= option.
> > >>>> But there is no way to append extensions to an existing march string
> > >>>> on the command line.
> > >>>>
> > >>>> And if we would add this feature today, it would take many years until we could
> > >>>> use it here, because we want to remain compatible with old tools.
> > >>>> Or we enable the optimization only when being built with new tools, but that
> > >>>> adds even more complexity and build/test configurations.
> > >>>>
> > >>>> What we have is:
> > >>>> * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
> > >>>> * Command line (since forever): -march=BASE_EXTENSIONLIST
> > >>>> * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
> > >>>> option push/pop)
> > >>>> * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
> > >>>>
> > >>>> I was not sure about using __riscv_zbb as well, but I considered it safe within
> > >>>> ifdef tests that ensure the macro won't be set twice.
> > >>>> If that's a concern, I could change to use something like this:
> > >>>> #define __riscv_force_zbb
> > >>>> #include <impl.c>
> > >>>> #undef __riscv_force_zbb
> > >>>> ... and change string-fza.h like this:
> > >>>> #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
> > >>>> // orc.b
> > >>>> #endif
> > >>>>
> > >>>> BR
> > >>>> Christoph
> > >>>
> > >>> Another options would to parse the current march and add the extension if required,
> > >>> something like:
> > >>>
> > >>> abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
> > >>> if [[ ! "$abi" =~ "_zbb" ]]
> > >>> then
> > >>>   abi="$abi"_zbb
> > >>> fi
> > >>
> > >> That alone likely won't do it, there's a bunch of ordering rules in the ISA string handling so we might get tripped up on them.  We've got a fairly relaxed version of the rules in GCC to try and match the various older rules, though, so it might be possible to make something similar work.
> > >>
> > >> We should probably just add some sort of -march=+zbb type argument.  IIRC Kito was going to do it at some point, not sure if he got around to it?
> > >
> > > I am just pointing this out because I think the way RISCV extension selection
> > > is currently implemented makes it awkward to provide ifunc implementation in
> > > a agnostic way (specially now that RISCV has dozens of extensions) without
> > > knowing the current target compiler is generating.
> > >
> > > Some other ABI allows to either specify a ISA/chip reference (like powerpc
> > > with -mcpu=powerX) or a ABI extension directly (like aarch64 with -march=+xxx).
> >
> > We have -mcpu, but for RISC-V that's even more fragmented than -march
> > (-mcpu essentially just rolls together -march and -mtune, and only
> > allows a curated list).  The `-march=+`-type stuff has been a pretty
> > common request, I think someone just needst oget around to actually
> > implementing it.
>
> I agree that `-march=+` would be the best solution.
> As we don't have it, I've created a PR for the riscv-toolchain-conventions:
>   https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/47
> This will have to depend on GCC 15+ (assuming there will be consensus
> in the next 10 months about it).
>
> Thanks for the proposal with the function-attributes.
> I'm working on a patchset for that and will send it out after some testing.
> This will have to depend on GCC 14+.
>
> > >>> I don't have a strong preference, it is just that by not using the compiler flag
> > >>> we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
> > >>> a possible better code generation from compiler.
> > >>
> > >> I think we'd likely get slightly better codgen from telling the compiler about the bitmanip extensions.  Maybe we want something like
> > >>
> > >>    diff --git a/string/memchr.c b/string/memchr.c
> > >>    index 08b5c41667..1b62dce8d8 100644
> > >>    --- a/string/memchr.c
> > >>    +++ b/string/memchr.c
> > >>    @@ -29,15 +29,19 @@
> > >>     # define __memchr MEMCHR
> > >>     #endif
> > >>       +#ifndef __MEMCHR_CODEGEN_ATTRIBUTE
> > >>    +#define __MEMCHR_CODEGEN_ATTRIBUTE
> > >>    +#endif
> > >>    +
> > >>     static __always_inline const char *
> > >>    -sadd (uintptr_t x, uintptr_t y)
> > >>    +sadd (uintptr_t x, uintptr_t y) __MEMCHR_CODEGEN_ATTRIBUTE
> > >>     {
> > >>       return (const char *)(y > UINTPTR_MAX - x ? UINTPTR_MAX : x + y);
> > >>     }
> > >>        /* Search no more than N bytes of S for C.  */
> > >>     void *
> > >>    -__memchr (void const *s, int c_in, size_t n)
> > >>    +__memchr (void const *s, int c_in, size_t n) __MEMCHR_CODEGEN_ATTRIBUTE
> > >>     {
> > >>       if (__glibc_unlikely (n == 0))
> > >>         return NULL;
> > >>
> > >> in the generic versions, so we can add a
> > >>
> > >>    #define __MEMCHR_CODEGEN_ATTRIBUTE __attribuet__((target("+zbb")))
> > >>
> > >> (or whatever the syntax is) to the Zbb-flavored versions of these routines?
> > >
> > > Yeah, this might work and it is clear than messing with compiler-defined
> > > macros.
> > >
> > >>
> > >> It might also be worth just jumping to the fast-misaligned versions for these routines, too --the slow-misaligned stuff is there for compatibility with old stuff (though memchr aligns the pointer, so it doesn't matter so much here).
> > >
> > > I was hopping that ABIs that would like to provide unaligned variants
> > > for mem* routines to improve the generic code, but it seems that for some
> > > it is easier to just add an assembly routine (as loongarch did).
> > >
> > > For memchr, I think it should be easy to provide a unaligned version.
> > > Something like (completely untested):
> > >
> > > /* Search no more than N bytes of S for C.  */
> > > void *
> > > __memchr (void const *s, int c_in, size_t n)
> > > {
> > >   if (__glibc_unlikely (n == 0))
> > >     return NULL;
> > >
> > > #ifdef USE_MEMCHR_UNALIGNED
> > >   /* Read the first word, but munge it so that bytes before the array
> > >      will not match goal.  */
> > >   const op_t *word_ptr = PTR_ALIGN_DOWN (s, sizeof (op_t));
> > >   uintptr_t s_int = (uintptr_t) s;
> > >
> > >   op_t word = *word_ptr;
> > >   op_t repeated_c = repeat_bytes (c_in);
> > >   /* Compute the address of the last byte taking in consideration possible
> > >      overflow.  */
> > >   const char *lbyte = sadd (s_int, n - 1);
> > >   /* And also the address of the word containing the last byte. */
> > >   const op_t *lword = (const op_t *) PTR_ALIGN_DOWN (lbyte, sizeof (op_t));
> > >
> > >   find_t mask = shift_find (find_eq_all (word, repeated_c), s_int);
> > >   if (mask != 0)
> > >     {
> > >       char *ret = (char *) s + index_first (mask);
> > >       return (ret <= lbyte) ? ret : NULL;
> > >     }
> > >   if (word_ptr == lword)
> > >     return NULL;
> > > #endif
> > >
> > >   word = *++word_ptr;
> > >   while (word_ptr != lword)
> > >     {
> > >       if (has_eq (word, repeated_c))
> > >         return (char *) word_ptr + index_first_eq (word, repeated_c);
> > >       word = *++word_ptr;
> > >     }
> > >
> > >   if (has_eq (word, repeated_c))
> > >     {
> > >       /* We found a match, but it might be in a byte past the end of the
> > >          array.  */
> > >       char *ret = (char *) word_ptr + index_first_eq (word, repeated_c);
> > >       if (ret <= lbyte)
> > >         return ret;
> > >     }
> > >   return NULL;
> > > }
> > >
> > >>
> > >>>>>> +#include <string/memchr.c>
> > >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > >>>>>> index fcef5659d4..5586d11c89 100644
> > >>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > >>>>>> @@ -1,5 +1,8 @@
> > >>>>>>  ifeq ($(subdir),string)
> > >>>>>>  sysdep_routines += \
> > >>>>>> +  memchr \
> > >>>>>> +  memchr-generic \
> > >>>>>> +  memchr-zbb \
> > >>>>>>    memcpy \
> > >>>>>>    memcpy-generic \
> > >>>>>>    memcpy_noalignment \
> > >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > >>>>>> index 9f806d7a9e..7321144a32 100644
> > >>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > >>>>>> @@ -20,19 +20,40 @@
> > >>>>>>  #include <string.h>
> > >>>>>>  #include <sys/hwprobe.h>
> > >>>>>>
> > >>>>>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
> > >>>>>> +
> > >>>>>>  size_t
> > >>>>>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > >>>>>>                       size_t max)
> > >>>>>>  {
> > >>>>>>    size_t i = max;
> > >>>>>> +  struct riscv_hwprobe pairs[] = {
> > >>>>>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
> > >>>>>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
> > >>>>>> +  };
> > >>>>>>
> > >>>>>> +  bool has_zbb = false;
> > >>>>>>    bool fast_unaligned = false;
> > >>>>>>
> > >>>>>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> > >>>>>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> > >>>>>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
> > >>>>>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
> > >>>>>> -    fast_unaligned = true;
> > >>>>>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
> > >>>>>> +    {
> > >>>>>> +      struct riscv_hwprobe *pair;
> > >>>>>> +
> > >>>>>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
> > >>>>>> +      pair = &pairs[0];
> > >>>>>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
> > >>>>>> +        has_zbb = true;
> > >>>>>> +
> > >>>>>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
> > >>>>>> +      pair = &pairs[1];
> > >>>>>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
> > >>>>>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
> > >>>>>> +        fast_unaligned = true;
> > >>>>>> +    }
> > >>>>>> +
> > >>>>>> +  IFUNC_IMPL (i, name, memchr,
> > >>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
> > >>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
> > >>>>>>
> > >>>>>>    IFUNC_IMPL (i, name, memcpy,
> > >>>>>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> > >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > >>>>>> new file mode 100644
> > >>>>>> index 0000000000..bc076cbf24
> > >>>>>> --- /dev/null
> > >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > >>>>>> @@ -0,0 +1,57 @@
> > >>>>>> +/* Multiple versions of memchr.
> > >>>>>> +   All versions must be listed in ifunc-impl-list.c.
> > >>>>>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
> > >>>>>> +   This file is part of the GNU C Library.
> > >>>>>> +
> > >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> > >>>>>> +   modify it under the terms of the GNU Lesser General Public
> > >>>>>> +   License as published by the Free Software Foundation; either
> > >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> > >>>>>> +
> > >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> > >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > >>>>>> +   Lesser General Public License for more details.
> > >>>>>> +
> > >>>>>> +   You should have received a copy of the GNU Lesser General Public
> > >>>>>> +   License along with the GNU C Library; if not, see
> > >>>>>> +   <https://www.gnu.org/licenses/>.  */
> > >>>>>> +
> > >>>>>> +#if IS_IN (libc)
> > >>>>>> +/* Redefine memchr so that the compiler won't complain about the type
> > >>>>>> +   mismatch with the IFUNC selector in strong_alias, below.  */
> > >>>>>> +# undef memchr
> > >>>>>> +# define memchr __redirect_memchr
> > >>>>>> +# include <stdint.h>
> > >>>>>> +# include <string.h>
> > >>>>>> +# include <ifunc-init.h>
> > >>>>>> +# include <riscv-ifunc.h>
> > >>>>>> +# include <sys/hwprobe.h>
> > >>>>>> +
> > >>>>>> +extern __typeof (__redirect_memchr) __libc_memchr;
> > >>>>>> +
> > >>>>>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
> > >>>>>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
> > >>>>>> +
> > >>>>>> +static inline __typeof (__redirect_memchr) *
> > >>>>>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> > >>>>>> +{
> > >>>>>> +  unsigned long long int v;
> > >>>>>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
> > >>>>>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
> > >>>>>> +    return __memchr_zbb;
> > >>>>>> +
> > >>>>>> +  return __memchr_generic;
> > >>>>>> +}
> > >>>>>> +
> > >>>>>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
> > >>>>>> +
> > >>>>>> +# undef memchr
> > >>>>>> +strong_alias (__libc_memchr, memchr);
> > >>>>>> +# ifdef SHARED
> > >>>>>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
> > >>>>>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
> > >>>>>> +# endif
> > >>>>>> +#else
> > >>>>>> +# include <string/memchr.c>
> > >>>>>> +#endif
  
Christoph Müllner May 6, 2024, 1:46 p.m. UTC | #11
On Mon, May 6, 2024 at 3:32 PM Kito Cheng <kito.cheng@sifive.com> wrote:
>
> `target` attribute is provided by GCC 14, and `-march=+ext` will at
> least require GCC 15 (if we add),
> also we relaxed the canonical order requirement for `-march=`, that
> means we can relatively easy to manipulate the ISA
> string by just concat that? so I don't really think we need  `-march=+ext`.

Then we require the Makefile author to:
1) parse if the current compiler flags include march (repeat for all
variables that may be used)
2a) if so, then amend to that
2b) if not, parse if there is a configured default ISA (via "gcc -v")
2ba) if so, then create -march accordingly
2bb) if not, then <fail to amend>

Bonus problem:
If there are multiple variables that might set -march=... this gets
even trickier because
the order how these variables are integrated in the compiler
invocation command is unknown.

Using "-march=+zfoo" seems simpler and cleaner.

>
> On Mon, May 6, 2024 at 9:20 PM Christoph Müllner
> <christoph.muellner@vrull.eu> wrote:
> >
> > On Tue, Apr 30, 2024 at 7:54 PM Palmer Dabbelt <palmer@dabbelt.com> wrote:
> > >
> > > On Tue, 30 Apr 2024 10:45:12 PDT (-0700), adhemerval.zanella@linaro.org wrote:
> > > >
> > > >
> > > > On 30/04/24 12:13, Palmer Dabbelt wrote:
> > > >> On Wed, 24 Apr 2024 06:36:43 PDT (-0700), adhemerval.zanella@linaro.org wrote:
> > > >>>
> > > >>>
> > > >>> On 24/04/24 10:16, Christoph Müllner wrote:
> > > >>>> On Wed, Apr 24, 2024 at 2:53 PM Adhemerval Zanella Netto
> > > >>>> <adhemerval.zanella@linaro.org> wrote:
> > > >>>>>
> > > >>>>>
> > > >>>>>
> > > >>>>> On 22/04/24 04:43, Christoph Müllner wrote:
> > > >>>>>> When building with Zbb enabled, memchr benefits from using orc.b in
> > > >>>>>> find_zero_all().  This patch changes the build system such, that a
> > > >>>>>> non-Zbb version as well as a Zbb version of this routine is built.
> > > >>>>>> Further, a ifunc resolver is provided that selects the right routine
> > > >>>>>> based on the outcome of extension probing via hwprobe().
> > > >>>>>>
> > > >>>>>> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> > > >>>>>> ---
> > > >>>>>>  sysdeps/riscv/multiarch/memchr-generic.c      | 26 +++++++++
> > > >>>>>>  sysdeps/riscv/multiarch/memchr-zbb.c          | 30 ++++++++++
> > > >>>>>>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  3 +
> > > >>>>>>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 31 ++++++++--
> > > >>>>>>  .../unix/sysv/linux/riscv/multiarch/memchr.c  | 57 +++++++++++++++++++
> > > >>>>>>  5 files changed, 142 insertions(+), 5 deletions(-)
> > > >>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
> > > >>>>>>  create mode 100644 sysdeps/riscv/multiarch/memchr-zbb.c
> > > >>>>>>  create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > > >>>>>>
> > > >>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
> > > >>>>>> new file mode 100644
> > > >>>>>> index 0000000000..a96c36398b
> > > >>>>>> --- /dev/null
> > > >>>>>> +++ b/sysdeps/riscv/multiarch/memchr-generic.c
> > > >>>>>> @@ -0,0 +1,26 @@
> > > >>>>>> +/* Re-include the default memchr implementation.
> > > >>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> > > >>>>>> +   This file is part of the GNU C Library.
> > > >>>>>> +
> > > >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> > > >>>>>> +   modify it under the terms of the GNU Lesser General Public
> > > >>>>>> +   License as published by the Free Software Foundation; either
> > > >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> > > >>>>>> +
> > > >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> > > >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > >>>>>> +   Lesser General Public License for more details.
> > > >>>>>> +
> > > >>>>>> +   You should have received a copy of the GNU Lesser General Public
> > > >>>>>> +   License along with the GNU C Library; if not, see
> > > >>>>>> +   <https://www.gnu.org/licenses/>.  */
> > > >>>>>> +
> > > >>>>>> +#include <string.h>
> > > >>>>>> +
> > > >>>>>> +#if IS_IN(libc)
> > > >>>>>> +# define MEMCHR __memchr_generic
> > > >>>>>> +# undef libc_hidden_builtin_def
> > > >>>>>> +# define libc_hidden_builtin_def(x)
> > > >>>>>> +#endif
> > > >>>>>> +#include <string/memchr.c>
> > > >>>>>> diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
> > > >>>>>> new file mode 100644
> > > >>>>>> index 0000000000..bead0335ae
> > > >>>>>> --- /dev/null
> > > >>>>>> +++ b/sysdeps/riscv/multiarch/memchr-zbb.c
> > > >>>>>> @@ -0,0 +1,30 @@
> > > >>>>>> +/* Re-include the default memchr implementation for Zbb.
> > > >>>>>> +   Copyright (C) 2024 Free Software Foundation, Inc.
> > > >>>>>> +   This file is part of the GNU C Library.
> > > >>>>>> +
> > > >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> > > >>>>>> +   modify it under the terms of the GNU Lesser General Public
> > > >>>>>> +   License as published by the Free Software Foundation; either
> > > >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> > > >>>>>> +
> > > >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> > > >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > >>>>>> +   Lesser General Public License for more details.
> > > >>>>>> +
> > > >>>>>> +   You should have received a copy of the GNU Lesser General Public
> > > >>>>>> +   License along with the GNU C Library; if not, see
> > > >>>>>> +   <https://www.gnu.org/licenses/>.  */
> > > >>>>>> +
> > > >>>>>> +#include <string.h>
> > > >>>>>> +
> > > >>>>>> +#if IS_IN(libc)
> > > >>>>>> +# define MEMCHR __memchr_zbb
> > > >>>>>> +# undef libc_hidden_builtin_def
> > > >>>>>> +# define libc_hidden_builtin_def(x)
> > > >>>>>> +#endif
> > > >>>>>> +/* Convince preprocessor to have Zbb instructions.  */
> > > >>>>>> +#ifndef __riscv_zbb
> > > >>>>>> +# define __riscv_zbb
> > > >>>>>> +#endif
> > > >>>>>
> > > >>>>> Is there a way to specific the compiler to enable a extension, like aarch64
> > > >>>>> -march=arch{+[no]feature}? I think ideally this should be enabled as CFLAGS
> > > >>>>> instead of messing with compiler defined pre-processor.
> > > >>>>
> > > >>>> The tools expect a list of all extensions as parameter to the -march= option.
> > > >>>> But there is no way to append extensions to an existing march string
> > > >>>> on the command line.
> > > >>>>
> > > >>>> And if we would add this feature today, it would take many years until we could
> > > >>>> use it here, because we want to remain compatible with old tools.
> > > >>>> Or we enable the optimization only when being built with new tools, but that
> > > >>>> adds even more complexity and build/test configurations.
> > > >>>>
> > > >>>> What we have is:
> > > >>>> * Preprocessor (since forever): Extension test macros (__riscv_EXTENSION)
> > > >>>> * Command line (since forever): -march=BASE_EXTENSIONLIST
> > > >>>> * GAS (since Nov 21): .option arch, +EXTENSION (in combination with
> > > >>>> option push/pop)
> > > >>>> * GCC (since Nov 23): __attribute__((target("arch=+EXTENSION")))
> > > >>>>
> > > >>>> I was not sure about using __riscv_zbb as well, but I considered it safe within
> > > >>>> ifdef tests that ensure the macro won't be set twice.
> > > >>>> If that's a concern, I could change to use something like this:
> > > >>>> #define __riscv_force_zbb
> > > >>>> #include <impl.c>
> > > >>>> #undef __riscv_force_zbb
> > > >>>> ... and change string-fza.h like this:
> > > >>>> #if defined(__riscv_zbb) || defined(__riscv_force_zbb)
> > > >>>> // orc.b
> > > >>>> #endif
> > > >>>>
> > > >>>> BR
> > > >>>> Christoph
> > > >>>
> > > >>> Another options would to parse the current march and add the extension if required,
> > > >>> something like:
> > > >>>
> > > >>> abi=$(riscv64-linux-gnu-gcc -Q --help=target | grep march | cut -d '=' -f2 | xargs)
> > > >>> if [[ ! "$abi" =~ "_zbb" ]]
> > > >>> then
> > > >>>   abi="$abi"_zbb
> > > >>> fi
> > > >>
> > > >> That alone likely won't do it, there's a bunch of ordering rules in the ISA string handling so we might get tripped up on them.  We've got a fairly relaxed version of the rules in GCC to try and match the various older rules, though, so it might be possible to make something similar work.
> > > >>
> > > >> We should probably just add some sort of -march=+zbb type argument.  IIRC Kito was going to do it at some point, not sure if he got around to it?
> > > >
> > > > I am just pointing this out because I think the way RISCV extension selection
> > > > is currently implemented makes it awkward to provide ifunc implementation in
> > > > a agnostic way (specially now that RISCV has dozens of extensions) without
> > > > knowing the current target compiler is generating.
> > > >
> > > > Some other ABI allows to either specify a ISA/chip reference (like powerpc
> > > > with -mcpu=powerX) or a ABI extension directly (like aarch64 with -march=+xxx).
> > >
> > > We have -mcpu, but for RISC-V that's even more fragmented than -march
> > > (-mcpu essentially just rolls together -march and -mtune, and only
> > > allows a curated list).  The `-march=+`-type stuff has been a pretty
> > > common request, I think someone just needst oget around to actually
> > > implementing it.
> >
> > I agree that `-march=+` would be the best solution.
> > As we don't have it, I've created a PR for the riscv-toolchain-conventions:
> >   https://github.com/riscv-non-isa/riscv-toolchain-conventions/pull/47
> > This will have to depend on GCC 15+ (assuming there will be consensus
> > in the next 10 months about it).
> >
> > Thanks for the proposal with the function-attributes.
> > I'm working on a patchset for that and will send it out after some testing.
> > This will have to depend on GCC 14+.
> >
> > > >>> I don't have a strong preference, it is just that by not using the compiler flag
> > > >>> we won't be able to either use the builtin (__builtin_riscv_orc_b_32) and/or get
> > > >>> a possible better code generation from compiler.
> > > >>
> > > >> I think we'd likely get slightly better codgen from telling the compiler about the bitmanip extensions.  Maybe we want something like
> > > >>
> > > >>    diff --git a/string/memchr.c b/string/memchr.c
> > > >>    index 08b5c41667..1b62dce8d8 100644
> > > >>    --- a/string/memchr.c
> > > >>    +++ b/string/memchr.c
> > > >>    @@ -29,15 +29,19 @@
> > > >>     # define __memchr MEMCHR
> > > >>     #endif
> > > >>       +#ifndef __MEMCHR_CODEGEN_ATTRIBUTE
> > > >>    +#define __MEMCHR_CODEGEN_ATTRIBUTE
> > > >>    +#endif
> > > >>    +
> > > >>     static __always_inline const char *
> > > >>    -sadd (uintptr_t x, uintptr_t y)
> > > >>    +sadd (uintptr_t x, uintptr_t y) __MEMCHR_CODEGEN_ATTRIBUTE
> > > >>     {
> > > >>       return (const char *)(y > UINTPTR_MAX - x ? UINTPTR_MAX : x + y);
> > > >>     }
> > > >>        /* Search no more than N bytes of S for C.  */
> > > >>     void *
> > > >>    -__memchr (void const *s, int c_in, size_t n)
> > > >>    +__memchr (void const *s, int c_in, size_t n) __MEMCHR_CODEGEN_ATTRIBUTE
> > > >>     {
> > > >>       if (__glibc_unlikely (n == 0))
> > > >>         return NULL;
> > > >>
> > > >> in the generic versions, so we can add a
> > > >>
> > > >>    #define __MEMCHR_CODEGEN_ATTRIBUTE __attribuet__((target("+zbb")))
> > > >>
> > > >> (or whatever the syntax is) to the Zbb-flavored versions of these routines?
> > > >
> > > > Yeah, this might work and it is clear than messing with compiler-defined
> > > > macros.
> > > >
> > > >>
> > > >> It might also be worth just jumping to the fast-misaligned versions for these routines, too --the slow-misaligned stuff is there for compatibility with old stuff (though memchr aligns the pointer, so it doesn't matter so much here).
> > > >
> > > > I was hopping that ABIs that would like to provide unaligned variants
> > > > for mem* routines to improve the generic code, but it seems that for some
> > > > it is easier to just add an assembly routine (as loongarch did).
> > > >
> > > > For memchr, I think it should be easy to provide a unaligned version.
> > > > Something like (completely untested):
> > > >
> > > > /* Search no more than N bytes of S for C.  */
> > > > void *
> > > > __memchr (void const *s, int c_in, size_t n)
> > > > {
> > > >   if (__glibc_unlikely (n == 0))
> > > >     return NULL;
> > > >
> > > > #ifdef USE_MEMCHR_UNALIGNED
> > > >   /* Read the first word, but munge it so that bytes before the array
> > > >      will not match goal.  */
> > > >   const op_t *word_ptr = PTR_ALIGN_DOWN (s, sizeof (op_t));
> > > >   uintptr_t s_int = (uintptr_t) s;
> > > >
> > > >   op_t word = *word_ptr;
> > > >   op_t repeated_c = repeat_bytes (c_in);
> > > >   /* Compute the address of the last byte taking in consideration possible
> > > >      overflow.  */
> > > >   const char *lbyte = sadd (s_int, n - 1);
> > > >   /* And also the address of the word containing the last byte. */
> > > >   const op_t *lword = (const op_t *) PTR_ALIGN_DOWN (lbyte, sizeof (op_t));
> > > >
> > > >   find_t mask = shift_find (find_eq_all (word, repeated_c), s_int);
> > > >   if (mask != 0)
> > > >     {
> > > >       char *ret = (char *) s + index_first (mask);
> > > >       return (ret <= lbyte) ? ret : NULL;
> > > >     }
> > > >   if (word_ptr == lword)
> > > >     return NULL;
> > > > #endif
> > > >
> > > >   word = *++word_ptr;
> > > >   while (word_ptr != lword)
> > > >     {
> > > >       if (has_eq (word, repeated_c))
> > > >         return (char *) word_ptr + index_first_eq (word, repeated_c);
> > > >       word = *++word_ptr;
> > > >     }
> > > >
> > > >   if (has_eq (word, repeated_c))
> > > >     {
> > > >       /* We found a match, but it might be in a byte past the end of the
> > > >          array.  */
> > > >       char *ret = (char *) word_ptr + index_first_eq (word, repeated_c);
> > > >       if (ret <= lbyte)
> > > >         return ret;
> > > >     }
> > > >   return NULL;
> > > > }
> > > >
> > > >>
> > > >>>>>> +#include <string/memchr.c>
> > > >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > > >>>>>> index fcef5659d4..5586d11c89 100644
> > > >>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > > >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> > > >>>>>> @@ -1,5 +1,8 @@
> > > >>>>>>  ifeq ($(subdir),string)
> > > >>>>>>  sysdep_routines += \
> > > >>>>>> +  memchr \
> > > >>>>>> +  memchr-generic \
> > > >>>>>> +  memchr-zbb \
> > > >>>>>>    memcpy \
> > > >>>>>>    memcpy-generic \
> > > >>>>>>    memcpy_noalignment \
> > > >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > > >>>>>> index 9f806d7a9e..7321144a32 100644
> > > >>>>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > > >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> > > >>>>>> @@ -20,19 +20,40 @@
> > > >>>>>>  #include <string.h>
> > > >>>>>>  #include <sys/hwprobe.h>
> > > >>>>>>
> > > >>>>>> +#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
> > > >>>>>> +
> > > >>>>>>  size_t
> > > >>>>>>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > > >>>>>>                       size_t max)
> > > >>>>>>  {
> > > >>>>>>    size_t i = max;
> > > >>>>>> +  struct riscv_hwprobe pairs[] = {
> > > >>>>>> +    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
> > > >>>>>> +    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
> > > >>>>>> +  };
> > > >>>>>>
> > > >>>>>> +  bool has_zbb = false;
> > > >>>>>>    bool fast_unaligned = false;
> > > >>>>>>
> > > >>>>>> -  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> > > >>>>>> -  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> > > >>>>>> -      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
> > > >>>>>> -          == RISCV_HWPROBE_MISALIGNED_FAST)
> > > >>>>>> -    fast_unaligned = true;
> > > >>>>>> +  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
> > > >>>>>> +    {
> > > >>>>>> +      struct riscv_hwprobe *pair;
> > > >>>>>> +
> > > >>>>>> +      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
> > > >>>>>> +      pair = &pairs[0];
> > > >>>>>> +      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
> > > >>>>>> +        has_zbb = true;
> > > >>>>>> +
> > > >>>>>> +      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
> > > >>>>>> +      pair = &pairs[1];
> > > >>>>>> +      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
> > > >>>>>> +        == RISCV_HWPROBE_MISALIGNED_FAST)
> > > >>>>>> +        fast_unaligned = true;
> > > >>>>>> +    }
> > > >>>>>> +
> > > >>>>>> +  IFUNC_IMPL (i, name, memchr,
> > > >>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
> > > >>>>>> +           IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
> > > >>>>>>
> > > >>>>>>    IFUNC_IMPL (i, name, memcpy,
> > > >>>>>>             IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> > > >>>>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > > >>>>>> new file mode 100644
> > > >>>>>> index 0000000000..bc076cbf24
> > > >>>>>> --- /dev/null
> > > >>>>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
> > > >>>>>> @@ -0,0 +1,57 @@
> > > >>>>>> +/* Multiple versions of memchr.
> > > >>>>>> +   All versions must be listed in ifunc-impl-list.c.
> > > >>>>>> +   Copyright (C) 2017-2024 Free Software Foundation, Inc.
> > > >>>>>> +   This file is part of the GNU C Library.
> > > >>>>>> +
> > > >>>>>> +   The GNU C Library is free software; you can redistribute it and/or
> > > >>>>>> +   modify it under the terms of the GNU Lesser General Public
> > > >>>>>> +   License as published by the Free Software Foundation; either
> > > >>>>>> +   version 2.1 of the License, or (at your option) any later version.
> > > >>>>>> +
> > > >>>>>> +   The GNU C Library is distributed in the hope that it will be useful,
> > > >>>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > >>>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > >>>>>> +   Lesser General Public License for more details.
> > > >>>>>> +
> > > >>>>>> +   You should have received a copy of the GNU Lesser General Public
> > > >>>>>> +   License along with the GNU C Library; if not, see
> > > >>>>>> +   <https://www.gnu.org/licenses/>.  */
> > > >>>>>> +
> > > >>>>>> +#if IS_IN (libc)
> > > >>>>>> +/* Redefine memchr so that the compiler won't complain about the type
> > > >>>>>> +   mismatch with the IFUNC selector in strong_alias, below.  */
> > > >>>>>> +# undef memchr
> > > >>>>>> +# define memchr __redirect_memchr
> > > >>>>>> +# include <stdint.h>
> > > >>>>>> +# include <string.h>
> > > >>>>>> +# include <ifunc-init.h>
> > > >>>>>> +# include <riscv-ifunc.h>
> > > >>>>>> +# include <sys/hwprobe.h>
> > > >>>>>> +
> > > >>>>>> +extern __typeof (__redirect_memchr) __libc_memchr;
> > > >>>>>> +
> > > >>>>>> +extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
> > > >>>>>> +extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
> > > >>>>>> +
> > > >>>>>> +static inline __typeof (__redirect_memchr) *
> > > >>>>>> +select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> > > >>>>>> +{
> > > >>>>>> +  unsigned long long int v;
> > > >>>>>> +  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
> > > >>>>>> +      && (v & RISCV_HWPROBE_EXT_ZBB))
> > > >>>>>> +    return __memchr_zbb;
> > > >>>>>> +
> > > >>>>>> +  return __memchr_generic;
> > > >>>>>> +}
> > > >>>>>> +
> > > >>>>>> +riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
> > > >>>>>> +
> > > >>>>>> +# undef memchr
> > > >>>>>> +strong_alias (__libc_memchr, memchr);
> > > >>>>>> +# ifdef SHARED
> > > >>>>>> +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
> > > >>>>>> +  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
> > > >>>>>> +# endif
> > > >>>>>> +#else
> > > >>>>>> +# include <string/memchr.c>
> > > >>>>>> +#endif
  
Kito Cheng May 6, 2024, 1:58 p.m. UTC | #12
> > `target` attribute is provided by GCC 14, and `-march=+ext` will at
> > least require GCC 15 (if we add),
> > also we relaxed the canonical order requirement for `-march=`, that
> > means we can relatively easy to manipulate the ISA
> > string by just concat that? so I don't really think we need  `-march=+ext`.
>
> Then we require the Makefile author to:
> 1) parse if the current compiler flags include march (repeat for all
> variables that may be used)
> 2a) if so, then amend to that
> 2b) if not, parse if there is a configured default ISA (via "gcc -v")
> 2ba) if so, then create -march accordingly
> 2bb) if not, then <fail to amend>
>
> Bonus problem:
> If there are multiple variables that might set -march=... this gets
> even trickier because
> the order how these variables are integrated in the compiler
> invocation command is unknown.
>
> Using "-march=+zfoo" seems simpler and cleaner.

I agree it will require the compiler to parse the gcc -v to obtain the
default -march, it might be a burden,
but AArch64 also does not allow -march=+ext only it requires
-march=arch{+[no]feature}*,
so I am wondering why do we need that to resolve/improve ifunc
implementation but not `target` attribute and `.option arch, +ext`?
  

Patch

diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
new file mode 100644
index 0000000000..a96c36398b
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memchr-generic.c
@@ -0,0 +1,26 @@ 
+/* Re-include the default memchr implementation.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+#if IS_IN(libc)
+# define MEMCHR __memchr_generic
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(x)
+#endif
+#include <string/memchr.c>
diff --git a/sysdeps/riscv/multiarch/memchr-zbb.c b/sysdeps/riscv/multiarch/memchr-zbb.c
new file mode 100644
index 0000000000..bead0335ae
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memchr-zbb.c
@@ -0,0 +1,30 @@ 
+/* Re-include the default memchr implementation for Zbb.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+#if IS_IN(libc)
+# define MEMCHR __memchr_zbb
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(x)
+#endif
+/* Convince preprocessor to have Zbb instructions.  */
+#ifndef __riscv_zbb
+# define __riscv_zbb
+#endif
+#include <string/memchr.c>
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
index fcef5659d4..5586d11c89 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
@@ -1,5 +1,8 @@ 
 ifeq ($(subdir),string)
 sysdep_routines += \
+  memchr \
+  memchr-generic \
+  memchr-zbb \
   memcpy \
   memcpy-generic \
   memcpy_noalignment \
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
index 9f806d7a9e..7321144a32 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
@@ -20,19 +20,40 @@ 
 #include <string.h>
 #include <sys/hwprobe.h>
 
+#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
+
 size_t
 __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			size_t max)
 {
   size_t i = max;
+  struct riscv_hwprobe pairs[] = {
+    { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 },
+    { .key = RISCV_HWPROBE_KEY_CPUPERF_0 },
+  };
 
+  bool has_zbb = false;
   bool fast_unaligned = false;
 
-  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
-  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
-      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
-          == RISCV_HWPROBE_MISALIGNED_FAST)
-    fast_unaligned = true;
+  if (__riscv_hwprobe (pairs, ARRAY_SIZE (pairs), 0, NULL, 0) == 0)
+    {
+      struct riscv_hwprobe *pair;
+
+      /* RISCV_HWPROBE_KEY_IMA_EXT_0  */
+      pair = &pairs[0];
+      if (pair->value & RISCV_HWPROBE_EXT_ZBB)
+        has_zbb = true;
+
+      /* RISCV_HWPROBE_KEY_CPUPERF_0  */
+      pair = &pairs[1];
+      if ((pair->value & RISCV_HWPROBE_MISALIGNED_MASK)
+	   == RISCV_HWPROBE_MISALIGNED_FAST)
+        fast_unaligned = true;
+    }
+
+  IFUNC_IMPL (i, name, memchr,
+	      IFUNC_IMPL_ADD (array, i, memchr, has_zbb, __memchr_zbb)
+	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
 
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
new file mode 100644
index 0000000000..bc076cbf24
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
@@ -0,0 +1,57 @@ 
+/* Multiple versions of memchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+/* Redefine memchr so that the compiler won't complain about the type
+   mismatch with the IFUNC selector in strong_alias, below.  */
+# undef memchr
+# define memchr __redirect_memchr
+# include <stdint.h>
+# include <string.h>
+# include <ifunc-init.h>
+# include <riscv-ifunc.h>
+# include <sys/hwprobe.h>
+
+extern __typeof (__redirect_memchr) __libc_memchr;
+
+extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
+extern __typeof (__redirect_memchr) __memchr_zbb attribute_hidden;
+
+static inline __typeof (__redirect_memchr) *
+select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
+{
+  unsigned long long int v;
+  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
+      && (v & RISCV_HWPROBE_EXT_ZBB))
+    return __memchr_zbb;
+
+  return __memchr_generic;
+}
+
+riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
+
+# undef memchr
+strong_alias (__libc_memchr, memchr);
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
+# endif
+#else
+# include <string/memchr.c>
+#endif