[1/3] libcpp: configure: check for AVX2 instead of SSE4

Message ID 20240806161850.18839-1-amonakov@ispras.ru
State New
Headers
Series libcpp: improve x86 vectorized helpers |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed

Commit Message

Alexander Monakov Aug. 6, 2024, 4:18 p.m. UTC
  Upcoming patches first drop Binutils ISA support from SSE4.2 to SSSE3,
then bump it to AVX2. Instead of fiddling with detection, just bump
our configure check to AVX2 immediately: if by some accident somebody
builds GCC without AVX2 support in the assembler, they will get SSE2
vectorized lexer, which is not too slow.

libcpp/ChangeLog:

	* config.in: Regenerate.
	* configure: Regenerate.
	* configure.ac: Check for AVX2 instead of SSE4.2.
	* lex.cc: Adjust for changed config macro.
---
 libcpp/config.in    | 6 +++---
 libcpp/configure    | 4 ++--
 libcpp/configure.ac | 6 +++---
 libcpp/lex.cc       | 2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)
  

Comments

Richard Biener Aug. 7, 2024, 8:23 a.m. UTC | #1
On Tue, Aug 6, 2024 at 6:19 PM Alexander Monakov <amonakov@ispras.ru> wrote:
>
> Upcoming patches first drop Binutils ISA support from SSE4.2 to SSSE3,
> then bump it to AVX2. Instead of fiddling with detection, just bump
> our configure check to AVX2 immediately: if by some accident somebody
> builds GCC without AVX2 support in the assembler, they will get SSE2
> vectorized lexer, which is not too slow.

OK.

> libcpp/ChangeLog:
>
>         * config.in: Regenerate.
>         * configure: Regenerate.
>         * configure.ac: Check for AVX2 instead of SSE4.2.
>         * lex.cc: Adjust for changed config macro.
> ---
>  libcpp/config.in    | 6 +++---
>  libcpp/configure    | 4 ++--
>  libcpp/configure.ac | 6 +++---
>  libcpp/lex.cc       | 2 +-
>  4 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/libcpp/config.in b/libcpp/config.in
> index 253ef03a3d..a0ca9e4df4 100644
> --- a/libcpp/config.in
> +++ b/libcpp/config.in
> @@ -35,6 +35,9 @@
>     */
>  #undef HAVE_ALLOCA_H
>
> +/* Define to 1 if you can assemble AVX2 insns. */
> +#undef HAVE_AVX2
> +
>  /* Define to 1 if you have the Mac OS X function
>     CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
>  #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES
> @@ -210,9 +213,6 @@
>  /* Define to 1 if you have the `putc_unlocked' function. */
>  #undef HAVE_PUTC_UNLOCKED
>
> -/* Define to 1 if you can assemble SSE4 insns. */
> -#undef HAVE_SSE4
> -
>  /* Define to 1 if you have the <stddef.h> header file. */
>  #undef HAVE_STDDEF_H
>
> diff --git a/libcpp/configure b/libcpp/configure
> index 32d6aaa306..74af097620 100755
> --- a/libcpp/configure
> +++ b/libcpp/configure
> @@ -9140,14 +9140,14 @@ case $target in
>  int
>  main ()
>  {
> -asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0))
> +asm ("vpshufb %ymm0, %ymm1, %ymm2")
>    ;
>    return 0;
>  }
>  _ACEOF
>  if ac_fn_c_try_compile "$LINENO"; then :
>
> -$as_echo "#define HAVE_SSE4 1" >>confdefs.h
> +$as_echo "#define HAVE_AVX2 1" >>confdefs.h
>
>  fi
>  rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
> diff --git a/libcpp/configure.ac b/libcpp/configure.ac
> index b883fec776..cfefb63552 100644
> --- a/libcpp/configure.ac
> +++ b/libcpp/configure.ac
> @@ -197,9 +197,9 @@ fi
>
>  case $target in
>    i?86-* | x86_64-*)
> -    AC_TRY_COMPILE([], [asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0))],
> -      [AC_DEFINE([HAVE_SSE4], [1],
> -                [Define to 1 if you can assemble SSE4 insns.])])
> +    AC_TRY_COMPILE([], [asm ("vpshufb %ymm0, %ymm1, %ymm2")],
> +      [AC_DEFINE([HAVE_AVX2], [1],
> +                [Define to 1 if you can assemble AVX2 insns.])])
>  esac
>
>  # Enable --enable-host-shared.
> diff --git a/libcpp/lex.cc b/libcpp/lex.cc
> index 1591dcdf15..fa9c03614c 100644
> --- a/libcpp/lex.cc
> +++ b/libcpp/lex.cc
> @@ -344,7 +344,7 @@ search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
>    return (const uchar *)p + found;
>  }
>
> -#ifdef HAVE_SSE4
> +#ifdef HAVE_AVX2
>  /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
>
>  static const uchar *
> --
> 2.44.0
>
  

Patch

diff --git a/libcpp/config.in b/libcpp/config.in
index 253ef03a3d..a0ca9e4df4 100644
--- a/libcpp/config.in
+++ b/libcpp/config.in
@@ -35,6 +35,9 @@ 
    */
 #undef HAVE_ALLOCA_H
 
+/* Define to 1 if you can assemble AVX2 insns. */
+#undef HAVE_AVX2
+
 /* Define to 1 if you have the Mac OS X function
    CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
 #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES
@@ -210,9 +213,6 @@ 
 /* Define to 1 if you have the `putc_unlocked' function. */
 #undef HAVE_PUTC_UNLOCKED
 
-/* Define to 1 if you can assemble SSE4 insns. */
-#undef HAVE_SSE4
-
 /* Define to 1 if you have the <stddef.h> header file. */
 #undef HAVE_STDDEF_H
 
diff --git a/libcpp/configure b/libcpp/configure
index 32d6aaa306..74af097620 100755
--- a/libcpp/configure
+++ b/libcpp/configure
@@ -9140,14 +9140,14 @@  case $target in
 int
 main ()
 {
-asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0))
+asm ("vpshufb %ymm0, %ymm1, %ymm2")
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"; then :
 
-$as_echo "#define HAVE_SSE4 1" >>confdefs.h
+$as_echo "#define HAVE_AVX2 1" >>confdefs.h
 
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
diff --git a/libcpp/configure.ac b/libcpp/configure.ac
index b883fec776..cfefb63552 100644
--- a/libcpp/configure.ac
+++ b/libcpp/configure.ac
@@ -197,9 +197,9 @@  fi
 
 case $target in
   i?86-* | x86_64-*)
-    AC_TRY_COMPILE([], [asm ("pcmpestri %0, %%xmm0, %%xmm1" : : "i"(0))],
-      [AC_DEFINE([HAVE_SSE4], [1],
-		 [Define to 1 if you can assemble SSE4 insns.])])
+    AC_TRY_COMPILE([], [asm ("vpshufb %ymm0, %ymm1, %ymm2")],
+      [AC_DEFINE([HAVE_AVX2], [1],
+		 [Define to 1 if you can assemble AVX2 insns.])])
 esac
 
 # Enable --enable-host-shared.
diff --git a/libcpp/lex.cc b/libcpp/lex.cc
index 1591dcdf15..fa9c03614c 100644
--- a/libcpp/lex.cc
+++ b/libcpp/lex.cc
@@ -344,7 +344,7 @@  search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
   return (const uchar *)p + found;
 }
 
-#ifdef HAVE_SSE4
+#ifdef HAVE_AVX2
 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
 
 static const uchar *