sysdeps/memmem-avx2.c: add memmem-avx2.c

Message ID 20231211173320.702246-1-tirtajames45@gmail.com
State Superseded
Headers
Series sysdeps/memmem-avx2.c: add memmem-avx2.c |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed

Commit Message

James Tirta Halim Dec. 11, 2023, 5:33 p.m. UTC
  ---
 sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
  

Comments

Sunil Pandey Dec. 11, 2023, 5:44 p.m. UTC | #1
Can you please provide some context/comment on this patch?

--Sunil

On Mon, Dec 11, 2023 at 9:37 AM James Tirta Halim <tirtajames45@gmail.com>
wrote:

> ---
>  sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
>  1 file changed, 55 insertions(+)
>  create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
>
> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c
> b/sysdeps/x86_64/multiarch/memmem-avx2.c
> new file mode 100644
> index 0000000000..b0cced73aa
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
> @@ -0,0 +1,55 @@
> +#include <immintrin.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <libc-pointer-arith.h>
> +
> +void *
> +__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t
> ne_len)
> +{
> +  if (ne_len == 1)
> +    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
> +  if (__glibc_unlikely (ne_len == 0))
> +    return (void *) hs;
> +  if (__glibc_unlikely (hs_len == ne_len))
> +    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
> +  if (__glibc_unlikely (hs_len < ne_len))
> +    return NULL;
> +  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
> +  const unsigned char *h = (const unsigned char *) hs;
> +  const unsigned char *n = (const unsigned char *) ne;
> +  const unsigned char *const end = h + hs_len - ne_len;
> +  const int c1 = *(n + 1);
> +  n += 2, ne_len -= 2;
> +  __m256i hv;
> +  uint32_t i, m;
> +  if (!PTR_IS_ALIGNED (h)) {
> +    hv = _mm256_loadu_si256 ((const __m256i *) h);
> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> +    for (; m; m = _blsr_u32 (m)) {
> +      i = _tzcnt_u32 (m);
> +      if (__glibc_unlikely (h + i > end))
> +        return NULL;
> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> +        return (char *) h + i;
> +    }
> +    h += sizeof (__m256i);
> +    if (__glibc_unlikely (h > end))
> +      return NULL;
> +    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
> +  }
> +  for (;;) {
> +    hv = _mm256_load_si256 ((const __m256i *) h);
> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> +    for (; m; m = _blsr_u32 (m)) {
> +      i = _tzcnt_u32 (m);
> +      if (__glibc_unlikely (h + i > end))
> +        return NULL;
> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> +        return (char *) h + i;
> +    }
> +    h += sizeof (__m256i);
> +    if (__glibc_unlikely (h > end))
> +      return NULL;
> +  }
> +  return NULL;
> +}
> --
> 2.43.0
>
>
  
Noah Goldstein Dec. 11, 2023, 6:11 p.m. UTC | #2
On Mon, Dec 11, 2023 at 11:45 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
> Can you please provide some context/comment on this patch?
>
Likewise performance data.
> --Sunil
>
> On Mon, Dec 11, 2023 at 9:37 AM James Tirta Halim <tirtajames45@gmail.com> wrote:
>>
>> ---
>>  sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
>>  1 file changed, 55 insertions(+)
>>  create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
>>
>> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
>> new file mode 100644
>> index 0000000000..b0cced73aa
>> --- /dev/null
>> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
>> @@ -0,0 +1,55 @@
>> +#include <immintrin.h>
>> +#include <string.h>
>> +#include <inttypes.h>
>> +#include <libc-pointer-arith.h>
>> +
>> +void *
>> +__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
>> +{
>> +  if (ne_len == 1)
>> +    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
>> +  if (__glibc_unlikely (ne_len == 0))
>> +    return (void *) hs;
>> +  if (__glibc_unlikely (hs_len == ne_len))
>> +    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
>> +  if (__glibc_unlikely (hs_len < ne_len))
>> +    return NULL;
>> +  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
>> +  const unsigned char *h = (const unsigned char *) hs;
>> +  const unsigned char *n = (const unsigned char *) ne;
>> +  const unsigned char *const end = h + hs_len - ne_len;
>> +  const int c1 = *(n + 1);
>> +  n += 2, ne_len -= 2;
>> +  __m256i hv;
>> +  uint32_t i, m;
>> +  if (!PTR_IS_ALIGNED (h)) {
>> +    hv = _mm256_loadu_si256 ((const __m256i *) h);
>> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
>> +    for (; m; m = _blsr_u32 (m)) {
>> +      i = _tzcnt_u32 (m);
>> +      if (__glibc_unlikely (h + i > end))
>> +        return NULL;
>> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
>> +        return (char *) h + i;
>> +    }
>> +    h += sizeof (__m256i);
>> +    if (__glibc_unlikely (h > end))
>> +      return NULL;
>> +    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
>> +  }
>> +  for (;;) {
>> +    hv = _mm256_load_si256 ((const __m256i *) h);
>> +    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
>> +    for (; m; m = _blsr_u32 (m)) {
>> +      i = _tzcnt_u32 (m);
>> +      if (__glibc_unlikely (h + i > end))
>> +        return NULL;
>> +      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
>> +        return (char *) h + i;
>> +    }
>> +    h += sizeof (__m256i);
>> +    if (__glibc_unlikely (h > end))
>> +      return NULL;
>> +  }
>> +  return NULL;
>> +}
>> --
>> 2.43.0
>>
  

Patch

diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
new file mode 100644
index 0000000000..b0cced73aa
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
@@ -0,0 +1,55 @@ 
+#include <immintrin.h>
+#include <string.h>
+#include <inttypes.h>
+#include <libc-pointer-arith.h>
+
+void *
+__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
+{
+  if (ne_len == 1)
+    return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
+  if (__glibc_unlikely (ne_len == 0))
+    return (void *) hs;
+  if (__glibc_unlikely (hs_len == ne_len))
+    return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
+  if (__glibc_unlikely (hs_len < ne_len))
+    return NULL;
+  const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
+  const unsigned char *h = (const unsigned char *) hs;
+  const unsigned char *n = (const unsigned char *) ne;
+  const unsigned char *const end = h + hs_len - ne_len;
+  const int c1 = *(n + 1);
+  n += 2, ne_len -= 2;
+  __m256i hv;
+  uint32_t i, m;
+  if (!PTR_IS_ALIGNED (h)) {
+    hv = _mm256_loadu_si256 ((const __m256i *) h);
+    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+    for (; m; m = _blsr_u32 (m)) {
+      i = _tzcnt_u32 (m);
+      if (__glibc_unlikely (h + i > end))
+        return NULL;
+      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
+        return (char *) h + i;
+    }
+    h += sizeof (__m256i);
+    if (__glibc_unlikely (h > end))
+      return NULL;
+    h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
+  }
+  for (;;) {
+    hv = _mm256_load_si256 ((const __m256i *) h);
+    m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+    for (; m; m = _blsr_u32 (m)) {
+      i = _tzcnt_u32 (m);
+      if (__glibc_unlikely (h + i > end))
+        return NULL;
+      if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
+        return (char *) h + i;
+    }
+    h += sizeof (__m256i);
+    if (__glibc_unlikely (h > end))
+      return NULL;
+  }
+  return NULL;
+}