sysdeps/memmem-avx2.c: add memmem-avx2.c
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
Commit Message
---
sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
Comments
Can you please provide some context/comment on this patch?
--Sunil
On Mon, Dec 11, 2023 at 9:37 AM James Tirta Halim <tirtajames45@gmail.com>
wrote:
> ---
> sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
> 1 file changed, 55 insertions(+)
> create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
>
> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c
> b/sysdeps/x86_64/multiarch/memmem-avx2.c
> new file mode 100644
> index 0000000000..b0cced73aa
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
> @@ -0,0 +1,55 @@
> +#include <immintrin.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <libc-pointer-arith.h>
> +
> +void *
> +__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t
> ne_len)
> +{
> + if (ne_len == 1)
> + return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
> + if (__glibc_unlikely (ne_len == 0))
> + return (void *) hs;
> + if (__glibc_unlikely (hs_len == ne_len))
> + return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
> + if (__glibc_unlikely (hs_len < ne_len))
> + return NULL;
> + const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
> + const unsigned char *h = (const unsigned char *) hs;
> + const unsigned char *n = (const unsigned char *) ne;
> + const unsigned char *const end = h + hs_len - ne_len;
> + const int c1 = *(n + 1);
> + n += 2, ne_len -= 2;
> + __m256i hv;
> + uint32_t i, m;
> + if (!PTR_IS_ALIGNED (h)) {
> + hv = _mm256_loadu_si256 ((const __m256i *) h);
> + m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> + for (; m; m = _blsr_u32 (m)) {
> + i = _tzcnt_u32 (m);
> + if (__glibc_unlikely (h + i > end))
> + return NULL;
> + if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> + return (char *) h + i;
> + }
> + h += sizeof (__m256i);
> + if (__glibc_unlikely (h > end))
> + return NULL;
> + h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
> + }
> + for (;;) {
> + hv = _mm256_load_si256 ((const __m256i *) h);
> + m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
> + for (; m; m = _blsr_u32 (m)) {
> + i = _tzcnt_u32 (m);
> + if (__glibc_unlikely (h + i > end))
> + return NULL;
> + if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
> + return (char *) h + i;
> + }
> + h += sizeof (__m256i);
> + if (__glibc_unlikely (h > end))
> + return NULL;
> + }
> + return NULL;
> +}
> --
> 2.43.0
>
>
On Mon, Dec 11, 2023 at 11:45 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
> Can you please provide some context/comment on this patch?
>
Likewise performance data.
> --Sunil
>
> On Mon, Dec 11, 2023 at 9:37 AM James Tirta Halim <tirtajames45@gmail.com> wrote:
>>
>> ---
>> sysdeps/x86_64/multiarch/memmem-avx2.c | 55 ++++++++++++++++++++++++++
>> 1 file changed, 55 insertions(+)
>> create mode 100644 sysdeps/x86_64/multiarch/memmem-avx2.c
>>
>> diff --git a/sysdeps/x86_64/multiarch/memmem-avx2.c b/sysdeps/x86_64/multiarch/memmem-avx2.c
>> new file mode 100644
>> index 0000000000..b0cced73aa
>> --- /dev/null
>> +++ b/sysdeps/x86_64/multiarch/memmem-avx2.c
>> @@ -0,0 +1,55 @@
>> +#include <immintrin.h>
>> +#include <string.h>
>> +#include <inttypes.h>
>> +#include <libc-pointer-arith.h>
>> +
>> +void *
>> +__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
>> +{
>> + if (ne_len == 1)
>> + return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
>> + if (__glibc_unlikely (ne_len == 0))
>> + return (void *) hs;
>> + if (__glibc_unlikely (hs_len == ne_len))
>> + return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
>> + if (__glibc_unlikely (hs_len < ne_len))
>> + return NULL;
>> + const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
>> + const unsigned char *h = (const unsigned char *) hs;
>> + const unsigned char *n = (const unsigned char *) ne;
>> + const unsigned char *const end = h + hs_len - ne_len;
>> + const int c1 = *(n + 1);
>> + n += 2, ne_len -= 2;
>> + __m256i hv;
>> + uint32_t i, m;
>> + if (!PTR_IS_ALIGNED (h)) {
>> + hv = _mm256_loadu_si256 ((const __m256i *) h);
>> + m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
>> + for (; m; m = _blsr_u32 (m)) {
>> + i = _tzcnt_u32 (m);
>> + if (__glibc_unlikely (h + i > end))
>> + return NULL;
>> + if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
>> + return (char *) h + i;
>> + }
>> + h += sizeof (__m256i);
>> + if (__glibc_unlikely (h > end))
>> + return NULL;
>> + h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
>> + }
>> + for (;;) {
>> + hv = _mm256_load_si256 ((const __m256i *) h);
>> + m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
>> + for (; m; m = _blsr_u32 (m)) {
>> + i = _tzcnt_u32 (m);
>> + if (__glibc_unlikely (h + i > end))
>> + return NULL;
>> + if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
>> + return (char *) h + i;
>> + }
>> + h += sizeof (__m256i);
>> + if (__glibc_unlikely (h > end))
>> + return NULL;
>> + }
>> + return NULL;
>> +}
>> --
>> 2.43.0
>>
new file mode 100644
@@ -0,0 +1,55 @@
+#include <immintrin.h>
+#include <string.h>
+#include <inttypes.h>
+#include <libc-pointer-arith.h>
+
+void *
+__memmem_avx2 (const void *hs, size_t hs_len, const void *ne, size_t ne_len)
+{
+ if (ne_len == 1)
+ return (void *) memchr (hs, *(unsigned char *) ne, hs_len);
+ if (__glibc_unlikely (ne_len == 0))
+ return (void *) hs;
+ if (__glibc_unlikely (hs_len == ne_len))
+ return !memcmp (hs, ne, ne_len) ? (void *) hs : NULL;
+ if (__glibc_unlikely (hs_len < ne_len))
+ return NULL;
+ const __m256i nv = _mm256_set1_epi8 (*(char *) ne);
+ const unsigned char *h = (const unsigned char *) hs;
+ const unsigned char *n = (const unsigned char *) ne;
+ const unsigned char *const end = h + hs_len - ne_len;
+ const int c1 = *(n + 1);
+ n += 2, ne_len -= 2;
+ __m256i hv;
+ uint32_t i, m;
+ if (!PTR_IS_ALIGNED (h)) {
+ hv = _mm256_loadu_si256 ((const __m256i *) h);
+ m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+ for (; m; m = _blsr_u32 (m)) {
+ i = _tzcnt_u32 (m);
+ if (__glibc_unlikely (h + i > end))
+ return NULL;
+ if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
+ return (char *) h + i;
+ }
+ h += sizeof (__m256i);
+ if (__glibc_unlikely (h > end))
+ return NULL;
+ h = (const unsigned char *) PTR_ALIGN_UP (h, sizeof (__m256i));
+ }
+ for (;;) {
+ hv = _mm256_load_si256 ((const __m256i *) h);
+ m = (uint32_t) _mm256_movemask_epi8 (_mm256_cmpeq_epi8 (hv, nv));
+ for (; m; m = _blsr_u32 (m)) {
+ i = _tzcnt_u32 (m);
+ if (__glibc_unlikely (h + i > end))
+ return NULL;
+ if (*(h + i + 1) == c1 && !memcmp (h + i + 2, n, ne_len))
+ return (char *) h + i;
+ }
+ h += sizeof (__m256i);
+ if (__glibc_unlikely (h > end))
+ return NULL;
+ }
+ return NULL;
+}