[v3,2/2] Optimize bsearch() implementation for performance

Message ID 20240903063056.2724742-3-visitorckw@gmail.com
State Superseded
Headers
Series Optimization and benchmarking of bsearch() |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

Kuan-Wei Chiu Sept. 3, 2024, 6:30 a.m. UTC
  Optimize the bsearch() function to improve binary search performance.
Although the code size grew by 8 bytes, the new implementation achieves
a 15% reduction in execution time on my x86 machine, according to
benchmark results.

code size:
* old:
   text    data     bss     dec     hex filename
    250       0       0     250      fa ./stdlib/bsearch.o
* new:
   text    data     bss     dec     hex filename
    258       0       0     258     102 ./stdlib/bsearch.o

benchmark:
* old:
{
 "timing_type": "hp_timing",
 "functions": {
  "bsearch": {
   "bench-variant": "default",
   "results": [121.887]
  }
 }
}
* new:
{
 "timing_type": "hp_timing",
 "functions": {
  "bsearch": {
   "bench-variant": "default",
   "results": [103.046]
  }
 }
}

Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
Although the benchmark results on my x86 machine are promising, I would
appreciate assistance in verifying that no regressions occur on other
architectures.

 bits/stdlib-bsearch.h | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)
  

Comments

Noah Goldstein Sept. 3, 2024, 5:37 p.m. UTC | #1
On Mon, Sep 2, 2024 at 11:31 PM Kuan-Wei Chiu <visitorckw@gmail.com> wrote:
>
> Optimize the bsearch() function to improve binary search performance.
> Although the code size grew by 8 bytes, the new implementation achieves
> a 15% reduction in execution time on my x86 machine, according to
> benchmark results.
>
> code size:
> * old:
>    text    data     bss     dec     hex filename
>     250       0       0     250      fa ./stdlib/bsearch.o
> * new:
>    text    data     bss     dec     hex filename
>     258       0       0     258     102 ./stdlib/bsearch.o
>
> benchmark:
> * old:
> {
>  "timing_type": "hp_timing",
>  "functions": {
>   "bsearch": {
>    "bench-variant": "default",
>    "results": [121.887]
>   }
>  }
> }
> * new:
> {
>  "timing_type": "hp_timing",
>  "functions": {
>   "bsearch": {
>    "bench-variant": "default",
>    "results": [103.046]
>   }
>  }
> }
>
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
> Although the benchmark results on my x86 machine are promising, I would
> appreciate assistance in verifying that no regressions occur on other
> architectures.
>
>  bits/stdlib-bsearch.h | 20 +++++++++-----------
>  1 file changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/bits/stdlib-bsearch.h b/bits/stdlib-bsearch.h
> index 540d718952..57f060b504 100644
> --- a/bits/stdlib-bsearch.h
> +++ b/bits/stdlib-bsearch.h
> @@ -20,22 +20,14 @@ __extern_inline void *
>  bsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
>          __compar_fn_t __compar)
>  {
> -  size_t __l, __u, __idx;
>    const void *__p;
>    int __comparison;
>
> -  __l = 0;
> -  __u = __nmemb;
> -  while (__l < __u)
> +  while (__nmemb)
>      {
> -      __idx = (__l + __u) / 2;
> -      __p = (const void *) (((const char *) __base) + (__idx * __size));
> +      __p = (const void *) (((const char *) __base) + ((__nmemb >> 1) * __size));
>        __comparison = (*__compar) (__key, __p);
> -      if (__comparison < 0)
> -       __u = __idx;
> -      else if (__comparison > 0)
> -       __l = __idx + 1;
> -      else
> +      if (__comparison == 0)
>         {
>  #if __GNUC_PREREQ(4, 6)
>  # pragma GCC diagnostic push
> @@ -46,6 +38,12 @@ bsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
>  # pragma GCC diagnostic pop
>  #endif
>         }
> +      if (__comparison > 0)
> +       {
> +         __base = ((const char *) __p) + __size;
> +         --__nmemb;
> +       }
> +      __nmemb >>= 1;
>      }
>
>    return NULL;
> --
> 2.34.1
>

LGTM
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
  

Patch

diff --git a/bits/stdlib-bsearch.h b/bits/stdlib-bsearch.h
index 540d718952..57f060b504 100644
--- a/bits/stdlib-bsearch.h
+++ b/bits/stdlib-bsearch.h
@@ -20,22 +20,14 @@  __extern_inline void *
 bsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
 	 __compar_fn_t __compar)
 {
-  size_t __l, __u, __idx;
   const void *__p;
   int __comparison;
 
-  __l = 0;
-  __u = __nmemb;
-  while (__l < __u)
+  while (__nmemb)
     {
-      __idx = (__l + __u) / 2;
-      __p = (const void *) (((const char *) __base) + (__idx * __size));
+      __p = (const void *) (((const char *) __base) + ((__nmemb >> 1) * __size));
       __comparison = (*__compar) (__key, __p);
-      if (__comparison < 0)
-	__u = __idx;
-      else if (__comparison > 0)
-	__l = __idx + 1;
-      else
+      if (__comparison == 0)
 	{
 #if __GNUC_PREREQ(4, 6)
 # pragma GCC diagnostic push
@@ -46,6 +38,12 @@  bsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size,
 # pragma GCC diagnostic pop
 #endif
 	}
+      if (__comparison > 0)
+	{
+	  __base = ((const char *) __p) + __size;
+	  --__nmemb;
+	}
+      __nmemb >>= 1;
     }
 
   return NULL;