Replace hash function from bcache with fast_hash

Message ID 20191203010207.63155-1-cbiesinger@google.com
State New, archived
Headers

Commit Message

Terekhov, Mikhail via Gdb-patches Dec. 3, 2019, 1:02 a.m. UTC
  This function is not just slower than xxhash, it is slower than
even libiberty's iterative_hash, so there does not seem to be
a reason for it to exist.

------------------------------------------------------------
Benchmark                     Time           CPU Iterations
------------------------------------------------------------
BM_xxh3                      11 ns         11 ns   66127192
BM_xxh32                     19 ns         19 ns   36792609
BM_xxh64                     16 ns         16 ns   42941328
BM_city32                    26 ns         26 ns   27028370
BM_city64                    17 ns         17 ns   40472793
BM_iterative_hash            77 ns         77 ns    9088854
BM_bcache_hash              125 ns        125 ns    5599232

gdb/ChangeLog:

2019-12-02  Christian Biesinger  <cbiesinger@google.com>

	* bcache.c (hash): Remove.
	(hash_continue): Remove.
	* bcache.h (hash): Remove.
	(hash_continue): Remove.
	(struct bcache) <ctor>: Update.
	* psymtab.c (psymbol_hash): Update.
	* stabsread.c (hashname): Update.
	* utils.h (fast_hash): Add an argument for a start value,
	defaulting to zero.

Change-Id: I107f013eda5fdd3293326b5a206be43155dae0f8
---
 gdb/bcache.c    | 25 -------------------------
 gdb/bcache.h    | 11 +++++------
 gdb/psymtab.c   | 11 +++++------
 gdb/stabsread.c |  2 +-
 gdb/utils.h     | 11 ++++++-----
 5 files changed, 17 insertions(+), 43 deletions(-)
  

Comments

Simon Marchi Dec. 3, 2019, 8:36 p.m. UTC | #1
On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote:
> This function is not just slower than xxhash, it is slower than
> even libiberty's iterative_hash, so there does not seem to be
> a reason for it to exist.
> 
> ------------------------------------------------------------
> Benchmark                     Time           CPU Iterations
> ------------------------------------------------------------
> BM_xxh3                      11 ns         11 ns   66127192
> BM_xxh32                     19 ns         19 ns   36792609
> BM_xxh64                     16 ns         16 ns   42941328
> BM_city32                    26 ns         26 ns   27028370
> BM_city64                    17 ns         17 ns   40472793
> BM_iterative_hash            77 ns         77 ns    9088854
> BM_bcache_hash              125 ns        125 ns    5599232
> 
> gdb/ChangeLog:
> 
> 2019-12-02  Christian Biesinger  <cbiesinger@google.com>
> 
> 	* bcache.c (hash): Remove.
> 	(hash_continue): Remove.
> 	* bcache.h (hash): Remove.
> 	(hash_continue): Remove.
> 	(struct bcache) <ctor>: Update.
> 	* psymtab.c (psymbol_hash): Update.
> 	* stabsread.c (hashname): Update.
> 	* utils.h (fast_hash): Add an argument for a start value,
> 	defaulting to zero.

LGTM, with the nits below fixed.

> diff --git a/gdb/bcache.h b/gdb/bcache.h
> index 15dcc63440..96f6d6813f 100644
> --- a/gdb/bcache.h
> +++ b/gdb/bcache.h
> @@ -138,13 +138,12 @@
>  
>  struct bstring;
>  
> -/* The hash functions */
> -extern unsigned long hash (const void *addr, int length);
> -extern unsigned long hash_continue (const void *addr, int length,
> -                                    unsigned long h);
> -
>  struct bcache
>  {
> +  static unsigned long default_hash (const void *ptr, int length) {

Brace on the next line.

> +    return fast_hash (ptr, length, 0);
> +  }

Can this method be private, just like `compare` is?

> +
>    /* Allocate a bcache.  HASH_FN and COMPARE_FN can be used to pass in
>       custom hash, and compare functions to be used by this bcache.  If
>       HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is

This line of documentation should be updated, probably hash -> fast_hash.

> diff --git a/gdb/utils.h b/gdb/utils.h
> index 79c8a6fc8d..68376dac83 100644
> --- a/gdb/utils.h
> +++ b/gdb/utils.h
> @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,
>  			  const gdb_byte *source, ULONGEST source_offset,
>  			  ULONGEST nbits, int bits_big_endian);
>  
> -/* A fast hashing function.  This can be used to hash strings in a fast way
> +/* A fast hashing function.  This can be used to hash data in a fast way
>     when the length is known.  If no fast hashing library is available, falls
> -   back to iterative_hash from libiberty.  */
> +   back to iterative_hash from libiberty.  START_VALUE can be set to
> +   continue hashing from a previous value.  */
>  
>  static inline unsigned int
> -fast_hash (const char* str, size_t len)
> +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)

- const void* ptr
+ const void *ptr

Simon
  
Terekhov, Mikhail via Gdb-patches Dec. 3, 2019, 9:25 p.m. UTC | #2
On Tue, Dec 3, 2019 at 2:36 PM Simon Marchi <simark@simark.ca> wrote:
>
> On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote:
> > This function is not just slower than xxhash, it is slower than
> > even libiberty's iterative_hash, so there does not seem to be
> > a reason for it to exist.
> >
> > ------------------------------------------------------------
> > Benchmark                     Time           CPU Iterations
> > ------------------------------------------------------------
> > BM_xxh3                      11 ns         11 ns   66127192
> > BM_xxh32                     19 ns         19 ns   36792609
> > BM_xxh64                     16 ns         16 ns   42941328
> > BM_city32                    26 ns         26 ns   27028370
> > BM_city64                    17 ns         17 ns   40472793
> > BM_iterative_hash            77 ns         77 ns    9088854
> > BM_bcache_hash              125 ns        125 ns    5599232
> >
> > gdb/ChangeLog:
> >
> > 2019-12-02  Christian Biesinger  <cbiesinger@google.com>
> >
> >       * bcache.c (hash): Remove.
> >       (hash_continue): Remove.
> >       * bcache.h (hash): Remove.
> >       (hash_continue): Remove.
> >       (struct bcache) <ctor>: Update.
> >       * psymtab.c (psymbol_hash): Update.
> >       * stabsread.c (hashname): Update.
> >       * utils.h (fast_hash): Add an argument for a start value,
> >       defaulting to zero.
>
> LGTM, with the nits below fixed.
>
> > diff --git a/gdb/bcache.h b/gdb/bcache.h
> > index 15dcc63440..96f6d6813f 100644
> > --- a/gdb/bcache.h
> > +++ b/gdb/bcache.h
> > @@ -138,13 +138,12 @@
> >
> >  struct bstring;
> >
> > -/* The hash functions */
> > -extern unsigned long hash (const void *addr, int length);
> > -extern unsigned long hash_continue (const void *addr, int length,
> > -                                    unsigned long h);
> > -
> >  struct bcache
> >  {
> > +  static unsigned long default_hash (const void *ptr, int length) {
>
> Brace on the next line.

Done.

> > +    return fast_hash (ptr, length, 0);
> > +  }
>
> Can this method be private, just like `compare` is?

Done.

> > +
> >    /* Allocate a bcache.  HASH_FN and COMPARE_FN can be used to pass in
> >       custom hash, and compare functions to be used by this bcache.  If
> >       HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is
>
> This line of documentation should be updated, probably hash -> fast_hash.

Done.

> > diff --git a/gdb/utils.h b/gdb/utils.h
> > index 79c8a6fc8d..68376dac83 100644
> > --- a/gdb/utils.h
> > +++ b/gdb/utils.h
> > @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,
> >                         const gdb_byte *source, ULONGEST source_offset,
> >                         ULONGEST nbits, int bits_big_endian);
> >
> > -/* A fast hashing function.  This can be used to hash strings in a fast way
> > +/* A fast hashing function.  This can be used to hash data in a fast way
> >     when the length is known.  If no fast hashing library is available, falls
> > -   back to iterative_hash from libiberty.  */
> > +   back to iterative_hash from libiberty.  START_VALUE can be set to
> > +   continue hashing from a previous value.  */
> >
> >  static inline unsigned int
> > -fast_hash (const char* str, size_t len)
> > +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)
>
> - const void* ptr
> + const void *ptr

Done, thanks.

Will push now with those fixes.

Christian
  

Patch

diff --git a/gdb/bcache.c b/gdb/bcache.c
index 3f0a63be22..497efe96cb 100644
--- a/gdb/bcache.c
+++ b/gdb/bcache.c
@@ -51,31 +51,6 @@  struct bstring
   d;
 };
 
-/* The old hash function was stolen from SDBM. This is what DB 3.0
-   uses now, and is better than the old one.  */
-
-unsigned long
-hash(const void *addr, int length)
-{
-  return hash_continue (addr, length, 0);
-}
-
-/* Continue the calculation of the hash H at the given address.  */
-
-unsigned long
-hash_continue (const void *addr, int length, unsigned long h)
-{
-  const unsigned char *k, *e;
-
-  k = (const unsigned char *)addr;
-  e = k+length;
-  for (; k< e;++k)
-    {
-      h *=16777619;
-      h ^= *k;
-    }
-  return (h);
-}
 
 /* Growing the bcache's hash table.  */
 
diff --git a/gdb/bcache.h b/gdb/bcache.h
index 15dcc63440..96f6d6813f 100644
--- a/gdb/bcache.h
+++ b/gdb/bcache.h
@@ -138,13 +138,12 @@ 
 
 struct bstring;
 
-/* The hash functions */
-extern unsigned long hash (const void *addr, int length);
-extern unsigned long hash_continue (const void *addr, int length,
-                                    unsigned long h);
-
 struct bcache
 {
+  static unsigned long default_hash (const void *ptr, int length) {
+    return fast_hash (ptr, length, 0);
+  }
+
   /* Allocate a bcache.  HASH_FN and COMPARE_FN can be used to pass in
      custom hash, and compare functions to be used by this bcache.  If
      HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is
@@ -154,7 +153,7 @@  struct bcache
 					    int length) = nullptr,
 		   int (*compare_fn)(const void *, const void *,
 				     int length) = nullptr)
-    : m_hash_function (hash_fn == nullptr ? hash : hash_fn),
+    : m_hash_function (hash_fn == nullptr ? default_hash : hash_fn),
       m_compare_function (compare_fn == nullptr ? compare : compare_fn)
   {
   }
diff --git a/gdb/psymtab.c b/gdb/psymtab.c
index 7074a32956..2cbc6d4f65 100644
--- a/gdb/psymtab.c
+++ b/gdb/psymtab.c
@@ -1530,14 +1530,13 @@  psymbol_hash (const void *addr, int length)
   unsigned int domain = psymbol->domain;
   unsigned int theclass = psymbol->aclass;
 
-  h = hash_continue (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h);
-  h = hash_continue (&lang, sizeof (unsigned int), h);
-  h = hash_continue (&domain, sizeof (unsigned int), h);
-  h = hash_continue (&theclass, sizeof (unsigned int), h);
+  h = fast_hash (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h);
+  h = fast_hash (&lang, sizeof (unsigned int), h);
+  h = fast_hash (&domain, sizeof (unsigned int), h);
+  h = fast_hash (&theclass, sizeof (unsigned int), h);
   /* Note that psymbol names are interned via symbol_set_names, so
      there's no need to hash the contents of the name here.  */
-  h = hash_continue (&psymbol->ginfo.name,
-		     sizeof (psymbol->ginfo.name), h);
+  h = fast_hash (&psymbol->ginfo.name, sizeof (psymbol->ginfo.name), h);
 
   return h;
 }
diff --git a/gdb/stabsread.c b/gdb/stabsread.c
index 979df0266c..91a73dd10d 100644
--- a/gdb/stabsread.c
+++ b/gdb/stabsread.c
@@ -4778,7 +4778,7 @@  find_name_end (const char *name)
 int
 hashname (const char *name)
 {
-  return hash (name, strlen (name)) % HASHSIZE;
+  return fast_hash (name, strlen (name)) % HASHSIZE;
 }
 
 /* Initializer for this module.  */
diff --git a/gdb/utils.h b/gdb/utils.h
index 79c8a6fc8d..68376dac83 100644
--- a/gdb/utils.h
+++ b/gdb/utils.h
@@ -571,17 +571,18 @@  extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,
 			  const gdb_byte *source, ULONGEST source_offset,
 			  ULONGEST nbits, int bits_big_endian);
 
-/* A fast hashing function.  This can be used to hash strings in a fast way
+/* A fast hashing function.  This can be used to hash data in a fast way
    when the length is known.  If no fast hashing library is available, falls
-   back to iterative_hash from libiberty.  */
+   back to iterative_hash from libiberty.  START_VALUE can be set to
+   continue hashing from a previous value.  */
 
 static inline unsigned int
-fast_hash (const char* str, size_t len)
+fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)
 {
 #ifdef HAVE_LIBXXHASH
-  return XXH64 (str, len, 0);
+  return XXH64 (ptr, len, start_value);
 #else
-  return iterative_hash (str, len, 0);
+  return iterative_hash (ptr, len, start_value);
 #endif
 }