Replace hash function from bcache with fast_hash
Commit Message
This function is not just slower than xxhash, it is slower than
even libiberty's iterative_hash, so there does not seem to be
a reason for it to exist.
------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------
BM_xxh3 11 ns 11 ns 66127192
BM_xxh32 19 ns 19 ns 36792609
BM_xxh64 16 ns 16 ns 42941328
BM_city32 26 ns 26 ns 27028370
BM_city64 17 ns 17 ns 40472793
BM_iterative_hash 77 ns 77 ns 9088854
BM_bcache_hash 125 ns 125 ns 5599232
gdb/ChangeLog:
2019-12-02 Christian Biesinger <cbiesinger@google.com>
* bcache.c (hash): Remove.
(hash_continue): Remove.
* bcache.h (hash): Remove.
(hash_continue): Remove.
(struct bcache) <ctor>: Update.
* psymtab.c (psymbol_hash): Update.
* stabsread.c (hashname): Update.
* utils.h (fast_hash): Add an argument for a start value,
defaulting to zero.
Change-Id: I107f013eda5fdd3293326b5a206be43155dae0f8
---
gdb/bcache.c | 25 -------------------------
gdb/bcache.h | 11 +++++------
gdb/psymtab.c | 11 +++++------
gdb/stabsread.c | 2 +-
gdb/utils.h | 11 ++++++-----
5 files changed, 17 insertions(+), 43 deletions(-)
Comments
On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote:
> This function is not just slower than xxhash, it is slower than
> even libiberty's iterative_hash, so there does not seem to be
> a reason for it to exist.
>
> ------------------------------------------------------------
> Benchmark Time CPU Iterations
> ------------------------------------------------------------
> BM_xxh3 11 ns 11 ns 66127192
> BM_xxh32 19 ns 19 ns 36792609
> BM_xxh64 16 ns 16 ns 42941328
> BM_city32 26 ns 26 ns 27028370
> BM_city64 17 ns 17 ns 40472793
> BM_iterative_hash 77 ns 77 ns 9088854
> BM_bcache_hash 125 ns 125 ns 5599232
>
> gdb/ChangeLog:
>
> 2019-12-02 Christian Biesinger <cbiesinger@google.com>
>
> * bcache.c (hash): Remove.
> (hash_continue): Remove.
> * bcache.h (hash): Remove.
> (hash_continue): Remove.
> (struct bcache) <ctor>: Update.
> * psymtab.c (psymbol_hash): Update.
> * stabsread.c (hashname): Update.
> * utils.h (fast_hash): Add an argument for a start value,
> defaulting to zero.
LGTM, with the nits below fixed.
> diff --git a/gdb/bcache.h b/gdb/bcache.h
> index 15dcc63440..96f6d6813f 100644
> --- a/gdb/bcache.h
> +++ b/gdb/bcache.h
> @@ -138,13 +138,12 @@
>
> struct bstring;
>
> -/* The hash functions */
> -extern unsigned long hash (const void *addr, int length);
> -extern unsigned long hash_continue (const void *addr, int length,
> - unsigned long h);
> -
> struct bcache
> {
> + static unsigned long default_hash (const void *ptr, int length) {
Brace on the next line.
> + return fast_hash (ptr, length, 0);
> + }
Can this method be private, just like `compare` is?
> +
> /* Allocate a bcache. HASH_FN and COMPARE_FN can be used to pass in
> custom hash, and compare functions to be used by this bcache. If
> HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is
This line of documentation should be updated, probably hash -> fast_hash.
> diff --git a/gdb/utils.h b/gdb/utils.h
> index 79c8a6fc8d..68376dac83 100644
> --- a/gdb/utils.h
> +++ b/gdb/utils.h
> @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,
> const gdb_byte *source, ULONGEST source_offset,
> ULONGEST nbits, int bits_big_endian);
>
> -/* A fast hashing function. This can be used to hash strings in a fast way
> +/* A fast hashing function. This can be used to hash data in a fast way
> when the length is known. If no fast hashing library is available, falls
> - back to iterative_hash from libiberty. */
> + back to iterative_hash from libiberty. START_VALUE can be set to
> + continue hashing from a previous value. */
>
> static inline unsigned int
> -fast_hash (const char* str, size_t len)
> +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)
- const void* ptr
+ const void *ptr
Simon
On Tue, Dec 3, 2019 at 2:36 PM Simon Marchi <simark@simark.ca> wrote:
>
> On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote:
> > This function is not just slower than xxhash, it is slower than
> > even libiberty's iterative_hash, so there does not seem to be
> > a reason for it to exist.
> >
> > ------------------------------------------------------------
> > Benchmark Time CPU Iterations
> > ------------------------------------------------------------
> > BM_xxh3 11 ns 11 ns 66127192
> > BM_xxh32 19 ns 19 ns 36792609
> > BM_xxh64 16 ns 16 ns 42941328
> > BM_city32 26 ns 26 ns 27028370
> > BM_city64 17 ns 17 ns 40472793
> > BM_iterative_hash 77 ns 77 ns 9088854
> > BM_bcache_hash 125 ns 125 ns 5599232
> >
> > gdb/ChangeLog:
> >
> > 2019-12-02 Christian Biesinger <cbiesinger@google.com>
> >
> > * bcache.c (hash): Remove.
> > (hash_continue): Remove.
> > * bcache.h (hash): Remove.
> > (hash_continue): Remove.
> > (struct bcache) <ctor>: Update.
> > * psymtab.c (psymbol_hash): Update.
> > * stabsread.c (hashname): Update.
> > * utils.h (fast_hash): Add an argument for a start value,
> > defaulting to zero.
>
> LGTM, with the nits below fixed.
>
> > diff --git a/gdb/bcache.h b/gdb/bcache.h
> > index 15dcc63440..96f6d6813f 100644
> > --- a/gdb/bcache.h
> > +++ b/gdb/bcache.h
> > @@ -138,13 +138,12 @@
> >
> > struct bstring;
> >
> > -/* The hash functions */
> > -extern unsigned long hash (const void *addr, int length);
> > -extern unsigned long hash_continue (const void *addr, int length,
> > - unsigned long h);
> > -
> > struct bcache
> > {
> > + static unsigned long default_hash (const void *ptr, int length) {
>
> Brace on the next line.
Done.
> > + return fast_hash (ptr, length, 0);
> > + }
>
> Can this method be private, just like `compare` is?
Done.
> > +
> > /* Allocate a bcache. HASH_FN and COMPARE_FN can be used to pass in
> > custom hash, and compare functions to be used by this bcache. If
> > HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is
>
> This line of documentation should be updated, probably hash -> fast_hash.
Done.
> > diff --git a/gdb/utils.h b/gdb/utils.h
> > index 79c8a6fc8d..68376dac83 100644
> > --- a/gdb/utils.h
> > +++ b/gdb/utils.h
> > @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,
> > const gdb_byte *source, ULONGEST source_offset,
> > ULONGEST nbits, int bits_big_endian);
> >
> > -/* A fast hashing function. This can be used to hash strings in a fast way
> > +/* A fast hashing function. This can be used to hash data in a fast way
> > when the length is known. If no fast hashing library is available, falls
> > - back to iterative_hash from libiberty. */
> > + back to iterative_hash from libiberty. START_VALUE can be set to
> > + continue hashing from a previous value. */
> >
> > static inline unsigned int
> > -fast_hash (const char* str, size_t len)
> > +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)
>
> - const void* ptr
> + const void *ptr
Done, thanks.
Will push now with those fixes.
Christian
@@ -51,31 +51,6 @@ struct bstring
d;
};
-/* The old hash function was stolen from SDBM. This is what DB 3.0
- uses now, and is better than the old one. */
-
-unsigned long
-hash(const void *addr, int length)
-{
- return hash_continue (addr, length, 0);
-}
-
-/* Continue the calculation of the hash H at the given address. */
-
-unsigned long
-hash_continue (const void *addr, int length, unsigned long h)
-{
- const unsigned char *k, *e;
-
- k = (const unsigned char *)addr;
- e = k+length;
- for (; k< e;++k)
- {
- h *=16777619;
- h ^= *k;
- }
- return (h);
-}
/* Growing the bcache's hash table. */
@@ -138,13 +138,12 @@
struct bstring;
-/* The hash functions */
-extern unsigned long hash (const void *addr, int length);
-extern unsigned long hash_continue (const void *addr, int length,
- unsigned long h);
-
struct bcache
{
+ static unsigned long default_hash (const void *ptr, int length) {
+ return fast_hash (ptr, length, 0);
+ }
+
/* Allocate a bcache. HASH_FN and COMPARE_FN can be used to pass in
custom hash, and compare functions to be used by this bcache. If
HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is
@@ -154,7 +153,7 @@ struct bcache
int length) = nullptr,
int (*compare_fn)(const void *, const void *,
int length) = nullptr)
- : m_hash_function (hash_fn == nullptr ? hash : hash_fn),
+ : m_hash_function (hash_fn == nullptr ? default_hash : hash_fn),
m_compare_function (compare_fn == nullptr ? compare : compare_fn)
{
}
@@ -1530,14 +1530,13 @@ psymbol_hash (const void *addr, int length)
unsigned int domain = psymbol->domain;
unsigned int theclass = psymbol->aclass;
- h = hash_continue (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h);
- h = hash_continue (&lang, sizeof (unsigned int), h);
- h = hash_continue (&domain, sizeof (unsigned int), h);
- h = hash_continue (&theclass, sizeof (unsigned int), h);
+ h = fast_hash (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h);
+ h = fast_hash (&lang, sizeof (unsigned int), h);
+ h = fast_hash (&domain, sizeof (unsigned int), h);
+ h = fast_hash (&theclass, sizeof (unsigned int), h);
/* Note that psymbol names are interned via symbol_set_names, so
there's no need to hash the contents of the name here. */
- h = hash_continue (&psymbol->ginfo.name,
- sizeof (psymbol->ginfo.name), h);
+ h = fast_hash (&psymbol->ginfo.name, sizeof (psymbol->ginfo.name), h);
return h;
}
@@ -4778,7 +4778,7 @@ find_name_end (const char *name)
int
hashname (const char *name)
{
- return hash (name, strlen (name)) % HASHSIZE;
+ return fast_hash (name, strlen (name)) % HASHSIZE;
}
/* Initializer for this module. */
@@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset,
const gdb_byte *source, ULONGEST source_offset,
ULONGEST nbits, int bits_big_endian);
-/* A fast hashing function. This can be used to hash strings in a fast way
+/* A fast hashing function. This can be used to hash data in a fast way
when the length is known. If no fast hashing library is available, falls
- back to iterative_hash from libiberty. */
+ back to iterative_hash from libiberty. START_VALUE can be set to
+ continue hashing from a previous value. */
static inline unsigned int
-fast_hash (const char* str, size_t len)
+fast_hash (const void* ptr, size_t len, unsigned int start_value = 0)
{
#ifdef HAVE_LIBXXHASH
- return XXH64 (str, len, 0);
+ return XXH64 (ptr, len, start_value);
#else
- return iterative_hash (str, len, 0);
+ return iterative_hash (ptr, len, start_value);
#endif
}