Message ID | 20191203010207.63155-1-cbiesinger@google.com |
---|---|
State | New |
Headers | show |
On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote: > This function is not just slower than xxhash, it is slower than > even libiberty's iterative_hash, so there does not seem to be > a reason for it to exist. > > ------------------------------------------------------------ > Benchmark Time CPU Iterations > ------------------------------------------------------------ > BM_xxh3 11 ns 11 ns 66127192 > BM_xxh32 19 ns 19 ns 36792609 > BM_xxh64 16 ns 16 ns 42941328 > BM_city32 26 ns 26 ns 27028370 > BM_city64 17 ns 17 ns 40472793 > BM_iterative_hash 77 ns 77 ns 9088854 > BM_bcache_hash 125 ns 125 ns 5599232 > > gdb/ChangeLog: > > 2019-12-02 Christian Biesinger <cbiesinger@google.com> > > * bcache.c (hash): Remove. > (hash_continue): Remove. > * bcache.h (hash): Remove. > (hash_continue): Remove. > (struct bcache) <ctor>: Update. > * psymtab.c (psymbol_hash): Update. > * stabsread.c (hashname): Update. > * utils.h (fast_hash): Add an argument for a start value, > defaulting to zero. LGTM, with the nits below fixed. > diff --git a/gdb/bcache.h b/gdb/bcache.h > index 15dcc63440..96f6d6813f 100644 > --- a/gdb/bcache.h > +++ b/gdb/bcache.h > @@ -138,13 +138,12 @@ > > struct bstring; > > -/* The hash functions */ > -extern unsigned long hash (const void *addr, int length); > -extern unsigned long hash_continue (const void *addr, int length, > - unsigned long h); > - > struct bcache > { > + static unsigned long default_hash (const void *ptr, int length) { Brace on the next line. > + return fast_hash (ptr, length, 0); > + } Can this method be private, just like `compare` is? > + > /* Allocate a bcache. HASH_FN and COMPARE_FN can be used to pass in > custom hash, and compare functions to be used by this bcache. If > HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is This line of documentation should be updated, probably hash -> fast_hash. > diff --git a/gdb/utils.h b/gdb/utils.h > index 79c8a6fc8d..68376dac83 100644 > --- a/gdb/utils.h > +++ b/gdb/utils.h > @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset, > const gdb_byte *source, ULONGEST source_offset, > ULONGEST nbits, int bits_big_endian); > > -/* A fast hashing function. This can be used to hash strings in a fast way > +/* A fast hashing function. This can be used to hash data in a fast way > when the length is known. If no fast hashing library is available, falls > - back to iterative_hash from libiberty. */ > + back to iterative_hash from libiberty. START_VALUE can be set to > + continue hashing from a previous value. */ > > static inline unsigned int > -fast_hash (const char* str, size_t len) > +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0) - const void* ptr + const void *ptr Simon
On Tue, Dec 3, 2019 at 2:36 PM Simon Marchi <simark@simark.ca> wrote: > > On 2019-12-02 8:02 p.m., Christian Biesinger via gdb-patches wrote: > > This function is not just slower than xxhash, it is slower than > > even libiberty's iterative_hash, so there does not seem to be > > a reason for it to exist. > > > > ------------------------------------------------------------ > > Benchmark Time CPU Iterations > > ------------------------------------------------------------ > > BM_xxh3 11 ns 11 ns 66127192 > > BM_xxh32 19 ns 19 ns 36792609 > > BM_xxh64 16 ns 16 ns 42941328 > > BM_city32 26 ns 26 ns 27028370 > > BM_city64 17 ns 17 ns 40472793 > > BM_iterative_hash 77 ns 77 ns 9088854 > > BM_bcache_hash 125 ns 125 ns 5599232 > > > > gdb/ChangeLog: > > > > 2019-12-02 Christian Biesinger <cbiesinger@google.com> > > > > * bcache.c (hash): Remove. > > (hash_continue): Remove. > > * bcache.h (hash): Remove. > > (hash_continue): Remove. > > (struct bcache) <ctor>: Update. > > * psymtab.c (psymbol_hash): Update. > > * stabsread.c (hashname): Update. > > * utils.h (fast_hash): Add an argument for a start value, > > defaulting to zero. > > LGTM, with the nits below fixed. > > > diff --git a/gdb/bcache.h b/gdb/bcache.h > > index 15dcc63440..96f6d6813f 100644 > > --- a/gdb/bcache.h > > +++ b/gdb/bcache.h > > @@ -138,13 +138,12 @@ > > > > struct bstring; > > > > -/* The hash functions */ > > -extern unsigned long hash (const void *addr, int length); > > -extern unsigned long hash_continue (const void *addr, int length, > > - unsigned long h); > > - > > struct bcache > > { > > + static unsigned long default_hash (const void *ptr, int length) { > > Brace on the next line. Done. > > + return fast_hash (ptr, length, 0); > > + } > > Can this method be private, just like `compare` is? Done. > > + > > /* Allocate a bcache. HASH_FN and COMPARE_FN can be used to pass in > > custom hash, and compare functions to be used by this bcache. If > > HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is > > This line of documentation should be updated, probably hash -> fast_hash. Done. > > diff --git a/gdb/utils.h b/gdb/utils.h > > index 79c8a6fc8d..68376dac83 100644 > > --- a/gdb/utils.h > > +++ b/gdb/utils.h > > @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset, > > const gdb_byte *source, ULONGEST source_offset, > > ULONGEST nbits, int bits_big_endian); > > > > -/* A fast hashing function. This can be used to hash strings in a fast way > > +/* A fast hashing function. This can be used to hash data in a fast way > > when the length is known. If no fast hashing library is available, falls > > - back to iterative_hash from libiberty. */ > > + back to iterative_hash from libiberty. START_VALUE can be set to > > + continue hashing from a previous value. */ > > > > static inline unsigned int > > -fast_hash (const char* str, size_t len) > > +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0) > > - const void* ptr > + const void *ptr Done, thanks. Will push now with those fixes. Christian
diff --git a/gdb/bcache.c b/gdb/bcache.c index 3f0a63be22..497efe96cb 100644 --- a/gdb/bcache.c +++ b/gdb/bcache.c @@ -51,31 +51,6 @@ struct bstring d; }; -/* The old hash function was stolen from SDBM. This is what DB 3.0 - uses now, and is better than the old one. */ - -unsigned long -hash(const void *addr, int length) -{ - return hash_continue (addr, length, 0); -} - -/* Continue the calculation of the hash H at the given address. */ - -unsigned long -hash_continue (const void *addr, int length, unsigned long h) -{ - const unsigned char *k, *e; - - k = (const unsigned char *)addr; - e = k+length; - for (; k< e;++k) - { - h *=16777619; - h ^= *k; - } - return (h); -} /* Growing the bcache's hash table. */ diff --git a/gdb/bcache.h b/gdb/bcache.h index 15dcc63440..96f6d6813f 100644 --- a/gdb/bcache.h +++ b/gdb/bcache.h @@ -138,13 +138,12 @@ struct bstring; -/* The hash functions */ -extern unsigned long hash (const void *addr, int length); -extern unsigned long hash_continue (const void *addr, int length, - unsigned long h); - struct bcache { + static unsigned long default_hash (const void *ptr, int length) { + return fast_hash (ptr, length, 0); + } + /* Allocate a bcache. HASH_FN and COMPARE_FN can be used to pass in custom hash, and compare functions to be used by this bcache. If HASH_FUNCTION is NULL hash() is used and if COMPARE_FUNCTION is @@ -154,7 +153,7 @@ struct bcache int length) = nullptr, int (*compare_fn)(const void *, const void *, int length) = nullptr) - : m_hash_function (hash_fn == nullptr ? hash : hash_fn), + : m_hash_function (hash_fn == nullptr ? default_hash : hash_fn), m_compare_function (compare_fn == nullptr ? compare : compare_fn) { } diff --git a/gdb/psymtab.c b/gdb/psymtab.c index 7074a32956..2cbc6d4f65 100644 --- a/gdb/psymtab.c +++ b/gdb/psymtab.c @@ -1530,14 +1530,13 @@ psymbol_hash (const void *addr, int length) unsigned int domain = psymbol->domain; unsigned int theclass = psymbol->aclass; - h = hash_continue (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h); - h = hash_continue (&lang, sizeof (unsigned int), h); - h = hash_continue (&domain, sizeof (unsigned int), h); - h = hash_continue (&theclass, sizeof (unsigned int), h); + h = fast_hash (&psymbol->ginfo.value, sizeof (psymbol->ginfo.value), h); + h = fast_hash (&lang, sizeof (unsigned int), h); + h = fast_hash (&domain, sizeof (unsigned int), h); + h = fast_hash (&theclass, sizeof (unsigned int), h); /* Note that psymbol names are interned via symbol_set_names, so there's no need to hash the contents of the name here. */ - h = hash_continue (&psymbol->ginfo.name, - sizeof (psymbol->ginfo.name), h); + h = fast_hash (&psymbol->ginfo.name, sizeof (psymbol->ginfo.name), h); return h; } diff --git a/gdb/stabsread.c b/gdb/stabsread.c index 979df0266c..91a73dd10d 100644 --- a/gdb/stabsread.c +++ b/gdb/stabsread.c @@ -4778,7 +4778,7 @@ find_name_end (const char *name) int hashname (const char *name) { - return hash (name, strlen (name)) % HASHSIZE; + return fast_hash (name, strlen (name)) % HASHSIZE; } /* Initializer for this module. */ diff --git a/gdb/utils.h b/gdb/utils.h index 79c8a6fc8d..68376dac83 100644 --- a/gdb/utils.h +++ b/gdb/utils.h @@ -571,17 +571,18 @@ extern void copy_bitwise (gdb_byte *dest, ULONGEST dest_offset, const gdb_byte *source, ULONGEST source_offset, ULONGEST nbits, int bits_big_endian); -/* A fast hashing function. This can be used to hash strings in a fast way +/* A fast hashing function. This can be used to hash data in a fast way when the length is known. If no fast hashing library is available, falls - back to iterative_hash from libiberty. */ + back to iterative_hash from libiberty. START_VALUE can be set to + continue hashing from a previous value. */ static inline unsigned int -fast_hash (const char* str, size_t len) +fast_hash (const void* ptr, size_t len, unsigned int start_value = 0) { #ifdef HAVE_LIBXXHASH - return XXH64 (str, len, 0); + return XXH64 (ptr, len, start_value); #else - return iterative_hash (str, len, 0); + return iterative_hash (ptr, len, start_value); #endif }