diff mbox series

[07/11] elf: Implement tail merging of strings in ldconfig

Message ID 2323ee8769cd254461f0b43a5a8d094eda5f39af.1604946656.git.fweimer@redhat.com
State Committed
Headers show
Series glibc-hwcaps support | expand

Commit Message

Florian Weimer Nov. 9, 2020, 6:41 p.m. UTC
This simplifies the string table construction in elf/cache.c
because there is no more need to keep track of offsets explicitly;
the string table implementation does this internally.

This change slightly reduces the size of the cache on disk.  The
file format does not change as a result.  The strings are
null-terminated, without explicit length, so tail merging is
transparent to readers.
---
 elf/Makefile |  3 ++-
 elf/cache.c  | 76 ++++++++++++++++++++++++++--------------------------
 2 files changed, 40 insertions(+), 39 deletions(-)

Comments

Adhemerval Zanella Nov. 30, 2020, 6:41 p.m. UTC | #1
On 09/11/2020 15:41, Florian Weimer via Libc-alpha wrote:
> This simplifies the string table construction in elf/cache.c
> because there is no more need to keep track of offsets explicitly;
> the string table implementation does this internally.
> 
> This change slightly reduces the size of the cache on disk.  The
> file format does not change as a result.  The strings are
> null-terminated, without explicit length, so tail merging is
> transparent to readers.

Any idea of the size improvement here?

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> ---
>  elf/Makefile |  3 ++-
>  elf/cache.c  | 76 ++++++++++++++++++++++++++--------------------------
>  2 files changed, 40 insertions(+), 39 deletions(-)
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index 5a8f116a67..e26ac16b44 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -118,7 +118,8 @@ others-static	+= ldconfig
>  others		+= ldconfig
>  install-rootsbin += ldconfig
>  
> -ldconfig-modules := cache readlib xmalloc xstrdup chroot_canon static-stubs
> +ldconfig-modules := cache readlib xmalloc xstrdup chroot_canon static-stubs \
> +  stringtable
>  extra-objs	+= $(ldconfig-modules:=.o)
>  others-extras   = $(ldconfig-modules)
>  endif

Ok.

> diff --git a/elf/cache.c b/elf/cache.c
> index 549e04ce21..5a6ee20e86 100644
> --- a/elf/cache.c
> +++ b/elf/cache.c
> @@ -35,11 +35,15 @@
>  #include <ldconfig.h>
>  #include <dl-cache.h>
>  #include <version.h>
> +#include <stringtable.h>
> +
> +/* Used to store library names, paths, and other strings.  */
> +static struct stringtable strings;
>  
>  struct cache_entry
>  {
> -  char *lib;			/* Library name.  */
> -  char *path;			/* Path to find library.  */
> +  struct stringtable_entry *lib; /* Library name.  */
> +  struct stringtable_entry *path; /* Path to find library.  */
>    int flags;			/* Flags to indicate kind of library.  */
>    unsigned int osversion;	/* Required OS version.  */
>    uint64_t hwcap;		/* Important hardware capabilities.  */
> @@ -300,7 +304,7 @@ static int

Ok.

>  compare (const struct cache_entry *e1, const struct cache_entry *e2)
>  {
>    /* We need to swap entries here to get the correct sort order.  */
> -  int res = _dl_cache_libcmp (e2->lib, e1->lib);
> +  int res = _dl_cache_libcmp (e2->lib->string, e1->lib->string);
>    if (res == 0)
>      {
>        if (e1->flags < e2->flags)

Ok.

> @@ -369,26 +373,24 @@ save_cache (const char *cache_name)
>  {
>    /* The cache entries are sorted already, save them in this order. */
>  
> -  /* Count the length of all strings.  */
> -  /* The old format doesn't contain hwcap entries and doesn't contain
> -     libraries in subdirectories with hwcaps entries.  Count therefore
> -     also all entries with hwcap == 0.  */
> -  size_t total_strlen = 0;
>    struct cache_entry *entry;
>    /* Number of cache entries.  */
>    int cache_entry_count = 0;
> -  /* Number of normal cache entries.  */
> +  /* The old format doesn't contain hwcap entries and doesn't contain
> +     libraries in subdirectories with hwcaps entries.  Count therefore
> +     also all entries with hwcap == 0.  */
>    int cache_entry_old_count = 0;
>  
>    for (entry = entries; entry != NULL; entry = entry->next)
>      {
> -      /* Account the final NULs.  */
> -      total_strlen += strlen (entry->lib) + strlen (entry->path) + 2;
>        ++cache_entry_count;
>        if (entry->hwcap == 0)
>  	++cache_entry_old_count;
>      }
>  
> +  struct stringtable_finalized strings_finalized;
> +  stringtable_finalize (&strings, &strings_finalized);
> +
>    /* Create the on disk cache structure.  */
>    struct cache_file *file_entries = NULL;
>    size_t file_entries_size = 0;
> @@ -432,7 +434,7 @@ save_cache (const char *cache_name)
>  	      sizeof CACHE_VERSION - 1);
>  
>        file_entries_new->nlibs = cache_entry_count;
> -      file_entries_new->len_strings = total_strlen;
> +      file_entries_new->len_strings = strings_finalized.size;
>        file_entries_new->flags = cache_file_new_flags_endian_current;
>      }
>  

Ok.

> @@ -449,20 +451,20 @@ save_cache (const char *cache_name)
>      str_offset = 0;
>  
>    /* An array for all strings.  */
> -  char *strings = xmalloc (total_strlen);
> -  char *str = strings;
>    int idx_old;
>    int idx_new;
>  
>    for (idx_old = 0, idx_new = 0, entry = entries; entry != NULL;
>         entry = entry->next, ++idx_new)
>      {
> -      /* First the library.  */
>        if (opt_format != opt_format_new && entry->hwcap == 0)
>  	{
>  	  file_entries->libs[idx_old].flags = entry->flags;
>  	  /* XXX: Actually we can optimize here and remove duplicates.  */
>  	  file_entries->libs[idx_old].key = str_offset + pad;
> +	  file_entries->libs[idx_new].key = str_offset + entry->lib->offset;
> +	  file_entries->libs[idx_new].value
> +	    = str_offset + entry->path->offset;
>  	}
>        if (opt_format != opt_format_old)
>  	{

Ok.

> @@ -473,20 +475,12 @@ save_cache (const char *cache_name)
>  	  file_entries_new->libs[idx_new].flags = entry->flags;
>  	  file_entries_new->libs[idx_new].osversion = entry->osversion;
>  	  file_entries_new->libs[idx_new].hwcap = entry->hwcap;
> -	  file_entries_new->libs[idx_new].key = str_offset;
> +	  file_entries_new->libs[idx_new].key
> +	    = str_offset + entry->lib->offset;
> +	  file_entries_new->libs[idx_new].value
> +	    = str_offset + entry->path->offset;
>  	}
>  
> -      size_t len = strlen (entry->lib) + 1;
> -      str = mempcpy (str, entry->lib, len);
> -      str_offset += len;
> -      /* Then the path.  */
> -      if (opt_format != opt_format_new && entry->hwcap == 0)
> -	file_entries->libs[idx_old].value = str_offset + pad;
> -      if (opt_format != opt_format_old)
> -	file_entries_new->libs[idx_new].value = str_offset;
> -      len = strlen (entry->path) + 1;
> -      str = mempcpy (str, entry->path, len);
> -      str_offset += len;
>        /* Ignore entries with hwcap for old format.  */
>        if (entry->hwcap == 0)
>  	++idx_old;

Ok.

> @@ -511,7 +505,7 @@ save_cache (const char *cache_name)
>  	extension_offset += pad;
>        extension_offset += file_entries_new_size;
>      }
> -  extension_offset += total_strlen;
> +  extension_offset += strings_finalized.size;
>    extension_offset = roundup (extension_offset, 4); /* Provide alignment.  */
>    if (opt_format != opt_format_old)
>      file_entries_new->extension_offset = extension_offset;

Ok.

> @@ -551,7 +545,8 @@ save_cache (const char *cache_name)
>  	error (EXIT_FAILURE, errno, _("Writing of cache data failed"));
>      }
>  
> -  if (write (fd, strings, total_strlen) != (ssize_t) total_strlen)
> +  if (write (fd, strings_finalized.strings, strings_finalized.size)
> +      != (ssize_t) strings_finalized.size)
>      error (EXIT_FAILURE, errno, _("Writing of cache data failed"));
>  
>    if (opt_format != opt_format_old)

Ok.

> @@ -580,7 +575,7 @@ save_cache (const char *cache_name)
>    /* Free all allocated memory.  */
>    free (file_entries_new);
>    free (file_entries);
> -  free (strings);
> +  free (strings_finalized.strings);
>  
>    while (entries)
>      {

Ok.

> @@ -596,14 +591,19 @@ void
>  add_to_cache (const char *path, const char *lib, int flags,
>  	      unsigned int osversion, uint64_t hwcap)
>  {
> -  size_t liblen = strlen (lib) + 1;
> -  size_t len = liblen + strlen (path) + 1;
> -  struct cache_entry *new_entry
> -    = xmalloc (sizeof (struct cache_entry) + liblen + len);
> -
> -  new_entry->lib = memcpy ((char *) (new_entry + 1), lib, liblen);
> -  new_entry->path = new_entry->lib + liblen;
> -  snprintf (new_entry->path, len, "%s/%s", path, lib);
> +  struct cache_entry *new_entry = xmalloc (sizeof (*new_entry));
> +
> +  struct stringtable_entry *path_interned;
> +  {
> +    char *p;
> +    if (asprintf (&p, "%s/%s", path, lib) < 0)
> +      error (EXIT_FAILURE, errno, _("Could not create library path"));
> +    path_interned = stringtable_add (&strings, p);
> +    free (p);
> +  }
> +
> +  new_entry->lib = stringtable_add (&strings, lib);
> +  new_entry->path = path_interned;
>    new_entry->flags = flags;
>    new_entry->osversion = osversion;
>    new_entry->hwcap = hwcap;
> 

Ok.
Florian Weimer Dec. 1, 2020, 10:28 a.m. UTC | #2
* Adhemerval Zanella via Libc-alpha:

> On 09/11/2020 15:41, Florian Weimer via Libc-alpha wrote:
>> This simplifies the string table construction in elf/cache.c
>> because there is no more need to keep track of offsets explicitly;
>> the string table implementation does this internally.
>> 
>> This change slightly reduces the size of the cache on disk.  The
>> file format does not change as a result.  The strings are
>> null-terminated, without explicit length, so tail merging is
>> transparent to readers.
>
> Any idea of the size improvement here?

It's smaller than expected.  Looks the tails aren't actually merged.
I'll figure out what is going on.

Thanks,
Florian
diff mbox series

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 5a8f116a67..e26ac16b44 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -118,7 +118,8 @@  others-static	+= ldconfig
 others		+= ldconfig
 install-rootsbin += ldconfig
 
-ldconfig-modules := cache readlib xmalloc xstrdup chroot_canon static-stubs
+ldconfig-modules := cache readlib xmalloc xstrdup chroot_canon static-stubs \
+  stringtable
 extra-objs	+= $(ldconfig-modules:=.o)
 others-extras   = $(ldconfig-modules)
 endif
diff --git a/elf/cache.c b/elf/cache.c
index 549e04ce21..5a6ee20e86 100644
--- a/elf/cache.c
+++ b/elf/cache.c
@@ -35,11 +35,15 @@ 
 #include <ldconfig.h>
 #include <dl-cache.h>
 #include <version.h>
+#include <stringtable.h>
+
+/* Used to store library names, paths, and other strings.  */
+static struct stringtable strings;
 
 struct cache_entry
 {
-  char *lib;			/* Library name.  */
-  char *path;			/* Path to find library.  */
+  struct stringtable_entry *lib; /* Library name.  */
+  struct stringtable_entry *path; /* Path to find library.  */
   int flags;			/* Flags to indicate kind of library.  */
   unsigned int osversion;	/* Required OS version.  */
   uint64_t hwcap;		/* Important hardware capabilities.  */
@@ -300,7 +304,7 @@  static int
 compare (const struct cache_entry *e1, const struct cache_entry *e2)
 {
   /* We need to swap entries here to get the correct sort order.  */
-  int res = _dl_cache_libcmp (e2->lib, e1->lib);
+  int res = _dl_cache_libcmp (e2->lib->string, e1->lib->string);
   if (res == 0)
     {
       if (e1->flags < e2->flags)
@@ -369,26 +373,24 @@  save_cache (const char *cache_name)
 {
   /* The cache entries are sorted already, save them in this order. */
 
-  /* Count the length of all strings.  */
-  /* The old format doesn't contain hwcap entries and doesn't contain
-     libraries in subdirectories with hwcaps entries.  Count therefore
-     also all entries with hwcap == 0.  */
-  size_t total_strlen = 0;
   struct cache_entry *entry;
   /* Number of cache entries.  */
   int cache_entry_count = 0;
-  /* Number of normal cache entries.  */
+  /* The old format doesn't contain hwcap entries and doesn't contain
+     libraries in subdirectories with hwcaps entries.  Count therefore
+     also all entries with hwcap == 0.  */
   int cache_entry_old_count = 0;
 
   for (entry = entries; entry != NULL; entry = entry->next)
     {
-      /* Account the final NULs.  */
-      total_strlen += strlen (entry->lib) + strlen (entry->path) + 2;
       ++cache_entry_count;
       if (entry->hwcap == 0)
 	++cache_entry_old_count;
     }
 
+  struct stringtable_finalized strings_finalized;
+  stringtable_finalize (&strings, &strings_finalized);
+
   /* Create the on disk cache structure.  */
   struct cache_file *file_entries = NULL;
   size_t file_entries_size = 0;
@@ -432,7 +434,7 @@  save_cache (const char *cache_name)
 	      sizeof CACHE_VERSION - 1);
 
       file_entries_new->nlibs = cache_entry_count;
-      file_entries_new->len_strings = total_strlen;
+      file_entries_new->len_strings = strings_finalized.size;
       file_entries_new->flags = cache_file_new_flags_endian_current;
     }
 
@@ -449,20 +451,20 @@  save_cache (const char *cache_name)
     str_offset = 0;
 
   /* An array for all strings.  */
-  char *strings = xmalloc (total_strlen);
-  char *str = strings;
   int idx_old;
   int idx_new;
 
   for (idx_old = 0, idx_new = 0, entry = entries; entry != NULL;
        entry = entry->next, ++idx_new)
     {
-      /* First the library.  */
       if (opt_format != opt_format_new && entry->hwcap == 0)
 	{
 	  file_entries->libs[idx_old].flags = entry->flags;
 	  /* XXX: Actually we can optimize here and remove duplicates.  */
 	  file_entries->libs[idx_old].key = str_offset + pad;
+	  file_entries->libs[idx_new].key = str_offset + entry->lib->offset;
+	  file_entries->libs[idx_new].value
+	    = str_offset + entry->path->offset;
 	}
       if (opt_format != opt_format_old)
 	{
@@ -473,20 +475,12 @@  save_cache (const char *cache_name)
 	  file_entries_new->libs[idx_new].flags = entry->flags;
 	  file_entries_new->libs[idx_new].osversion = entry->osversion;
 	  file_entries_new->libs[idx_new].hwcap = entry->hwcap;
-	  file_entries_new->libs[idx_new].key = str_offset;
+	  file_entries_new->libs[idx_new].key
+	    = str_offset + entry->lib->offset;
+	  file_entries_new->libs[idx_new].value
+	    = str_offset + entry->path->offset;
 	}
 
-      size_t len = strlen (entry->lib) + 1;
-      str = mempcpy (str, entry->lib, len);
-      str_offset += len;
-      /* Then the path.  */
-      if (opt_format != opt_format_new && entry->hwcap == 0)
-	file_entries->libs[idx_old].value = str_offset + pad;
-      if (opt_format != opt_format_old)
-	file_entries_new->libs[idx_new].value = str_offset;
-      len = strlen (entry->path) + 1;
-      str = mempcpy (str, entry->path, len);
-      str_offset += len;
       /* Ignore entries with hwcap for old format.  */
       if (entry->hwcap == 0)
 	++idx_old;
@@ -511,7 +505,7 @@  save_cache (const char *cache_name)
 	extension_offset += pad;
       extension_offset += file_entries_new_size;
     }
-  extension_offset += total_strlen;
+  extension_offset += strings_finalized.size;
   extension_offset = roundup (extension_offset, 4); /* Provide alignment.  */
   if (opt_format != opt_format_old)
     file_entries_new->extension_offset = extension_offset;
@@ -551,7 +545,8 @@  save_cache (const char *cache_name)
 	error (EXIT_FAILURE, errno, _("Writing of cache data failed"));
     }
 
-  if (write (fd, strings, total_strlen) != (ssize_t) total_strlen)
+  if (write (fd, strings_finalized.strings, strings_finalized.size)
+      != (ssize_t) strings_finalized.size)
     error (EXIT_FAILURE, errno, _("Writing of cache data failed"));
 
   if (opt_format != opt_format_old)
@@ -580,7 +575,7 @@  save_cache (const char *cache_name)
   /* Free all allocated memory.  */
   free (file_entries_new);
   free (file_entries);
-  free (strings);
+  free (strings_finalized.strings);
 
   while (entries)
     {
@@ -596,14 +591,19 @@  void
 add_to_cache (const char *path, const char *lib, int flags,
 	      unsigned int osversion, uint64_t hwcap)
 {
-  size_t liblen = strlen (lib) + 1;
-  size_t len = liblen + strlen (path) + 1;
-  struct cache_entry *new_entry
-    = xmalloc (sizeof (struct cache_entry) + liblen + len);
-
-  new_entry->lib = memcpy ((char *) (new_entry + 1), lib, liblen);
-  new_entry->path = new_entry->lib + liblen;
-  snprintf (new_entry->path, len, "%s/%s", path, lib);
+  struct cache_entry *new_entry = xmalloc (sizeof (*new_entry));
+
+  struct stringtable_entry *path_interned;
+  {
+    char *p;
+    if (asprintf (&p, "%s/%s", path, lib) < 0)
+      error (EXIT_FAILURE, errno, _("Could not create library path"));
+    path_interned = stringtable_add (&strings, p);
+    free (p);
+  }
+
+  new_entry->lib = stringtable_add (&strings, lib);
+  new_entry->path = path_interned;
   new_entry->flags = flags;
   new_entry->osversion = osversion;
   new_entry->hwcap = hwcap;