[12/22] libctf: rethink strtab writeout

Message ID 20240417202018.34966-13-nick.alcock@oracle.com
State New
Headers
Series more modifiable CTF dicts (and a few bugfixes) |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 fail Patch failed to apply

Commit Message

Nick Alcock April 17, 2024, 8:20 p.m. UTC
  This commit finally adjusts strtab writeout so that repeated writeouts, or
writeouts of a dict that was read in earlier, only sorts the portion of the
strtab that was newly added.

There are three intertwined changes here:

 - pull the contents of strtabs from newly ctf_bufopened dicts into the
   atoms table, so that future additions will reuse the existing offset etc
   rather than adding new identical strings
 - allow the internal ctf_bufopen done by serialization to contribute its
   existing atoms table, so that existing atoms can be used for the
   remainder of the open process (like name table construction): this atoms
   table currente gets thrown away in the mass reassignment done later in
   ctf_serialize in any case, but it needs to be there during the open.
 - rewrite ctf_str_write_strtab so that a) it uses iterators rather than
   ctf_*_iter, reducing pointless structures which serve no other purpose
   than to implement ordinary variable scope, but more clunkily, and b)
   retains the existing strtab on the front of the new one, with its sort
   retained, rather than resorting, so all existing already-written strtab
   offsets remain valid across the call.

This latter change finally permits repeated serializations, and
reserializations of ctf_open()ed dicts, to work, but for now we keep the
code that prevents that because serialization is about to change again in a
way that will make it more obvious that doing such things is safe, and we
can take it out then.

(There are also some smaller changes like moving the purge of the refs table
into ctf_str_write_strtab(), since that's where the changes happen that
invalidate it, rather than doing it in ctf_serialize().  We also prohibit
something that has never worked, opening a dict and then reporting symbols
to it via ctf_link_add_strtab() et al: you must do that to newly-created
dicts which have had stuff ctf_link()ed into them.  This is very unlikely
ever to be a problem in practice: linkers just don't do that sort of thing.)

libctf/

	* ctf-create.c (ctf_create): Add (temporary) atoms arg.
	* ctf-impl.h (struct ctf_dict.ctf_dynstrtab): New.
	(ctf_str_create_atoms): Adjust.
	(ctf_str_write_strtab): Likewise.
	(ctf_simple_open_internal): Likewise.
	* ctf-open.c (ctf_simple_open_internal): Add atoms arg.
	(ctf_bufopen): Likewise.
	(ctf_bufopen_internal): Initialize just enough of an
	atoms table: pre-init from the atoms arg if supplied.
	(ctf_simple_open): Adjust.
	* ctf-serialize.c (ctf_serialize): Constify the strtab.
	Move ref list purging into ctf_str_write_strtab.
	Initialize the new dict with the old dict's atoms table.
	Accept the new strtab from ctf_str_write_strtab.
	Adjust for addition of ctf_dynstrtab.
	* ctf-string.c (ctf_strraw_explicit): Improve comments.
	(ctf_str_create_atoms): Prepopulate from an existing atoms table,
	or alternatively pull in all strings from the strtab and turn
	them into atoms.
	(ctf_str_free_atoms): Free the dynstrtab and its strtab.
	(struct ctf_strtab_write_state): Remove.
	(ctf_str_count_strtab): Fold this...
	(ctf_str_populate_sorttab): ... and this...
	(ctf_str_write_strtab): ... into this.  Prepend existing strings
	to the strtab rather than resorting them (and wrecking their
	offsets).  Keep the dynstrtab updated.  Update refs for all
	atoms with refs, whether or not they are strings newly added
	to the strtab.
---
 libctf/ctf-create.c    |   2 +-
 libctf/ctf-impl.h      |   9 +-
 libctf/ctf-open.c      |  20 ++-
 libctf/ctf-serialize.c |  24 +--
 libctf/ctf-string.c    | 391 +++++++++++++++++++++++++++--------------
 5 files changed, 294 insertions(+), 152 deletions(-)
  

Patch

diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c
index e0558d28233..78fb0305c20 100644
--- a/libctf/ctf-create.c
+++ b/libctf/ctf-create.c
@@ -133,7 +133,7 @@  ctf_create (int *errp)
   cts.cts_size = sizeof (hdr);
   cts.cts_entsize = 1;
 
-  if ((fp = ctf_bufopen_internal (&cts, NULL, NULL, NULL, errp)) == NULL)
+  if ((fp = ctf_bufopen_internal (&cts, NULL, NULL, NULL, NULL, errp)) == NULL)
     goto err;
 
   /* These hashes will have been initialized with a starting size of zero,
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index f4611316f50..3eef232bea0 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -396,6 +396,7 @@  struct ctf_dict
   ctf_dynhash_t *ctf_names;	    /* Hash table of remaining type names.  */
   ctf_lookup_t ctf_lookups[5];	    /* Pointers to nametabs for name lookup.  */
   ctf_strs_t ctf_str[2];	    /* Array of string table base and bounds.  */
+  ctf_strs_writable_t *ctf_dynstrtab; /* Dynamically allocated string table, if any. */
   ctf_dynhash_t *ctf_str_atoms;	  /* Hash table of ctf_str_atoms_t.  */
   ctf_dynhash_t *ctf_str_movable_refs; /* Hash table of void * -> ctf_str_atom_ref_t.  */
   uint32_t ctf_str_prov_offset;	  /* Latest provisional offset assigned so far.  */
@@ -734,7 +735,7 @@  extern const char *ctf_strraw (ctf_dict_t *, uint32_t);
 extern const char *ctf_strraw_explicit (ctf_dict_t *, uint32_t,
 					ctf_strs_t *);
 extern const char *ctf_strptr_validate (ctf_dict_t *, uint32_t);
-extern int ctf_str_create_atoms (ctf_dict_t *);
+extern int ctf_str_create_atoms (ctf_dict_t *, ctf_dynhash_t *atoms);
 extern void ctf_str_free_atoms (ctf_dict_t *);
 extern uint32_t ctf_str_add (ctf_dict_t *, const char *);
 extern uint32_t ctf_str_add_ref (ctf_dict_t *, const char *, uint32_t *ref);
@@ -745,7 +746,7 @@  extern int ctf_str_add_external (ctf_dict_t *, const char *, uint32_t offset);
 extern void ctf_str_remove_ref (ctf_dict_t *, const char *, uint32_t *ref);
 extern void ctf_str_rollback (ctf_dict_t *, ctf_snapshot_id_t);
 extern void ctf_str_purge_refs (ctf_dict_t *);
-extern ctf_strs_writable_t ctf_str_write_strtab (ctf_dict_t *);
+extern const ctf_strs_writable_t *ctf_str_write_strtab (ctf_dict_t *);
 
 extern struct ctf_archive_internal *
 ctf_new_archive_internal (int is_archive, int unmap_on_close,
@@ -762,10 +763,10 @@  extern int ctf_flip (ctf_dict_t *, ctf_header_t *, unsigned char *, int);
 extern ctf_dict_t *ctf_simple_open_internal (const char *, size_t, const char *,
 					     size_t, size_t,
 					     const char *, size_t,
-					     ctf_dynhash_t *, int *);
+					     ctf_dynhash_t *, ctf_dynhash_t *, int *);
 extern ctf_dict_t *ctf_bufopen_internal (const ctf_sect_t *, const ctf_sect_t *,
 					 const ctf_sect_t *, ctf_dynhash_t *,
-					 int *);
+					 ctf_dynhash_t *, int *);
 extern int ctf_import_unref (ctf_dict_t *fp, ctf_dict_t *pfp);
 extern int ctf_serialize (ctf_dict_t *);
 
diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c
index 22475465fa8..6d7a276f2cd 100644
--- a/libctf/ctf-open.c
+++ b/libctf/ctf-open.c
@@ -1290,7 +1290,7 @@  ctf_dict_t *ctf_simple_open (const char *ctfsect, size_t ctfsect_size,
 {
   return ctf_simple_open_internal (ctfsect, ctfsect_size, symsect, symsect_size,
 				   symsect_entsize, strsect, strsect_size, NULL,
-				   errp);
+				   NULL, errp);
 }
 
 /* Open a CTF file, mocking up a suitable ctf_sect and overriding the external
@@ -1300,7 +1300,8 @@  ctf_dict_t *ctf_simple_open_internal (const char *ctfsect, size_t ctfsect_size,
 				      const char *symsect, size_t symsect_size,
 				      size_t symsect_entsize,
 				      const char *strsect, size_t strsect_size,
-				      ctf_dynhash_t *syn_strtab, int *errp)
+				      ctf_dynhash_t *syn_strtab,
+				      ctf_dynhash_t *atoms, int *errp)
 {
   ctf_sect_t skeleton;
 
@@ -1338,7 +1339,7 @@  ctf_dict_t *ctf_simple_open_internal (const char *ctfsect, size_t ctfsect_size,
     }
 
   return ctf_bufopen_internal (ctfsectp, symsectp, strsectp, syn_strtab,
-			       errp);
+			       atoms, errp);
 }
 
 /* Decode the specified CTF buffer and optional symbol table, and create a new
@@ -1350,7 +1351,7 @@  ctf_dict_t *
 ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
 	     const ctf_sect_t *strsect, int *errp)
 {
-  return ctf_bufopen_internal (ctfsect, symsect, strsect, NULL, errp);
+  return ctf_bufopen_internal (ctfsect, symsect, strsect, NULL, NULL, errp);
 }
 
 /* Like ctf_bufopen, but overriding the external strtab with a synthetic one.  */
@@ -1358,7 +1359,7 @@  ctf_bufopen (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
 ctf_dict_t *
 ctf_bufopen_internal (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
 		      const ctf_sect_t *strsect, ctf_dynhash_t *syn_strtab,
-		      int *errp)
+		      ctf_dynhash_t *atoms, int *errp)
 {
   const ctf_preamble_t *pp;
   size_t hdrsz = sizeof (ctf_header_t);
@@ -1615,7 +1616,14 @@  ctf_bufopen_internal (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
      ctf_set_base().  */
 
   ctf_set_version (fp, hp, hp->cth_version);
-  if (ctf_str_create_atoms (fp) < 0)
+
+  /* Temporary assignment, just enough to be able to initialize
+     the atoms table.  */
+
+  fp->ctf_str[CTF_STRTAB_0].cts_strs = (const char *) fp->ctf_buf
+    + hp->cth_stroff;
+  fp->ctf_str[CTF_STRTAB_0].cts_len = hp->cth_strlen;
+  if (ctf_str_create_atoms (fp, atoms) < 0)
     {
       err = ENOMEM;
       goto bad;
diff --git a/libctf/ctf-serialize.c b/libctf/ctf-serialize.c
index 6355d4225eb..82e5b7d705b 100644
--- a/libctf/ctf-serialize.c
+++ b/libctf/ctf-serialize.c
@@ -955,7 +955,7 @@  ctf_serialize (ctf_dict_t *fp)
   ctf_header_t hdr, *hdrp;
   ctf_dvdef_t *dvd;
   ctf_varent_t *dvarents;
-  ctf_strs_writable_t strtab;
+  const ctf_strs_writable_t *strtab;
   int err;
   int sym_functions = 0;
 
@@ -1090,36 +1090,34 @@  ctf_serialize (ctf_dict_t *fp)
   assert (t == (unsigned char *) buf + sizeof (ctf_header_t) + hdr.cth_stroff);
 
   /* Construct the final string table and fill out all the string refs with the
-     final offsets.  Then purge the refs list, because we're about to move this
-     strtab onto the end of the buf, invalidating all the offsets.  */
-  strtab = ctf_str_write_strtab (fp);
-  ctf_str_purge_refs (fp);
+     final offsets.  */
 
-  if (strtab.cts_strs == NULL)
+  strtab = ctf_str_write_strtab (fp);
+
+  if (strtab == NULL)
     goto oom;
 
   /* Now the string table is constructed, we can sort the buffer of
      ctf_varent_t's.  */
-  ctf_sort_var_arg_cb_t sort_var_arg = { fp, (ctf_strs_t *) &strtab };
+  ctf_sort_var_arg_cb_t sort_var_arg = { fp, (ctf_strs_t *) strtab };
   ctf_qsort_r (dvarents, nvars, sizeof (ctf_varent_t), ctf_sort_var,
 	       &sort_var_arg);
 
-  if ((newbuf = realloc (buf, buf_size + strtab.cts_len)) == NULL)
+  if ((newbuf = realloc (buf, buf_size + strtab->cts_len)) == NULL)
     goto oom;
 
   buf = newbuf;
-  memcpy (buf + buf_size, strtab.cts_strs, strtab.cts_len);
+  memcpy (buf + buf_size, strtab->cts_strs, strtab->cts_len);
   hdrp = (ctf_header_t *) buf;
-  hdrp->cth_strlen = strtab.cts_len;
+  hdrp->cth_strlen = strtab->cts_len;
   buf_size += hdrp->cth_strlen;
-  free (strtab.cts_strs);
 
   /* Finally, we are ready to ctf_simple_open() the new dict.  If this is
      successful, we then switch nfp and fp and free the old dict.  */
 
   if ((nfp = ctf_simple_open_internal ((char *) buf, buf_size, NULL, 0,
 				       0, NULL, 0, fp->ctf_syn_ext_strtab,
-				       &err)) == NULL)
+				       fp->ctf_str_atoms, &err)) == NULL)
     {
       free (buf);
       return (ctf_set_errno (fp, err));
@@ -1189,9 +1187,11 @@  ctf_serialize (ctf_dict_t *fp)
   ctf_str_free_atoms (nfp);
   nfp->ctf_str_atoms = fp->ctf_str_atoms;
   nfp->ctf_prov_strtab = fp->ctf_prov_strtab;
+  nfp->ctf_dynstrtab = fp->ctf_dynstrtab;
   nfp->ctf_str_movable_refs = fp->ctf_str_movable_refs;
   fp->ctf_str_atoms = NULL;
   fp->ctf_prov_strtab = NULL;
+  fp->ctf_dynstrtab = NULL;
   fp->ctf_str_movable_refs = NULL;
   memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t));
   memset (&fp->ctf_errs_warnings, 0, sizeof (ctf_list_t));
diff --git a/libctf/ctf-string.c b/libctf/ctf-string.c
index f25cd3abdeb..ccf36498eb9 100644
--- a/libctf/ctf-string.c
+++ b/libctf/ctf-string.c
@@ -20,10 +20,14 @@ 
 #include <assert.h>
 #include <ctf-impl.h>
 #include <string.h>
-#include <assert.h>
 
-/* Convert an encoded CTF string name into a pointer to a C string, using an
-  explicit internal strtab rather than the fp-based one.  */
+static ctf_str_atom_t *
+ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
+			  int flags, uint32_t *ref);
+
+/* Convert an encoded CTF string name into a pointer to a C string, possibly
+  using an explicit internal provisional strtab rather than the fp-based
+  one.  */
 const char *
 ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
 {
@@ -32,18 +36,20 @@  ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
   if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
     ctsp = strtab;
 
-  /* If this name is in the external strtab, and there is a synthetic strtab,
-     use it in preference.  */
+  /* If this name is in the external strtab, and there is a synthetic
+     strtab, use it in preference.  (This is used to add the set of strings
+     -- symbol names, etc -- the linker knows about before the strtab is
+     written out.)  */
 
   if (CTF_NAME_STID (name) == CTF_STRTAB_1
       && fp->ctf_syn_ext_strtab != NULL)
     return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
 			       (void *) (uintptr_t) name);
 
-  /* If the name is in the internal strtab, and the offset is beyond the end of
-     the ctsp->cts_len but below the ctf_str_prov_offset, this is a provisional
-     string added by ctf_str_add*() but not yet built into a real strtab: get
-     the value out of the ctf_prov_strtab.  */
+  /* If the name is in the internal strtab, and the name offset is beyond
+     the end of the ctsp->cts_len but below the ctf_str_prov_offset, this is
+     a provisional string added by ctf_str_add*() but not yet built into a
+     real strtab: get the value out of the ctf_prov_strtab.  */
 
   if (CTF_NAME_STID (name) == CTF_STRTAB_0
       && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
@@ -133,13 +139,25 @@  ctf_str_free_atom (void *a)
 }
 
 /* Create the atoms table.  There is always at least one atom in it, the null
-   string.  */
+   string: but also pull in atoms from the internal strtab.  (We rely on
+   calls to ctf_str_add_external to populate external strtab entries, since
+   these are often not quite the same as what appears in any external
+   strtab, and the external strtab is often huge and best not aggressively
+   pulled in.)
+
+   Alternatively, if passed, populate atoms from the passed-in table, but do
+   not propagate their flags or refs: they are all non-freeable and
+   non-movable.  (This is used when serializing a dict: this entire atoms
+   table will be thrown away shortly, so it is important that we not create
+   any new strings.)  */
 int
-ctf_str_create_atoms (ctf_dict_t *fp)
+ctf_str_create_atoms (ctf_dict_t *fp, ctf_dynhash_t *atoms)
 {
+  size_t i;
+
   fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
-					  free, ctf_str_free_atom);
-  if (fp->ctf_str_atoms == NULL)
+					  NULL, ctf_str_free_atom);
+  if (!fp->ctf_str_atoms)
     return -ENOMEM;
 
   if (!fp->ctf_prov_strtab)
@@ -160,6 +178,63 @@  ctf_str_create_atoms (ctf_dict_t *fp)
   if (errno == ENOMEM)
     goto oom_str_add;
 
+  /* Serializing.  We have existing strings in an existing atoms table with
+     possibly-live pointers to them which must be used unchanged.  Import
+     them into this atoms table.  */
+
+  if (atoms)
+    {
+      ctf_next_t *it = NULL;
+      void *k, *v;
+      int err;
+
+      while ((err = ctf_dynhash_next (atoms, &it, &k, &v)) == 0)
+	{
+	  ctf_str_atom_t *existing = v;
+	  ctf_str_atom_t *atom;
+
+	  if (existing->csa_str[0] == 0)
+	    continue;
+
+	  if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
+	    goto oom_str_add;
+	  memcpy (atom, existing, sizeof (struct ctf_str_atom));
+	  memset (&atom->csa_refs, 0, sizeof(ctf_list_t));
+	  atom->csa_flags = 0;
+
+	  if (ctf_dynhash_insert (fp->ctf_str_atoms, atom->csa_str, atom) < 0)
+	    {
+	      free (atom);
+	      goto oom_str_add;
+	    }
+	}
+    }
+  else
+    {
+      /* Not serializing.  Pull in all the strings in the strtab as new
+	 atoms.  The provisional strtab must be empty at this point, so
+	 there is no need to populate atoms from it as well.  Types in this
+	 subset are frozen and readonly, so the refs list and movable refs
+	 list need not be populated.  */
+
+      for (i = 0; i < fp->ctf_str[CTF_STRTAB_0].cts_len;
+	   i += strlen (&fp->ctf_str[CTF_STRTAB_0].cts_strs[i]) + 1)
+	{
+	  ctf_str_atom_t *atom;
+
+	  if (fp->ctf_str[CTF_STRTAB_0].cts_strs[i] == 0)
+	    continue;
+
+	  atom = ctf_str_add_ref_internal (fp, &fp->ctf_str[CTF_STRTAB_0].cts_strs[i],
+					   0, 0);
+
+	  if (!atom)
+	    goto oom_str_add;
+
+	  atom->csa_offset = i;
+	}
+    }
+
   return 0;
 
  oom_str_add:
@@ -181,6 +256,11 @@  ctf_str_free_atoms (ctf_dict_t *fp)
   ctf_dynhash_destroy (fp->ctf_prov_strtab);
   ctf_dynhash_destroy (fp->ctf_str_atoms);
   ctf_dynhash_destroy (fp->ctf_str_movable_refs);
+  if (fp->ctf_dynstrtab)
+    {
+      free (fp->ctf_dynstrtab->cts_strs);
+      free (fp->ctf_dynstrtab);
+    }
 }
 
 #define CTF_STR_ADD_REF 0x1
@@ -537,69 +617,6 @@  ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
     *(ref->caf_ref) = value;
 }
 
-/* State shared across the strtab write process.  */
-typedef struct ctf_strtab_write_state
-{
-  /* Strtab we are writing, and the number of strings in it.  */
-  ctf_strs_writable_t *strtab;
-  size_t strtab_count;
-
-  /* Pointers to (existing) atoms in the atoms table, for qsorting.  */
-  ctf_str_atom_t **sorttab;
-
-  /* Loop counter for sorttab population.  */
-  size_t i;
-
-  /* The null-string atom (skipped during population).  */
-  ctf_str_atom_t *nullstr;
-} ctf_strtab_write_state_t;
-
-/* Count the number of entries in the strtab, and its length.  */
-static void
-ctf_str_count_strtab (void *key _libctf_unused_, void *value,
-	      void *arg)
-{
-  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
-  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
-
-  /* We only factor in the length of items that have no offset and have refs:
-     other items are in the external strtab, or will simply not be written out
-     at all.  They still contribute to the total count, though, because we still
-     have to sort them.  We add in the null string's length explicitly, outside
-     this function, since it is explicitly written out even if it has no refs at
-     all.  */
-
-  if (s->nullstr == atom)
-    {
-      s->strtab_count++;
-      return;
-    }
-
-  if (!ctf_list_empty_p (&atom->csa_refs))
-    {
-      if (!atom->csa_external_offset)
-	s->strtab->cts_len += strlen (atom->csa_str) + 1;
-      s->strtab_count++;
-    }
-}
-
-/* Populate the sorttab with pointers to the strtab atoms.  */
-static void
-ctf_str_populate_sorttab (void *key _libctf_unused_, void *value,
-		  void *arg)
-{
-  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
-  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
-
-  /* Skip the null string.  */
-  if (s->nullstr == atom)
-    return;
-
-  /* Skip atoms with no refs.  */
-  if (!ctf_list_empty_p (&atom->csa_refs))
-    s->sorttab[s->i++] = atom;
-}
-
 /* Sort the strtab.  */
 static int
 ctf_str_sort_strtab (const void *a, const void *b)
@@ -611,79 +628,182 @@  ctf_str_sort_strtab (const void *a, const void *b)
 }
 
 /* Write out and return a strtab containing all strings with recorded refs,
-   adjusting the refs to refer to the corresponding string.  The returned strtab
-   may be NULL on error.  Also populate the synthetic strtab with mappings from
-   external strtab offsets to names, so we can look them up with ctf_strptr().
-   Only external strtab offsets with references are added.  */
-ctf_strs_writable_t
+   adjusting the refs to refer to the corresponding string.  The returned
+   strtab is already assigned to strtab 0 in this dict, is owned by this
+   dict, and may be NULL on error.  Also populate the synthetic strtab with
+   mappings from external strtab offsets to names, so we can look them up
+   with ctf_strptr().  Only external strtab offsets with references are
+   added.
+
+   As a side effect, replaces the strtab of the current dict with the newly-
+   generated strtab.  This is an exception to the general rule that
+   serialization does not change the dict passed in, because the alternative
+   is to copy the entire atoms table on every reserialization just to avoid
+   modifying the original, which is excessively costly for minimal gain.
+
+   We use the lazy man's approach and double memory costs by always storing
+   atoms as individually allocated entities whenever they come from anywhere
+   but a freshly-opened, mmapped dict, even though after serialization there
+   is another copy in the strtab; this ensures that ctf_strptr()-returned
+   pointers to them remain valid for the lifetime of the dict.
+
+   This is all rendered more complex because if a dict is ctf_open()ed it
+   will have a bunch of strings in its strtab already, and their strtab
+   offsets can never change (without piles of complexity to rescan the
+   entire dict just to get all the offsets to all of them into the atoms
+   table).  Entries below the existing strtab limit are just copied into the
+   new dict: entries above it are new, and are are sorted first, then
+   appended to it.  The sorting is purely a compression-efficiency
+   improvement, and we get nearly as good an improvement from sorting big
+   chunks like this as we would from sorting the whole thing.  */
+
+const ctf_strs_writable_t *
 ctf_str_write_strtab (ctf_dict_t *fp)
 {
-  ctf_strs_writable_t strtab;
-  ctf_str_atom_t *nullstr;
+  ctf_strs_writable_t *strtab;
+  size_t strtab_count = 0;
   uint32_t cur_stroff = 0;
-  ctf_strtab_write_state_t s;
   ctf_str_atom_t **sorttab;
+  ctf_next_t *it = NULL;
   size_t i;
+  void *v;
+  int err;
+  int new_strtab = 0;
   int any_external = 0;
 
-  memset (&strtab, 0, sizeof (struct ctf_strs_writable));
-  memset (&s, 0, sizeof (struct ctf_strtab_write_state));
-  s.strtab = &strtab;
+  strtab = calloc (1, sizeof (ctf_strs_writable_t));
+  if (!strtab)
+    return NULL;
 
-  nullstr = ctf_dynhash_lookup (fp->ctf_str_atoms, "");
-  if (!nullstr)
+  /* The strtab contains the existing string table at its start: figure out
+     how many new strings we need to add.  We only need to add new strings
+     that have no external offset, that have refs, and that are found in the
+     provisional strtab.  If the existing strtab is empty we also need to
+     add the null string at its start.  */
+
+  strtab->cts_len = fp->ctf_str[CTF_STRTAB_0].cts_len;
+
+  if (strtab->cts_len == 0)
     {
-      ctf_err_warn (fp, 0, ECTF_INTERNAL, _("null string not found in strtab"));
-      strtab.cts_strs = NULL;
-      return strtab;
+      new_strtab = 1;
+      strtab->cts_len++; 			/* For the \0.  */
     }
 
-  s.nullstr = nullstr;
-  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_count_strtab, &s);
-  strtab.cts_len++;				/* For the null string.  */
+  /* Count new entries in the strtab: i.e. entries in the provisional
+     strtab.  Ignore any entry for \0, entries which ended up in the
+     external strtab, and unreferenced entries.  */
 
-  ctf_dprintf ("%lu bytes of strings in strtab.\n",
-	       (unsigned long) strtab.cts_len);
+  while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
+    {
+      const char *str = (const char *) v;
+      ctf_str_atom_t *atom;
 
-  /* Sort the strtab.  Force the null string to be first.  */
-  sorttab = calloc (s.strtab_count, sizeof (ctf_str_atom_t *));
+      atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
+      if (!ctf_assert (fp, atom))
+	goto err_strtab;
+
+      if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
+	  atom->csa_external_offset)
+	continue;
+
+      strtab->cts_len += strlen (atom->csa_str) + 1;
+      strtab_count++;
+    }
+  if (err != ECTF_NEXT_END)
+    {
+      ctf_dprintf ("ctf_str_write_strtab: error counting strtab entries: %s\n",
+		   ctf_errmsg (err));
+      goto err_strtab;
+    }
+
+  ctf_dprintf ("%lu bytes of strings in strtab: %lu pre-existing.\n",
+	       (unsigned long) strtab->cts_len,
+	       (unsigned long) fp->ctf_str[CTF_STRTAB_0].cts_len);
+
+  /* Sort the new part of the strtab.  */
+
+  sorttab = calloc (strtab_count, sizeof (ctf_str_atom_t *));
   if (!sorttab)
-    goto oom;
+    {
+      ctf_set_errno (fp, ENOMEM);
+      goto err_strtab;
+    }
 
-  sorttab[0] = nullstr;
-  s.i = 1;
-  s.sorttab = sorttab;
-  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_populate_sorttab, &s);
+  i = 0;
+  while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
+    {
+      ctf_str_atom_t *atom;
 
-  qsort (&sorttab[1], s.strtab_count - 1, sizeof (ctf_str_atom_t *),
+      atom = ctf_dynhash_lookup (fp->ctf_str_atoms, v);
+      if (!ctf_assert (fp, atom))
+	goto err_sorttab;
+
+      if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
+	  atom->csa_external_offset)
+	continue;
+
+      sorttab[i++] = atom;
+    }
+
+  qsort (sorttab, strtab_count, sizeof (ctf_str_atom_t *),
 	 ctf_str_sort_strtab);
 
-  if ((strtab.cts_strs = malloc (strtab.cts_len)) == NULL)
-    goto oom_sorttab;
+  if ((strtab->cts_strs = malloc (strtab->cts_len)) == NULL)
+    goto err_sorttab;
 
-  /* Update all refs: also update the strtab appropriately.  */
-  for (i = 0; i < s.strtab_count; i++)
+  cur_stroff = fp->ctf_str[CTF_STRTAB_0].cts_len;
+
+  if (new_strtab)
     {
-      if (sorttab[i]->csa_external_offset)
-	{
-	  /* External strtab entry.  */
+      strtab->cts_strs[0] = 0;
+      cur_stroff++;
+    }
+  else
+    memcpy (strtab->cts_strs, fp->ctf_str[CTF_STRTAB_0].cts_strs,
+	    fp->ctf_str[CTF_STRTAB_0].cts_len);
 
-	  any_external = 1;
-	  ctf_str_update_refs (sorttab[i], sorttab[i]->csa_external_offset);
-	  sorttab[i]->csa_offset = sorttab[i]->csa_external_offset;
-	}
-      else
-	{
-	  /* Internal strtab entry with refs: actually add to the string
-	     table.  */
+  /* Work over the sorttab, add its strings to the strtab, and remember
+     where they are in the csa_offset for the appropriate atom.  No ref
+     updating is done at this point, because refs might well relate to
+     already-existing strings, or external strings, which do not need adding
+     to the strtab and may not be in the sorttab.  */
 
-	  ctf_str_update_refs (sorttab[i], cur_stroff);
-	  sorttab[i]->csa_offset = cur_stroff;
-	  strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
-	  cur_stroff += strlen (sorttab[i]->csa_str) + 1;
-	}
+  for (i = 0; i < strtab_count; i++)
+    {
+      sorttab[i]->csa_offset = cur_stroff;
+      strcpy (&strtab->cts_strs[cur_stroff], sorttab[i]->csa_str);
+      cur_stroff += strlen (sorttab[i]->csa_str) + 1;
     }
   free (sorttab);
+  sorttab = NULL;
+
+  /* Update all refs, then purge them as no longer necessary: also update
+     the strtab appropriately.  */
+
+  while ((err = ctf_dynhash_next (fp->ctf_str_atoms, &it, NULL, &v)) == 0)
+    {
+      ctf_str_atom_t *atom = (ctf_str_atom_t *) v;
+      uint32_t offset;
+
+      if (ctf_list_empty_p (&atom->csa_refs))
+	continue;
+
+      if (atom->csa_external_offset)
+	{
+	  any_external = 1;
+	  offset = atom->csa_external_offset;
+	}
+      else
+	offset = atom->csa_offset;
+      ctf_str_update_refs (atom, offset);
+    }
+  if (err != ECTF_NEXT_END)
+    {
+      ctf_dprintf ("ctf_str_write_strtab: error iterating over atoms while updating refs: %s\n",
+		   ctf_errmsg (err));
+      goto err_strtab;
+    }
+  ctf_str_purge_refs (fp);
 
   if (!any_external)
     {
@@ -691,16 +811,29 @@  ctf_str_write_strtab (ctf_dict_t *fp)
       fp->ctf_syn_ext_strtab = NULL;
     }
 
+  /* Replace the old strtab with the new one in this dict.  */
+
+  if (fp->ctf_dynstrtab)
+    {
+      free (fp->ctf_dynstrtab->cts_strs);
+      free (fp->ctf_dynstrtab);
+    }
+
+  fp->ctf_dynstrtab = strtab;
+  fp->ctf_str[CTF_STRTAB_0].cts_strs = strtab->cts_strs;
+  fp->ctf_str[CTF_STRTAB_0].cts_len = strtab->cts_len;
+
   /* All the provisional strtab entries are now real strtab entries, and
      ctf_strptr() will find them there.  The provisional offset now starts right
      beyond the new end of the strtab.  */
 
   ctf_dynhash_empty (fp->ctf_prov_strtab);
-  fp->ctf_str_prov_offset = strtab.cts_len + 1;
+  fp->ctf_str_prov_offset = strtab->cts_len + 1;
   return strtab;
 
- oom_sorttab:
+ err_sorttab:
   free (sorttab);
- oom:
-  return strtab;
+ err_strtab:
+  free (strtab);
+  return NULL;
 }