[06/19] libctf: fix CTF dict compression

Message ID 20240730153707.168357-7-nick.alcock@oracle.com
State New
Headers
Series libctf: various bugfixes (including a write into freed memory), and loosen constraints on enums |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 fail Patch failed to apply

Commit Message

Nick Alcock July 30, 2024, 3:36 p.m. UTC
  Commit 483546ce4f3 ("libctf: make ctf_serialize() actually serialize")
accidentally broke dict compression.  There were two bugs:

 - ctf_arc_write_one_ctf was still making its own decision about
   whether to compress the dict via direct ctf_size comparison, which is
   unfortunate because now that it no longer calls ctf_serialize itself,
   ctf_size is always zero when it does this: it should let the writing
   functions decide on the threshold, which they contain code to do which is
   simply not used for lack of one trivial wrapper to write to an fd and
   also provide a compression threshold

 - ctf_write_mem, the function underlying all writing as of the commit
   above, was calling zlib's compressBound and avoiding compression if this
   returned a value larger than the input.  Unfortunately compressBound does
   not do a trial compression and determine whether the result is
   compressible: it just adds zlib header sizes to the value passed in, so
   our test would *always* have concluded that the value was incompressible!
   Avoid by simply always compressing if the raw size is larger than the
   threshold: zlib is quite clever enough to avoid actually compressing
   if the data is incompressible.

Add a testcase for this.

libctf/
	* ctf-impl.h (ctf_write_thresholded): New...
	* ctf-serialize.c (ctf_write_thresholded): ... defined here,
        a wrapper around...
        (ctf_write_mem): ... this.  Don't check compressibility.
	(ctf_compress_write): Reimplement as a ctf_write_thresholded
        wrapper.
	(ctf_write): Likewise.
	* ctf-archive.c (arc_write_one_ctf): Just call
        ctf_write_thresholded rather than trying to work out whether
        to compress.
	* testsuite/libctf-writable/ctf-compressed.*: New test.
---
 libctf/ctf-archive.c                          |   8 +-
 libctf/ctf-impl.h                             |   2 +
 libctf/ctf-serialize.c                        |  51 ++----
 .../libctf-writable/ctf-compressed.c          | 158 ++++++++++++++++++
 .../libctf-writable/ctf-compressed.lk         |   2 +
 5 files changed, 179 insertions(+), 42 deletions(-)
 create mode 100644 libctf/testsuite/libctf-writable/ctf-compressed.c
 create mode 100644 libctf/testsuite/libctf-writable/ctf-compressed.lk
  

Patch

diff --git a/libctf/ctf-archive.c b/libctf/ctf-archive.c
index c602705b310..0034bf0982a 100644
--- a/libctf/ctf-archive.c
+++ b/libctf/ctf-archive.c
@@ -265,16 +265,10 @@  arc_write_one_ctf (ctf_dict_t *f, int fd, size_t threshold)
   uint64_t ctfsz = 0;
   char *ctfszp;
   size_t ctfsz_len;
-  int (*writefn) (ctf_dict_t * fp, int fd);
 
   if ((off = lseek (fd, 0, SEEK_CUR)) < 0)
     return errno * -1;
 
-  if (f->ctf_size > threshold)
-    writefn = ctf_compress_write;
-  else
-    writefn = ctf_write;
-
   /* This zero-write turns into the size in a moment. */
   ctfsz_len = sizeof (ctfsz);
   ctfszp = (char *) &ctfsz;
@@ -287,7 +281,7 @@  arc_write_one_ctf (ctf_dict_t *f, int fd, size_t threshold)
       ctfszp += writelen;
     }
 
-  if (writefn (f, fd) != 0)
+  if (ctf_write_thresholded (f, fd, threshold) != 0)
     return f->ctf_errno * -1;
 
   if ((end_off = lseek (fd, 0, SEEK_CUR)) < 0)
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index b17a2d8699d..0d88e639884 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -762,6 +762,8 @@  extern int ctf_flip (ctf_dict_t *, ctf_header_t *, unsigned char *, int);
 
 extern int ctf_import_unref (ctf_dict_t *fp, ctf_dict_t *pfp);
 
+extern int ctf_write_thresholded (ctf_dict_t *fp, int fd, size_t threshold);
+
 _libctf_malloc_
 extern void *ctf_mmap (size_t length, size_t offset, int fd);
 extern void ctf_munmap (void *, size_t);
diff --git a/libctf/ctf-serialize.c b/libctf/ctf-serialize.c
index 2d1f014db28..9efdb16e0d6 100644
--- a/libctf/ctf-serialize.c
+++ b/libctf/ctf-serialize.c
@@ -1180,10 +1180,10 @@  ctf_write_mem (ctf_dict_t *fp, size_t *size, size_t threshold)
     alloc_len = compressBound (rawbufsiz - sizeof (ctf_header_t))
       + sizeof (ctf_header_t);
 
-  /* Trivial operation if the buffer is incompressible or too small to bother
-     compressing, and we're not doing a forced write-time flip.  */
+  /* Trivial operation if the buffer is too small to bother compressing, and
+     we're not doing a forced write-time flip.  */
 
-  if (rawbufsiz < threshold || rawbufsiz < alloc_len)
+  if (rawbufsiz < threshold)
     {
       alloc_len = rawbufsiz;
       uncompressed = 1;
@@ -1248,10 +1248,10 @@  err:
   return NULL;
 }
 
-/* Compress the specified CTF data stream and write it to the specified file
-   descriptor.  */
+/* Write the compressed CTF data stream to the specified file descriptor,
+   possibly compressed.  Internal only (for now).  */
 int
-ctf_compress_write (ctf_dict_t *fp, int fd)
+ctf_write_thresholded (ctf_dict_t *fp, int fd, size_t threshold)
 {
   unsigned char *buf;
   unsigned char *bp;
@@ -1260,7 +1260,7 @@  ctf_compress_write (ctf_dict_t *fp, int fd)
   ssize_t len;
   int err = 0;
 
-  if ((buf = ctf_write_mem (fp, &tmp, 0)) == NULL)
+  if ((buf = ctf_write_mem (fp, &tmp, threshold)) == NULL)
     return -1;					/* errno is set for us.  */
 
   buf_len = tmp;
@@ -1283,36 +1283,17 @@  ret:
   return err;
 }
 
+/* Compress the specified CTF data stream and write it to the specified file
+   descriptor.  */
+int
+ctf_compress_write (ctf_dict_t *fp, int fd)
+{
+  return ctf_write_thresholded (fp, fd, 0);
+}
+
 /* Write the uncompressed CTF data stream to the specified file descriptor.  */
 int
 ctf_write (ctf_dict_t *fp, int fd)
 {
-  unsigned char *buf;
-  unsigned char *bp;
-  size_t tmp;
-  ssize_t buf_len;
-  ssize_t len;
-  int err = 0;
-
-  if ((buf = ctf_write_mem (fp, &tmp, (size_t) -1)) == NULL)
-    return -1;					/* errno is set for us.  */
-
-  buf_len = tmp;
-  bp = buf;
-
-  while (buf_len > 0)
-    {
-      if ((len = write (fd, bp, buf_len)) < 0)
-	{
-	  err = ctf_set_errno (fp, errno);
-	  ctf_err_warn (fp, 0, 0, _("ctf_compress_write: error writing"));
-	  goto ret;
-	}
-      buf_len -= len;
-      bp += len;
-    }
-
-ret:
-  free (buf);
-  return err;
+  return ctf_write_thresholded (fp, fd, (size_t) -1);
 }
diff --git a/libctf/testsuite/libctf-writable/ctf-compressed.c b/libctf/testsuite/libctf-writable/ctf-compressed.c
new file mode 100644
index 00000000000..4769cdb97f2
--- /dev/null
+++ b/libctf/testsuite/libctf-writable/ctf-compressed.c
@@ -0,0 +1,158 @@ 
+/* Make sure linking and writing an archive with a low threshold correctly
+   compresses it.  (This tests for two bugs, one where archives weren't
+   serialized regardless of their threshold, and another where nothing was.)  */
+
+#include <ctf-api.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main (int argc, char *argv[])
+{
+  ctf_dict_t *in1;
+  ctf_dict_t *in2;
+  ctf_dict_t *fp;
+  ctf_dict_t *dump_fp;
+  ctf_archive_t *arc1, *arc2;
+  ctf_archive_t *final_arc;
+  ctf_sect_t s1, s2;
+  ctf_encoding_t encoding = { CTF_INT_SIGNED, 0, sizeof (char) };
+  ctf_encoding_t encoding2 = { CTF_INT_SIGNED, 0, sizeof (long long) };
+  unsigned char *buf1, *buf2, *buf3;
+  size_t buf1_sz, buf2_sz, buf3_sz;
+  ctf_dump_state_t *dump_state = NULL;
+  ctf_next_t *i = NULL;
+  int err;
+
+  /* Linking does not currently work on mingw because of an unreliable tmpfile
+     implementation on that platform (see
+     https://github.com/msys2/MINGW-packages/issues/18878).  Simply skip for
+     now.  */
+
+#ifdef __MINGW32__
+  printf ("UNSUPPORTED: platform bug breaks ctf_link\n");
+  return 0;
+#else
+
+  if ((fp = ctf_create (&err)) == NULL)
+    goto create_err;
+
+  if ((in1 = ctf_create (&err)) == NULL)
+    goto create_err;
+
+  if ((in2 = ctf_create (&err)) == NULL)
+    goto create_err;
+
+  /* Force a conflict to get an archive created.  */
+
+  if ((ctf_add_integer (in1, CTF_ADD_ROOT, "foo", &encoding)) == CTF_ERR)
+    {
+      fprintf (stderr, "Cannot add: %s\n", ctf_errmsg (ctf_errno (in1)));
+      return 1;
+    }
+
+  if ((ctf_add_integer (in2, CTF_ADD_ROOT, "foo", &encoding2)) == CTF_ERR)
+    {
+      fprintf (stderr, "Cannot add: %s\n", ctf_errmsg (ctf_errno (in2)));
+      return 1;
+    }
+
+  /* Write them out and read them back in, to turn them into archives.
+     This would be unnecessary if ctf_link_add() were public :( */
+  if ((buf1 = ctf_write_mem (in1, &buf1_sz, -1)) == NULL)
+    {
+      fprintf (stderr, "Cannot serialize: %s\n", ctf_errmsg (ctf_errno (in1)));
+      return 1;
+    }
+
+  if ((buf2 = ctf_write_mem (in2, &buf2_sz, -1)) == NULL)
+    {
+      fprintf (stderr, "Cannot serialize: %s\n", ctf_errmsg (ctf_errno (in2)));
+      return 1;
+    }
+
+  s1.cts_name = "foo";
+  s2.cts_name = "bar";
+  s1.cts_data = (void *) buf1;
+  s2.cts_data = (void *) buf2;
+  s1.cts_size = buf1_sz;
+  s2.cts_size = buf2_sz;
+  s1.cts_entsize = 64; /* Unimportant.  */
+  s2.cts_entsize = 64; /* Unimportant.  */
+
+  if ((arc1 = ctf_arc_bufopen (&s1, NULL, NULL, &err)) == NULL ||
+      (arc2 = ctf_arc_bufopen (&s2, NULL, NULL, &err)) == NULL)
+    goto open_err;
+
+  ctf_dict_close (in1);
+  ctf_dict_close (in2);
+
+  /* Link them together.  */
+
+  if (ctf_link_add_ctf (fp, arc1, "a") < 0 ||
+      ctf_link_add_ctf (fp, arc2, "b") < 0)
+    goto link_err;
+
+  if (ctf_link (fp, 0) < 0)
+    goto link_err;
+
+  /* Write them out.  We need a new buf here, because the archives are still
+     using the other two bufs.  */
+
+  if ((buf3 = ctf_link_write (fp, &buf3_sz, 1)) == NULL)
+    goto link_err;
+
+  /* Read them back in.  */
+
+  s1.cts_data = (void *) buf3;
+  s1.cts_size = buf3_sz;
+
+  if ((final_arc = ctf_arc_bufopen (&s1, NULL, NULL, &err)) == NULL)
+    goto open_err;
+
+  if (ctf_archive_count (final_arc) != 2)
+    {
+      fprintf (stderr, "Archive is the wrong length: %zi.\n", ctf_archive_count (final_arc));
+      return -1;
+    }
+
+  /* Dump the header of each archive member, and search for CTF_F_COMPRESS in
+     the resulting dump.  */
+  while ((dump_fp = ctf_archive_next (final_arc, &i, NULL, 0, &err)) != NULL)
+    {
+      char *dumpstr;
+
+      while ((dumpstr = ctf_dump (dump_fp, &dump_state, CTF_SECT_HEADER,
+				  NULL, NULL)) != NULL)
+	{
+	  if (strstr (dumpstr, "CTF_F_COMPRESS") != NULL)
+	    printf ("Output is compressed.\n");
+	  free (dumpstr);
+	}
+      ctf_dict_close (dump_fp);
+    }
+  if (err != ECTF_NEXT_END)
+    {
+      fprintf (stderr, "Archive iteation error: %s\n", ctf_errmsg (err));
+      return 1;
+    }
+
+  ctf_arc_close (final_arc);
+  free (buf1);
+  free (buf2);
+  free (buf3);
+  ctf_dict_close (fp);
+  return 0;
+
+ create_err:
+  fprintf (stderr, "Cannot create: %s\n", ctf_errmsg (err));
+  return 1;
+ open_err:
+  fprintf (stderr, "Cannot open: %s\n", ctf_errmsg (err));
+  return 1;
+ link_err:
+  fprintf (stderr, "Cannot link: %s\n", ctf_errmsg (ctf_errno (fp)));
+  return 1;
+#endif
+}
diff --git a/libctf/testsuite/libctf-writable/ctf-compressed.lk b/libctf/testsuite/libctf-writable/ctf-compressed.lk
new file mode 100644
index 00000000000..cf611c36f97
--- /dev/null
+++ b/libctf/testsuite/libctf-writable/ctf-compressed.lk
@@ -0,0 +1,2 @@ 
+Output is compressed.
+Output is compressed.