extend dl-minimal malloc implementation

Message ID xnlgomzih2.fsf@greed.delorie.com
State New, archived
Headers

Commit Message

DJ Delorie June 20, 2017, 5:17 p.m. UTC
  This is a minor (hah!) enhancement to the trivial malloc
implementation in elf/dl-malloc.c to make free() work more
generically, and allow for reusing free'd chunks.  Like the core
malloc(), it uses a heap-based allocator with a header before each
chunk, and a free list.  However, unlike the core allocator, there's
only one free list and it's unsorted - the design goal here is to keep
the *code* simple and easy to understand, knowing that this code
should only be used during early program load (i.e. the free list
should typically be small enough that naive algorithms are sufficient)

Tested by running ls and blender, both via testrun (ld.so
implementation) and by extracting this code into a standalone library
and preloading it (runs slow due to free list scanning, but it runs).

Review notes:

I left in the DJTEST clauses for now, in case anyone wants to see how
it's working via the debug statements.  I don't intend to check those
in officially.

The tst-* testcase works by #include'ing dl-minimal.c instead of
running against the .so because I found it nearly impossible to link
against ld.so and not get libc.so's malloc implementation or some
random link or runtime error (ld.so is not a "normal" shared object ;)


From 478fb05972909d77373d86c1fcfe8e89cf1df993 Mon Sep 17 00:00:00 2001
From: DJ Delorie <dj@delorie.com>
Date: Tue, 20 Jun 2017 12:56:30 -0400
Subject: Extend dl-minimal's malloc() implementation

* elf/dl-minimal.c: Upgrade minimal malloc to allow for reusing
free'd memory.  Migrate malloc/calloc/realloc to morecore(),
add free list, optimize calloc zeroing.
* elf/tst-dl-minimal-malloc.c: New.
* elf/Makefile: Run it.
  

Comments

Florian Weimer June 22, 2017, 1:05 p.m. UTC | #1
On 06/20/2017 07:17 PM, DJ Delorie wrote:
> This is a minor (hah!) enhancement to the trivial malloc
> implementation in elf/dl-malloc.c to make free() work more
> generically, and allow for reusing free'd chunks.  Like the core
> malloc(), it uses a heap-based allocator with a header before each
> chunk, and a free list.  However, unlike the core allocator, there's
> only one free list and it's unsorted - the design goal here is to keep
> the *code* simple and easy to understand, knowing that this code
> should only be used during early program load (i.e. the free list
> should typically be small enough that naive algorithms are sufficient)

It will allow us to use dynarrays and scratch buffers in the dynamic
linker because we get a working realloc.

> +/* For each chunk of memory we work with in this minimal malloc, we
> +   store one 32-bit "size" field before it, and reuse the first word
> +   of the user-space memory to store a linked list of free chunks when
> +   the chunk is freed.  This is similar in concept to the regular
> +   malloc's design.
> + */
> +typedef struct free_chunk {
> +  /* This is just to avoid "packed" */
> +  uint32_t filler;
> +  /* This is the only variable that takes up space between user chunks.  */
> +  uint32_t size;
> +  /* The address of "next" is what we return to the user.  */
> +  struct free_chunk *next;
> +} free_chunk;

The struct is technically invalid C.  We might be better off with
pointer arithmetic.  But considering that malloc/malloc.c uses pretty
much the same construct, I'm not sure if it's worth changing this.

> +/* Convert between user and chunk pointers.  */
> +#define ptr2chunk(ptr) (free_chunk *)((char *)ptr - sizeof (uint32_t)*2)
> +#define chunk2ptr(ch) (void *)(& ch->next)
> +/* Extra bytes we need between chunks.  */
> +#define OVERHEAD sizeof(uint32_t)
> +#define TOOBIG (uint32_t)(~0L)
> +
> +/* Debugging - make sure the free_chunk struct doesn't have a 32-bit
> +   hole between size and next.  */
> +static char
> +  check_size [(sizeof(free_chunk) == sizeof(uint32_t) * 2 + sizeof(void *)) ? 1 : -1]
> +  __attribute__((used));

You can use _Static_assert for that.  (By the way: For such constructs,
should be __attribute__ ((unused)) to suppress the warning;
__attribute__ ((used)) suppresses the warning *and* emits the object to
the assembly file, which is not what we want.)

> +/* We have a singly linked list of free'd chunks.  Newly freed chunks
> +   are added at the head end, and chunks are removed from
> +   anywhere.  */
> +static free_chunk free_list = { 0, 0, NULL };

Can we turn this into a single pointer, to save .bss space?

> +static free_chunk *
> +find_on_free_list (void *old_pointer, size_t n, int zero_it)

zero_it should be bool (also in other places).

> +/* Combination malloc/calloc/realloc - allocates memory, possibly
> +   extending an existing chunk (realloc), possibly zeroing it if
> +   needed (calloc).  You shouldn't ask for both realloc and calloc at
> +   the same time, of course.  */
> +static void *
> +morecore(void *old_pointer, size_t n, int zero_it)
>  {
> +  free_chunk *rv;
> +
> +  djnl();
> +  djprintf("morecore\n", zero_it ? -n : n, old_pointer);
> +
> +  /* We round up the requested size so that we have room for the
> +     "size" of the next chunk after this one is properly aligned.
> +     This means the smallest chunk is 12 bytes on 64-bit
> +     platforms.  */
> +  if (n < sizeof(void *))
> +    n = sizeof(void *);
> +  n = ALIGN_UP (n + OVERHEAD, MALLOC_ALIGNMENT) - OVERHEAD;
> +
> +  /* Special cases for realloc.  */
> +  if (old_pointer)
> +    {
> +      free_chunk *old_chunk = ptr2chunk (old_pointer);
> +
> +      /* Shrinking - just reuse the old chunk.  */
> +      if (old_chunk->size >= n)
> +	return old_pointer;

Can we put the tail on the free list?

> -  /* Make sure the allocation pointer is ideally aligned.  */
> -  alloc_ptr = (void *) 0 + (((alloc_ptr - (void *) 0) + MALLOC_ALIGNMENT - 1)
> -			    & ~(MALLOC_ALIGNMENT - 1));
> +  /* If we're realloc'ing the last block, we either extend it (by
> +     re-allocating it over the old block) or we copy it (because a new
> +     noncontiguous mapping was needed).  */

What does “last” mean here?

>  /* This will rarely be called.  */
>  void weak_function
>  free (void *ptr)
>  {

> +  /* else if it's really big we can't store it on the free list.  */
> +  if (ch->size == TOOBIG)
> +    return;

I think we should just fail TOOBIG allocations.

> +
> +  /* else store the chunk on the free list.  */
> +  ch->next = free_list.next;
> +  free_list.next = ch;
> +  djchunk("free_list\n", &free_list);
> +  djchunk("free_list->next\n", free_list.next);
>  }

Would it make sense to keep the free list order by increasing addresses
and attempt to coalesce neighboring chunks?  The free list should be
really short, and this should help us to reduce fragmentation.

Another thing that might be worth considering is to keep the free list
pointer in a parameter.  Then we can use the same code to implement the
async-signal-safe malloc for TLS data (with a per-thread free list).
The free list is beneficial there because we have some space in the TCB
on many architectures which we could reuse for TLS data if the allocator
is sufficiently flexible.

Thanks,
Florian
  

Patch

diff --git a/ChangeLog b/ChangeLog
index ebab4ce..31eb8f2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@ 
+2017-06-20  DJ Delorie  <dj@delorie.com>
+
+	* elf/dl-minimal.c: Upgrade minimal malloc to allow for reusing
+	free'd memory.  Migrate malloc/calloc/realloc to morecore(),
+	add free list, optimize calloc zeroing.
+	* elf/tst-dl-minimal-malloc.c: New.
+	* elf/Makefile: Run it.
+
 2017-06-20  Wilco Dijkstra  <wdijkstr@arm.com>
 
 	* benchtests/powf-inputs: Add reduced trace from wrf.
diff --git a/elf/Makefile b/elf/Makefile
index 201b328..a926449 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -293,6 +293,7 @@  tests += vismain
 tests-pie += vismain
 CFLAGS-vismain.c = $(PIE-ccflag)
 endif
+tests += tst-dl-minimal-malloc
 modules-execstack-yes = tst-execstack-mod
 extra-test-objs += $(addsuffix .os,$(strip $(modules-names)))
 
diff --git a/elf/dl-minimal.c b/elf/dl-minimal.c
index 59e159a..11765ad 100644
--- a/elf/dl-minimal.c
+++ b/elf/dl-minimal.c
@@ -16,6 +16,7 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifndef TESTING_MALLOC
 #include <errno.h>
 #include <limits.h>
 #include <stdio.h>
@@ -28,6 +29,7 @@ 
 #include <ldsodefs.h>
 #include <_itoa.h>
 #include <malloc/malloc-internal.h>
+#endif
 
 #include <assert.h>
 
@@ -43,10 +45,188 @@  extern void weak_function free (void *ptr);
 extern void * weak_function realloc (void *ptr, size_t n);
 
 
-/* Allocate an aligned memory block.  */
-void * weak_function
-malloc (size_t n)
+/* For each chunk of memory we work with in this minimal malloc, we
+   store one 32-bit "size" field before it, and reuse the first word
+   of the user-space memory to store a linked list of free chunks when
+   the chunk is freed.  This is similar in concept to the regular
+   malloc's design.
+ */
+typedef struct free_chunk {
+  /* This is just to avoid "packed" */
+  uint32_t filler;
+  /* This is the only variable that takes up space between user chunks.  */
+  uint32_t size;
+  /* The address of "next" is what we return to the user.  */
+  struct free_chunk *next;
+} free_chunk;
+
+/* Convert between user and chunk pointers.  */
+#define ptr2chunk(ptr) (free_chunk *)((char *)ptr - sizeof (uint32_t)*2)
+#define chunk2ptr(ch) (void *)(& ch->next)
+/* Extra bytes we need between chunks.  */
+#define OVERHEAD sizeof(uint32_t)
+#define TOOBIG (uint32_t)(~0L)
+
+/* Debugging - make sure the free_chunk struct doesn't have a 32-bit
+   hole between size and next.  */
+static char
+  check_size [(sizeof(free_chunk) == sizeof(uint32_t) * 2 + sizeof(void *)) ? 1 : -1]
+  __attribute__((used));
+
+/* We have a singly linked list of free'd chunks.  Newly freed chunks
+   are added at the head end, and chunks are removed from
+   anywhere.  */
+static free_chunk free_list = { 0, 0, NULL };
+
+#ifndef DJTEST
+#define DJTEST 0
+#endif
+
+#if DJTEST
+
+/* Strictly for debugging :-)  */
+static char *
+djhex (char *buf, int64_t v)
+{
+  const char hex[] = "0123456789abcdef";
+  int i;
+  for (i=0; i<16; i++)
+    buf[i] = hex[(v>>(4*(15-i))) & 15];
+  buf[16] = ' ';
+  return buf+17;
+}
+static void
+djprintf (const char *msg, size_t s, void *ptr)
+{
+  char buf[1000], *bp = buf;
+  bp = djhex (bp, (int64_t)s);
+  bp = djhex (bp, (int64_t)ptr);
+  write(2, buf, bp-buf);
+  write (2, msg, strlen(msg));
+}
+static void
+djchunk (const char *msg, free_chunk *ch)
+{
+  char buf[1000], *bp = buf;
+  write (2, "\033[30m", 5);
+  bp = djhex (bp, (int64_t)ch);
+  bp = djhex (bp, (int64_t)ch->size);
+  bp = djhex (bp, (int64_t)ch->next);
+  write(2, buf, bp-buf);
+  write (2, msg, strlen(msg));
+  write (2, "\033[0m", 4);
+}
+static void
+djnl(void) {
+  write (2, "\n", 1);
+}
+
+#else
+
+#define djprintf(a,b,c)
+#define djchunk(a,b)
+#define djnl()
+
+#endif
+
+static free_chunk *
+find_on_free_list (void *old_pointer, size_t n, int zero_it)
+{
+  free_chunk *old_chunk, *rv, *rvp, *best, *bestp;
+  size_t best_size;
+
+  rvp = &free_list;
+  best = NULL;
+  best_size = 0;
+
+  djchunk("free_list\n", &free_list);
+  for (rv = free_list.next; rv; rv = rv->next) {
+    djchunk("free list\n", rv);
+    if (rv->size >= n
+	&& (rv->size < best_size
+	    || best == NULL))
+      {
+	best = rv;
+	best_size = rv->size;
+	/* bestp points to the previous chunk, or &free_list */
+	bestp = rvp;
+	if (rv->size == n)
+	  /* It doesn't get any bester than this.  */
+	  break;
+      }
+    rvp = rv;
+  }
+
+  if (best == NULL)
+    return NULL;
+
+  bestp->next = best->next;
+  djprintf("malloc - reused\n", n, best);
+  djchunk ("bestp\n", bestp);
+  djchunk ("best\n", best);
+  best->next = NULL;
+
+  if (zero_it)
+    memset (chunk2ptr (best), '\0', n);
+
+  if (old_pointer)
+    {
+      old_chunk = ptr2chunk (old_pointer);
+      memcpy (chunk2ptr (best), old_pointer, old_chunk->size < n ? old_chunk->size : n);
+    }
+
+  return best;
+}
+
+/* Combination malloc/calloc/realloc - allocates memory, possibly
+   extending an existing chunk (realloc), possibly zeroing it if
+   needed (calloc).  You shouldn't ask for both realloc and calloc at
+   the same time, of course.  */
+static void *
+morecore(void *old_pointer, size_t n, int zero_it)
 {
+  free_chunk *rv;
+
+  djnl();
+  djprintf("morecore\n", zero_it ? -n : n, old_pointer);
+
+  /* We round up the requested size so that we have room for the
+     "size" of the next chunk after this one is properly aligned.
+     This means the smallest chunk is 12 bytes on 64-bit
+     platforms.  */
+  if (n < sizeof(void *))
+    n = sizeof(void *);
+  n = ALIGN_UP (n + OVERHEAD, MALLOC_ALIGNMENT) - OVERHEAD;
+
+  /* Special cases for realloc.  */
+  if (old_pointer)
+    {
+      free_chunk *old_chunk = ptr2chunk (old_pointer);
+
+      /* Shrinking - just reuse the old chunk.  */
+      if (old_chunk->size >= n)
+	return old_pointer;
+
+      /* See if we can realloc using existing space at the end of our
+	 current mapping.  */
+      if (alloc_end != 0
+	  && old_pointer == alloc_last_block
+	  && (char *)alloc_end - (char *)old_pointer < n)
+	{
+	  /* We have space at the end of the current page, use it.  */
+	  old_chunk->size = n;
+	  alloc_ptr = alloc_last_block + n;
+	  return old_pointer;
+	}
+    }
+
+  /* Do a best-fit search of the free list first... */
+  rv = find_on_free_list (old_pointer, n, zero_it);
+  if (rv)
+    return chunk2ptr(rv);
+
+  /* Nothing on the free list, allocate a new chunk.  */
+
   if (alloc_end == 0)
     {
       /* Consume any unused space in the last page of our data segment.  */
@@ -57,9 +237,16 @@  malloc (size_t n)
 				& ~(GLRO(dl_pagesize) - 1));
     }
 
-  /* Make sure the allocation pointer is ideally aligned.  */
-  alloc_ptr = (void *) 0 + (((alloc_ptr - (void *) 0) + MALLOC_ALIGNMENT - 1)
-			    & ~(MALLOC_ALIGNMENT - 1));
+  /* If we're realloc'ing the last block, we either extend it (by
+     re-allocating it over the old block) or we copy it (because a new
+     noncontiguous mapping was needed).  */
+  if (old_pointer && old_pointer == alloc_last_block)
+    /* Note we do NOT pad/align here, because we're using the
+       padding/alignment from back when we first allocated it.  */
+    alloc_ptr = alloc_last_block;
+  else
+    /* Make sure the allocation pointer is ideally aligned.  */
+    alloc_ptr = (void *) PTR_ALIGN_UP ((char *) alloc_ptr + OVERHEAD, MALLOC_ALIGNMENT);
 
   if (alloc_ptr + n >= alloc_end || n >= -(uintptr_t) alloc_ptr)
     {
@@ -75,13 +262,46 @@  malloc (size_t n)
       if (page == MAP_FAILED)
 	return NULL;
       if (page != alloc_end)
-	alloc_ptr = page;
+	{
+	  alloc_ptr = page;
+	  /* Make sure the allocation pointer is ideally aligned.  */
+	  alloc_ptr = (void *) PTR_ALIGN_UP ((char *) alloc_ptr + OVERHEAD, MALLOC_ALIGNMENT);
+	}
       alloc_end = page + nup;
     }
 
+  if (old_pointer && alloc_ptr != old_pointer)
+    {
+      free_chunk *old_chunk = ptr2chunk (old_pointer);
+      /* We need to copy the data, since we didn't just extend our mapping.  */
+      memcpy (alloc_ptr, old_pointer, old_chunk->size < n ? old_chunk->size : n);
+    }
+  /* The else case here is that we extended our mapping contiguously,
+     and/or reused existing space, so old_pointer == alloc_ptr (or
+     it's not set) and we don't need to do the copy.  */
+
+  /* Remember the chunk size here.  */
+  rv = ptr2chunk (alloc_ptr);
+  if (n < UINT32_MAX)
+    rv->size = n;
+  else
+    rv->size = TOOBIG;
+
   alloc_last_block = (void *) alloc_ptr;
   alloc_ptr += n;
-  return alloc_last_block;
+  djprintf("\033[32mmalloc\033[0m\n", n, alloc_last_block);
+  djchunk ("returning\n", rv);
+
+  /* We don't need to zero this, even if desired, since it's never
+     been used since we mmap'd it.  */
+  return chunk2ptr (rv);
+}
+
+/* Allocate an aligned memory block.  */
+void * weak_function
+malloc (size_t n)
+{
+  return morecore (NULL, n, 0);
 }
 
 /* We use this function occasionally since the real implementation may
@@ -90,9 +310,6 @@  malloc (size_t n)
 void * weak_function
 calloc (size_t nmemb, size_t size)
 {
-  /* New memory from the trivial malloc above is always already cleared.
-     (We make sure that's true in the rare occasion it might not be,
-     by clearing memory in free, below.)  */
   size_t bytes = nmemb * size;
 
 #define HALF_SIZE_T (((size_t) 1) << (8 * sizeof (size_t) / 2))
@@ -100,36 +317,51 @@  calloc (size_t nmemb, size_t size)
       && size != 0 && bytes / size != nmemb)
     return NULL;
 
-  return malloc (bytes);
+  return morecore (NULL, bytes, 1);
+}
+
+/* This is only called with the most recent block returned by malloc.  */
+void * weak_function
+realloc (void *ptr, size_t n)
+{
+  return morecore (ptr, n, 0);
 }
 
 /* This will rarely be called.  */
 void weak_function
 free (void *ptr)
 {
-  /* We can free only the last block allocated.  */
+  free_chunk *ch = ptr2chunk(ptr);
+  djnl();
+  djprintf("\033[31mfree\033[0m\n", ch->size, ptr);
+
+  if (ptr == NULL)
+    return;
+
+  /* paranoia, and debugging - we want a core dump, not a text message.  For testing. */
+  if (ch->size == 0)
+    *(int *)(-1) = 0;
+
+  /* We can easily free the last block in the heap.  */
   if (ptr == alloc_last_block)
     {
-      /* Since this is rare, we clear the freed block here
-	 so that calloc can presume malloc returns cleared memory.  */
-      memset (alloc_last_block, '\0', alloc_ptr - alloc_last_block);
-      alloc_ptr = alloc_last_block;
+      /* Undo the alignment and padding we do in morecore().  */
+      alloc_ptr = (void *) ((char *)alloc_last_block - OVERHEAD);
+      return;
     }
-}
 
-/* This is only called with the most recent block returned by malloc.  */
-void * weak_function
-realloc (void *ptr, size_t n)
-{
-  if (ptr == NULL)
-    return malloc (n);
-  assert (ptr == alloc_last_block);
-  size_t old_size = alloc_ptr - alloc_last_block;
-  alloc_ptr = alloc_last_block;
-  void *new = malloc (n);
-  return new != ptr ? memcpy (new, ptr, old_size) : new;
+  /* else if it's really big we can't store it on the free list.  */
+  if (ch->size == TOOBIG)
+    return;
+
+  /* else store the chunk on the free list.  */
+  ch->next = free_list.next;
+  free_list.next = ch;
+  djchunk("free_list\n", &free_list);
+  djchunk("free_list->next\n", free_list.next);
 }
 
+#ifndef TESTING_MALLOC
 /* Avoid signal frobnication in setjmp/longjmp.  Keeps things smaller.  */
 
 #include <setjmp.h>
@@ -294,3 +526,5 @@  __strsep (char **stringp, const char *delim)
 }
 weak_alias (__strsep, strsep)
 strong_alias (__strsep, __strsep_g)
+
+#endif /* TESTING_MALLOC */
diff --git a/elf/tst-dl-minimal-malloc.c b/elf/tst-dl-minimal-malloc.c
new file mode 100644
index 0000000..9523b9a
--- /dev/null
+++ b/elf/tst-dl-minimal-malloc.c
@@ -0,0 +1,162 @@ 
+/* Minimal testsuite for dl-minimal's malloc/free implementation
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <libc-lock.h>
+#include <stdarg.h>
+#include <libc-pointer-arith.h>
+
+/*----------------------------------------*/
+
+/* All this hackery is just because we're trying to use dl-minimal.os
+   directly to ensure we get the right copy of malloc to test, but
+   dl-minimal.os is designed to be used in ld.so, not other
+   programs.  */
+
+#define internal_function
+#define attribute_hidden
+#define attribute_relro
+#define weak_function
+#define __rtld_lock_define_recursive(a,b)
+#define rtld_hidden_proto(x)
+#define libc_hidden_proto(x)
+#define weak_extern(x)
+#define __libc_enable_secure 1
+
+#define __mmap mmap
+#define MALLOC_ALIGNMENT 16
+#define GLRO(x) x
+
+int dl_pagesize;
+
+/*----------------------------------------*/
+
+#define TESTING_MALLOC 1
+#include "./dl-minimal.c"
+
+/*----------------------------------------*/
+
+#define SZ(p) *(uint32_t *)((p)-4)
+
+static void
+fill(unsigned char *ptr, int count)
+{
+  memset (ptr, '5', count);
+}
+
+static int
+check_calloc (int sz)
+{
+  int i;
+  char *ptr = calloc (sz, 1);
+  for (i=0; i<sz; i++)
+    if (ptr[i] != '\0')
+      {
+	printf("calloc(%d) had nonzero at %d\n", sz, i);
+	return 1;
+      }
+  return 0;
+}
+
+
+static int
+do_test(void)
+{
+  unsigned char *p, *p2, *p3;
+  size_t rest;
+  int rv = 0;
+
+  dl_pagesize = getpagesize();
+
+  p = malloc(4000);
+  if (SZ(p) != 4012)
+    {
+      printf("size of malloc(4000) not 4012\n");
+      return 1;
+    }
+
+  /* Force the next one after this to a new page.  */
+  rest = 4095 - (((size_t) p) & 4095);
+  p2 = realloc (p, rest);
+
+  p = malloc(12);
+  p2 = realloc(p, 12);
+  if (p != p2)
+    {
+      printf("realloc in place: pointer changed\n");
+      return 1;
+    }
+  p2 = realloc(p, 24);
+  if (p != p2)
+    {
+      printf("realloc in place: pointer changed\n");
+      return 1;
+    }
+
+  /* Block more reallocs-in-place.  */
+  p2 = malloc(1);
+
+  p2 = realloc(p, 40);
+  if (p2 == p)
+    {
+      printf("realloc copy: pointer not moved\n");
+      return 1;
+    }
+
+  free (p2);
+  p = malloc(40);
+  if (p != p2)
+    {
+      printf("free list: free'd chunk not reused\n");
+      return 1;
+    }
+
+  p = malloc(30);
+  p2 = malloc(50);
+  p3 = malloc(70);
+
+  fill(p, 30);
+  fill(p2, 50);
+  fill(p3, 70);
+
+  free (p);
+  free (p2);
+  free (p3);
+
+  p = malloc (45);
+  if (p != p2)
+    {
+      printf("free list: best fit not used\n");
+      return 1;
+    }
+
+  rv += check_calloc (30);
+  rv += check_calloc (50);
+  rv += check_calloc (70);
+  rv += check_calloc (90);
+
+  return rv;
+}
+
+#include <support/test-driver.c>