Benchtests: Improve large memcpy/memset benchmarks

Message ID PAWPR08MB8982673949A0B2FECD47F92983879@PAWPR08MB8982.eurprd08.prod.outlook.com
State Superseded
Headers
Series Benchtests: Improve large memcpy/memset benchmarks |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Wilco Dijkstra March 23, 2023, 11:41 a.m. UTC
  Adjust sizes between 64KB and 16MB and iterations based on length.  
Remove incorrect uses of alloc_bufs (since we're not interested in measuring
Linux clear_page time).

---
  

Comments

Noah Goldstein March 23, 2023, 6:17 p.m. UTC | #1
On Thu, Mar 23, 2023 at 6:41 AM Wilco Dijkstra via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
>
> Adjust sizes between 64KB and 16MB and iterations based on length.
> Remove incorrect uses of alloc_bufs (since we're not interested in measuring
> Linux clear_page time).
>
> ---
>
> diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
> index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
> --- a/benchtests/bench-bzero-large.c
> +++ b/benchtests/bench-bzero-large.c
> @@ -22,9 +22,8 @@
>  #else
>  # define TEST_NAME "bzero"
>  #endif
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>
>  #include "json-lib.h"
> @@ -52,7 +51,7 @@ IMPL (memset_zero, 0)
>  static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -74,20 +73,13 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, size_t len)
>  {
> -  align &= 63;
Can you replace with `align &= getpagesize () - 1`?
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;

Is there a reason to remove the out of bounds check?
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
>    json_array_begin (json_ctx, "timings");
>
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
>
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
> diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
> index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
> --- a/benchtests/bench-memcpy-large.c
> +++ b/benchtests/bench-memcpy-large.c
> @@ -19,10 +19,9 @@
>  #ifndef MEMCPY_RESULT
>  # define MEMCPY_RESULT(dst, len) dst
>  # define START_SIZE (64 * 1024)
> -# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
> +# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  # define TEST_MAIN
>  # define TEST_NAME "memcpy"
> -# define TIMEOUT (20 * 60)
>  # include "bench-string.h"
>
>  IMPL (memcpy, 1)
> @@ -36,7 +35,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
>              size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -59,12 +58,7 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
>    char *s1, *s2;
>    size_t repeats;
>    align1 &= 4095;
> -  if (align1 + len >= page_size)
> -    return;
> -
>    align2 &= 4095;
Might as well fix these up, can you replace 4095 with `getpagesize () - 1`?
> -  if (align2 + len >= page_size)
> -    return;
>
>    s1 = (char *) (buf1 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
> index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
> --- a/benchtests/bench-memmove-large.c
> +++ b/benchtests/bench-memmove-large.c
> @@ -16,12 +16,10 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define BASE_PAGE_SIZE (1024 * 1024)
> -#define START_SIZE (4 * 1024)
> +#define START_SIZE (64 * 1024)
>  #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #define TEST_MAIN
>  #define TEST_NAME "memmove"
> -#define TIMEOUT (20 * 60)
>  #include "bench-string.h"
>  #include "json-lib.h"
>
> @@ -33,7 +31,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>              size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -54,13 +52,8 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
>    size_t i, j;
>    char *s1, *s2;
>
> -  align1 &= 127;
> -  if (align1 + len >= page_size)
> -    return;
> -
> -  align2 &= 127;
> -  if (align2 + len >= page_size)
> -    return;
> +  align1 &= 4095;
> +  align2 &= 4095;
`getpagesize () - 1`
>
>    s1 = (char *) (buf2 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
> index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
> --- a/benchtests/bench-memset-large.c
> +++ b/benchtests/bench-memset-large.c
> @@ -18,9 +18,8 @@
>
>  #define TEST_MAIN
>  #define TEST_NAME "memset"
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>
>  #include "json-lib.h"
> @@ -35,7 +34,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>              int c __attribute ((unused)), size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
this takes [.5, 10] seconds?
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -53,10 +52,6 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>  {
> -  align &= 63;
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
> @@ -64,10 +59,7 @@ do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>    json_array_begin (json_ctx, "timings");
>
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
>
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
>
>
  
Wilco Dijkstra March 23, 2023, 11:01 p.m. UTC | #2
Hi Noah,


>> -  align &= 63;
>
> Can you replace with `align &= getpagesize () - 1`?

Yes that would make more sense indeed.

>> -  if ((align + len) * sizeof (CHAR) > page_size)
>> -    return;
>
> Is there a reason to remove the out of bounds check?

Well it can't ever go out of bounds. The allocated buffer size is
2 * page_size (ie. 4 times real page size or 2 times MIN_PAGE_SIZE
due to the incorrect logic in init_sizes).

And if it could go out of bounds it should be an assert so we don't
silently benchmark a bounds check!

>>    align2 &= 4095;
>
> Might as well fix these up, can you replace 4095 with `getpagesize () - 1`?

Sure.

>> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>
> this takes [.5, 10] seconds?

Yes, it's only 1 GB written per test. It takes ~8.5 seconds on an old slow
Cortex-A72.

Cheers,
Wilco
  
Noah Goldstein March 24, 2023, 4:29 p.m. UTC | #3
On Thu, Mar 23, 2023 at 6:01 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi Noah,
>
>
> >> -  align &= 63;
> >
> > Can you replace with `align &= getpagesize () - 1`?
>
> Yes that would make more sense indeed.
>
> >> -  if ((align + len) * sizeof (CHAR) > page_size)
> >> -    return;
> >
> > Is there a reason to remove the out of bounds check?
>
> Well it can't ever go out of bounds. The allocated buffer size is
> 2 * page_size (ie. 4 times real page size or 2 times MIN_PAGE_SIZE
> due to the incorrect logic in init_sizes).
>

I disagree, we have a bounds check in nearly all other benchmarks
and I think its 1) a convenience so you don't need to add the equivalent
code in many other places and 2) a portability thing as page size varies
by system but people often write benchmark bounds with constant size
bounds.

> And if it could go out of bounds it should be an assert so we don't
> silently benchmark a bounds check!

Then I think the approach would be to output the json parameters and
report invalid in some way for the times.

In other places benchmarks rely on this bounds check and it seems more
error prone than anything else to just leave it unchecked. I would much
rather the benchmark not run or fail in some way than report a potentially
incorrect time as true.

>
> >>    align2 &= 4095;
> >
> > Might as well fix these up, can you replace 4095 with `getpagesize () - 1`?
>
> Sure.
>
> >> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
> >
> > this takes [.5, 10] seconds?
>
> Yes, it's only 1 GB written per test. It takes ~8.5 seconds on an old slow
> Cortex-A72.
>
> Cheers,
> Wilco
>
>
  
Carlos O'Donell June 19, 2024, 2:20 p.m. UTC | #4
On 3/23/23 7:41 AM, Wilco Dijkstra via Libc-alpha wrote:
> 
> Adjust sizes between 64KB and 16MB and iterations based on length.  
> Remove incorrect uses of alloc_bufs (since we're not interested in measuring
> Linux clear_page time).

Wilco,

Would it be possible for you to post a v2 that rolls up the feedback
from Noah and HJ here?

I know that some of their feedback you have chosen not to integrate,
but any other changes you made would be good to get into the tree.

Thank you!

> ---
> 
> diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
> index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
> --- a/benchtests/bench-bzero-large.c
> +++ b/benchtests/bench-bzero-large.c
> @@ -22,9 +22,8 @@
>  #else
>  # define TEST_NAME "bzero"
>  #endif
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>  
>  #include "json-lib.h"
> @@ -52,7 +51,7 @@ IMPL (memset_zero, 0)
>  static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>    timing_t start, stop, cur;
>  
>    TIMING_NOW (start);
> @@ -74,20 +73,13 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, size_t len)
>  {
> -  align &= 63;
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
>    json_array_begin (json_ctx, "timings");
>  
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
>  
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
> diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
> index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
> --- a/benchtests/bench-memcpy-large.c
> +++ b/benchtests/bench-memcpy-large.c
> @@ -19,10 +19,9 @@
>  #ifndef MEMCPY_RESULT
>  # define MEMCPY_RESULT(dst, len) dst
>  # define START_SIZE (64 * 1024)
> -# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
> +# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  # define TEST_MAIN
>  # define TEST_NAME "memcpy"
> -# define TIMEOUT (20 * 60)
>  # include "bench-string.h"
>  
>  IMPL (memcpy, 1)
> @@ -36,7 +35,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
>  	     size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
>    timing_t start, stop, cur;
>  
>    TIMING_NOW (start);
> @@ -59,12 +58,7 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
>    char *s1, *s2;
>    size_t repeats;
>    align1 &= 4095;
> -  if (align1 + len >= page_size)
> -    return;
> -
>    align2 &= 4095;
> -  if (align2 + len >= page_size)
> -    return;
>  
>    s1 = (char *) (buf1 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
> index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
> --- a/benchtests/bench-memmove-large.c
> +++ b/benchtests/bench-memmove-large.c
> @@ -16,12 +16,10 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> -#define BASE_PAGE_SIZE (1024 * 1024)
> -#define START_SIZE (4 * 1024)
> +#define START_SIZE (64 * 1024)
>  #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #define TEST_MAIN
>  #define TEST_NAME "memmove"
> -#define TIMEOUT (20 * 60)
>  #include "bench-string.h"
>  #include "json-lib.h"
>  
> @@ -33,7 +31,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>  	     size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
>    timing_t start, stop, cur;
>  
>    TIMING_NOW (start);
> @@ -54,13 +52,8 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
>    size_t i, j;
>    char *s1, *s2;
>  
> -  align1 &= 127;
> -  if (align1 + len >= page_size)
> -    return;
> -
> -  align2 &= 127;
> -  if (align2 + len >= page_size)
> -    return;
> +  align1 &= 4095;
> +  align2 &= 4095;
>  
>    s1 = (char *) (buf2 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
> index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
> --- a/benchtests/bench-memset-large.c
> +++ b/benchtests/bench-memset-large.c
> @@ -18,9 +18,8 @@
>  
>  #define TEST_MAIN
>  #define TEST_NAME "memset"
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>  
>  #include "json-lib.h"
> @@ -35,7 +34,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>  	     int c __attribute ((unused)), size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>    timing_t start, stop, cur;
>  
>    TIMING_NOW (start);
> @@ -53,10 +52,6 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>  {
> -  align &= 63;
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
> @@ -64,10 +59,7 @@ do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>    json_array_begin (json_ctx, "timings");
>  
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
>  
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
> 
> 
> 
>
  
Wilco Dijkstra June 21, 2024, 3:49 p.m. UTC | #5
Hi Carlos,

> Would it be possible for you to post a v2 that rolls up the feedback
> from Noah and HJ here?
>
> I know that some of their feedback you have chosen not to integrate,
> but any other changes you made would be good to get into the tree.

Sure v2 is below. This doesn't include fixes for page_size = getpagesize ()
since that affects a lot more benchmarks and incorrect range checks
(there are many uses of 4095 and 4096 for page alignment), so I've kept the
rest the same as v1.

Cheers,
Wilco


v2: Use getpagesize () as suggested.

Adjust sizes between 64KB and 16MB and iterations based on length.
Remove incorrect uses of alloc_bufs since we're not interested in measuring
Linux clear_page time.  Use getpagesize() - 1 instead of 4095 when
aligning within a page.

---

diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
index 71300e878afbfcc92c378d41487e469fde08093a..2eebd08315d75a23f19a7b750493d1a63ce0319d 100644
--- a/benchtests/bench-bzero-large.c
+++ b/benchtests/bench-bzero-large.c
@@ -22,9 +22,8 @@
 #else
 # define TEST_NAME "bzero"
 #endif
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -52,7 +51,7 @@ IMPL (memset_zero, 0)
 static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -74,20 +73,13 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 static void
 do_test (json_ctx_t *json_ctx, size_t align, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);
diff --git a/benchtests/bench-bzero.c b/benchtests/bench-bzero.c
index d0497dd8b0ab852ab661cac6df82202b0d5969f5..637b4f765eeac2a7bcdac35a785060e8afbbbc35 100644
--- a/benchtests/bench-bzero.c
+++ b/benchtests/bench-bzero.c
@@ -73,7 +73,7 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 static void
 do_test (json_ctx_t *json_ctx, size_t align, size_t len)
 {
-  align &= 4095;
+  align &= getpagesize () - 1;
   if ((align + len) * sizeof (CHAR) > page_size)
     return;
 
diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
index 713588e8ce8835d382b9e8e781e351ad21b54418..acdc175a7843acade1defa0244b35f003bac8241 100644
--- a/benchtests/bench-memcpy-large.c
+++ b/benchtests/bench-memcpy-large.c
@@ -19,10 +19,9 @@
 #ifndef MEMCPY_RESULT
 # define MEMCPY_RESULT(dst, len) dst
 # define START_SIZE (64 * 1024)
-# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
+# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 # define TEST_MAIN
 # define TEST_NAME "memcpy"
-# define TIMEOUT (20 * 60)
 # include "bench-string.h"
 
 IMPL (memcpy, 1)
@@ -36,7 +35,7 @@ static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -58,13 +57,8 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
   size_t i, j;
   char *s1, *s2;
   size_t repeats;
-  align1 &= 4095;
-  if (align1 + len >= page_size)
-    return;
-
-  align2 &= 4095;
-  if (align2 + len >= page_size)
-    return;
+  align1 &= getpagesize () - 1;
+  align2 &= getpagesize () - 1;
 
   s1 = (char *) (buf1 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
index cc0145534de690652ce05e07608470ea3cf0efd1..c660288f96d5eaeddc5b83ef53a17efe1c572ae8 100644
--- a/benchtests/bench-memmove-large.c
+++ b/benchtests/bench-memmove-large.c
@@ -16,12 +16,10 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define BASE_PAGE_SIZE (1024 * 1024)
-#define START_SIZE (4 * 1024)
+#define START_SIZE (64 * 1024)
 #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #define TEST_MAIN
 #define TEST_NAME "memmove"
-#define TIMEOUT (20 * 60)
 #include "bench-string.h"
 #include "json-lib.h"
 
@@ -33,7 +31,7 @@ static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -54,13 +52,8 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
   size_t i, j;
   char *s1, *s2;
 
-  align1 &= 127;
-  if (align1 + len >= page_size)
-    return;
-
-  align2 &= 127;
-  if (align2 + len >= page_size)
-    return;
+  align1 &= getpagesize () - 1;
+  align2 &= getpagesize () - 1;
 
   s1 = (char *) (buf2 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
index 22ecea4da8a726c4751056a1cbabc0a505046625..3a5169940b94b7cd118ca2b461c15f5e53fc7b46 100644
--- a/benchtests/bench-memset-large.c
+++ b/benchtests/bench-memset-large.c
@@ -18,9 +18,8 @@
 
 #define TEST_MAIN
 #define TEST_NAME "memset"
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -35,7 +34,7 @@ static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 	     int c __attribute ((unused)), size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -53,10 +52,6 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 static void
 do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
@@ -64,10 +59,7 @@ do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);
diff --git a/benchtests/bench-memset.c b/benchtests/bench-memset.c
index cbeb6b942b3a0fcf712cefd7c1a75b97336fa677..5682f7298b7fbaaaebb607dead7ea005a3d4a790 100644
--- a/benchtests/bench-memset.c
+++ b/benchtests/bench-memset.c
@@ -61,7 +61,7 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 static void
 do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
 {
-  align &= 4095;
+  align &= getpagesize () - 1;
   if ((align + len) * sizeof (CHAR) > page_size)
     return;
  

Patch

diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
--- a/benchtests/bench-bzero-large.c
+++ b/benchtests/bench-bzero-large.c
@@ -22,9 +22,8 @@ 
 #else
 # define TEST_NAME "bzero"
 #endif
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -52,7 +51,7 @@  IMPL (memset_zero, 0)
 static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -74,20 +73,13 @@  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 static void
 do_test (json_ctx_t *json_ctx, size_t align, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);
diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
--- a/benchtests/bench-memcpy-large.c
+++ b/benchtests/bench-memcpy-large.c
@@ -19,10 +19,9 @@ 
 #ifndef MEMCPY_RESULT
 # define MEMCPY_RESULT(dst, len) dst
 # define START_SIZE (64 * 1024)
-# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
+# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 # define TEST_MAIN
 # define TEST_NAME "memcpy"
-# define TIMEOUT (20 * 60)
 # include "bench-string.h"
 
 IMPL (memcpy, 1)
@@ -36,7 +35,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -59,12 +58,7 @@  do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
   char *s1, *s2;
   size_t repeats;
   align1 &= 4095;
-  if (align1 + len >= page_size)
-    return;
-
   align2 &= 4095;
-  if (align2 + len >= page_size)
-    return;
 
   s1 = (char *) (buf1 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
--- a/benchtests/bench-memmove-large.c
+++ b/benchtests/bench-memmove-large.c
@@ -16,12 +16,10 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define BASE_PAGE_SIZE (1024 * 1024)
-#define START_SIZE (4 * 1024)
+#define START_SIZE (64 * 1024)
 #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #define TEST_MAIN
 #define TEST_NAME "memmove"
-#define TIMEOUT (20 * 60)
 #include "bench-string.h"
 #include "json-lib.h"
 
@@ -33,7 +31,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -54,13 +52,8 @@  do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
   size_t i, j;
   char *s1, *s2;
 
-  align1 &= 127;
-  if (align1 + len >= page_size)
-    return;
-
-  align2 &= 127;
-  if (align2 + len >= page_size)
-    return;
+  align1 &= 4095;
+  align2 &= 4095;
 
   s1 = (char *) (buf2 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
--- a/benchtests/bench-memset-large.c
+++ b/benchtests/bench-memset-large.c
@@ -18,9 +18,8 @@ 
 
 #define TEST_MAIN
 #define TEST_NAME "memset"
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -35,7 +34,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 	     int c __attribute ((unused)), size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -53,10 +52,6 @@  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 static void
 do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
@@ -64,10 +59,7 @@  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);