[v4,1/3] posix: add (failing) test for REG_STARTEND

Message ID 1d5642ecb4bb477c9fd7e1ebaee868fe4ccbefc7.1683500149.git.nabijaczleweli@nabijaczleweli.xyz
State Superseded
Headers
Series [v4,1/3] posix: add (failing) test for REG_STARTEND |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Ahelenia Ziemiańska May 7, 2023, 10:56 p.m. UTC
  This test passes on NetBSD, the illumos gate, and musl
with https://www.openwall.com/lists/musl/2023/04/20/2;
it's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it currently fails with
  tst-reg-startend.c: ^a: a^@c: no match$
  tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a: abc: no match$
  tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a.c$: a^@c: no match$
  tst-reg-startend.c: ^a.c$: abc: no match$
  tst-reg-startend.c: ^a.*c$: a^@c: no match$
  tst-reg-startend.c: ^a.*c$: abc: no match$
  tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
  tst-reg-startend.c: ^a[^c]c$: abc: no match$
  tst-reg-startend.c: ^a..: a^@c: no match$
  tst-reg-startend.c: ^a..: abc: no match$
  tst-reg-startend.c: ..c: a^@c: no match$

The test may also be compiled stand-alone (-DSTANDALONE)
and on all platforms that have the interface
(hence the macro to initialise regmatch_ts,
 which start with pointer fields on the illumos gate),
for ease of testing and inclusion in other test suites.

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
Resending after a week; clean rebase.

 posix/Makefile           |   1 +
 posix/tst-reg-startend.c | 124 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 posix/tst-reg-startend.c
  

Comments

Adhemerval Zanella Netto May 29, 2023, 5:37 p.m. UTC | #1
On 07/05/23 19:56, наб via Libc-alpha wrote:
> This test passes on NetBSD, the illumos gate, and musl
> with https://www.openwall.com/lists/musl/2023/04/20/2;
> it's nothing revolutionary and the behaviour it tests
> is largely guaranteed by the 4.4BSD-Lite manual;
> nevertheless, it currently fails with
>   tst-reg-startend.c: ^a: a^@c: no match$
>   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a: abc: no match$
>   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a.c$: a^@c: no match$
>   tst-reg-startend.c: ^a.c$: abc: no match$
>   tst-reg-startend.c: ^a.*c$: a^@c: no match$
>   tst-reg-startend.c: ^a.*c$: abc: no match$
>   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
>   tst-reg-startend.c: ^a[^c]c$: abc: no match$
>   tst-reg-startend.c: ^a..: a^@c: no match$
>   tst-reg-startend.c: ^a..: abc: no match$
>   tst-reg-startend.c: ..c: a^@c: no match$
> 
> The test may also be compiled stand-alone (-DSTANDALONE)
> and on all platforms that have the interface
> (hence the macro to initialise regmatch_ts,
>  which start with pointer fields on the illumos gate),
> for ease of testing and inclusion in other test suites.

Tests that should triggers newer regressions should be either marks as XFAIL,
or in this case, move after the patch that actually fixes it. 

> 
> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
> ---
> Resending after a week; clean rebase.
> 
>  posix/Makefile           |   1 +
>  posix/tst-reg-startend.c | 124 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 125 insertions(+)
>  create mode 100644 posix/tst-reg-startend.c
> 
> diff --git a/posix/Makefile b/posix/Makefile
> index cc77e939ad..24aeb781ca 100644
> --- a/posix/Makefile
> +++ b/posix/Makefile
> @@ -295,6 +295,7 @@ tests := \
>    tst-posix_spawn-setsid \
>    tst-preadwrite \
>    tst-preadwrite64 \
> +  tst-reg-startend \
>    tst-regcomp-truncated \
>    tst-regex \
>    tst-regex2 \
> diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
> new file mode 100644
> index 0000000000..c3bfac0359
> --- /dev/null
> +++ b/posix/tst-reg-startend.c
> @@ -0,0 +1,124 @@
> +/* Permission to use, copy, modify, and/or distribute this software for any
> +   purpose with or without fee is hereby granted.
> +
> +   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> +   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> +   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */

I am not sure if we can accept such license. It not the current one used for
newer submission, including tests (LGPL 2.1).

> +
> +#include <assert.h>
> +#include <locale.h>
> +#include <string.h>
> +#include <regex.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +
> +
> +#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
> +#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)
> +
> +static const regmatch_t bound = M(1, 4);
> +
> +static const char *const regex_ac[] =
> +  {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
> +static const char *const regex_aa[] =
> +  {"^", "^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
> +static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
> +static const char *const data_aa[] = {"_a\0adef", "_abadef"};
> +static const regmatch_t results_ac[] =
> +  {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
> +static const regmatch_t results_aa[] =
> +  {M(1, 1), M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
> +static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
> +              sizeof(results_ac) / sizeof(*results_ac), "");
> +static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
> +              sizeof(results_aa) / sizeof(*results_aa), "");


Instead of the static_assert, why not add the input arguments and the
expect result on same struct?

> +
> +
> +static bool
> +testbunch (const char *const *regexes, const char *const data[static 2],
> +           const regmatch_t *results)
> +{
> +#define BASEERR(data)                              \
> +  err = true,                                      \
> +    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
> +    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)


We have macros that already log and handle the required boilerplate to
report tests issues on support/check.h.  Newer tests should use it.

> +
> +  bool err = false;
> +  for (; *regexes; ++regexes, ++results)
> +    {
> +      regex_t rgx;
> +      assert (!regcomp (&rgx, *regexes, 0));
> +
> +      for (size_t i = 0; i < 2; ++i)
> +        {
> +          regmatch_t match = bound;
> +          if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
> +            BASEERR(data), fputs (": no match\n", stdout);
> +
> +          if (!MEQ(match, *results))
> +            BASEERR(data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                    (int)results->rm_so, (int)results->rm_eo,
> +                                    (int)match.rm_so, (int)match.rm_eo);
> +        }
> +
> +      regfree(&rgx);
> +    }
> +
> +  return err;
> +}
> +
> +
> +static const char *const mb_data[2] = {"_aaćdef", "_aćdef"};
> +static const bool mb_exp[] = {false, true};
> +
> +static bool
> +testmb (void)
> +{
> +  bool err = false;
> +  regex_t rgx;
> +  const char *const regexes[] = {"ać"};
> +  assert (!regcomp (&rgx, *regexes, 0));
> +
> +  for (size_t i = 0; i < 2; ++i)

We have array_length macro to avoid putting array sizes everywhere (and they
work better if we want to extend the tests).

> +    {
> +      regmatch_t match = bound;
> +      if (regexec (&rgx, mb_data[i], 1, &match, REG_STARTEND) == mb_exp[i])
> +        BASEERR(mb_data), fprintf (stdout, ": %s match\n",
> +                                   mb_exp[i] ? "no" : "yes");
> +
> +      if (!MEQ(match, bound))
> +        BASEERR(mb_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                   (int)bound.rm_so, (int)bound.rm_eo,
> +                                   (int)match.rm_so, (int)match.rm_eo);
> +    }
> +
> +  regfree(&rgx);
> +  return err;
> +}
> +
> +
> +static int
> +do_test (int argc, char **argv)
> +{
> +  (void) argc, (void) argv;

Not really need here.

> +  assert (setlocale (LC_ALL, "C.UTF-8"));
> +
> +  return testbunch (regex_ac, data_ac, results_ac) ||
> +         testbunch (regex_aa, data_aa, results_aa) ||
> +         testmb ();
> +}
> +
> +
> +#ifndef STANDALONE
> +#include "../test-skeleton.c"

Use #include <support/test-driver.c> instead.

> +#else
> +int
> +main(int argc, char **argv)
> +{
> +  return do_test(argc, argv);
> +}
> +#endif
  
Ahelenia Ziemiańska May 29, 2023, 8:10 p.m. UTC | #2
On Mon, May 29, 2023 at 02:37:39PM -0300, Adhemerval Zanella Netto wrote:
> On 07/05/23 19:56, наб via Libc-alpha wrote:
> > This test passes on NetBSD, the illumos gate, and musl
> > with https://www.openwall.com/lists/musl/2023/04/20/2;
> > it's nothing revolutionary and the behaviour it tests
> > is largely guaranteed by the 4.4BSD-Lite manual;
> > nevertheless, it currently fails with
> >   tst-reg-startend.c: ^a: a^@c: no match$
> >   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
> >   tst-reg-startend.c: ^a: abc: no match$
> >   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
> >   tst-reg-startend.c: ^a.c$: a^@c: no match$
> >   tst-reg-startend.c: ^a.c$: abc: no match$
> >   tst-reg-startend.c: ^a.*c$: a^@c: no match$
> >   tst-reg-startend.c: ^a.*c$: abc: no match$
> >   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
> >   tst-reg-startend.c: ^a[^c]c$: abc: no match$
> >   tst-reg-startend.c: ^a..: a^@c: no match$
> >   tst-reg-startend.c: ^a..: abc: no match$
> >   tst-reg-startend.c: ..c: a^@c: no match$
> > 
> > The test may also be compiled stand-alone (-DSTANDALONE)
> > and on all platforms that have the interface
> > (hence the macro to initialise regmatch_ts,
> >  which start with pointer fields on the illumos gate),
> > for ease of testing and inclusion in other test suites.
> Tests that should triggers newer regressions should be either marks as XFAIL,
> or in this case, move after the patch that actually fixes it. 
I've moved it to the end as 3/3 locally.

> > diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
> > new file mode 100644
> > index 0000000000..c3bfac0359
> > --- /dev/null
> > +++ b/posix/tst-reg-startend.c
> > @@ -0,0 +1,124 @@
> > +/* Permission to use, copy, modify, and/or distribute this software for any
> > +   purpose with or without fee is hereby granted.
> > +
> > +   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> > +   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > +   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > +   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */
> I am not sure if we can accept such license. It not the current one used for
> newer submission, including tests (LGPL 2.1).
There are tests and other glibc code under ISC-style licences already
but sure; I've slapped Unlicense text on this, which appears to be
preferred by the Foundation since I'm an EU citizen
(and the Unlicense is also already used in glibc).

> > +static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
> > +              sizeof(results_ac) / sizeof(*results_ac), "");
> > +static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
> > +              sizeof(results_aa) / sizeof(*results_aa), "");
> Instead of the static_assert, why not add the input arguments and the
> expect result on same struct?
Applied.

> > +static bool
> > +testbunch (const char *const *regexes, const char *const data[static 2],
> > +           const regmatch_t *results)
> > +{
> > +#define BASEERR(data)                              \
> > +  err = true,                                      \
> > +    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
> > +    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
> We have macros that already log and handle the required boilerplate to
> report tests issues on support/check.h.  Newer tests should use it.
I've used those for recording errors, but since the error logging
takes format strings, it don't allow arbitrary data being logged.

> > +  for (size_t i = 0; i < 2; ++i)
> We have array_length macro to avoid putting array sizes everywhere (and they
> work better if we want to extend the tests).
Converted everything into 0-terminated arrays.

> > +static int
> > +do_test (int argc, char **argv)
> > +{
> > +  (void) argc, (void) argv;
> Not really need here.
And forbidden by the new wrapper.

> > +  assert (setlocale (LC_ALL, "C.UTF-8"));
> > +
> > +  return testbunch (regex_ac, data_ac, results_ac) ||
> > +         testbunch (regex_aa, data_aa, results_aa) ||
> > +         testmb ();
> > +}
> > +
> > +
> > +#ifndef STANDALONE
> > +#include "../test-skeleton.c"
> Use #include <support/test-driver.c> instead.
Applied.

v6 scissor-patch follows.

Best,
-- >8 --
Date: Fri, 21 Apr 2023 23:57:16 +0200
Subject: [PATCH v5 3/3] posix: add test for REG_STARTEND

This test passes on NetBSD, the illumos gate, musl with
https://www.openwall.com/lists/musl/2023/05/14/1,
and now glibc.
It's nothing revolutionary and the behaviour it tests
is largely guaranteed by the 4.4BSD-Lite manual;
nevertheless, it used to fail with
  tst-reg-startend.c: ^a: a^@c: no match$
  tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a: abc: no match$
  tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
  tst-reg-startend.c: ^a.c$: a^@c: no match$
  tst-reg-startend.c: ^a.c$: abc: no match$
  tst-reg-startend.c: ^a.*c$: a^@c: no match$
  tst-reg-startend.c: ^a.*c$: abc: no match$
  tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
  tst-reg-startend.c: ^a[^c]c$: abc: no match$
  tst-reg-startend.c: ^a..: a^@c: no match$
  tst-reg-startend.c: ^a..: abc: no match$
  tst-reg-startend.c: ..c: a^@c: no match$

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
---
 posix/Makefile           |   1 +
 posix/tst-reg-startend.c | 142 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100644 posix/tst-reg-startend.c

diff --git a/posix/Makefile b/posix/Makefile
index e19b74cd67..abc0ff1f60 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@ tests := \
   tst-posix_spawn-setsid \
   tst-preadwrite \
   tst-preadwrite64 \
+  tst-reg-startend \
   tst-regcomp-truncated \
   tst-regex \
   tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..854d430676
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,142 @@
+/* This is free and unencumbered software released into the public domain.
+
+   Anyone is free to copy, modify, publish, use, compile, sell, or
+   distribute this software, either in source code form or as a compiled
+   binary, for any purpose, commercial or non-commercial, and by any
+   means.
+
+   In jurisdictions that recognize copyright laws, the author or authors
+   of this software dedicate any and all copyright interest in the
+   software to the public domain. We make this dedication for the benefit
+   of the public at large and to the detriment of our heirs and
+   successors. We intend this dedication to be an overt act of
+   relinquishment in perpetuity of all present and future rights to this
+   software under copyright law.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.  */
+
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <support/check.h>
+
+
+static const regmatch_t bound = {1, 4};
+
+
+struct reg_res {
+  const char *regex;
+  regmatch_t result;
+};
+static const struct reg_res reg_res_ac[] = {
+  {"^a",       {1, 2}},
+  {"c$",       {3, 4}},
+  {"^a.c$",    {1, 4}},
+  {"^a.*c$",   {1, 4}},
+  {"^a[^c]c$", {1, 4}},
+  {"^a..",     {1, 4}},
+  {"..c",      {1, 4}},
+  {"[^z]c",    {2, 4}},
+  {}
+};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef", NULL};
+
+static const struct reg_res reg_res_aa[] = {
+  {"^",             {1, 1}},
+  {"^a",            {1, 2}},
+  {"a$",            {3, 4}},
+  {"^\\(a\\).\\1$", {1, 4}},
+  {"^a[^a]*" ,      {1, 3}},
+  {}
+};
+static const char *const data_aa[] = {"_a\0adef", "_abadef", NULL};
+
+
+static void
+testbunch (const struct reg_res *reg_reses, const char *const *const data)
+{
+#define BASEERR(data)                                      \
+  support_record_failure (),                               \
+    fprintf (stdout, __FILE__ ": %s: ", reg_reses->regex), \
+    fwrite (data + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+  for (; reg_reses->regex; ++reg_reses)
+    {
+      regex_t rgx;
+      assert (!regcomp (&rgx, reg_reses->regex, 0));
+
+      for (const char *const *dt = data; *dt; ++dt)
+        {
+          regmatch_t match = bound;
+          if (regexec (&rgx, *dt, 1, &match, REG_STARTEND))
+            BASEERR(dt), fputs (": no match\n", stdout);
+
+          if (memcmp(&match, &reg_reses->result, sizeof (regmatch_t)))
+            BASEERR(dt), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                  (int)reg_reses->result.rm_so,
+                                  (int)reg_reses->result.rm_eo,
+                                  (int)match.rm_so, (int)match.rm_eo);
+        }
+
+      regfree(&rgx);
+    }
+}
+
+
+struct mb_data_exp {
+  const char *data;
+  bool exp;
+};
+static const struct mb_data_exp mb_data_exp[] = {
+  {"_aaćdef", false},
+  {"_aćdef", true},
+  {}
+};
+
+static void
+testmb (void)
+{
+  regex_t rgx;
+  const struct reg_res reg_reses[] = {{"ać"}};
+  assert (!regcomp (&rgx, reg_reses->regex, 0));
+
+  for (const struct mb_data_exp *de = mb_data_exp; de->data; ++de)
+    {
+      regmatch_t match = bound;
+      if (regexec (&rgx, de->data, 1, &match, REG_STARTEND) == de->exp)
+        BASEERR(de->data), fprintf (stdout, ": %s match\n",
+                                    de->exp ? "no" : "yes");
+
+      if (memcmp(&match, &bound, sizeof (regmatch_t)))
+        BASEERR(de->data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                    (int)bound.rm_so, (int)bound.rm_eo,
+                                    (int)match.rm_so, (int)match.rm_eo);
+    }
+
+  regfree(&rgx);
+}
+
+
+static int
+do_test (void)
+{
+  assert (setlocale (LC_ALL, "C.UTF-8"));
+
+  testbunch (reg_res_ac, data_ac);
+  testbunch (reg_res_aa, data_aa);
+  testmb ();
+  return 0;
+}
+
+
+#include <support/test-driver.c>
  
Adhemerval Zanella Netto May 29, 2023, 8:23 p.m. UTC | #3
On 29/05/23 17:10, наб wrote:
> On Mon, May 29, 2023 at 02:37:39PM -0300, Adhemerval Zanella Netto wrote:
>> On 07/05/23 19:56, наб via Libc-alpha wrote:
>>> This test passes on NetBSD, the illumos gate, and musl
>>> with https://www.openwall.com/lists/musl/2023/04/20/2;
>>> it's nothing revolutionary and the behaviour it tests
>>> is largely guaranteed by the 4.4BSD-Lite manual;
>>> nevertheless, it currently fails with
>>>   tst-reg-startend.c: ^a: a^@c: no match$
>>>   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
>>>   tst-reg-startend.c: ^a: abc: no match$
>>>   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
>>>   tst-reg-startend.c: ^a.c$: a^@c: no match$
>>>   tst-reg-startend.c: ^a.c$: abc: no match$
>>>   tst-reg-startend.c: ^a.*c$: a^@c: no match$
>>>   tst-reg-startend.c: ^a.*c$: abc: no match$
>>>   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
>>>   tst-reg-startend.c: ^a[^c]c$: abc: no match$
>>>   tst-reg-startend.c: ^a..: a^@c: no match$
>>>   tst-reg-startend.c: ^a..: abc: no match$
>>>   tst-reg-startend.c: ..c: a^@c: no match$
>>>
>>> The test may also be compiled stand-alone (-DSTANDALONE)
>>> and on all platforms that have the interface
>>> (hence the macro to initialise regmatch_ts,
>>>  which start with pointer fields on the illumos gate),
>>> for ease of testing and inclusion in other test suites.
>> Tests that should triggers newer regressions should be either marks as XFAIL,
>> or in this case, move after the patch that actually fixes it. 
> I've moved it to the end as 3/3 locally.
> 
>>> diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
>>> new file mode 100644
>>> index 0000000000..c3bfac0359
>>> --- /dev/null
>>> +++ b/posix/tst-reg-startend.c
>>> @@ -0,0 +1,124 @@
>>> +/* Permission to use, copy, modify, and/or distribute this software for any
>>> +   purpose with or without fee is hereby granted.
>>> +
>>> +   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
>>> +   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>>> +   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
>>> +   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
>>> +   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
>>> +   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
>>> +   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */
>> I am not sure if we can accept such license. It not the current one used for
>> newer submission, including tests (LGPL 2.1).
> There are tests and other glibc code under ISC-style licences already
> but sure; I've slapped Unlicense text on this, which appears to be
> preferred by the Foundation since I'm an EU citizen
> (and the Unlicense is also already used in glibc).

There are usually pretty old tests that are glue together from other projects,
and added prior we have a clear definition on how tests should be added.

At least from our current Contribution Checklist [1], new code should follow
LGPL 2.1; but other maintainers can correct me (I am not really an expert in
this area).

[1] https://sourceware.org/glibc/wiki/Contribution%20checklist#Copyright_and_license

> 
>>> +static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
>>> +              sizeof(results_ac) / sizeof(*results_ac), "");
>>> +static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
>>> +              sizeof(results_aa) / sizeof(*results_aa), "");
>> Instead of the static_assert, why not add the input arguments and the
>> expect result on same struct?
> Applied.
> 
>>> +static bool
>>> +testbunch (const char *const *regexes, const char *const data[static 2],
>>> +           const regmatch_t *results)
>>> +{
>>> +#define BASEERR(data)                              \
>>> +  err = true,                                      \
>>> +    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
>>> +    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
>> We have macros that already log and handle the required boilerplate to
>> report tests issues on support/check.h.  Newer tests should use it.
> I've used those for recording errors, but since the error logging
> takes format strings, it don't allow arbitrary data being logged.

Fair enough.

> 
>>> +  for (size_t i = 0; i < 2; ++i)
>> We have array_length macro to avoid putting array sizes everywhere (and they
>> work better if we want to extend the tests).
> Converted everything into 0-terminated arrays.
> 
>>> +static int
>>> +do_test (int argc, char **argv)
>>> +{
>>> +  (void) argc, (void) argv;
>> Not really need here.
> And forbidden by the new wrapper.
> 
>>> +  assert (setlocale (LC_ALL, "C.UTF-8"));
>>> +
>>> +  return testbunch (regex_ac, data_ac, results_ac) ||
>>> +         testbunch (regex_aa, data_aa, results_aa) ||
>>> +         testmb ();
>>> +}
>>> +
>>> +
>>> +#ifndef STANDALONE
>>> +#include "../test-skeleton.c"
>> Use #include <support/test-driver.c> instead.
> Applied.
> 
> v6 scissor-patch follows.

So the only think I am not really sure if we can accept tests that do not follow
LGPL 2.1.  Carlos, do you know if we any blocker regarding it?

> 
> Best,
> -- >8 --
> Date: Fri, 21 Apr 2023 23:57:16 +0200
> Subject: [PATCH v5 3/3] posix: add test for REG_STARTEND
> 
> This test passes on NetBSD, the illumos gate, musl with
> https://www.openwall.com/lists/musl/2023/05/14/1,
> and now glibc.
> It's nothing revolutionary and the behaviour it tests
> is largely guaranteed by the 4.4BSD-Lite manual;
> nevertheless, it used to fail with
>   tst-reg-startend.c: ^a: a^@c: no match$
>   tst-reg-startend.c: ^a: a^@c: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a: abc: no match$
>   tst-reg-startend.c: ^a: abc: wanted {1, 2}, got {1, 4}$
>   tst-reg-startend.c: ^a.c$: a^@c: no match$
>   tst-reg-startend.c: ^a.c$: abc: no match$
>   tst-reg-startend.c: ^a.*c$: a^@c: no match$
>   tst-reg-startend.c: ^a.*c$: abc: no match$
>   tst-reg-startend.c: ^a[^c]c$: a^@c: no match$
>   tst-reg-startend.c: ^a[^c]c$: abc: no match$
>   tst-reg-startend.c: ^a..: a^@c: no match$
>   tst-reg-startend.c: ^a..: abc: no match$
>   tst-reg-startend.c: ..c: a^@c: no match$
> 
> Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
> ---
>  posix/Makefile           |   1 +
>  posix/tst-reg-startend.c | 142 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 143 insertions(+)
>  create mode 100644 posix/tst-reg-startend.c
> 
> diff --git a/posix/Makefile b/posix/Makefile
> index e19b74cd67..abc0ff1f60 100644
> --- a/posix/Makefile
> +++ b/posix/Makefile
> @@ -295,6 +295,7 @@ tests := \
>    tst-posix_spawn-setsid \
>    tst-preadwrite \
>    tst-preadwrite64 \
> +  tst-reg-startend \
>    tst-regcomp-truncated \
>    tst-regex \
>    tst-regex2 \
> diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
> new file mode 100644
> index 0000000000..854d430676
> --- /dev/null
> +++ b/posix/tst-reg-startend.c
> @@ -0,0 +1,142 @@
> +/* This is free and unencumbered software released into the public domain.
> +
> +   Anyone is free to copy, modify, publish, use, compile, sell, or
> +   distribute this software, either in source code form or as a compiled
> +   binary, for any purpose, commercial or non-commercial, and by any
> +   means.
> +
> +   In jurisdictions that recognize copyright laws, the author or authors
> +   of this software dedicate any and all copyright interest in the
> +   software to the public domain. We make this dedication for the benefit
> +   of the public at large and to the detriment of our heirs and
> +   successors. We intend this dedication to be an overt act of
> +   relinquishment in perpetuity of all present and future rights to this
> +   software under copyright law.
> +
> +   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> +   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> +   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> +   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> +   OTHER DEALINGS IN THE SOFTWARE.  */
> +
> +
> +#include <assert.h>
> +#include <locale.h>
> +#include <string.h>
> +#include <regex.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <support/check.h>
> +
> +
> +static const regmatch_t bound = {1, 4};
> +
> +
> +struct reg_res {
> +  const char *regex;
> +  regmatch_t result;
> +};
> +static const struct reg_res reg_res_ac[] = {
> +  {"^a",       {1, 2}},
> +  {"c$",       {3, 4}},
> +  {"^a.c$",    {1, 4}},
> +  {"^a.*c$",   {1, 4}},
> +  {"^a[^c]c$", {1, 4}},
> +  {"^a..",     {1, 4}},
> +  {"..c",      {1, 4}},
> +  {"[^z]c",    {2, 4}},
> +  {}
> +};
> +static const char *const data_ac[] = {"_a\0cdef", "_abcdef", NULL};
> +
> +static const struct reg_res reg_res_aa[] = {
> +  {"^",             {1, 1}},
> +  {"^a",            {1, 2}},
> +  {"a$",            {3, 4}},
> +  {"^\\(a\\).\\1$", {1, 4}},
> +  {"^a[^a]*" ,      {1, 3}},
> +  {}
> +};
> +static const char *const data_aa[] = {"_a\0adef", "_abadef", NULL};
> +
> +
> +static void
> +testbunch (const struct reg_res *reg_reses, const char *const *const data)
> +{
> +#define BASEERR(data)                                      \
> +  support_record_failure (),                               \
> +    fprintf (stdout, __FILE__ ": %s: ", reg_reses->regex), \
> +    fwrite (data + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
> +
> +  for (; reg_reses->regex; ++reg_reses)
> +    {
> +      regex_t rgx;
> +      assert (!regcomp (&rgx, reg_reses->regex, 0));
> +
> +      for (const char *const *dt = data; *dt; ++dt)
> +        {
> +          regmatch_t match = bound;
> +          if (regexec (&rgx, *dt, 1, &match, REG_STARTEND))
> +            BASEERR(dt), fputs (": no match\n", stdout);
> +
> +          if (memcmp(&match, &reg_reses->result, sizeof (regmatch_t)))
> +            BASEERR(dt), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                  (int)reg_reses->result.rm_so,
> +                                  (int)reg_reses->result.rm_eo,
> +                                  (int)match.rm_so, (int)match.rm_eo);
> +        }
> +
> +      regfree(&rgx);
> +    }
> +}
> +
> +
> +struct mb_data_exp {
> +  const char *data;
> +  bool exp;
> +};
> +static const struct mb_data_exp mb_data_exp[] = {
> +  {"_aaćdef", false},
> +  {"_aćdef", true},
> +  {}
> +};
> +
> +static void
> +testmb (void)
> +{
> +  regex_t rgx;
> +  const struct reg_res reg_reses[] = {{"ać"}};
> +  assert (!regcomp (&rgx, reg_reses->regex, 0));
> +
> +  for (const struct mb_data_exp *de = mb_data_exp; de->data; ++de)
> +    {
> +      regmatch_t match = bound;
> +      if (regexec (&rgx, de->data, 1, &match, REG_STARTEND) == de->exp)
> +        BASEERR(de->data), fprintf (stdout, ": %s match\n",
> +                                    de->exp ? "no" : "yes");
> +
> +      if (memcmp(&match, &bound, sizeof (regmatch_t)))
> +        BASEERR(de->data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
> +                                    (int)bound.rm_so, (int)bound.rm_eo,
> +                                    (int)match.rm_so, (int)match.rm_eo);
> +    }
> +
> +  regfree(&rgx);
> +}
> +
> +
> +static int
> +do_test (void)
> +{
> +  assert (setlocale (LC_ALL, "C.UTF-8"));
> +
> +  testbunch (reg_res_ac, data_ac);
> +  testbunch (reg_res_aa, data_aa);
> +  testmb ();
> +  return 0;
> +}
> +
> +
> +#include <support/test-driver.c>
  

Patch

diff --git a/posix/Makefile b/posix/Makefile
index cc77e939ad..24aeb781ca 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -295,6 +295,7 @@  tests := \
   tst-posix_spawn-setsid \
   tst-preadwrite \
   tst-preadwrite64 \
+  tst-reg-startend \
   tst-regcomp-truncated \
   tst-regex \
   tst-regex2 \
diff --git a/posix/tst-reg-startend.c b/posix/tst-reg-startend.c
new file mode 100644
index 0000000000..c3bfac0359
--- /dev/null
+++ b/posix/tst-reg-startend.c
@@ -0,0 +1,124 @@ 
+/* Permission to use, copy, modify, and/or distribute this software for any
+   purpose with or without fee is hereby granted.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+
+#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
+#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)
+
+static const regmatch_t bound = M(1, 4);
+
+static const char *const regex_ac[] =
+  {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
+static const char *const regex_aa[] =
+  {"^", "^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
+static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
+static const char *const data_aa[] = {"_a\0adef", "_abadef"};
+static const regmatch_t results_ac[] =
+  {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
+static const regmatch_t results_aa[] =
+  {M(1, 1), M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
+static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
+              sizeof(results_ac) / sizeof(*results_ac), "");
+static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
+              sizeof(results_aa) / sizeof(*results_aa), "");
+
+
+static bool
+testbunch (const char *const *regexes, const char *const data[static 2],
+           const regmatch_t *results)
+{
+#define BASEERR(data)                              \
+  err = true,                                      \
+    fprintf (stdout, __FILE__ ": %s: ", *regexes), \
+    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stdout)
+
+  bool err = false;
+  for (; *regexes; ++regexes, ++results)
+    {
+      regex_t rgx;
+      assert (!regcomp (&rgx, *regexes, 0));
+
+      for (size_t i = 0; i < 2; ++i)
+        {
+          regmatch_t match = bound;
+          if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
+            BASEERR(data), fputs (": no match\n", stdout);
+
+          if (!MEQ(match, *results))
+            BASEERR(data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                    (int)results->rm_so, (int)results->rm_eo,
+                                    (int)match.rm_so, (int)match.rm_eo);
+        }
+
+      regfree(&rgx);
+    }
+
+  return err;
+}
+
+
+static const char *const mb_data[2] = {"_aaćdef", "_aćdef"};
+static const bool mb_exp[] = {false, true};
+
+static bool
+testmb (void)
+{
+  bool err = false;
+  regex_t rgx;
+  const char *const regexes[] = {"ać"};
+  assert (!regcomp (&rgx, *regexes, 0));
+
+  for (size_t i = 0; i < 2; ++i)
+    {
+      regmatch_t match = bound;
+      if (regexec (&rgx, mb_data[i], 1, &match, REG_STARTEND) == mb_exp[i])
+        BASEERR(mb_data), fprintf (stdout, ": %s match\n",
+                                   mb_exp[i] ? "no" : "yes");
+
+      if (!MEQ(match, bound))
+        BASEERR(mb_data), fprintf (stdout, ": wanted {%d, %d}, got {%d, %d}\n",
+                                   (int)bound.rm_so, (int)bound.rm_eo,
+                                   (int)match.rm_so, (int)match.rm_eo);
+    }
+
+  regfree(&rgx);
+  return err;
+}
+
+
+static int
+do_test (int argc, char **argv)
+{
+  (void) argc, (void) argv;
+  assert (setlocale (LC_ALL, "C.UTF-8"));
+
+  return testbunch (regex_ac, data_ac, results_ac) ||
+         testbunch (regex_aa, data_aa, results_aa) ||
+         testmb ();
+}
+
+
+#ifndef STANDALONE
+#include "../test-skeleton.c"
+#else
+int
+main(int argc, char **argv)
+{
+  return do_test(argc, argv);
+}
+#endif