[1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)

Message ID 20200810204856.2111211-1-adhemerval.zanella@linaro.org
State Superseded
Headers
Series [1/3] stdlib: Use fixed buffer size for realpath (BZ #26241) |

Commit Message

Adhemerval Zanella Netto Aug. 10, 2020, 8:48 p.m. UTC
  It uses both a fixed internal buffer with PATH_MAX size to read and
copy the results of the readlink call.

Also, if PATH_MAX is not defined it uses a default value of 1024
as for other stdlib implementations.

The expected stack usage is about 8k on Linux where PATH_MAX is
define as 4096 (plus some internal function usage for local
variable).

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 stdlib/Makefile                               |   3 +-
 stdlib/canonicalize.c                         |  38 +++---
 stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
 support/support_set_small_thread_stack_size.c |  12 +-
 support/xthread.h                             |   2 +
 5 files changed, 138 insertions(+), 25 deletions(-)
 create mode 100644 stdlib/tst-canon-bz26341.c
  

Comments

Matt Turner Aug. 11, 2020, 12:32 a.m. UTC | #1
> [PATCH 1/3] stdlib: Use fixed buffer size for realpath (BZ #26241)

The correct bug number is #26341.
  
Xiaoming Ni Aug. 11, 2020, 3 a.m. UTC | #2
On 2020/8/11 4:48, Adhemerval Zanella wrote:
> It uses both a fixed internal buffer with PATH_MAX size to read and
> copy the results of the readlink call.
> 
> Also, if PATH_MAX is not defined it uses a default value of 1024
> as for other stdlib implementations.
> 
> The expected stack usage is about 8k on Linux where PATH_MAX is
> define as 4096 (plus some internal function usage for local
> variable).
> 
> Checked on x86_64-linux-gnu and i686-linux-gnu.
> ---
>   stdlib/Makefile                               |   3 +-
>   stdlib/canonicalize.c                         |  38 +++---
>   stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>   support/support_set_small_thread_stack_size.c |  12 +-
>   support/xthread.h                             |   2 +
>   5 files changed, 138 insertions(+), 25 deletions(-)
>   create mode 100644 stdlib/tst-canon-bz26341.c
> 
> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index 4615f6dfe7..7093b8a584 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -87,7 +87,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
>   		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>   		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>   		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
> -		   tst-setcontext9 tst-bz20544
> +		   tst-setcontext9 tst-bz20544 tst-canon-bz26341
>   
>   tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>   		   tst-tls-atexit tst-tls-atexit-nodelete
> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>   LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>   LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>   LDLIBS-test-on_exit-race = $(shared-thread-library)
> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>   
>   LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>   LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
> index cbd885a3c5..554ba221e4 100644
> --- a/stdlib/canonicalize.c
> +++ b/stdlib/canonicalize.c
> @@ -28,6 +28,14 @@
>   #include <eloop-threshold.h>
>   #include <shlib-compat.h>
>   
> +#ifndef PATH_MAX
> +# ifdef MAXPATHLEN
> +#  define PATH_MAX MAXPATHLEN
> +# else
> +#  define PATH_MAX 1024
> +# endif
> +#endif
> +
>   /* Return the canonical absolute name of file NAME.  A canonical name
>      does not contain any `.', `..' components nor any repeated path
>      separators ('/') or symlinks.  All path components must exist.  If
> @@ -42,9 +50,8 @@
>   char *
>   __realpath (const char *name, char *resolved)
>   {
> -  char *rpath, *dest, *extra_buf = NULL;
> +  char *rpath, *dest, extra_buf[PATH_MAX];
Why does the 4 KB stack space need to be occupied?  Even if there are no 
linked files ?
  
Adhemerval Zanella Netto Aug. 11, 2020, 2:57 p.m. UTC | #3
On 11/08/2020 00:00, Xiaoming Ni wrote:
> On 2020/8/11 4:48, Adhemerval Zanella wrote:
>> It uses both a fixed internal buffer with PATH_MAX size to read and
>> copy the results of the readlink call.
>>
>> Also, if PATH_MAX is not defined it uses a default value of 1024
>> as for other stdlib implementations.
>>
>> The expected stack usage is about 8k on Linux where PATH_MAX is
>> define as 4096 (plus some internal function usage for local
>> variable).
>>
>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>> ---
>>   stdlib/Makefile                               |   3 +-
>>   stdlib/canonicalize.c                         |  38 +++---
>>   stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>>   support/support_set_small_thread_stack_size.c |  12 +-
>>   support/xthread.h                             |   2 +
>>   5 files changed, 138 insertions(+), 25 deletions(-)
>>   create mode 100644 stdlib/tst-canon-bz26341.c
>>
>> diff --git a/stdlib/Makefile b/stdlib/Makefile
>> index 4615f6dfe7..7093b8a584 100644
>> --- a/stdlib/Makefile
>> +++ b/stdlib/Makefile
>> @@ -87,7 +87,7 @@ tests        := tst-strtol tst-strtod testmb testrand testsort testdiv   \
>>              tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>>              tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>>              tst-setcontext6 tst-setcontext7 tst-setcontext8 \
>> -           tst-setcontext9 tst-bz20544
>> +           tst-setcontext9 tst-bz20544 tst-canon-bz26341
>>     tests-internal    := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>>              tst-tls-atexit tst-tls-atexit-nodelete
>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>>   LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>>   LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>>   LDLIBS-test-on_exit-race = $(shared-thread-library)
>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>>     LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>>   LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
>> index cbd885a3c5..554ba221e4 100644
>> --- a/stdlib/canonicalize.c
>> +++ b/stdlib/canonicalize.c
>> @@ -28,6 +28,14 @@
>>   #include <eloop-threshold.h>
>>   #include <shlib-compat.h>
>>   +#ifndef PATH_MAX
>> +# ifdef MAXPATHLEN
>> +#  define PATH_MAX MAXPATHLEN
>> +# else
>> +#  define PATH_MAX 1024
>> +# endif
>> +#endif
>> +
>>   /* Return the canonical absolute name of file NAME.  A canonical name
>>      does not contain any `.', `..' components nor any repeated path
>>      separators ('/') or symlinks.  All path components must exist.  If
>> @@ -42,9 +50,8 @@
>>   char *
>>   __realpath (const char *name, char *resolved)
>>   {
>> -  char *rpath, *dest, *extra_buf = NULL;
>> +  char *rpath, *dest, extra_buf[PATH_MAX];
> Why does the 4 KB stack space need to be occupied?  Even if there are no linked files ?

It does not, it is a simplification to avoid to decompose the function
and handle symlinks in a special case.  To avoid the stack allocation
for common case would need to either to use dynamic allocation or
adjust the function that once it founds a symlink, it calls another
function to handle the loop with a stack allocated provided buffer.  
I don't think this extra code complexity really pays off.
  
Xiaoming Ni Aug. 12, 2020, 1:38 a.m. UTC | #4
On 2020/8/11 22:57, Adhemerval Zanella wrote:
> 
> 
> On 11/08/2020 00:00, Xiaoming Ni wrote:
>> On 2020/8/11 4:48, Adhemerval Zanella wrote:
>>> It uses both a fixed internal buffer with PATH_MAX size to read and
>>> copy the results of the readlink call.
>>>
>>> Also, if PATH_MAX is not defined it uses a default value of 1024
>>> as for other stdlib implementations.
>>>
>>> The expected stack usage is about 8k on Linux where PATH_MAX is
>>> define as 4096 (plus some internal function usage for local
>>> variable).
>>>
>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>> ---
>>>    stdlib/Makefile                               |   3 +-
>>>    stdlib/canonicalize.c                         |  38 +++---
>>>    stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>>>    support/support_set_small_thread_stack_size.c |  12 +-
>>>    support/xthread.h                             |   2 +
>>>    5 files changed, 138 insertions(+), 25 deletions(-)
>>>    create mode 100644 stdlib/tst-canon-bz26341.c
>>>
>>> diff --git a/stdlib/Makefile b/stdlib/Makefile
>>> index 4615f6dfe7..7093b8a584 100644
>>> --- a/stdlib/Makefile
>>> +++ b/stdlib/Makefile
>>> @@ -87,7 +87,7 @@ tests        := tst-strtol tst-strtod testmb testrand testsort testdiv   \
>>>               tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>>>               tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>>>               tst-setcontext6 tst-setcontext7 tst-setcontext8 \
>>> -           tst-setcontext9 tst-bz20544
>>> +           tst-setcontext9 tst-bz20544 tst-canon-bz26341
>>>      tests-internal    := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>>>               tst-tls-atexit tst-tls-atexit-nodelete
>>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>>>    LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>>>    LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>>>    LDLIBS-test-on_exit-race = $(shared-thread-library)
>>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>>>      LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>>>    LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
>>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
>>> index cbd885a3c5..554ba221e4 100644
>>> --- a/stdlib/canonicalize.c
>>> +++ b/stdlib/canonicalize.c
>>> @@ -28,6 +28,14 @@
>>>    #include <eloop-threshold.h>
>>>    #include <shlib-compat.h>
>>>    +#ifndef PATH_MAX
>>> +# ifdef MAXPATHLEN
>>> +#  define PATH_MAX MAXPATHLEN
>>> +# else
>>> +#  define PATH_MAX 1024
>>> +# endif
>>> +#endif
>>> +
>>>    /* Return the canonical absolute name of file NAME.  A canonical name
>>>       does not contain any `.', `..' components nor any repeated path
>>>       separators ('/') or symlinks.  All path components must exist.  If
>>> @@ -42,9 +50,8 @@
>>>    char *
>>>    __realpath (const char *name, char *resolved)
>>>    {
>>> -  char *rpath, *dest, *extra_buf = NULL;
>>> +  char *rpath, *dest, extra_buf[PATH_MAX];
>> Why does the 4 KB stack space need to be occupied?  Even if there are no linked files ?
> 
> It does not, it is a simplification to avoid to decompose the function
> and handle symlinks in a special case.  To avoid the stack allocation
> for common case would need to either to use dynamic allocation or
> adjust the function that once it founds a symlink, it calls another
> function to handle the loop with a stack allocated provided buffer.
> I don't think this extra code complexity really pays off.


Extract the symlinks processing as an independent function and move 
extra_buf and buf to the new independent function to avoid wasting 8 KB 
stack space when the realpath is used to process unlinked files.
Is this better?

Thanks
Xiaoming Ni
  
Adhemerval Zanella Netto Aug. 12, 2020, 11:04 p.m. UTC | #5
On 11/08/2020 22:38, Xiaoming Ni wrote:
> On 2020/8/11 22:57, Adhemerval Zanella wrote:
>>
>>
>> On 11/08/2020 00:00, Xiaoming Ni wrote:
>>> On 2020/8/11 4:48, Adhemerval Zanella wrote:
>>>> It uses both a fixed internal buffer with PATH_MAX size to read and
>>>> copy the results of the readlink call.
>>>>
>>>> Also, if PATH_MAX is not defined it uses a default value of 1024
>>>> as for other stdlib implementations.
>>>>
>>>> The expected stack usage is about 8k on Linux where PATH_MAX is
>>>> define as 4096 (plus some internal function usage for local
>>>> variable).
>>>>
>>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>>> ---
>>>>    stdlib/Makefile                               |   3 +-
>>>>    stdlib/canonicalize.c                         |  38 +++---
>>>>    stdlib/tst-canon-bz26341.c                    | 108 ++++++++++++++++++
>>>>    support/support_set_small_thread_stack_size.c |  12 +-
>>>>    support/xthread.h                             |   2 +
>>>>    5 files changed, 138 insertions(+), 25 deletions(-)
>>>>    create mode 100644 stdlib/tst-canon-bz26341.c
>>>>
>>>> diff --git a/stdlib/Makefile b/stdlib/Makefile
>>>> index 4615f6dfe7..7093b8a584 100644
>>>> --- a/stdlib/Makefile
>>>> +++ b/stdlib/Makefile
>>>> @@ -87,7 +87,7 @@ tests        := tst-strtol tst-strtod testmb testrand testsort testdiv   \
>>>>               tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>>>>               tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>>>>               tst-setcontext6 tst-setcontext7 tst-setcontext8 \
>>>> -           tst-setcontext9 tst-bz20544
>>>> +           tst-setcontext9 tst-bz20544 tst-canon-bz26341
>>>>      tests-internal    := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>>>>               tst-tls-atexit tst-tls-atexit-nodelete
>>>> @@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-thread-library)
>>>>    LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
>>>>    LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
>>>>    LDLIBS-test-on_exit-race = $(shared-thread-library)
>>>> +LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
>>>>      LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
>>>>    LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
>>>> diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
>>>> index cbd885a3c5..554ba221e4 100644
>>>> --- a/stdlib/canonicalize.c
>>>> +++ b/stdlib/canonicalize.c
>>>> @@ -28,6 +28,14 @@
>>>>    #include <eloop-threshold.h>
>>>>    #include <shlib-compat.h>
>>>>    +#ifndef PATH_MAX
>>>> +# ifdef MAXPATHLEN
>>>> +#  define PATH_MAX MAXPATHLEN
>>>> +# else
>>>> +#  define PATH_MAX 1024
>>>> +# endif
>>>> +#endif
>>>> +
>>>>    /* Return the canonical absolute name of file NAME.  A canonical name
>>>>       does not contain any `.', `..' components nor any repeated path
>>>>       separators ('/') or symlinks.  All path components must exist.  If
>>>> @@ -42,9 +50,8 @@
>>>>    char *
>>>>    __realpath (const char *name, char *resolved)
>>>>    {
>>>> -  char *rpath, *dest, *extra_buf = NULL;
>>>> +  char *rpath, *dest, extra_buf[PATH_MAX];
>>> Why does the 4 KB stack space need to be occupied?  Even if there are no linked files ?
>>
>> It does not, it is a simplification to avoid to decompose the function
>> and handle symlinks in a special case.  To avoid the stack allocation
>> for common case would need to either to use dynamic allocation or
>> adjust the function that once it founds a symlink, it calls another
>> function to handle the loop with a stack allocated provided buffer.
>> I don't think this extra code complexity really pays off.
> 
> 
> Extract the symlinks processing as an independent function and move extra_buf and buf to the new independent function to avoid wasting 8 KB stack space when the realpath is used to process unlinked files.
> Is this better?

Yes, my only reservation is the complexity and possible code duplication
to handle it.  I can't see no easy way to accomplish it without duplicate
the loop code (minus the 'extra_buf' alloca) and make the default loop
calling it with the stack allocated extra_buf (and I would like to avoid
this approach).

Another possibility which I think it would be better it to use a scratch
buffer and make some compromise with stack usage and heap allocation.
The default 1024 bytes of the scratch buffer should hit mostly of the
common calls (it is 1/4 of PATH_MAX), so malloc would be called only for
large paths (which should be uncommon).  We can also use a scratch buffer
for the readlink as well, since we might infer the required size from
the previous lstat call.

With something like below we can make the realpath uses a stack of about
~1024 and ~2048 if the path contains symbolic link:

---

diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index cbd885a3c5..dca160f523 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -25,9 +25,56 @@
 #include <errno.h>
 #include <stddef.h>
 
+#include <scratch_buffer.h>
 #include <eloop-threshold.h>
 #include <shlib-compat.h>
 
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+#  define PATH_MAX MAXPATHLEN
+# else
+#  define PATH_MAX 1024
+# endif
+#endif
+
+static bool
+realpath_readlink (const char *rpath, const char *end, size_t path_max,
+		   size_t st_size, struct scratch_buffer *extra_buf)
+{
+  bool r = false;
+
+  struct scratch_buffer buf;
+  scratch_buffer_init (&buf);
+  /* Add the terminating null byte.  */
+  if (!scratch_buffer_set_array_size (&buf, st_size + 1,  sizeof (char)))
+    return false;
+
+  ssize_t n = __readlink (rpath, buf.data, buf.length - 1);
+  if (n < 0)
+    goto out;
+  ((char *) buf.data)[n] = '\0';
+
+  size_t len = strlen (end);
+  if (path_max - n <= len)
+    {
+      __set_errno (ENAMETOOLONG);
+      goto out;
+    }
+
+  if (!scratch_buffer_set_array_size (extra_buf, n + len + 1, sizeof (char)))
+    goto out;
+
+  /* Careful here, end may be a pointer into extra_buf... */
+  memmove ((char *) extra_buf->data + n, end, len + 1);
+  memcpy (extra_buf->data, buf.data, n);
+
+  r = true;
+
+out:
+  scratch_buffer_free (&buf);
+  return r;
+}
+
 /* Return the canonical absolute name of file NAME.  A canonical name
    does not contain any `.', `..' components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
@@ -42,10 +89,13 @@
 char *
 __realpath (const char *name, char *resolved)
 {
-  char *rpath, *dest, *extra_buf = NULL;
+  char *rpath, *dest;
   const char *start, *end, *rpath_limit;
-  long int path_max;
+  const size_t path_max = PATH_MAX;
   int num_links = 0;
+  struct scratch_buffer extra_buf;
+
+  scratch_buffer_init (&extra_buf);
 
   if (name == NULL)
     {
@@ -65,14 +115,6 @@ __realpath (const char *name, char *resolved)
       return NULL;
     }
 
-#ifdef PATH_MAX
-  path_max = PATH_MAX;
-#else
-  path_max = __pathconf (name, _PC_PATH_MAX);
-  if (path_max <= 0)
-    path_max = 1024;
-#endif
-
   if (resolved == NULL)
     {
       rpath = malloc (path_max);
@@ -101,7 +143,6 @@ __realpath (const char *name, char *resolved)
   for (start = end = name; *start; start = end)
     {
       struct stat64 st;
-      int n;
 
       /* Skip sequence of multiple path-separators.  */
       while (*start == '/')
@@ -163,35 +204,19 @@ __realpath (const char *name, char *resolved)
 
 	  if (S_ISLNK (st.st_mode))
 	    {
-	      char *buf = __alloca (path_max);
-	      size_t len;
-
 	      if (++num_links > __eloop_threshold ())
 		{
 		  __set_errno (ELOOP);
 		  goto error;
 		}
 
-	      n = __readlink (rpath, buf, path_max - 1);
-	      if (n < 0)
+	      if (! realpath_readlink (rpath, end, path_max, st.st_size,
+				       &extra_buf))
 		goto error;
-	      buf[n] = '\0';
-
-	      if (!extra_buf)
-		extra_buf = __alloca (path_max);
 
-	      len = strlen (end);
-	      if (path_max - n <= len)
-		{
-		  __set_errno (ENAMETOOLONG);
-		  goto error;
-		}
-
-	      /* Careful here, end may be a pointer into extra_buf... */
-	      memmove (&extra_buf[n], end, len + 1);
-	      name = end = memcpy (extra_buf, buf, n);
+	      name = end = extra_buf.data;
 
-	      if (buf[0] == '/')
+	      if (((char *)extra_buf.data)[0] == '/')
 		dest = rpath + 1;	/* It's an absolute symlink */
 	      else
 		/* Back up to previous component, ignore if at root already: */
@@ -209,6 +234,8 @@ __realpath (const char *name, char *resolved)
     --dest;
   *dest = '\0';
 
+  scratch_buffer_free (&extra_buf);
+
   assert (resolved == NULL || resolved == rpath);
   return rpath;
 
@@ -216,6 +243,7 @@ error:
   assert (resolved == NULL || resolved == rpath);
   if (resolved == NULL)
     free (rpath);
+  scratch_buffer_free (&extra_buf);
   return NULL;
 }
 libc_hidden_def (__realpath)
  
Adhemerval Zanella Netto Aug. 13, 2020, 8:29 p.m. UTC | #6
On 12/08/2020 20:04, Adhemerval Zanella wrote:
> 
> 
> On 11/08/2020 22:38, Xiaoming Ni wrote:

> With something like below we can make the realpath uses a stack of about
> ~1024 and ~2048 if the path contains symbolic link:
> 


> @@ -163,35 +204,19 @@ __realpath (const char *name, char *resolved)
>  
>  	  if (S_ISLNK (st.st_mode))
>  	    {
> -	      char *buf = __alloca (path_max);
> -	      size_t len;
> -
>  	      if (++num_links > __eloop_threshold ())
>  		{
>  		  __set_errno (ELOOP);
>  		  goto error;
>  		}
>  
> -	      n = __readlink (rpath, buf, path_max - 1);
> -	      if (n < 0)
> +	      if (! realpath_readlink (rpath, end, path_max, st.st_size,
> +				       &extra_buf))

Scratch that, unfortunately some Linux filesystems do not return the symlink
target file size with a lstat call (procfs and sysfs for instance).  So
we need to either issue multiple readlink with different increasing buffers
or assume PATH_MAX (as current algorithms does).

I will send a v3 of my patch to use a scratch buffer.  What I am not sure is
if the Linux optimization is worth if the idea is use a small stack as possible
for common cases since it trades some syscall by a higher stack usage.
  

Patch

diff --git a/stdlib/Makefile b/stdlib/Makefile
index 4615f6dfe7..7093b8a584 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -87,7 +87,7 @@  tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
 		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
 		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
-		   tst-setcontext9 tst-bz20544
+		   tst-setcontext9 tst-bz20544 tst-canon-bz26341
 
 tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
 		   tst-tls-atexit tst-tls-atexit-nodelete
@@ -102,6 +102,7 @@  LDLIBS-test-atexit-race = $(shared-thread-library)
 LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
 LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
 LDLIBS-test-on_exit-race = $(shared-thread-library)
+LDLIBS-tst-canon-bz26341 = $(shared-thread-library)
 
 LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
 LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)
diff --git a/stdlib/canonicalize.c b/stdlib/canonicalize.c
index cbd885a3c5..554ba221e4 100644
--- a/stdlib/canonicalize.c
+++ b/stdlib/canonicalize.c
@@ -28,6 +28,14 @@ 
 #include <eloop-threshold.h>
 #include <shlib-compat.h>
 
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+#  define PATH_MAX MAXPATHLEN
+# else
+#  define PATH_MAX 1024
+# endif
+#endif
+
 /* Return the canonical absolute name of file NAME.  A canonical name
    does not contain any `.', `..' components nor any repeated path
    separators ('/') or symlinks.  All path components must exist.  If
@@ -42,9 +50,8 @@ 
 char *
 __realpath (const char *name, char *resolved)
 {
-  char *rpath, *dest, *extra_buf = NULL;
+  char *rpath, *dest, extra_buf[PATH_MAX];
   const char *start, *end, *rpath_limit;
-  long int path_max;
   int num_links = 0;
 
   if (name == NULL)
@@ -65,27 +72,19 @@  __realpath (const char *name, char *resolved)
       return NULL;
     }
 
-#ifdef PATH_MAX
-  path_max = PATH_MAX;
-#else
-  path_max = __pathconf (name, _PC_PATH_MAX);
-  if (path_max <= 0)
-    path_max = 1024;
-#endif
-
   if (resolved == NULL)
     {
-      rpath = malloc (path_max);
+      rpath = malloc (PATH_MAX);
       if (rpath == NULL)
 	return NULL;
     }
   else
     rpath = resolved;
-  rpath_limit = rpath + path_max;
+  rpath_limit = rpath + PATH_MAX;
 
   if (name[0] != '/')
     {
-      if (!__getcwd (rpath, path_max))
+      if (!__getcwd (rpath, PATH_MAX))
 	{
 	  rpath[0] = '\0';
 	  goto error;
@@ -142,10 +141,10 @@  __realpath (const char *name, char *resolved)
 		  goto error;
 		}
 	      new_size = rpath_limit - rpath;
-	      if (end - start + 1 > path_max)
+	      if (end - start + 1 > PATH_MAX)
 		new_size += end - start + 1;
 	      else
-		new_size += path_max;
+		new_size += PATH_MAX;
 	      new_rpath = (char *) realloc (rpath, new_size);
 	      if (new_rpath == NULL)
 		goto error;
@@ -163,7 +162,7 @@  __realpath (const char *name, char *resolved)
 
 	  if (S_ISLNK (st.st_mode))
 	    {
-	      char *buf = __alloca (path_max);
+	      char buf[PATH_MAX];
 	      size_t len;
 
 	      if (++num_links > __eloop_threshold ())
@@ -172,16 +171,13 @@  __realpath (const char *name, char *resolved)
 		  goto error;
 		}
 
-	      n = __readlink (rpath, buf, path_max - 1);
+	      n = __readlink (rpath, buf, sizeof (buf) - 1);
 	      if (n < 0)
 		goto error;
 	      buf[n] = '\0';
 
-	      if (!extra_buf)
-		extra_buf = __alloca (path_max);
-
 	      len = strlen (end);
-	      if (path_max - n <= len)
+	      if (PATH_MAX - n <= len)
 		{
 		  __set_errno (ENAMETOOLONG);
 		  goto error;
diff --git a/stdlib/tst-canon-bz26341.c b/stdlib/tst-canon-bz26341.c
new file mode 100644
index 0000000000..63474bddaa
--- /dev/null
+++ b/stdlib/tst-canon-bz26341.c
@@ -0,0 +1,108 @@ 
+/* Check if realpath does not consume extra stack space based on symlink
+   existance in the path (BZ #26341)
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/xunistd.h>
+#include <support/xthread.h>
+
+static char *filename;
+static size_t filenamelen;
+static char *linkname;
+
+static int
+maxsymlinks (void)
+{
+#ifdef MAXSYMLINKS
+  return MAXSYMLINKS;
+#else
+  long int sysconf_symloop_max = sysconf (_SC_SYMLOOP_MAX);
+  return sysconf_symloop_max <= 0
+	 ? _POSIX_SYMLOOP_MAX
+	 : sysconf_symloop_max;
+#endif
+}
+
+#ifndef PATH_MAX
+# define PATH_MAX 1024
+#endif
+
+static void
+create_link (void)
+{
+  int fd = create_temp_file ("tst-canon-bz26341", &filename);
+  TEST_VERIFY_EXIT (fd != -1);
+  xclose (fd);
+
+  char *prevlink = filename;
+  int maxlinks = maxsymlinks ();
+  for (int i = 0; i < maxlinks; i++)
+    {
+      linkname = xasprintf ("%s%d", filename, i);
+      xsymlink (prevlink, linkname);
+      add_temp_file (linkname);
+      prevlink = linkname;
+    }
+
+  filenamelen = strlen (filename);
+}
+
+static void *
+do_realpath (void *arg)
+{
+  /* Old implementation of realpath allocates a PATH_MAX using alloca
+     for each symlink in the path, leading to MAXSYMLINKS times PATH_MAX
+     maximum stack usage.
+     This stack allocations tries fill the thread allocated stack minus
+     both the thread (plus some slack) and the realpath (plus some slack).
+     If realpath uses more than 2 * PATH_MAX plus some slack it will trigger
+     a stackoverflow.  */
+
+  const size_t realpath_usage = 2 * PATH_MAX + 1024;
+  const size_t thread_usage = 1 * PATH_MAX + 1024;
+  size_t stack_size = support_small_thread_stack_size ()
+		      - realpath_usage - thread_usage;
+  char stack[stack_size];
+  char *resolved = stack + stack_size - thread_usage + 1024;
+
+  char *p = realpath (linkname, resolved);
+  TEST_VERIFY (p != NULL);
+  TEST_COMPARE_BLOB (resolved, filenamelen, filename, filenamelen);
+
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  create_link ();
+
+  pthread_t th = xpthread_create (support_small_stack_thread_attribute (),
+				  do_realpath, NULL);
+  xpthread_join (th);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/support/support_set_small_thread_stack_size.c b/support/support_set_small_thread_stack_size.c
index 69d66e97db..74a0e38a72 100644
--- a/support/support_set_small_thread_stack_size.c
+++ b/support/support_set_small_thread_stack_size.c
@@ -20,8 +20,8 @@ 
 #include <pthread.h>
 #include <support/xthread.h>
 
-void
-support_set_small_thread_stack_size (pthread_attr_t *attr)
+size_t
+support_small_thread_stack_size (void)
 {
   /* Some architectures have too small values for PTHREAD_STACK_MIN
      which cannot be used for creating threads.  Ensure that the stack
@@ -31,5 +31,11 @@  support_set_small_thread_stack_size (pthread_attr_t *attr)
   if (stack_size < PTHREAD_STACK_MIN)
     stack_size = PTHREAD_STACK_MIN;
 #endif
-  xpthread_attr_setstacksize (attr, stack_size);
+  return stack_size;
+}
+
+void
+support_set_small_thread_stack_size (pthread_attr_t *attr)
+{
+  xpthread_attr_setstacksize (attr, support_small_thread_stack_size ());
 }
diff --git a/support/xthread.h b/support/xthread.h
index 05f8d4a7d9..6ba2f5a18b 100644
--- a/support/xthread.h
+++ b/support/xthread.h
@@ -78,6 +78,8 @@  void xpthread_attr_setguardsize (pthread_attr_t *attr,
 /* Set the stack size in ATTR to a small value, but still large enough
    to cover most internal glibc stack usage.  */
 void support_set_small_thread_stack_size (pthread_attr_t *attr);
+/* Return the stack size used on support_set_small_thread_stack_size.  */
+size_t support_small_thread_stack_size (void);
 
 /* Return a pointer to a thread attribute which requests a small
    stack.  The caller must not free this pointer.  */