glob: Avoid copying the d_name field of struct dirent [BZ #19779]

Message ID 607be14d-f158-5603-81e1-6d9f3e4bcb58@cs.ucla.edu
State New, archived
Headers

Commit Message

Paul Eggert May 4, 2016, 5:39 p.m. UTC
  I merged those changes into gnulib's glob.c here:

http://git.savannah.gnu.org/cgit/gnulib.git/commit/?id=36cc6c33ade715a844662cefd256f8f8fdd8a05d

Gnulib still differs from glibc glob.c in that (a) Gnulib uses spaces 
and not tabs, (b) Gnulib's comments quote 'like this' and not `like 
this', and (c) Gnulib contains several other portability changes briefly 
described below and detailed in the attached patch. The first bullet is 
probably worth a more-detailed look in glibc; the other stuff doesn't 
appear to be urgent.

* glibc glob.c has an assignment 'name = alloca_account (buflen, 
alloca_used)' that is not guarded by __libc_use_alloca.

* glibc glob.c is missing some Gnulib-only portability hacks, protected 
inside "#ifndef _LIBC", "#ifndef  __attribute_noinline__", etc.

* glibc glob.c has some now-unnecessary "#if _LIBC"s that can be omitted 
to simplify the code.

* glibc glob.c has some unnecessary casts, e.g., the cast in 'char *new 
= (char *) malloc (dirlen + 1 + eltlen);'.

* A glibc glob.c comment says that something is "illegal" where it is 
merely invalid. (This is a pet peeve of RMS's.)

* glibc glob.c assumes C99 statements-before-decls in a couple of places.

* glibc glob.c has a complicated cast '*(const char *const * const) a' 
that confuses some compilers.

* There is some complicated stuff in the link_exists2_p area that 
doesn't port well to platforms with fstatat.
  

Patch

--- -	2016-05-04 10:25:26.318937701 -0700
+++ glob.c	2016-05-04 10:13:21.859892515 -0700
@@ -15,7 +15,10 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifdef  HAVE_CONFIG_H
+#ifndef _LIBC
+/* Don't use __attribute__ __nonnull__ in this compilation unit.  Otherwise gcc
+   optimizes away the pattern == NULL || pglob == NULL tests below.  */
+# define _GL_ARG_NONNULL(params)
 # include <config.h>
 #endif
 
@@ -34,22 +37,19 @@ 
 
 #include <stdio.h>              /* Needed on stupid SunOS for assert.  */
 
-#if !defined _LIBC || !defined GLOB_ONLY_P
-#if defined HAVE_UNISTD_H || defined _LIBC
-# include <unistd.h>
-# ifndef POSIX
-#  ifdef _POSIX_VERSION
-#   define POSIX
-#  endif
-# endif
+#ifndef GLOB_ONLY_P
+
+#include <unistd.h>
+#if !defined POSIX && defined _POSIX_VERSION
+# define POSIX
 #endif
 
-#include <pwd.h>
+#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
+# define WINDOWS32
+#endif
 
-#if defined HAVE_STDINT_H || defined _LIBC
-# include <stdint.h>
-#elif !defined UINTPTR_MAX
-# define UINTPTR_MAX (~((size_t) 0))
+#ifndef WINDOWS32
+# include <pwd.h>
 #endif
 
 #include <errno.h>
@@ -57,24 +57,7 @@ 
 # define __set_errno(val) errno = (val)
 #endif
 
-#if defined HAVE_DIRENT_H || defined __GNU_LIBRARY__
-# include <dirent.h>
-#else
-# define dirent direct
-# ifdef HAVE_SYS_NDIR_H
-#  include <sys/ndir.h>
-# endif
-# ifdef HAVE_SYS_DIR_H
-#  include <sys/dir.h>
-# endif
-# ifdef HAVE_NDIR_H
-#  include <ndir.h>
-# endif
-# ifdef HAVE_VMSDIR_H
-#  include "vmsdir.h"
-# endif /* HAVE_VMSDIR_H */
-#endif
-
+#include <dirent.h>
 #include <stdlib.h>
 #include <string.h>
 #include <alloca.h>
@@ -93,17 +76,14 @@ 
 # endif
 # define struct_stat64          struct stat64
 #else /* !_LIBC */
-# include "getlogin_r.h"
-# include "mempcpy.h"
-# include "stat-macros.h"
-# include "strdup.h"
+# define __getlogin_r(buf, len) getlogin_r (buf, len)
 # define __stat64(fname, buf)   stat (fname, buf)
+# define __fxstatat64(_, d, f, st, flag) fstatat (d, f, st, flag)
 # define struct_stat64          struct stat
-# define __stat(fname, buf)     stat (fname, buf)
 # define __alloca               alloca
 # define __readdir              readdir
-# define __readdir64            readdir64
 # define __glob_pattern_p       glob_pattern_p
+# define COMPILE_GLOB64
 #endif /* _LIBC */
 
 #include <fnmatch.h>
@@ -186,7 +166,7 @@ 
     D_INO_TO_RESULT (source)               \
   }
 
-#endif /* !defined _LIBC || !defined GLOB_ONLY_P */
+#endif /* !defined GLOB_ONLY_P */
 
 /* Call gl_readdir on STREAM.  This macro can be overridden to reduce
    type safety if an old interface version needs to be supported.  */
@@ -230,13 +210,49 @@ 
 # define attribute_hidden
 #endif
 
+#ifndef __attribute_noinline__
+# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 1)
+#  define __attribute_noinline__ /* Ignore */
+#else
+#  define __attribute_noinline__ __attribute__ ((__noinline__))
+# endif
+#endif
+
+#if ! defined __builtin_expect && __GNUC__ < 3
+# define __builtin_expect(expr, expected) (expr)
+#endif
+
+#ifndef __glibc_unlikely
+# define __glibc_unlikely(expr) __builtin_expect (expr, 0)
+#endif
+
+#ifndef _LIBC
+/* The results of opendir() in this file are not used with dirfd and fchdir,
+   and we do not leak fds to any single-threaded code that could use stdio,
+   therefore save some unnecessary recursion in fchdir.c and opendir_safer.c.
+   FIXME - if the kernel ever adds support for multi-thread safety for
+   avoiding standard fds, then we should use opendir_safer.  */
+# ifdef GNULIB_defined_opendir
+#  undef opendir
+# endif
+# ifdef GNULIB_defined_closedir
+#  undef closedir
+# endif
+
+/* Just use malloc.  */
+# define __libc_use_alloca(n) false
+# define alloca_account(len, avar) ((void) (len), (void) (avar), (void *) 0)
+# define extend_alloca_account(buf, len, newlen, avar) \
+    ((void) (buf), (void) (len), (void) (newlen), (void) (avar), (void *) 0)
+#endif
+
 static int glob_in_dir (const char *pattern, const char *directory,
                         int flags, int (*errfunc) (const char *, int),
                         glob_t *pglob, size_t alloca_used);
 extern int __glob_pattern_type (const char *pattern, int quote)
     attribute_hidden;
 
-#if !defined _LIBC || !defined GLOB_ONLY_P
+#ifndef GLOB_ONLY_P
 static int prefix_array (const char *prefix, char **array, size_t n) __THROWNL;
 static int collated_compare (const void *, const void *) __THROWNL;
 
@@ -265,7 +281,7 @@ 
   return *cp != '\0' ? cp : NULL;
 }
 
-#endif /* !defined _LIBC || !defined GLOB_ONLY_P */
+#endif /* !defined GLOB_ONLY_P */
 
 /* Do glob searching for PATTERN, placing results in PGLOB.
    The bits defined above may be set in FLAGS.
@@ -292,9 +308,7 @@ 
   int malloc_dirname = 0;
   glob_t dirs;
   int retval = 0;
-#ifdef _LIBC
   size_t alloca_used = 0;
-#endif
 
   if (pattern == NULL || pglob == NULL || (flags & ~__GLOB_FLAGS) != 0)
     {
@@ -350,14 +364,12 @@ 
           size_t rest_len;
           char *onealt;
           size_t pattern_len = strlen (pattern) - 1;
-#ifdef _LIBC
           int alloca_onealt = __libc_use_alloca (alloca_used + pattern_len);
           if (alloca_onealt)
             onealt = alloca_account (pattern_len, alloca_used);
           else
-#endif
             {
-              onealt = (char *) malloc (pattern_len);
+              onealt = malloc (pattern_len);
               if (onealt == NULL)
                 {
                   if (!(flags & GLOB_APPEND))
@@ -377,11 +389,9 @@ 
           next = next_brace_sub (begin + 1, flags);
           if (next == NULL)
             {
-              /* It is an illegal expression.  */
+              /* It is an invalid expression.  */
             illegal_brace:
-#ifdef _LIBC
               if (__glibc_unlikely (!alloca_onealt))
-#endif
                 free (onealt);
               return glob (pattern, flags & ~GLOB_BRACE, errfunc, pglob);
             }
@@ -429,9 +439,7 @@ 
               /* If we got an error, return it.  */
               if (result && result != GLOB_NOMATCH)
                 {
-#ifdef _LIBC
                   if (__glibc_unlikely (!alloca_onealt))
-#endif
                     free (onealt);
                   if (!(flags & GLOB_APPEND))
                     {
@@ -450,9 +458,7 @@ 
               assert (next != NULL);
             }
 
-#ifdef _LIBC
           if (__glibc_unlikely (!alloca_onealt))
-#endif
             free (onealt);
 
           if (pglob->gl_pathc != firstc)
@@ -475,8 +481,7 @@ 
           if (pglob->gl_offs >= ~((size_t) 0) / sizeof (char *))
             return GLOB_NOSPACE;
 
-          pglob->gl_pathv = (char **) malloc ((pglob->gl_offs + 1)
-                                              * sizeof (char *));
+          pglob->gl_pathv = malloc ((pglob->gl_offs + 1) * sizeof (char *));
           if (pglob->gl_pathv == NULL)
             return GLOB_NOSPACE;
 
@@ -549,7 +554,7 @@ 
           char *drive_spec;
 
           ++dirlen;
-          drive_spec = (char *) __alloca (dirlen + 1);
+          drive_spec = __alloca (dirlen + 1);
           *((char *) mempcpy (drive_spec, pattern, dirlen)) = '\0';
           /* For now, disallow wildcards in the drive spec, to
              prevent infinite recursion in glob.  */
@@ -560,11 +565,9 @@ 
              from "d:/", since "d:" and "d:/" are not the same.*/
         }
 #endif
-#ifdef _LIBC
       if (__libc_use_alloca (alloca_used + dirlen + 1))
         newp = alloca_account (dirlen + 1, alloca_used);
       else
-#endif
         {
           newp = malloc (dirlen + 1);
           if (newp == NULL)
@@ -585,6 +588,7 @@ 
         /* "pattern/".  Expand "pattern", appending slashes.  */
         {
           int orig_flags = flags;
+          int val;
           if (!(flags & GLOB_NOESCAPE) && dirname[dirlen - 1] == '\\')
             {
               /* "pattern\\/".  Remove the final backslash if it hasn't
@@ -598,7 +602,7 @@ 
                   flags &= ~(GLOB_NOCHECK | GLOB_NOMAGIC);
                 }
             }
-          int val = glob (dirname, flags | GLOB_MARK, errfunc, pglob);
+          val = glob (dirname, flags | GLOB_MARK, errfunc, pglob);
           if (val == 0)
             pglob->gl_flags = ((pglob->gl_flags & ~GLOB_MARK)
                                | (flags & GLOB_MARK));
@@ -615,7 +619,6 @@ 
         }
     }
 
-#ifndef VMS
   if ((flags & (GLOB_TILDE|GLOB_TILDE_CHECK)) && dirname[0] == '~')
     {
       if (dirname[1] == '\0' || dirname[1] == '/'
@@ -630,20 +633,50 @@ 
             home_dir = "SYS:";
 # else
 #  ifdef WINDOWS32
+          /* Windows NT defines HOMEDRIVE and HOMEPATH.  But give preference
+             to HOME, because the user can change HOME.  */
           if (home_dir == NULL || home_dir[0] == '\0')
-            home_dir = "c:/users/default"; /* poor default */
+            {
+              const char *home_drive = getenv ("HOMEDRIVE");
+              const char *home_path = getenv ("HOMEPATH");
+
+              if (home_drive != NULL && home_path != NULL)
+                {
+                  size_t home_drive_len = strlen (home_drive);
+                  size_t home_path_len = strlen (home_path);
+                  char *mem = alloca (home_drive_len + home_path_len + 1);
+
+                  memcpy (mem, home_drive, home_drive_len);
+                  memcpy (mem + home_drive_len, home_path, home_path_len + 1);
+                  home_dir = mem;
+                }
+              else
+                home_dir = "c:/users/default"; /* poor default */
+            }
 #  else
           if (home_dir == NULL || home_dir[0] == '\0')
             {
               int success;
               char *name;
+              int malloc_name = 0;
               size_t buflen = GET_LOGIN_NAME_MAX () + 1;
 
               if (buflen == 0)
                 /* 'sysconf' does not support _SC_LOGIN_NAME_MAX.  Try
                    a moderate value.  */
                 buflen = 20;
-              name = alloca_account (buflen, alloca_used);
+              if (__libc_use_alloca (alloca_used + buflen))
+                name = alloca_account (buflen, alloca_used);
+              else
+                {
+                  name = malloc (buflen);
+                  if (name == NULL)
+                    {
+                      retval = GLOB_NOSPACE;
+                      goto out;
+                    }
+                  malloc_name = 1;
+                }
 
               success = __getlogin_r (name, buflen) == 0;
               if (success)
@@ -711,6 +744,8 @@ 
 #   else
                   p = getpwnam (name);
 #   endif
+                  if (__glibc_unlikely (malloc_name))
+                    free (name);
                   if (p != NULL)
                     {
                       if (!malloc_pwtmpbuf)
@@ -974,7 +1009,6 @@ 
         }
 # endif /* Not Amiga && not WINDOWS32.  */
     }
-#endif  /* Not VMS.  */
 
   /* Now test whether we looked for "~" or "~NAME".  In this case we
      can give the answer now.  */
@@ -1003,9 +1037,8 @@ 
               return GLOB_NOSPACE;
             }
 
-          new_gl_pathv
-            = (char **) realloc (pglob->gl_pathv,
-                                 (newcount + 1 + 1) * sizeof (char *));
+          new_gl_pathv = realloc (pglob->gl_pathv,
+                                  (newcount + 1 + 1) * sizeof (char *));
           if (new_gl_pathv == NULL)
             goto nospace;
           pglob->gl_pathv = new_gl_pathv;
@@ -1091,7 +1124,7 @@ 
         {
           size_t old_pathc;
 
-#ifdef  SHELL
+#ifdef SHELL
           {
             /* Make globbing interruptible in the bash shell. */
             extern int interrupt_state;
@@ -1155,14 +1188,13 @@ 
                   return GLOB_NOSPACE;
                 }
 
-              new_gl_pathv = (char **) realloc (pglob->gl_pathv,
-                                                (newcount + 2)
-                                                * sizeof (char *));
+              new_gl_pathv = realloc (pglob->gl_pathv,
+                                      (newcount + 2) * sizeof (char *));
               if (new_gl_pathv == NULL)
                 goto nospace2;
               pglob->gl_pathv = new_gl_pathv;
 
-              pglob->gl_pathv[newcount] = __strdup (pattern);
+              pglob->gl_pathv[newcount] = strdup (pattern);
               if (pglob->gl_pathv[newcount] == NULL)
                 {
                   globfree (&dirs);
@@ -1289,7 +1321,7 @@ 
 #endif
 
 
-#if !defined _LIBC || !defined GLOB_ONLY_P
+#ifndef GLOB_ONLY_P
 
 /* Free storage allocated in PGLOB by a previous 'glob' call.  */
 void
@@ -1313,8 +1345,8 @@ 
 static int
 collated_compare (const void *a, const void *b)
 {
-  const char *const s1 = *(const char *const * const) a;
-  const char *const s2 = *(const char *const * const) b;
+  char *const *ps1 = a; char *s1 = *ps1;
+  char *const *ps2 = b; char *s2 = *ps2;
 
   if (s1 == s2)
     return 0;
@@ -1364,7 +1396,7 @@ 
   for (i = 0; i < n; ++i)
     {
       size_t eltlen = strlen (array[i]) + 1;
-      char *new = (char *) malloc (dirlen + 1 + eltlen);
+      char *new = malloc (dirlen + 1 + eltlen);
       if (new == NULL)
         {
           while (i > 0)
@@ -1386,7 +1418,7 @@ 
 
 
 /* We must not compile this function twice.  */
-#if !defined _LIBC || !defined NO_GLOB_PATTERN_P
+#ifndef NO_GLOB_PATTERN_P
 int
 __glob_pattern_type (const char *pattern, int quote)
 {
@@ -1434,50 +1466,55 @@ 
 # endif
 #endif
 
-#endif /* !GLOB_ONLY_P */
-
 
 /* We put this in a separate function mainly to allow the memory
    allocated with alloca to be recycled.  */
-#if !defined _LIBC || !defined GLOB_ONLY_P
 static int
 __attribute_noinline__
 link_exists2_p (const char *dir, size_t dirlen, const char *fname,
                glob_t *pglob
-# ifndef _LIBC
+# if !defined _LIBC && !HAVE_FSTATAT
                 , int flags
 # endif
                 )
 {
   size_t fnamelen = strlen (fname);
-  char *fullname = (char *) __alloca (dirlen + 1 + fnamelen + 1);
+  char *fullname = __alloca (dirlen + 1 + fnamelen + 1);
   struct stat st;
-# ifndef _LIBC
-  struct_stat64 st64;
-# endif
 
   mempcpy (mempcpy (mempcpy (fullname, dir, dirlen), "/", 1),
            fname, fnamelen + 1);
 
-# ifdef _LIBC
-  return (*pglob->gl_stat) (fullname, &st) == 0;
-# else
-  return ((__builtin_expect (flags & GLOB_ALTDIRFUNC, 0)
-           ? (*pglob->gl_stat) (fullname, &st)
-           : __stat64 (fullname, &st64)) == 0);
+# if !defined _LIBC && !HAVE_FSTATAT
+  if (__builtin_expect ((flags & GLOB_ALTDIRFUNC) == 0, 1))
+    {
+      struct_stat64 st64;
+      return __stat64 (fullname, &st64) == 0;
+    }
 # endif
+  return (*pglob->gl_stat) (fullname, &st) == 0;
 }
-# ifdef _LIBC
-#  define link_exists_p(dfd, dirname, dirnamelen, fname, pglob, flags) \
-  (__builtin_expect (flags & GLOB_ALTDIRFUNC, 0)                              \
-   ? link_exists2_p (dirname, dirnamelen, fname, pglob)                       \
-   : ({ struct stat64 st64;                                                   \
-       __fxstatat64 (_STAT_VER, dfd, fname, &st64, 0) == 0; }))
+
+/* Return true if DIR/FNAME exists.  */
+static int
+link_exists_p (int dfd, const char *dir, size_t dirlen, const char *fname,
+               glob_t *pglob, int flags)
+{
+# if defined _LIBC || HAVE_FSTATAT
+  if (__builtin_expect (flags & GLOB_ALTDIRFUNC, 0))
+    return link_exists2_p (dir, dirlen, fname, pglob);
+  else
+    {
+      /* dfd cannot be -1 here, because dirfd never returns -1 on
+         glibc, or on hosts that have fstatat.  */
+      struct_stat64 st64;
+      return __fxstatat64 (_STAT_VER, dfd, fname, &st64, 0) == 0;
+    }
 # else
-#  define link_exists_p(dfd, dirname, dirnamelen, fname, pglob, flags) \
-  link_exists2_p (dirname, dirnamelen, fname, pglob, flags)
+  return link_exists2_p (dir, dirlen, fname, pglob, flags);
 # endif
-#endif
+}
+#endif /* !defined GLOB_ONLY_P */
 
 
 /* Like 'glob', but PATTERN is a final pathname component,
@@ -1505,6 +1542,7 @@ 
   size_t cur = 0;
   int meta;
   int save;
+  int result;
 
   alloca_used += sizeof (init_names);
 
@@ -1568,10 +1606,8 @@ 
         }
       else
         {
-#ifdef _LIBC
           int dfd = (__builtin_expect (flags & GLOB_ALTDIRFUNC, 0)
                      ? -1 : dirfd ((DIR *) stream));
-#endif
           int fnm_flags = ((!(flags & GLOB_PERIOD) ? FNM_PERIOD : 0)
                            | ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
 #if defined _AMIGA || defined VMS
@@ -1646,15 +1682,16 @@ 
     {
       size_t len = strlen (pattern);
       nfound = 1;
-      names->name[cur] = (char *) malloc (len + 1);
+      names->name[cur] = malloc (len + 1);
       if (names->name[cur] == NULL)
         goto memory_error;
       *((char *) mempcpy (names->name[cur++], pattern, len)) = '\0';
     }
 
-  int result = GLOB_NOMATCH;
+  result = GLOB_NOMATCH;
   if (nfound != 0)
     {
+      char **new_gl_pathv;
       result = 0;
 
       if (pglob->gl_pathc > UINTPTR_MAX - pglob->gl_offs
@@ -1664,18 +1701,19 @@ 
               > UINTPTR_MAX / sizeof (char *)))
         goto memory_error;
 
-      char **new_gl_pathv;
       new_gl_pathv
-        = (char **) realloc (pglob->gl_pathv,
-                             (pglob->gl_pathc + pglob->gl_offs + nfound + 1)
-                             * sizeof (char *));
+        = realloc (pglob->gl_pathv,
+                   (pglob->gl_pathc + pglob->gl_offs + nfound + 1)
+                   * sizeof (char *));
+
       if (new_gl_pathv == NULL)
         {
         memory_error:
           while (1)
             {
               struct globnames *old = names;
-              for (size_t i = 0; i < cur; ++i)
+              size_t i;
+              for (i = 0; i < cur; ++i)
                 free (names->name[i]);
               names = names->next;
               /* NB: we will not leak memory here if we exit without
@@ -1700,7 +1738,8 @@ 
           while (1)
             {
               struct globnames *old = names;
-              for (size_t i = 0; i < cur; ++i)
+              size_t i;
+              for (i = 0; i < cur; ++i)
                 new_gl_pathv[pglob->gl_offs + pglob->gl_pathc++]
                   = names->name[i];
               names = names->next;