[04/18] posix: Allow glob to match dangling symlinks [BZ #866]

Message ID ecdf122b-842a-15a0-2518-96ce9c52d45d@cs.ucla.edu
State New, archived
Headers

Commit Message

Paul Eggert Aug. 31, 2017, 10:11 p.m. UTC
  Thanks for working to clear up this longstanding problem in glibc glob. 
Here are the issues I found with the proposed patch:

* glob_in_dir calls gl_stat, which is a typo; it should call gl_lstat.

* The GLOB_MARK checks should use stat not lstat, since symlinks to 
directories should be marked.

* For GLOB_TILDE and GLOB_TILDE_CHECK, the revised code still tests for 
existence of the home directory as a directory. Other glob 
implementations merely expand the ~ or ~foo and treat the result as a 
literal string, and glibc should be consistent here.  This is simpler 
and avoids a stat call.

* __lstat64 needs to be defined in the !_LIBC case too.

* While looking into this I noticed that glob ignores directory entries 
with d_ino == 0. Although currently GNU/Linux cannot return such 
entries, POSIX allows d_ino == 0 so this assumption is not portable, and 
this bug is in the neighborhood so we might as well fix it in a 
separated patch (which simplifies the code).

* commit message says "remove tst-glob3" but the patch actually removes 
bug-glob1-ARGS.

I fixed these problems (except for the last one) while merging the patch 
into Gnulib, and this resulted in the attached patches which I installed 
into Gnulib master. I'll CC: this to bug-gnulib accordingly. Please 
consider merging the glibc-relevant parts of these patches back into 
glibc, for your next iteration of this glibc proposal. As always, the 
goal is for glob.c and related files to be identical in glibc and Gnulib.
  

Patch

From f43e4827d2ae03d01979d89c9bdf294bf10af7e1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 31 Aug 2017 14:34:25 -0700
Subject: [PATCH 3/3] glob: match dangling symlinks

This fixes a bug I inadvertently introduced to Gnulib when I
merged glibc glob back into gnulib on 2007-10-16.  This fix is
inspired by a patch proposed for glibc by Adhemerval Zanella in:
https://sourceware.org/ml/libc-alpha/2017-08/msg00446.html
* doc/posix-functions/glob.texi: Update list of affected platforms.
* lib/glob.c (__lstat64): New macro.
(is_dir): New function.
(glob, glob_in_dir): Match symlinks even if they are dangling.
(link_stat, link_exists_p): Remove.  All uses removed.
* lib/glob.in.h (__attribute_noinline__): Remove; no longer used.
* m4/glob.m4 (gl_PREREQ_GLOB): Do not check for fstatat.
* modules/glob (Depends-on): Remove dirfd.
* modules/glob-tests (Depends-on): Add symlink.
* tests/test-glob.c: Include errno.h, unistd.h.
(BASE): New macro.
(main): Test dangling symlinks, if symlinks are supported.
---
 ChangeLog                     |  17 ++++
 doc/posix-functions/glob.texi |   2 +-
 lib/glob.c                    | 227 +++++++++++++++---------------------------
 lib/glob.in.h                 |   8 --
 m4/glob.m4                    |   4 +-
 modules/glob                  |   1 -
 modules/glob-tests            |   1 +
 tests/test-glob.c             |  20 ++++
 8 files changed, 121 insertions(+), 159 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 53945d434..960c56029 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@ 
 2017-08-31  Paul Eggert  <eggert@cs.ucla.edu>
 
+	glob: match dangling symlinks
+	This fixes a bug I inadvertently introduced to Gnulib when I
+	merged glibc glob back into gnulib on 2007-10-16.  This fix is
+	inspired by a patch proposed for glibc by Adhemerval Zanella in:
+	https://sourceware.org/ml/libc-alpha/2017-08/msg00446.html
+	* doc/posix-functions/glob.texi: Update list of affected platforms.
+	* lib/glob.c (__lstat64): New macro.
+	(is_dir): New function.
+	(glob, glob_in_dir): Match symlinks even if they are dangling.
+	(link_stat, link_exists_p): Remove.  All uses removed.
+	* lib/glob.in.h (__attribute_noinline__): Remove; no longer used.
+	* m4/glob.m4 (gl_PREREQ_GLOB): Do not check for fstatat.
+	* modules/glob-tests (Depends-on): Add symlink.
+	* tests/test-glob.c: Include errno.h, unistd.h.
+	(BASE): New macro.
+	(main): Test dangling symlinks, if symlinks are supported.
+
 	glob, backupfile: inode 0 is a valid inode number
 	* doc/posix-functions/readdir.texi (readdir):
 	* doc/posix-headers/dirent.texi (dirent.h):
diff --git a/doc/posix-functions/glob.texi b/doc/posix-functions/glob.texi
index 382fe5234..5ea89a87f 100644
--- a/doc/posix-functions/glob.texi
+++ b/doc/posix-functions/glob.texi
@@ -14,7 +14,7 @@  IRIX 5.3, mingw, MSVC 14, BeOS.
 @item
 This function does not list symbolic links to nonexistent files among the results,
 on some platforms:
-glibc 2.14, AIX 7.1, HP-UX 11, Solaris 11 2011-11.
+glibc 2.26, AIX 7.2, HP-UX 11, Solaris 11 2011-11.
 @item
 On platforms where @code{off_t} is a 32-bit type, this function may not
 work correctly on huge directories larger than 2 GB.
diff --git a/lib/glob.c b/lib/glob.c
index 106f4cb26..610d50a7f 100644
--- a/lib/glob.c
+++ b/lib/glob.c
@@ -57,6 +57,9 @@ 
 # define readdir(str) __readdir64 (str)
 # define getpwnam_r(name, bufp, buf, len, res) \
     __getpwnam_r (name, bufp, buf, len, res)
+# ifndef __lstat64
+#  define __lstat64(fname, buf) __lxstat64 (_STAT_VER, fname, buf)
+# endif
 # ifndef __stat64
 #  define __stat64(fname, buf) __xstat64 (_STAT_VER, fname, buf)
 # endif
@@ -64,6 +67,7 @@ 
 # define FLEXIBLE_ARRAY_MEMBER
 #else /* !_LIBC */
 # define __getlogin_r(buf, len) getlogin_r (buf, len)
+# define __lstat64(fname, buf)  lstat (fname, buf)
 # define __stat64(fname, buf)   stat (fname, buf)
 # define __fxstatat64(_, d, f, st, flag) fstatat (d, f, st, flag)
 # define struct_stat64          struct stat
@@ -226,6 +230,18 @@  static int prefix_array (const char *prefix, char **array, size_t n) __THROWNL;
 static int collated_compare (const void *, const void *) __THROWNL;
 
 
+/* Return true if FILENAME is a directory or a symbolic link to a directory.
+   Use FLAGS and PGLOB to resolve the filename.  */
+static bool
+is_dir (char const *filename, int flags, glob_t const *pglob)
+{
+  struct stat st;
+  struct_stat64 st64;
+  return (__glibc_unlikely (flags & GLOB_ALTDIRFUNC)
+          ? pglob->gl_stat (filename, &st) == 0 && S_ISDIR (st.st_mode)
+          : __stat64 (filename, &st64) == 0 && S_ISDIR (st64.st_mode));
+}
+
 /* Find the end of the sub-pattern in a brace expression.  */
 static const char *
 next_brace_sub (const char *cp, int flags)
@@ -976,68 +992,53 @@  glob (const char *pattern, int flags, int (*errfunc) (const char *, int),
      can give the answer now.  */
   if (filename == NULL)
     {
-      struct stat st;
-      struct_stat64 st64;
-
-      /* Return the directory if we don't check for error or if it exists.  */
-      if ((flags & GLOB_NOCHECK)
-          || (((__builtin_expect (flags & GLOB_ALTDIRFUNC, 0))
-               ? ((*pglob->gl_stat) (dirname, &st) == 0
-                  && S_ISDIR (st.st_mode))
-               : (__stat64 (dirname, &st64) == 0 && S_ISDIR (st64.st_mode)))))
+      size_t newcount = pglob->gl_pathc + pglob->gl_offs;
+      char **new_gl_pathv;
+
+      if (newcount > SIZE_MAX / sizeof (char *) - 2)
         {
-          size_t newcount = pglob->gl_pathc + pglob->gl_offs;
-          char **new_gl_pathv;
+        nospace:
+          free (pglob->gl_pathv);
+          pglob->gl_pathv = NULL;
+          pglob->gl_pathc = 0;
+          retval = GLOB_NOSPACE;
+          goto out;
+        }
 
-          if (newcount > SIZE_MAX / sizeof (char *) - 2)
-            {
-            nospace:
-              free (pglob->gl_pathv);
-              pglob->gl_pathv = NULL;
-              pglob->gl_pathc = 0;
-              retval = GLOB_NOSPACE;
-              goto out;
-            }
+      new_gl_pathv = realloc (pglob->gl_pathv,
+                              (newcount + 2) * sizeof (char *));
+      if (new_gl_pathv == NULL)
+        goto nospace;
+      pglob->gl_pathv = new_gl_pathv;
 
-          new_gl_pathv = realloc (pglob->gl_pathv,
-                                  (newcount + 2) * sizeof (char *));
-          if (new_gl_pathv == NULL)
+      if (flags & GLOB_MARK && is_dir (dirname, flags, pglob))
+        {
+          char *p;
+          pglob->gl_pathv[newcount] = malloc (dirlen + 2);
+          if (pglob->gl_pathv[newcount] == NULL)
             goto nospace;
-          pglob->gl_pathv = new_gl_pathv;
-
-          if (flags & GLOB_MARK)
+          p = mempcpy (pglob->gl_pathv[newcount], dirname, dirlen);
+          p[0] = '/';
+          p[1] = '\0';
+          if (__glibc_unlikely (malloc_dirname))
+            free (dirname);
+        }
+      else
+        {
+          if (__glibc_unlikely (malloc_dirname))
+            pglob->gl_pathv[newcount] = dirname;
+          else
             {
-              char *p;
-              pglob->gl_pathv[newcount] = malloc (dirlen + 2);
+              pglob->gl_pathv[newcount] = strdup (dirname);
               if (pglob->gl_pathv[newcount] == NULL)
                 goto nospace;
-              p = mempcpy (pglob->gl_pathv[newcount], dirname, dirlen);
-              p[0] = '/';
-              p[1] = '\0';
-              if (__glibc_unlikely (malloc_dirname))
-                free (dirname);
             }
-          else
-            {
-              if (__glibc_unlikely (malloc_dirname))
-                pglob->gl_pathv[newcount] = dirname;
-              else
-                {
-                  pglob->gl_pathv[newcount] = strdup (dirname);
-                  if (pglob->gl_pathv[newcount] == NULL)
-                    goto nospace;
-                }
-            }
-          pglob->gl_pathv[++newcount] = NULL;
-          ++pglob->gl_pathc;
-          pglob->gl_flags = flags;
-
-          return 0;
         }
+      pglob->gl_pathv[++newcount] = NULL;
+      ++pglob->gl_pathc;
+      pglob->gl_flags = flags;
 
-      /* Not found.  */
-      retval = GLOB_NOMATCH;
-      goto out;
+      return 0;
     }
 
   meta = __glob_pattern_type (dirname, !(flags & GLOB_NOESCAPE));
@@ -1245,15 +1246,9 @@  glob (const char *pattern, int flags, int (*errfunc) (const char *, int),
     {
       /* Append slashes to directory names.  */
       size_t i;
-      struct stat st;
-      struct_stat64 st64;
 
       for (i = oldcount; i < pglob->gl_pathc + pglob->gl_offs; ++i)
-        if ((__builtin_expect (flags & GLOB_ALTDIRFUNC, 0)
-             ? ((*pglob->gl_stat) (pglob->gl_pathv[i], &st) == 0
-                && S_ISDIR (st.st_mode))
-             : (__stat64 (pglob->gl_pathv[i], &st64) == 0
-                && S_ISDIR (st64.st_mode))))
+        if (is_dir (pglob->gl_pathv[i], flags, pglob))
           {
             size_t len = strlen (pglob->gl_pathv[i]) + 2;
             char *new = realloc (pglob->gl_pathv[i], len);
@@ -1359,56 +1354,6 @@  prefix_array (const char *dirname, char **array, size_t n)
   return 0;
 }
 
-/* We put this in a separate function mainly to allow the memory
-   allocated with alloca to be recycled.  */
-static int
-__attribute_noinline__
-link_stat (const char *dir, size_t dirlen, const char *fname,
-           glob_t *pglob
-# if !defined _LIBC && !HAVE_FSTATAT
-           , int flags
-# endif
-           )
-{
-  size_t fnamelen = strlen (fname);
-  char *fullname = __alloca (dirlen + 1 + fnamelen + 1);
-  struct stat st;
-
-  mempcpy (mempcpy (mempcpy (fullname, dir, dirlen), "/", 1),
-           fname, fnamelen + 1);
-
-# if !defined _LIBC && !HAVE_FSTATAT
-  if (__builtin_expect ((flags & GLOB_ALTDIRFUNC) == 0, 1))
-    {
-      struct_stat64 st64;
-      return __stat64 (fullname, &st64);
-    }
-# endif
-  return (*pglob->gl_stat) (fullname, &st);
-}
-
-/* Return true if DIR/FNAME exists.  */
-static int
-link_exists_p (int dfd, const char *dir, size_t dirlen, const char *fname,
-               glob_t *pglob, int flags)
-{
-  int status;
-# if defined _LIBC || HAVE_FSTATAT
-  if (__builtin_expect (flags & GLOB_ALTDIRFUNC, 0))
-    status = link_stat (dir, dirlen, fname, pglob);
-  else
-    {
-      /* dfd cannot be -1 here, because dirfd never returns -1 on
-         glibc, or on hosts that have fstatat.  */
-      struct_stat64 st64;
-      status = __fxstatat64 (_STAT_VER, dfd, fname, &st64, 0);
-    }
-# else
-  status = link_stat (dir, dirlen, fname, pglob, flags);
-# endif
-  return status == 0 || errno == EOVERFLOW;
-}
-
 /* Like 'glob', but PATTERN is a final pathname component,
    and matches are searched for in DIRECTORY.
    The GLOB_NOSORT bit in FLAGS is ignored.  No sorting is ever done.
@@ -1450,8 +1395,6 @@  glob_in_dir (const char *pattern, const char *directory, int flags,
     }
   else if (meta == 0)
     {
-      /* Since we use the normal file functions we can also use stat()
-         to verify the file is there.  */
       union
       {
         struct stat st;
@@ -1476,8 +1419,8 @@  glob_in_dir (const char *pattern, const char *directory, int flags,
                         "/", 1),
                pattern, patlen + 1);
       if (((__builtin_expect (flags & GLOB_ALTDIRFUNC, 0)
-            ? (*pglob->gl_stat) (fullname, &ust.st)
-            : __stat64 (fullname, &ust.st64))
+            ? (*pglob->gl_lstat) (fullname, &ust.st)
+            : __lstat64 (fullname, &ust.st64))
            == 0)
           || errno == EOVERFLOW)
         /* We found this file to be existing.  Now tell the rest
@@ -1501,8 +1444,6 @@  glob_in_dir (const char *pattern, const char *directory, int flags,
         }
       else
         {
-          int dfd = (__builtin_expect (flags & GLOB_ALTDIRFUNC, 0)
-                     ? -1 : dirfd ((DIR *) stream));
           int fnm_flags = ((!(flags & GLOB_PERIOD) ? FNM_PERIOD : 0)
                            | ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0));
           flags |= GLOB_MAGCHAR;
@@ -1536,42 +1477,34 @@  glob_in_dir (const char *pattern, const char *directory, int flags,
 
               if (fnmatch (pattern, d.name, fnm_flags) == 0)
                 {
-                  /* If the file we found is a symlink we have to
-                     make sure the target file exists.  */
-                  dirent_type type = readdir_result_type (d);
-                  if (! (type == DT_LNK || type == DT_UNKNOWN)
-                      || link_exists_p (dfd, directory, dirlen, d.name,
-                                        pglob, flags))
+                  if (cur == names->count)
                     {
-                      if (cur == names->count)
-                        {
-                          struct globnames *newnames;
-                          size_t count = names->count * 2;
-                          size_t nameoff = offsetof (struct globnames, name);
-                          size_t size = FLEXSIZEOF (struct globnames, name,
-                                                    count * sizeof (char *));
-                          if ((SIZE_MAX - nameoff) / 2 / sizeof (char *)
-                              < names->count)
-                            goto memory_error;
-                          if (glob_use_alloca (alloca_used, size))
-                            newnames = names_alloca
-                              = alloca_account (size, alloca_used);
-                          else if ((newnames = malloc (size))
-                                   == NULL)
-                            goto memory_error;
-                          newnames->count = count;
-                          newnames->next = names;
-                          names = newnames;
-                          cur = 0;
-                        }
-                      names->name[cur] = strdup (d.name);
-                      if (names->name[cur] == NULL)
+                      struct globnames *newnames;
+                      size_t count = names->count * 2;
+                      size_t nameoff = offsetof (struct globnames, name);
+                      size_t size = FLEXSIZEOF (struct globnames, name,
+                                                count * sizeof (char *));
+                      if ((SIZE_MAX - nameoff) / 2 / sizeof (char *)
+                          < names->count)
                         goto memory_error;
-                      ++cur;
-                      ++nfound;
-                      if (SIZE_MAX - pglob->gl_offs <= nfound)
+                      if (glob_use_alloca (alloca_used, size))
+                        newnames = names_alloca
+                          = alloca_account (size, alloca_used);
+                      else if ((newnames = malloc (size))
+                               == NULL)
                         goto memory_error;
+                      newnames->count = count;
+                      newnames->next = names;
+                      names = newnames;
+                      cur = 0;
                     }
+                  names->name[cur] = strdup (d.name);
+                  if (names->name[cur] == NULL)
+                    goto memory_error;
+                  ++cur;
+                  ++nfound;
+                  if (SIZE_MAX - pglob->gl_offs <= nfound)
+                    goto memory_error;
                 }
             }
         }
diff --git a/lib/glob.in.h b/lib/glob.in.h
index cfb7e4996..b0d27cff6 100644
--- a/lib/glob.in.h
+++ b/lib/glob.in.h
@@ -49,14 +49,6 @@ 
 
 #define attribute_hidden
 
-#ifndef __attribute_noinline__
-# if 3 < __GNUC__ + (1 <= __GNUC_MINOR__)
-#  define __attribute_noinline__ __attribute__ ((__noinline__))
-#else
-#  define __attribute_noinline__ /* Ignore */
-# endif
-#endif
-
 #if __GNUC__ < 3
 # define __glibc_unlikely(cond) (cond)
 #else
diff --git a/m4/glob.m4 b/m4/glob.m4
index 23fc80219..189cd3a18 100644
--- a/m4/glob.m4
+++ b/m4/glob.m4
@@ -1,4 +1,4 @@ 
-# glob.m4 serial 14
+# glob.m4 serial 15
 dnl Copyright (C) 2005-2007, 2009-2017 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -72,5 +72,5 @@  AC_DEFUN([gl_PREREQ_GLOB],
     HAVE_SYS_CDEFS_H=0
   fi
   AC_SUBST([HAVE_SYS_CDEFS_H])
-  AC_CHECK_FUNCS_ONCE([fstatat getlogin_r getpwnam_r])dnl
+  AC_CHECK_FUNCS_ONCE([getlogin_r getpwnam_r])dnl
 ])
diff --git a/modules/glob b/modules/glob
index 0d6209de6..98d49e34f 100644
--- a/modules/glob
+++ b/modules/glob
@@ -21,7 +21,6 @@  alloca          [test -n "$GLOB_H"]
 builtin-expect  [test -n "$GLOB_H"]
 closedir        [test -n "$GLOB_H"]
 d-type          [test -n "$GLOB_H"]
-dirfd           [test -n "$GLOB_H"]
 flexmember      [test -n "$GLOB_H"]
 fnmatch         [test -n "$GLOB_H"]
 getlogin_r      [test -n "$GLOB_H"]
diff --git a/modules/glob-tests b/modules/glob-tests
index 151bc06b3..abc3666ed 100644
--- a/modules/glob-tests
+++ b/modules/glob-tests
@@ -5,6 +5,7 @@  tests/macros.h
 
 Depends-on:
 glob-c++-tests
+symlink
 
 configure.ac:
 
diff --git a/tests/test-glob.c b/tests/test-glob.c
index 7e755d9c4..eb3472c6e 100644
--- a/tests/test-glob.c
+++ b/tests/test-glob.c
@@ -20,6 +20,9 @@ 
 
 #include <glob.h>
 
+#include <errno.h>
+#include <unistd.h>
+
 #include "signature.h"
 SIGNATURE_CHECK (glob, int, (char const *, int, int (*) (char const *, int),
                              glob_t *));
@@ -29,6 +32,7 @@  SIGNATURE_CHECK (globfree, void, (glob_t *));
 
 #include "macros.h"
 
+#define BASE "test-glob.t"
 #define GL_NO_SUCH_FILE "/gnulib-magic-does-not-exist"
 
 int
@@ -73,5 +77,21 @@  main ()
   ASSERT (strcmp (g.gl_pathv[0], GL_NO_SUCH_FILE) == 0);
   globfree (&g);
 
+  if ((symlink (GL_NO_SUCH_FILE, BASE "globlink1") == 0 || errno == EEXIST)
+      && (symlink (".", BASE "globlink2") == 0 || errno == EEXIST))
+    {
+      res = glob (BASE "globlink[12]", 0, NULL, &g);
+      ASSERT (res == 0 && g.gl_pathc == 2);
+      ASSERT (strcmp (g.gl_pathv[0], BASE "globlink1") == 0);
+      ASSERT (strcmp (g.gl_pathv[1], BASE "globlink2") == 0);
+      globfree (&g);
+
+      res = glob (BASE "globlink[12]", GLOB_MARK, NULL, &g);
+      ASSERT (res == 0 && g.gl_pathc == 2);
+      ASSERT (strcmp (g.gl_pathv[0], BASE "globlink1") == 0);
+      ASSERT (strcmp (g.gl_pathv[1], BASE "globlink2/") == 0);
+      globfree (&g);
+    }
+
   return 0;
 }
-- 
2.13.5