glob: Simplify the interface for the GLOB_ALTDIRFUNC callback gl_readdir

Message ID 57068276.9000503@redhat.com
State Superseded
Headers

Commit Message

Florian Weimer April 7, 2016, 3:53 p.m. UTC
  Previously, application code had to set up d_ino and d_namlen members
if the platform supported them, involving conditional compilation.
The changes cause glob to ignore d_ino in GLOB_ALTDIRFUNC mode, and
always rely on the string length instead of using d_namlen.  DT_UNKNOWN
is zero, so zero-initialization of struct dirent will set the d_type
member to a conservative value.

Florian
  

Comments

Roland McGrath April 8, 2016, 6:44 p.m. UTC | #1
This is a potentially breaking ABI and API change.  The documentation does
not say anything specific about what the gl_readdir function's protocol is.
It just says "an alternative implementation of readdir".  The way I'd read
that, and the reality of the status quo ante, is that a trivial wrapper
around readdir will behave identically to not using GLOB_ALTDIRFUNC at all.
That means that d_ino==0 results must be ignored.

IMHO this means we need symbol versioning to make a change to the treatment
of d_ino.

It's a sufficiently conservative change to start ignoring d_namlen,
so you could do that separately.


Thanks,
Roland
  

Patch

2016-04-07  Florian Weimer  <fweimer@redhat.com>

	glob: Simplify and document the interface for the GLOB_ALTDIRFUNC
	callback function gl_readdir.
	* posix/glob.c (NAMELEN, CONVERT_D_NAMLEN): Remove.
	(CONVERT_DIRENT_DIRENT64): Use strcpy instead of memcpy.
	(glob_in_dir): Remove len.  Only skip non-real entries if
	GLOB_ALTDIRFUNC is not set.  Use strdup instead of malloc and
	memcpy to copy the name.
	* posix/bug-glob2.c (my_readdir): Clear struct dirent object.  Do
	not set d_ino explicitly.
	* posix/tst-gnuglob.c (my_readdir): Likewise.
	* manual/pattern.texi (Calling Glob): Document requirements for
	implementations of the gl_readdir callback function.

diff --git a/manual/pattern.texi b/manual/pattern.texi
index d1b9275..c026e68 100644
--- a/manual/pattern.texi
+++ b/manual/pattern.texi
@@ -237,6 +237,17 @@  function used to read the contents of a directory.  It is used if the
 @code{GLOB_ALTDIRFUNC} bit is set in the flag parameter.  The type of
 this field is @w{@code{struct dirent *(*) (void *)}}.
 
+An implementation of @code{gl_readdir} should initialize the
+@code{struct dirent} object to zero, up to the @code{d_name} member.  As
+an optimization, it may set the @code{d_type} member if the file type of
+the entry is known.  The @code{d_name} member must be null-terminated;
+the entire string is used by @code{glob}.
+
+The @code{struct dirent} object can be overwritten by a subsequent call
+to the @code{gl_readdir} callback function on the same directory stream
+created by the @code{gl_opendir} callback function.  It should be
+deallocated by the @code{gl_closedir} callback function.
+
 This is a GNU extension.
 
 @item gl_opendir
diff --git a/posix/bug-glob2.c b/posix/bug-glob2.c
index ddf5ec9..3726f83 100644
--- a/posix/bug-glob2.c
+++ b/posix/bug-glob2.c
@@ -193,7 +193,7 @@  my_readdir (void *gdir)
       return NULL;
     }
 
-  dir->d.d_ino = dir->idx;
+  memset (&dir->d, 0, offsetof (struct dirent, d_name));
 
 #ifdef _DIRENT_HAVE_D_TYPE
   dir->d.d_type = filesystem[dir->idx].type;
@@ -202,13 +202,11 @@  my_readdir (void *gdir)
   strcpy (dir->d.d_name, filesystem[dir->idx].name);
 
 #ifdef _DIRENT_HAVE_D_TYPE
-  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_ino: %ld, d_type: %d, d_name: \"%s\" }\n",
-	  dir->level, (long int) dir->idx, dir->d.d_ino, dir->d.d_type,
-	  dir->d.d_name);
+  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_type: %d, d_name: \"%s\" }\n",
+	  dir->level, (long int) dir->idx, dir->d.d_type, dir->d.d_name);
 #else
-  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_ino: %ld, d_name: \"%s\" }\n",
-	  dir->level, (long int) dir->idx, dir->d.d_ino,
-	  dir->d.d_name);
+  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_name: \"%s\" }\n",
+	  dir->level, (long int) dir->idx, dir->d.d_name);
 #endif
 
   ++dir->idx;
diff --git a/posix/glob.c b/posix/glob.c
index 0c04c3c..21fd242 100644
--- a/posix/glob.c
+++ b/posix/glob.c
@@ -57,10 +57,8 @@ 
 
 #if defined HAVE_DIRENT_H || defined __GNU_LIBRARY__
 # include <dirent.h>
-# define NAMLEN(dirent) strlen((dirent)->d_name)
 #else
 # define dirent direct
-# define NAMLEN(dirent) (dirent)->d_namlen
 # ifdef HAVE_SYS_NDIR_H
 #  include <sys/ndir.h>
 # endif
@@ -76,12 +74,6 @@ 
 #endif
 
 
-/* In GNU systems, <dirent.h> defines this macro for us.  */
-#ifdef _D_NAMLEN
-# undef NAMLEN
-# define NAMLEN(d) _D_NAMLEN(d)
-#endif
-
 /* When used in the GNU libc the symbol _DIRENT_HAVE_D_TYPE is available
    if the `d_type' member for `struct dirent' is available.
    HAVE_STRUCT_DIRENT_D_TYPE plays the same role in GNULIB.  */
@@ -105,12 +97,6 @@ 
 
 /* If the system has the `struct dirent64' type we use it internally.  */
 #if defined _LIBC && !defined COMPILE_GLOB64
-# if defined HAVE_DIRENT_H || defined __GNU_LIBRARY__
-#  define CONVERT_D_NAMLEN(d64, d32)
-# else
-#  define CONVERT_D_NAMLEN(d64, d32) \
-  (d64)->d_namlen = (d32)->d_namlen;
-# endif
 
 # if (defined POSIX || defined WINDOWS32) && !defined __GNU_LIBRARY__
 #  define CONVERT_D_INO(d64, d32)
@@ -127,8 +113,7 @@ 
 # endif
 
 # define CONVERT_DIRENT_DIRENT64(d64, d32) \
-  memcpy ((d64)->d_name, (d32)->d_name, NAMLEN (d32) + 1);		      \
-  CONVERT_D_NAMLEN (d64, d32)						      \
+  strcpy ((d64)->d_name, (d32)->d_name);				      \
   CONVERT_D_INO (d64, d32)						      \
   CONVERT_D_TYPE (d64, d32)
 #endif
@@ -1554,7 +1539,6 @@  glob_in_dir (const char *pattern, const char *directory, int flags,
 	  while (1)
 	    {
 	      const char *name;
-	      size_t len;
 #if defined _LIBC && !defined COMPILE_GLOB64
 	      struct dirent64 *d;
 	      union
@@ -1586,7 +1570,10 @@  glob_in_dir (const char *pattern, const char *directory, int flags,
 #endif
 	      if (d == NULL)
 		break;
-	      if (! REAL_DIR_ENTRY (d))
+	      /* If the GLOB_ALTDIRFUNC callback is used, we expect
+		 the callback to return only existing entries.  */
+	      if (! REAL_DIR_ENTRY (d)
+		  && __builtin_expect (flags & GLOB_ALTDIRFUNC, 0) == 0)
 		continue;
 
 	      /* If we shall match only directories use the information
@@ -1622,12 +1609,10 @@  glob_in_dir (const char *pattern, const char *directory, int flags,
 			  names = newnames;
 			  cur = 0;
 			}
-		      len = NAMLEN (d);
-		      names->name[cur] = (char *) malloc (len + 1);
+		      names->name[cur] = strdup (d->d_name);
 		      if (names->name[cur] == NULL)
 			goto memory_error;
-		      *((char *) mempcpy (names->name[cur++], name, len))
-			= '\0';
+		      ++cur;
 		      ++nfound;
 		    }
 		}
diff --git a/posix/tst-gnuglob.c b/posix/tst-gnuglob.c
index 992b997..4eae689 100644
--- a/posix/tst-gnuglob.c
+++ b/posix/tst-gnuglob.c
@@ -211,7 +211,7 @@  my_readdir (void *gdir)
       return NULL;
     }
 
-  dir->d.d_ino = dir->idx;
+  memset (&dir->d, 0, offsetof (struct dirent, d_name));
 
 #ifdef _DIRENT_HAVE_D_TYPE
   dir->d.d_type = filesystem[dir->idx].type;
@@ -220,13 +220,11 @@  my_readdir (void *gdir)
   strcpy (dir->d.d_name, filesystem[dir->idx].name);
 
 #ifdef _DIRENT_HAVE_D_TYPE
-  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_ino: %ld, d_type: %d, d_name: \"%s\" }\n",
-	  dir->level, (long int) dir->idx, dir->d.d_ino, dir->d.d_type,
-	  dir->d.d_name);
+  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_type: %d, d_name: \"%s\" }\n",
+	  dir->level, (long int) dir->idx, dir->d.d_type, dir->d.d_name);
 #else
-  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_ino: %ld, d_name: \"%s\" }\n",
-	  dir->level, (long int) dir->idx, dir->d.d_ino,
-	  dir->d.d_name);
+  PRINTF ("my_readdir ({ level: %d, idx: %ld }) = { d_name: \"%s\" }\n",
+	  dir->level, (long int) dir->idx, dir->d.d_name);
 #endif
 
   ++dir->idx;