Patchwork Fix readdir_r with long file names

login
register
mail settings
Submitter Paul Eggert
Date March 2, 2016, 5:44 p.m.
Message ID <56D72683.6010302@cs.ucla.edu>
Download mbox | patch
Permalink /patch/11169/
State New
Headers show

Comments

Paul Eggert - March 2, 2016, 5:44 p.m.
Florian Weimer wrote:

> Just to be clear, you looked at the wrong struct dirent definition for
> GNU/Linux, there is a sysdeps override.

Yes, we'd need a similar fix in sysdeps/unix/sysv/linux/bits/dirent.h.

> Right now, most programs relying on sizeof (struct dirent) work well in
> almost all cases.  We really don't want to break that.  There appears to
> be an overlap between these programs and users of readdir_r, so once we
> remove that from the API, we should have better story for struct dirent
> declarators as well.

I see your point in worrying about GNU/Linux programs that use 'sizeof 
(struct dirent)', even though these programs are not portable and won't 
work on other POSIX platforms.

How about something like the attached (untested) patch, then? It keeps 
the structure the same size, while still using flexible arrays to 
indicate to analyzers that the array in question may be larger. It also 
adds a compile-time option (default off) to simply use flexible arrays 
without the backwards-compatibility hack. We could use that option in 
Gnulib.
Joseph Myers - March 3, 2016, 10:39 p.m.
On Wed, 2 Mar 2016, Paul Eggert wrote:

> analyzers that the array in question may be larger. It also adds a
> compile-time option (default off) to simply use flexible arrays without the
> backwards-compatibility hack. We could use that option in Gnulib.

I don't think such a compile-time option is desirable (especially not 
undocumented, all feature test macros should be documented in 
creature.texi); we shouldn't profilerate feature-test macros for things 
that aren't well-defined API groups.
Florian Weimer - March 8, 2016, 12:20 p.m.
On 03/02/2016 06:44 PM, Paul Eggert wrote:
> +/* Declare a structure member named ID, with type "array of TYPE", and
> +   with actual size unspecified but with nominal size SIZE for the
> +   benefit of traditional applications.  If the application is
> +   compiled with _FLEXARRAY_SOURCE and if the compiler is C99 or
> +   better, model this as a flexible array.  Otherwise, model it as a
> +   fixed-size array of size SIZE (followed by a flexible array
> +   afterwards, if C99, for the benefit of source-code analyzers that
> +   can use this as a cue that the array is really varying-size).  The
> +   fixed-size array is for compatibility with programs that unwisely
> +   take the size of structures that contain flexible arrays, and which
> +   rely on traditional glibc which uses fixed-size arrays.  */
> +#ifndef __flexarray
> +# ifdef __USE_FLEXARRAY
> +#  define __flexarray(id, type, size) type id[]
> +# else
> +#  define __flexarray(id, type, size) type id[size]; type __flexarray __flexarr
> +# endif
> +#endif

Sorry, I'm not convinced this is good compiler hint.  I suspect it moves
things in the opposite direction.  Some compilers likely have hacks to
support arrays of length 1 as flexible arrays if the array is at the end
of a struct, and after your change, the member is no longer at the end,
potentially disabling the flexible array kludge.

I think we should interact better with compilers in this area, but this
needs fairly explicit mark-up, showing what we need, and not something
that requires the compiler to guess.

Thanks,
Florian

Patch

diff --git a/bits/dirent.h b/bits/dirent.h
index 7b79a53..69a7892 100644
--- a/bits/dirent.h
+++ b/bits/dirent.h
@@ -32,7 +32,7 @@  struct dirent
     unsigned char d_namlen;	/* Length of the file name.  */
 
     /* Only this member is in the POSIX standard.  */
-    char d_name[1];		/* File name (actually longer).  */
+    __flexarray (d_name, char, 1); /* File name.  */
   };
 
 #ifdef __USE_LARGEFILE64
@@ -42,8 +42,7 @@  struct dirent64
     unsigned short int d_reclen;
     unsigned char d_type;
     unsigned char d_namlen;
-
-    char d_name[1];
+    __flexarray (d_name, char, 1); /* File name.  */
   };
 #endif
 
diff --git a/include/features.h b/include/features.h
index 9514d35..ca162d3 100644
--- a/include/features.h
+++ b/include/features.h
@@ -45,6 +45,7 @@ 
    _THREAD_SAFE		Same as _REENTRANT, often used by other systems.
    _FORTIFY_SOURCE	If set to numeric value > 0 additional security
 			measures are defined, according to level.
+   _FLEXARRAY_SOURCE	Select flexible array data structures when applicable.
 
    The `-ansi' switch to the GNU C compiler, and standards conformance
    options such as `-std=c99', define __STRICT_ANSI__.  If none of
@@ -80,6 +81,7 @@ 
    __USE_GNU		Define GNU extensions.
    __USE_REENTRANT	Define reentrant/thread-safe *_r functions.
    __USE_FORTIFY_LEVEL	Additional security measures used, according to level.
+   __USE_FLEXARRAY	Use flexible arrays for data structures.
 
    The macros `__GNU_LIBRARY__', `__GLIBC__', and `__GLIBC_MINOR__' are
    defined by this file unconditionally.  `__GNU_LIBRARY__' is provided
@@ -117,6 +119,7 @@ 
 #undef	__USE_GNU
 #undef	__USE_REENTRANT
 #undef	__USE_FORTIFY_LEVEL
+#undef	__USE_FLEXARRAY
 #undef	__KERNEL_STRICT_NAMES
 
 /* Suppress kernel-name space pollution unless user expressedly asks
@@ -341,6 +344,10 @@ 
 # define __USE_FORTIFY_LEVEL 0
 #endif
 
+#ifdef _FLEXARRAY_SOURCE
+# define __USE_FLEXARRAY 1
+#endif
+
 /* Get definitions of __STDC_* predefined macros, if the compiler has
    not preincluded this header automatically.  */
 #include <stdc-predef.h>
diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h
index 7fd4154..facfdc3 100644
--- a/misc/sys/cdefs.h
+++ b/misc/sys/cdefs.h
@@ -157,10 +157,29 @@ 
 #  else
 /* Some other non-C99 compiler.  Approximate with [1].  */
 #   define __flexarr	[1]
+#   define __flexarray(id, type, size) type id[size]
 #  endif
 # endif
 #endif
 
+/* Declare a structure member named ID, with type "array of TYPE", and
+   with actual size unspecified but with nominal size SIZE for the
+   benefit of traditional applications.  If the application is
+   compiled with _FLEXARRAY_SOURCE and if the compiler is C99 or
+   better, model this as a flexible array.  Otherwise, model it as a
+   fixed-size array of size SIZE (followed by a flexible array
+   afterwards, if C99, for the benefit of source-code analyzers that
+   can use this as a cue that the array is really varying-size).  The
+   fixed-size array is for compatibility with programs that unwisely
+   take the size of structures that contain flexible arrays, and which
+   rely on traditional glibc which uses fixed-size arrays.  */
+#ifndef __flexarray
+# ifdef __USE_FLEXARRAY
+#  define __flexarray(id, type, size) type id[]
+# else
+#  define __flexarray(id, type, size) type id[size]; type __flexarray __flexarr
+# endif
+#endif
 
 /* __asm__ ("xyz") is used throughout the headers to rename functions
    at the assembly language level.  This is wrapped by the __REDIRECT
diff --git a/sysdeps/nacl/bits/dirent.h b/sysdeps/nacl/bits/dirent.h
index d4eb7fe..2e38a58 100644
--- a/sysdeps/nacl/bits/dirent.h
+++ b/sysdeps/nacl/bits/dirent.h
@@ -30,7 +30,7 @@  struct dirent
     unsigned short int d_reclen; /* Length of the whole `struct dirent'.  */
 
     /* Only this member is in the POSIX standard.  */
-    char d_name[256];		/* We must not include limits.h! */
+    __flexarray (d_name, char, 256); /* We must not include limits.h! */
   };
 
 #ifdef __USE_LARGEFILE64
@@ -42,7 +42,7 @@  struct dirent64
     unsigned short int d_reclen; /* Length of the whole `struct dirent'.  */
 
     /* Only this member is in the POSIX standard.  */
-    char d_name[256];		/* We must not include limits.h! */
+    __flexarray (d_name, char, 256); /* We must not include limits.h! */
   };
 #endif
 
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/dirent.h b/sysdeps/unix/sysv/linux/alpha/bits/dirent.h
index 98d297e..2470d0f 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/dirent.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/dirent.h
@@ -29,7 +29,7 @@  struct dirent
     __off_t d_off;
     unsigned short int d_reclen;
     unsigned char d_type;
-    char d_name[256];		/* We must not include limits.h! */
+    __flexarray (d_name, char, 256); /* We must not include limits.h! */
   };
 
 #ifdef __USE_LARGEFILE64
@@ -40,7 +40,7 @@  struct dirent64
     __off64_t d_off;
     unsigned short int d_reclen;
     unsigned char d_type;
-    char d_name[256];		/* We must not include limits.h! */
+    __flexarray (d_name, char, 256); /* We must not include limits.h! */
   };
 #endif
 
diff --git a/sysdeps/unix/sysv/linux/bits/dirent.h b/sysdeps/unix/sysv/linux/bits/dirent.h
index 31b1961..b7137e0 100644
--- a/sysdeps/unix/sysv/linux/bits/dirent.h
+++ b/sysdeps/unix/sysv/linux/bits/dirent.h
@@ -30,7 +30,7 @@  struct dirent
 #endif
     unsigned short int d_reclen;
     unsigned char d_type;
-    char d_name[256];		/* We must not include limits.h! */
+    __flexarray (d_name, char, 256); /* We must not include limits.h! */
   };
 
 #ifdef __USE_LARGEFILE64
@@ -40,7 +40,7 @@  struct dirent64
     __off64_t d_off;
     unsigned short int d_reclen;
     unsigned char d_type;
-    char d_name[256];		/* We must not include limits.h! */
+    __flexarray (d_name, char, 256); /* We must not include limits.h! */
   };
 #endif