[v2,1/5] mips: Do not malloc on getdents64 fallback

Message ID 703d65b6-82bd-f557-7257-0fdd9cedb2f4@linaro.org
State Dropped
Headers

Commit Message

Adhemerval Zanella Netto Oct. 8, 2019, 5:37 p.m. UTC
  On 07/10/2019 15:29, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> +#define copy_field(dst, src)			\
>> +  ({						\
>> +     typeof (src) _src;				\
>> +     memcpy (&_src, &(src), sizeof (src));	\
>> +     typeof (dst) _dst = _src;			\
>> +     memcpy (&(dst), &_dst, sizeof (dst));	\
>> +  })
>> +
>> +      copy_field (dp->d_ino, kdp->d_ino);
>> +      copy_field (dp->d_off, kdp->d_off);
>> +      copy_field (last_offset, kdp->d_off);
>> +      copy_field (dp->d_reclen, new_reclen);
>>        dp->d_type = *((char *) kdp + kdp->d_reclen - 1);
> 
> I believe this still asserts the dynamic type of *dp, which is not what
> we want.  The truly portable way probably involves using offsetof and
> not -> dereferencing. 8-(
> 
> Considering that, I would probably drop copy_field, compile the file
> with -fno-strict-aliasing, and add a comment to the (now plain
> assignments) that this is okay due to -fno-strict-aliasing.
> 
> But this is really up to you, I do not want to discuss this patch to
> death.
> 

I chatted with Rich Felker yesterday and we couldn't be sure that
'&((T*)buf)->member' is indeed UB.  In any case I changed to a portable
way now and I think it should be ok to push.

--
  

Comments

Florian Weimer Oct. 8, 2019, 6:52 p.m. UTC | #1
* Adhemerval Zanella:

> +#define DP_MEMBER(src, type, member)			     \
> +    (__typeof__((type){0}.member) *)			     \
> +      memcpy (&((__typeof__((type){0}.member)){0}),          \
> +	      ((char *)(src) + offsetof (type, member)),     \
> +	      sizeof ((type){0}.member))

Please add a comment that this is used to avoid an aliasing violation.

> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_ino)),
> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
> +	      sizeof ((struct dirent64){0}.d_ino));
> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_off)),
> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
> +	      sizeof ((struct dirent64){0}.d_ino));
> +      memcpy (&last_offset,
> +	      DP_MEMBER (kdp, struct kernel_dirent, d_off),
> +	      sizeof (last_offset));

I think you should be able to use:

   last_offset = *DP_MEMBER (kdp, struct kernel_dirent, d_off);

last_offset has the correct type.

> +      memcpy (DP_MEMBER (dp, struct dirent64, d_reclen), &new_reclen,
> +	      sizeof ((struct dirent64){0}.d_reclen));

That looks wrong.  DP_MEMBER (dp, struct dirent64, d_reclen) is a
temporary object, so the outer memcpy is dead.

Thanks,
Florian
  
Adhemerval Zanella Netto Oct. 8, 2019, 7:52 p.m. UTC | #2
On 08/10/2019 15:52, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> +#define DP_MEMBER(src, type, member)			     \
>> +    (__typeof__((type){0}.member) *)			     \
>> +      memcpy (&((__typeof__((type){0}.member)){0}),          \
>> +	      ((char *)(src) + offsetof (type, member)),     \
>> +	      sizeof ((type){0}.member))
> 
> Please add a comment that this is used to avoid an aliasing violation.

Ack.

> 
>> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_ino)),
>> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
>> +	      sizeof ((struct dirent64){0}.d_ino));
>> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_off)),
>> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
>> +	      sizeof ((struct dirent64){0}.d_ino));
>> +      memcpy (&last_offset,
>> +	      DP_MEMBER (kdp, struct kernel_dirent, d_off),
>> +	      sizeof (last_offset));
> 
> I think you should be able to use:
> 
>    last_offset = *DP_MEMBER (kdp, struct kernel_dirent, d_off);
> 
> last_offset has the correct type.

Ack.

> 
>> +      memcpy (DP_MEMBER (dp, struct dirent64, d_reclen), &new_reclen,
>> +	      sizeof ((struct dirent64){0}.d_reclen));
> 
> That looks wrong.  DP_MEMBER (dp, struct dirent64, d_reclen) is a
> temporary object, so the outer memcpy is dead.

Sigh, indeed. I changed to:

   memcpy (((char *)(dp) + offsetof (struct dirent64, d_reclen)),
           &new_reclen, sizeof ((struct dirent64){0}.d_reclen));

> 
> Thanks,
> Florian
> 

I hope I got this right this time...
  
Florian Weimer Oct. 8, 2019, 7:59 p.m. UTC | #3
* Adhemerval Zanella:

>>> +      memcpy (DP_MEMBER (dp, struct dirent64, d_reclen), &new_reclen,
>>> +	      sizeof ((struct dirent64){0}.d_reclen));
>> 
>> That looks wrong.  DP_MEMBER (dp, struct dirent64, d_reclen) is a
>> temporary object, so the outer memcpy is dead.
>
> Sigh, indeed. I changed to:
>
>    memcpy (((char *)(dp) + offsetof (struct dirent64, d_reclen)),
>            &new_reclen, sizeof ((struct dirent64){0}.d_reclen));

sizeof ((struct dirent64){0}.d_reclen) could just be
sizeof (new_reclen).  After all, this only works if they are the same.

I guess -fno-strict-aliasing looks more attractive now. 8-/

You probably should write ((char *) dp) instead of (char *)(dp) if you
want to make the operator precedence explicit, or at least drop the
parentheses around dp.  (I think the cast binds tighter than the +,
but I can't really remember.  I tend to write the paranetheses.)
  
Adhemerval Zanella Netto Oct. 9, 2019, 1:02 p.m. UTC | #4
On 08/10/2019 16:59, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>>> +      memcpy (DP_MEMBER (dp, struct dirent64, d_reclen), &new_reclen,
>>>> +	      sizeof ((struct dirent64){0}.d_reclen));
>>>
>>> That looks wrong.  DP_MEMBER (dp, struct dirent64, d_reclen) is a
>>> temporary object, so the outer memcpy is dead.
>>
>> Sigh, indeed. I changed to:
>>
>>    memcpy (((char *)(dp) + offsetof (struct dirent64, d_reclen)),
>>            &new_reclen, sizeof ((struct dirent64){0}.d_reclen));
> 
> sizeof ((struct dirent64){0}.d_reclen) could just be
> sizeof (new_reclen).  After all, this only works if they are the same.

Ack.

> 
> I guess -fno-strict-aliasing looks more attractive now. 8-/
> 
> You probably should write ((char *) dp) instead of (char *)(dp) if you
> want to make the operator precedence explicit, or at least drop the
> parentheses around dp.  (I think the cast binds tighter than the +,
> but I can't really remember.  I tend to write the paranetheses.)
> 

Ack.
  
Joseph Myers Nov. 2, 2020, 7:51 p.m. UTC | #5
On Tue, 8 Oct 2019, Adhemerval Zanella wrote:

> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_ino)),
> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
> +	      sizeof ((struct dirent64){0}.d_ino));
> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_off)),
> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
> +	      sizeof ((struct dirent64){0}.d_ino));

(This is slightly different from the version of the code that ended up 
getting committed.)

GCC mainline now gives a rather cryptic error about this code:

../sysdeps/unix/sysv/linux/mips/mips64/getdents64.c: In function '__getdents64':
../sysdeps/unix/sysv/linux/mips/mips64/getdents64.c:121:7: error: 'memcpy' forming offset [4, 7] is out of the bounds [0, 4] [-Werror=array-bounds]
  121 |       memcpy (((char *) dp + offsetof (struct dirent64, d_ino)),
      |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  122 |               KDP_MEMBER (kdp, d_ino), sizeof ((struct dirent64){0}.d_ino));
      |               ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../sysdeps/unix/sysv/linux/mips/mips64/getdents64.c:123:7: error: 'memcpy' forming offset [4, 7] is out of the bounds [0, 4] [-Werror=array-bounds]
  123 |       memcpy (((char *) dp + offsetof (struct dirent64, d_off)),
      |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  124 |               KDP_MEMBER (kdp, d_off), sizeof ((struct dirent64){0}.d_off));
      |               ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

I think what this error is pointing out is that the field in 
kernel_dirent, for non-n64, is 32-bit, while this is using memcpy to copy 
64 bits from it into the glibc dirent64, which obviously doesn't work.
  
Adhemerval Zanella Netto Nov. 2, 2020, 10:10 p.m. UTC | #6
On 02/11/2020 16:51, Joseph Myers wrote:
> On Tue, 8 Oct 2019, Adhemerval Zanella wrote:
> 
>> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_ino)),
>> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
>> +	      sizeof ((struct dirent64){0}.d_ino));
>> +      memcpy (((char *)(dp) + offsetof (struct dirent64, d_off)),
>> +	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
>> +	      sizeof ((struct dirent64){0}.d_ino));
> 
> (This is slightly different from the version of the code that ended up 
> getting committed.)
> 
> GCC mainline now gives a rather cryptic error about this code:
> 
> ../sysdeps/unix/sysv/linux/mips/mips64/getdents64.c: In function '__getdents64':
> ../sysdeps/unix/sysv/linux/mips/mips64/getdents64.c:121:7: error: 'memcpy' forming offset [4, 7] is out of the bounds [0, 4] [-Werror=array-bounds]
>   121 |       memcpy (((char *) dp + offsetof (struct dirent64, d_ino)),
>       |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>   122 |               KDP_MEMBER (kdp, d_ino), sizeof ((struct dirent64){0}.d_ino));
>       |               ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> ../sysdeps/unix/sysv/linux/mips/mips64/getdents64.c:123:7: error: 'memcpy' forming offset [4, 7] is out of the bounds [0, 4] [-Werror=array-bounds]
>   123 |       memcpy (((char *) dp + offsetof (struct dirent64, d_off)),
>       |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>   124 |               KDP_MEMBER (kdp, d_off), sizeof ((struct dirent64){0}.d_off));
>       |               ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> I think what this error is pointing out is that the field in 
> kernel_dirent, for non-n64, is 32-bit, while this is using memcpy to copy 
> 64 bits from it into the glibc dirent64, which obviously doesn't work.
> 

I was trying to be too clever to avoid a temporary variable to handle 
mips64n32.  I think the below should handle the issue raised by GCC11,
I will just check some on mips64 machine from gcc farm before send the
fix.

---

diff --git a/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c b/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
index d18a5297dc..2ea1369ef4 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
+++ b/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
@@ -90,16 +90,29 @@ __getdents64 (int fd, void *buf, size_t nbytes)
 
   while ((char *) kdp < (char *) skdp + r)
     {
-      /* This macro is used to avoid aliasing violation.  */
-#define KDP_MEMBER(src, member)			     			\
-    (__typeof__((struct kernel_dirent){0}.member) *)			\
-      memcpy (&((__typeof__((struct kernel_dirent){0}.member)){0}),	\
-	      ((char *)(src) + offsetof (struct kernel_dirent, member)),\
-	      sizeof ((struct kernel_dirent){0}.member))
+#define KDP_MEMBER(src, member)						     \
+      ({								     \
+	__typeof ((struct kernel_dirent){0}.member) kdp_tmp;		     \
+	memcpy (&kdp_tmp,						     \
+		((char *)(src) + offsetof (struct kernel_dirent, member)),   \
+		sizeof (kdp_tmp));					     \
+	kdp_tmp;							     \
+      })
+
+      /* Copy the MEMBER from SRC kernel_dirent to DST dirent64.  It handles
+	 the different size of d_off/d_ino for mips64-n32 by using temporary
+	 variables.  */
+#define COPY_MEMBER(src, dst, member)					     \
+      ({								     \
+	__typeof ((struct dirent64){0}.member) dp_tmp			     \
+	  = KDP_MEMBER (src, member);					     \
+	memcpy ((char *) dp + offsetof (struct dirent64, d_off),	     \
+		&dp_tmp, sizeof (dp_tmp));				     \
+      })
 
       /* This is a conservative approximation, since some of size_diff might
 	 fit into the existing padding for alignment.  */
-      unsigned short int k_reclen = *KDP_MEMBER (kdp, d_reclen);
+      unsigned short int k_reclen = KDP_MEMBER (kdp, d_reclen);
       unsigned short int new_reclen = ALIGN_UP (k_reclen + size_diff,
 						_Alignof (struct dirent64));
       if (nb + new_reclen > nbytes)
@@ -118,11 +131,10 @@ __getdents64 (int fd, void *buf, size_t nbytes)
 	}
       nb += new_reclen;
 
-      memcpy (((char *) dp + offsetof (struct dirent64, d_ino)),
-	      KDP_MEMBER (kdp, d_ino), sizeof ((struct dirent64){0}.d_ino));
-      memcpy (((char *) dp + offsetof (struct dirent64, d_off)),
-	      KDP_MEMBER (kdp, d_off), sizeof ((struct dirent64){0}.d_off));
-      last_offset = *KDP_MEMBER (kdp, d_off);
+      COPY_MEMBER (dp, kdp, d_off);
+      COPY_MEMBER (dp, kdp, d_ino);
+
+      last_offset = KDP_MEMBER (kdp, d_off);
       memcpy (((char *) dp + offsetof (struct dirent64, d_reclen)),
 	      &new_reclen, sizeof (new_reclen));
       dp->d_type = *((char *) kdp + k_reclen - 1);
  
Florian Weimer Nov. 3, 2020, 10:27 a.m. UTC | #7
* Adhemerval Zanella:

> I was trying to be too clever to avoid a temporary variable to handle 
> mips64n32.  I think the below should handle the issue raised by GCC11,
> I will just check some on mips64 machine from gcc farm before send the
> fix.

I think at this point it might be clearer two have two new structs with
the first few fields of the actual structs, use memcpy on the whole
structs, and a field-by-field copy between the structs to perform the
type adjustment.

It's not that we expect the layout of the structs involved to change.

thanks,
Florian
  

Patch

diff --git a/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c b/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
index a8c65cccbf..905239cad9 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
+++ b/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
@@ -22,88 +22,113 @@ 
 #include <assert.h>
 #include <sys/param.h>
 #include <unistd.h>
-#include <scratch_buffer.h>
 #include <limits.h>
 
+#include <include/libc-pointer-arith.h>
+
 ssize_t
-__getdents64 (int fd, void *buf0, size_t nbytes)
+__getdents64 (int fd, void *buf, size_t nbytes)
 {
-  char *buf = buf0;
-
   /* The system call takes an unsigned int argument, and some length
      checks in the kernel use an int type.  */
   if (nbytes > INT_MAX)
     nbytes = INT_MAX;
 
 #ifdef __NR_getdents64
-  ssize_t ret = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes);
-  if (ret != -1)
-    return ret;
+  static int getdents64_supported = true;
+  if (atomic_load_relaxed (&getdents64_supported))
+    {
+      ssize_t ret = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes);
+      if (ret >= 0 || errno != ENOSYS)
+	return ret;
+
+      atomic_store_relaxed (&getdents64_supported, false);
+    }
 #endif
 
   /* Unfortunately getdents64 was only wire-up for MIPS n64 on Linux 3.10.
-     If syscall is not available it need to fallback to non-LFS one.  */
+     If the syscall is not available it need to fallback to the non-LFS one.
+     Also to avoid an unbounded allocation through VLA/alloca or malloc (which
+     would make the syscall non async-signal-safe) it uses a limited buffer.
+     This is sub-optimal for large NBYTES, however this is a fallback
+     mechanism to emulate a syscall that kernel should provide.   */
 
   struct kernel_dirent
-    {
-      unsigned long d_ino;
-      unsigned long d_off;
-      unsigned short int d_reclen;
-      char d_name[256];
-    };
+  {
+#if _MIPS_SIM == _ABI64
+    uint64_t d_ino;
+    uint64_t d_off;
+#else
+    uint32_t d_ino;
+    uint32_t d_off;
+#endif
+    unsigned short int d_reclen;
+    char d_name[1];
+  };
+
+  /* The largest possible practical length of the d_name member are 255
+     Unicode characters in UTF-8 encoding, so d_name is 766 bytes long, plus
+     18 (mips64) / 10 (mips64n32) bytes from header, for total of 784 (mips64)
+     / 776 (mips64n32) bytes total.  Ensure that the minimum size holds at
+     least one entry.  */
+  enum { KBUF_SIZE = 1024 };
+  char kbuf[KBUF_SIZE];
+  size_t kbuf_size = nbytes < KBUF_SIZE ? nbytes : KBUF_SIZE;
 
   const size_t size_diff = (offsetof (struct dirent64, d_name)
-			   - offsetof (struct kernel_dirent, d_name));
+                           - offsetof (struct kernel_dirent, d_name));
 
-  size_t red_nbytes = MIN (nbytes
-			   - ((nbytes / (offsetof (struct dirent64, d_name)
-					 + 14)) * size_diff),
-			   nbytes - size_diff);
+  struct dirent64 *dp = (struct dirent64 *) buf;
 
-  struct scratch_buffer tmpbuf;
-  scratch_buffer_init (&tmpbuf);
-  if (!scratch_buffer_set_array_size (&tmpbuf, red_nbytes, sizeof (uint8_t)))
-    INLINE_SYSCALL_ERROR_RETURN_VALUE (ENOMEM);
+  size_t nb = 0;
+  off64_t last_offset = -1;
 
-  struct kernel_dirent *skdp, *kdp;
-  skdp = kdp = tmpbuf.data;
+  ssize_t r = INLINE_SYSCALL_CALL (getdents, fd, kbuf, kbuf_size);
+  if (r <= 0)
+    return r;
 
-  ssize_t retval = INLINE_SYSCALL_CALL (getdents, fd, kdp, red_nbytes);
-  if (retval == -1)
-    {
-      scratch_buffer_free (&tmpbuf);
-      return -1;
-    }
+  struct kernel_dirent *skdp, *kdp;
+  skdp = kdp = (struct kernel_dirent *) kbuf;
 
-  off64_t last_offset = -1;
-  struct dirent64 *dp = (struct dirent64 *) buf;
-  while ((char *) kdp < (char *) skdp + retval)
+  while ((char *) kdp < (char *) skdp + r)
     {
-      const size_t alignment = _Alignof (struct dirent64);
-      /* Since kdp->d_reclen is already aligned for the kernel structure
-	 this may compute a value that is bigger than necessary.  */
-      size_t new_reclen = ((kdp->d_reclen + size_diff + alignment - 1)
-			   & ~(alignment - 1));
-      if ((char *) dp + new_reclen > buf + nbytes)
-        {
-	  /* Our heuristic failed.  We read too many entries.  Reset
-	     the stream.  */
-	  assert (last_offset != -1);
-	  __lseek64 (fd, last_offset, SEEK_SET);
-
-	  if ((char *) dp == buf)
+      /* This is a conservative approximation, since some of size_diff might
+	 fit into the existing padding for alignment.  */
+      size_t new_reclen = ALIGN_UP (kdp->d_reclen + size_diff,
+				    _Alignof (struct dirent64));
+      if (nb + new_reclen > nbytes)
+	{
+	  /* Entry is too large for the fixed-size buffer.  */
+	  if (last_offset == -1)
 	    {
-	      scratch_buffer_free (&tmpbuf);
-	      return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+	      __set_errno (EINVAL);
+	      return -1;
 	    }
 
-	  break;
+	  /* The new entry will overflow the input buffer, rewind to last
+	     obtained entry and return.  */
+	  __lseek64 (fd, last_offset, SEEK_SET);
+	  return (char *) dp - (char *) buf;
 	}
-
-      last_offset = kdp->d_off;
-      dp->d_ino = kdp->d_ino;
-      dp->d_off = kdp->d_off;
-      dp->d_reclen = new_reclen;
+      nb += new_reclen;
+
+#define DP_MEMBER(src, type, member)			     \
+    (__typeof__((type){0}.member) *)			     \
+      memcpy (&((__typeof__((type){0}.member)){0}),          \
+	      ((char *)(src) + offsetof (type, member)),     \
+	      sizeof ((type){0}.member))
+
+      memcpy (((char *)(dp) + offsetof (struct dirent64, d_ino)),
+	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
+	      sizeof ((struct dirent64){0}.d_ino));
+      memcpy (((char *)(dp) + offsetof (struct dirent64, d_off)),
+	      DP_MEMBER (kdp, struct kernel_dirent, d_ino),
+	      sizeof ((struct dirent64){0}.d_ino));
+      memcpy (&last_offset,
+	      DP_MEMBER (kdp, struct kernel_dirent, d_off),
+	      sizeof (last_offset));
+      memcpy (DP_MEMBER (dp, struct dirent64, d_reclen), &new_reclen,
+	      sizeof ((struct dirent64){0}.d_reclen));
       dp->d_type = *((char *) kdp + kdp->d_reclen - 1);
       memcpy (dp->d_name, kdp->d_name,
 	      kdp->d_reclen - offsetof (struct kernel_dirent, d_name));
@@ -112,8 +137,7 @@  __getdents64 (int fd, void *buf0, size_t nbytes)
       kdp = (struct kernel_dirent *) (((char *) kdp) + kdp->d_reclen);
     }
 
-  scratch_buffer_free (&tmpbuf);
-  return (char *) dp - buf;
+  return (char *) dp - (char *) buf;
 }
 libc_hidden_def (__getdents64)
 weak_alias (__getdents64, getdents64)