Patchwork [v2,1/5] mips: Do not malloc on getdents64 fallback

login
register
mail settings
Submitter Adhemerval Zanella Netto
Date Aug. 28, 2019, 9:02 p.m.
Message ID <4298ae27-5ba3-e06c-de8b-a338b3547ad7@linaro.org>
Download mbox | patch
Permalink /patch/34317/
State New
Headers show

Comments

Adhemerval Zanella Netto - Aug. 28, 2019, 9:02 p.m.
On 28/08/2019 11:42, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>    struct kernel_dirent
>> +  {
>> +    unsigned long d_ino;
>> +    unsigned long d_off;
>> +    unsigned short int d_reclen;
>> +    char d_name[1];
>> +  } kbuf[KBUF_SIZE / sizeof (struct kernel_dirent)];
> 
> I think it's still not clear to me in which cases we actually need to
> move the dirent entries in the buffer.  My impression is that we just
> need to move d_name by one byte because before, d_type was after the
> name, and afterwards, it comes before the name.  But the record
> boundaries are unchanged.

My understanding is the record boundary would be same as long the 
d_name fits on the alignment padding space minus the size of the
d_type.  Otherwise the dirent64 will need to be extended.

This leads to possible memmoves the whole buffer obtained from kernel
on which iteration (which for large buffer is another performance
drain) and possible lseek for the case where registers are slide out
the buffer.  I strongly think using an auxiliary buffer is still
simpler than operating in-place.

Also, mips64-n32 has another issue I found recently: it uses the
compat syscall which uses the compat dirent with both d_ino/d_off
as 32 bits.  For in-place getdents call it will require even more
memory moves. Below is an updated patch to fix it:

---
Florian Weimer - Aug. 28, 2019, 9:23 p.m.
* Adhemerval Zanella:

> On 28/08/2019 11:42, Florian Weimer wrote:
>> * Adhemerval Zanella:
>> 
>>>    struct kernel_dirent
>>> +  {
>>> +    unsigned long d_ino;
>>> +    unsigned long d_off;
>>> +    unsigned short int d_reclen;
>>> +    char d_name[1];
>>> +  } kbuf[KBUF_SIZE / sizeof (struct kernel_dirent)];
>> 
>> I think it's still not clear to me in which cases we actually need to
>> move the dirent entries in the buffer.  My impression is that we just
>> need to move d_name by one byte because before, d_type was after the
>> name, and afterwards, it comes before the name.  But the record
>> boundaries are unchanged.
>
> My understanding is the record boundary would be same as long the 
> d_name fits on the alignment padding space minus the size of the
> d_type.  Otherwise the dirent64 will need to be extended.

Hmm.  The problem is mips64 n32, right?  Where unsigned long is 32 bits?

Thanks,
Florian
Adhemerval Zanella Netto - Aug. 29, 2019, 11:04 a.m.
On 28/08/2019 18:23, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> On 28/08/2019 11:42, Florian Weimer wrote:
>>> * Adhemerval Zanella:
>>>
>>>>    struct kernel_dirent
>>>> +  {
>>>> +    unsigned long d_ino;
>>>> +    unsigned long d_off;
>>>> +    unsigned short int d_reclen;
>>>> +    char d_name[1];
>>>> +  } kbuf[KBUF_SIZE / sizeof (struct kernel_dirent)];
>>>
>>> I think it's still not clear to me in which cases we actually need to
>>> move the dirent entries in the buffer.  My impression is that we just
>>> need to move d_name by one byte because before, d_type was after the
>>> name, and afterwards, it comes before the name.  But the record
>>> boundaries are unchanged.
>>
>> My understanding is the record boundary would be same as long the 
>> d_name fits on the alignment padding space minus the size of the
>> d_type.  Otherwise the dirent64 will need to be extended.
> 
> Hmm.  The problem is mips64 n32, right?  Where unsigned long is 32 bits?

Yes, it would require either a mips64-n32 specific path or implementation.

Patch

diff --git a/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c b/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
index 8bf3abb0e0..881b5eb651 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
+++ b/sysdeps/unix/sysv/linux/mips/mips64/getdents64.c
@@ -22,98 +22,92 @@ 
 #include <assert.h>
 #include <sys/param.h>
 #include <unistd.h>
-#include <scratch_buffer.h>
 #include <limits.h>
 
 ssize_t
-__getdents64 (int fd, void *buf0, size_t nbytes)
+__getdents64 (int fd, void *buf, size_t nbytes)
 {
-  char *buf = buf0;
-
   /* The system call takes an unsigned int argument, and some length
      checks in the kernel use an int type.  */
   if (nbytes > INT_MAX)
     nbytes = INT_MAX;
 
 #ifdef __NR_getdents64
-  ssize_t ret = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes);
-  if (ret != -1)
-    return ret;
+  static bool getdents64_supported = true;
+  if (atomic_load_relaxed (&getdents64_supported))
+    {
+      ssize_t ret = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes);
+      if (ret >= 0 || errno != ENOSYS)
+	return ret;
+
+      atomic_store_relaxed (&getdents64_supported, false);
+    }
 #endif
 
   /* Unfortunately getdents64 was only wire-up for MIPS n64 on Linux 3.10.
-     If syscall is not available it need to fallback to non-LFS one.  */
+     If the syscall is not available it need to fallback to the non-LFS one.
+     Also to avoid an unbounded allocation through VLA/alloca or malloc (which
+     would make the syscall non async-signal-safe) it uses a limited buffer.
+     This is sub-optimal for large NBYTES, however this is a fallback
+     mechanism to emulate a syscall that kernel should provide.   */
 
+  enum { KBUF_SIZE = 1024 };
   struct kernel_dirent
-    {
-      unsigned long d_ino;
-      unsigned long d_off;
-      unsigned short int d_reclen;
-      char d_name[256];
-    };
+  {
+#if _MIPS_SIM == _ABI64
+    uint64_t d_ino;
+    uint64_t d_off;
+#else
+    uint32_t d_ino;
+    uint32_t d_off;
+#endif
+    unsigned short int d_reclen;
+    char d_name[1];
+  } kbuf[KBUF_SIZE / sizeof (struct kernel_dirent)];
+  size_t kbuf_size = nbytes < KBUF_SIZE ? nbytes : KBUF_SIZE;
 
   const size_t size_diff = (offsetof (struct dirent64, d_name)
-			   - offsetof (struct kernel_dirent, d_name));
-
-  size_t red_nbytes = MIN (nbytes
-			   - ((nbytes / (offsetof (struct dirent64, d_name)
-					 + 14)) * size_diff),
-			   nbytes - size_diff);
-
-  struct scratch_buffer tmpbuf;
-  scratch_buffer_init (&tmpbuf);
-  if (!scratch_buffer_set_array_size (&tmpbuf, red_nbytes, sizeof (uint8_t)))
-    INLINE_SYSCALL_ERROR_RETURN_VALUE (ENOMEM);
-
-  struct kernel_dirent *skdp, *kdp;
-  skdp = kdp = tmpbuf.data;
+                           - offsetof (struct kernel_dirent, d_name));
 
-  ssize_t retval = INLINE_SYSCALL_CALL (getdents, fd, kdp, red_nbytes);
-  if (retval == -1)
-    {
-      scratch_buffer_free (&tmpbuf);
-      return -1;
-    }
+  struct dirent64 *dp = (struct dirent64 *) buf;
 
+  size_t nb = 0;
   off64_t last_offset = -1;
-  struct dirent64 *dp = (struct dirent64 *) buf;
-  while ((char *) kdp < (char *) skdp + retval)
+
+  ssize_t r;
+  while ((r = INLINE_SYSCALL_CALL (getdents, fd, kbuf, kbuf_size)) > 0)
     {
-      const size_t alignment = _Alignof (struct dirent64);
-      /* Since kdp->d_reclen is already aligned for the kernel structure
-	 this may compute a value that is bigger than necessary.  */
-      size_t new_reclen = ((kdp->d_reclen + size_diff + alignment - 1)
-			   & ~(alignment - 1));
-      if ((char *) dp + new_reclen > buf + nbytes)
-        {
-	  /* Our heuristic failed.  We read too many entries.  Reset
-	     the stream.  */
-	  assert (last_offset != -1);
-	  __lseek64 (fd, last_offset, SEEK_SET);
-
-	  if ((char *) dp == buf)
+      struct kernel_dirent *skdp, *kdp;
+      skdp = kdp = kbuf;
+
+      while ((char *) kdp < (char *) skdp + r)
+	{
+	  const size_t alignment = _Alignof (struct dirent64);
+	  size_t new_reclen = ((kdp->d_reclen + size_diff + alignment - 1)
+			      & ~(alignment - 1));
+	  if (nb + new_reclen > nbytes)
 	    {
-	      scratch_buffer_free (&tmpbuf);
-	      return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+		/* The new entry will overflow the input buffer, rewind to
+		   last obtained entry and return.  */
+	       __lseek64 (fd, last_offset, SEEK_SET);
+	       goto out;
 	    }
+	  nb += new_reclen;
 
-	  break;
-	}
-
-      last_offset = kdp->d_off;
-      dp->d_ino = kdp->d_ino;
-      dp->d_off = kdp->d_off;
-      dp->d_reclen = new_reclen;
-      dp->d_type = *((char *) kdp + kdp->d_reclen - 1);
-      memcpy (dp->d_name, kdp->d_name,
-	      kdp->d_reclen - offsetof (struct kernel_dirent, d_name));
+	  dp->d_ino = kdp->d_ino;
+	  dp->d_off = last_offset = kdp->d_off;
+	  dp->d_reclen = new_reclen;
+	  dp->d_type = *((char *) kdp + kdp->d_reclen - 1);
+	  memcpy (dp->d_name, kdp->d_name,
+		  kdp->d_reclen - offsetof (struct kernel_dirent, d_name));
 
-      dp = (struct dirent64 *) ((char *) dp + new_reclen);
-      kdp = (struct kernel_dirent *) (((char *) kdp) + kdp->d_reclen);
+	  dp = (struct dirent64 *) ((char *) dp + new_reclen);
+	  kdp = (struct kernel_dirent *) (((char *) kdp) + kdp->d_reclen);
+	}
     }
 
-  scratch_buffer_free (&tmpbuf);
-  return (char *) dp - buf;
+out:
+  return (char *) dp - (char *) buf;
 }
 libc_hidden_def (__getdents64)
 weak_alias (__getdents64, getdents64)