Patchwork [BZ,#19371] Properly handle x32 syscall

login
register
mail settings
Submitter H.J. Lu
Date Dec. 16, 2015, 3:01 p.m.
Message ID <20151216150139.GA24629@gmail.com>
Download mbox | patch
Permalink /patch/10035/
State New
Headers show

Comments

H.J. Lu - Dec. 16, 2015, 3:01 p.m.
X32 syscall() may return 64-bit integer as lseek, time and times.  Its
return type should be __syscall_slong_t instead of long int.  We need
to properly return 64-bit error value.

Before the patch:

Dump of assembler code for function syscall:
   0x000dab20 <+0>:	mov    %rdi,%rax
   0x000dab23 <+3>:	mov    %rsi,%rdi
   0x000dab26 <+6>:	mov    %rdx,%rsi
   0x000dab29 <+9>:	mov    %rcx,%rdx
   0x000dab2c <+12>:	mov    %r8,%r10
   0x000dab2f <+15>:	mov    %r9,%r8
   0x000dab32 <+18>:	mov    0x8(%rsp),%r9
   0x000dab37 <+23>:	syscall
   0x000dab39 <+25>:	cmp    $0xfffffffffffff001,%rax
   0x000dab3f <+31>:	jae    0xdab42 <syscall+34>
   0x000dab41 <+33>:	retq
   0x000dab42 <+34>:	mov    0x2b3367(%rip),%rcx        # 0x38deb0
   0x000dab49 <+41>:	neg    %eax
   0x000dab4b <+43>:	mov    %eax,%fs:(%rcx)
   0x000dab4e <+46>:	or     $0xffffffff,%eax
                        ^^^^^^^^^^^^^^^^^^ This is 32-bit error return.
   0x000dab51 <+49>:	retq
End of assembler dump.

After the patch:

Dump of assembler code for function syscall:
   0x000daaf0 <+0>:	mov    %rdi,%rax
   0x000daaf3 <+3>:	mov    %rsi,%rdi
   0x000daaf6 <+6>:	mov    %rdx,%rsi
   0x000daaf9 <+9>:	mov    %rcx,%rdx
   0x000daafc <+12>:	mov    %r8,%r10
   0x000daaff <+15>:	mov    %r9,%r8
   0x000dab02 <+18>:	mov    0x8(%rsp),%r9
   0x000dab07 <+23>:	syscall
   0x000dab09 <+25>:	cmp    $0xfffffffffffff001,%rax
   0x000dab0f <+31>:	jae    0xdab12 <syscall+34>
   0x000dab11 <+33>:	retq
   0x000dab12 <+34>:	mov    0x2b3397(%rip),%rcx        # 0x38deb0
   0x000dab19 <+41>:	neg    %eax
   0x000dab1b <+43>:	mov    %eax,%fs:(%rcx)
   0x000dab1e <+46>:	or     $0xffffffffffffffff,%rax
   0x000dab22 <+50>:	retq
End of assembler dump.

OK for master?

H.J.
---
	[BZ #19371]
	* posix/unistd.h (syscall): Use __syscall_slong_t for return
	type.
	* sysdeps/unix/sysv/linux/x86_64/x32/syscall.S: New file.
---
 posix/unistd.h                               |  2 +-
 sysdeps/unix/sysv/linux/x86_64/x32/syscall.S | 33 ++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/x32/syscall.S
Florian Weimer - Dec. 16, 2015, 4:35 p.m.
On 12/16/2015 04:01 PM, H.J. Lu wrote:
> -extern long int syscall (long int __sysno, ...) __THROW;
> +extern __syscall_slong_t syscall (long int __sysno, ...) __THROW;

syscall is documented in the manual, so this change needs to be
reflected there in some way.

Florian
H.J. Lu - Dec. 16, 2015, 4:36 p.m.
On Wed, Dec 16, 2015 at 8:35 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 12/16/2015 04:01 PM, H.J. Lu wrote:
>> -extern long int syscall (long int __sysno, ...) __THROW;
>> +extern __syscall_slong_t syscall (long int __sysno, ...) __THROW;
>
> syscall is documented in the manual, so this change needs to be
> reflected there in some way.

I will send a patch to Linux man pages after it is fixed in glibc.
Florian Weimer - Dec. 16, 2015, 4:38 p.m.
On 12/16/2015 05:36 PM, H.J. Lu wrote:
> On Wed, Dec 16, 2015 at 8:35 AM, Florian Weimer <fweimer@redhat.com> wrote:
>> On 12/16/2015 04:01 PM, H.J. Lu wrote:
>>> -extern long int syscall (long int __sysno, ...) __THROW;
>>> +extern __syscall_slong_t syscall (long int __sysno, ...) __THROW;
>>
>> syscall is documented in the manual, so this change needs to be
>> reflected there in some way.
> 
> I will send a patch to Linux man pages after it is fixed in glibc.

Sorry, I was talking about manual/startup.texi:

manual/startup.texi:727:@deftypefun {long int} syscall (long int
@var{sysno}, @dots{})
manual/startup.texi-728-@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
manual/startup.texi-729-
manual/startup.texi-730-@code{syscall} performs a generic system call.

Florian
Dmitry Levin - Dec. 16, 2015, 4:48 p.m.
On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
> return type should be __syscall_slong_t instead of long int.  We need
> to properly return 64-bit error value.
[...]
> +#include <sysdep.h>
> +
> +/* Return -1LL in a full 64 bits.  */
> +#undef SYSCALL_ERROR_HANDLER
> +#define SYSCALL_ERROR_HANDLER	\
> +0:				\
> +	SYSCALL_SET_ERRNO;	\
> +	orq $-1, %rax;		\
> +	ret;
> +
> +/* Always use our own error handler.  */
> +#undef SYSCALL_ERROR_LABEL
> +#define SYSCALL_ERROR_LABEL 0f
> +
> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>

While this looks technically OK, I don't understand why RAX_LP is used
at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
would be simpler if glibc finally admitted that return code of any
x32 linux syscall is stored in %rax and not in %eax.

Also, I wish this and times bugs were fixed long time ago.
Now I have to apply workarounds like this:
https://github.com/strace/strace/commit/b6b38fa73b9e0810c68a19dcb3ade2974db20f8c
H.J. Lu - Dec. 16, 2015, 4:52 p.m.
On Wed, Dec 16, 2015 at 8:48 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
> On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
>> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
>> return type should be __syscall_slong_t instead of long int.  We need
>> to properly return 64-bit error value.
> [...]
>> +#include <sysdep.h>
>> +
>> +/* Return -1LL in a full 64 bits.  */
>> +#undef SYSCALL_ERROR_HANDLER
>> +#define SYSCALL_ERROR_HANDLER        \
>> +0:                           \
>> +     SYSCALL_SET_ERRNO;      \
>> +     orq $-1, %rax;          \
>> +     ret;
>> +
>> +/* Always use our own error handler.  */
>> +#undef SYSCALL_ERROR_LABEL
>> +#define SYSCALL_ERROR_LABEL 0f
>> +
>> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>
>
> While this looks technically OK, I don't understand why RAX_LP is used
> at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
> would be simpler if glibc finally admitted that return code of any
> x32 linux syscall is stored in %rax and not in %eax.

Only 3 system calls return 64-bit value.  %eax is one-byte shorter.
Adhemerval Zanella Netto - Dec. 16, 2015, 4:54 p.m.
On 16-12-2015 14:52, H.J. Lu wrote:
> On Wed, Dec 16, 2015 at 8:48 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
>> On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
>>> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
>>> return type should be __syscall_slong_t instead of long int.  We need
>>> to properly return 64-bit error value.
>> [...]
>>> +#include <sysdep.h>
>>> +
>>> +/* Return -1LL in a full 64 bits.  */
>>> +#undef SYSCALL_ERROR_HANDLER
>>> +#define SYSCALL_ERROR_HANDLER        \
>>> +0:                           \
>>> +     SYSCALL_SET_ERRNO;      \
>>> +     orq $-1, %rax;          \
>>> +     ret;
>>> +
>>> +/* Always use our own error handler.  */
>>> +#undef SYSCALL_ERROR_LABEL
>>> +#define SYSCALL_ERROR_LABEL 0f
>>> +
>>> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>
>>
>> While this looks technically OK, I don't understand why RAX_LP is used
>> at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
>> would be simpler if glibc finally admitted that return code of any
>> x32 linux syscall is stored in %rax and not in %eax.
> 
> Only 3 system calls return 64-bit value.  %eax is one-byte shorter.
> 

I still fail the see the performance gains over the re-engineering and
code duplication required to avoid make the syscall return 64-bit values.
H.J. Lu - Dec. 16, 2015, 4:58 p.m.
On Wed, Dec 16, 2015 at 8:54 AM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
> On 16-12-2015 14:52, H.J. Lu wrote:
>> On Wed, Dec 16, 2015 at 8:48 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
>>> On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
>>>> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
>>>> return type should be __syscall_slong_t instead of long int.  We need
>>>> to properly return 64-bit error value.
>>> [...]
>>>> +#include <sysdep.h>
>>>> +
>>>> +/* Return -1LL in a full 64 bits.  */
>>>> +#undef SYSCALL_ERROR_HANDLER
>>>> +#define SYSCALL_ERROR_HANDLER        \
>>>> +0:                           \
>>>> +     SYSCALL_SET_ERRNO;      \
>>>> +     orq $-1, %rax;          \
>>>> +     ret;
>>>> +
>>>> +/* Always use our own error handler.  */
>>>> +#undef SYSCALL_ERROR_LABEL
>>>> +#define SYSCALL_ERROR_LABEL 0f
>>>> +
>>>> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>
>>>
>>> While this looks technically OK, I don't understand why RAX_LP is used
>>> at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
>>> would be simpler if glibc finally admitted that return code of any
>>> x32 linux syscall is stored in %rax and not in %eax.
>>
>> Only 3 system calls return 64-bit value.  %eax is one-byte shorter.
>>
>
> I still fail the see the performance gains over the re-engineering and
> code duplication required to avoid make the syscall return 64-bit values.

What code duplication?
H.J. Lu - Dec. 16, 2015, 5:01 p.m.
On Wed, Dec 16, 2015 at 8:58 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Dec 16, 2015 at 8:54 AM, Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>> On 16-12-2015 14:52, H.J. Lu wrote:
>>> On Wed, Dec 16, 2015 at 8:48 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
>>>> On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
>>>>> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
>>>>> return type should be __syscall_slong_t instead of long int.  We need
>>>>> to properly return 64-bit error value.
>>>> [...]
>>>>> +#include <sysdep.h>
>>>>> +
>>>>> +/* Return -1LL in a full 64 bits.  */
>>>>> +#undef SYSCALL_ERROR_HANDLER
>>>>> +#define SYSCALL_ERROR_HANDLER        \
>>>>> +0:                           \
>>>>> +     SYSCALL_SET_ERRNO;      \
>>>>> +     orq $-1, %rax;          \
>>>>> +     ret;
>>>>> +
>>>>> +/* Always use our own error handler.  */
>>>>> +#undef SYSCALL_ERROR_LABEL
>>>>> +#define SYSCALL_ERROR_LABEL 0f
>>>>> +
>>>>> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>
>>>>
>>>> While this looks technically OK, I don't understand why RAX_LP is used
>>>> at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
>>>> would be simpler if glibc finally admitted that return code of any
>>>> x32 linux syscall is stored in %rax and not in %eax.
>>>
>>> Only 3 system calls return 64-bit value.  %eax is one-byte shorter.
>>>
>>
>> I still fail the see the performance gains over the re-engineering and
>> code duplication required to avoid make the syscall return 64-bit values.
>
> What code duplication?
>

The whole point of x32 is it is ILP32.  Some system calls return
64-bit is just an exception.
Adhemerval Zanella Netto - Dec. 16, 2015, 5:08 p.m.
On 16-12-2015 15:01, H.J. Lu wrote:
> On Wed, Dec 16, 2015 at 8:58 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Dec 16, 2015 at 8:54 AM, Adhemerval Zanella
>> <adhemerval.zanella@linaro.org> wrote:
>>>
>>>
>>> On 16-12-2015 14:52, H.J. Lu wrote:
>>>> On Wed, Dec 16, 2015 at 8:48 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
>>>>> On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
>>>>>> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
>>>>>> return type should be __syscall_slong_t instead of long int.  We need
>>>>>> to properly return 64-bit error value.
>>>>> [...]
>>>>>> +#include <sysdep.h>
>>>>>> +
>>>>>> +/* Return -1LL in a full 64 bits.  */
>>>>>> +#undef SYSCALL_ERROR_HANDLER
>>>>>> +#define SYSCALL_ERROR_HANDLER        \
>>>>>> +0:                           \
>>>>>> +     SYSCALL_SET_ERRNO;      \
>>>>>> +     orq $-1, %rax;          \
>>>>>> +     ret;
>>>>>> +
>>>>>> +/* Always use our own error handler.  */
>>>>>> +#undef SYSCALL_ERROR_LABEL
>>>>>> +#define SYSCALL_ERROR_LABEL 0f
>>>>>> +
>>>>>> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>
>>>>>
>>>>> While this looks technically OK, I don't understand why RAX_LP is used
>>>>> at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
>>>>> would be simpler if glibc finally admitted that return code of any
>>>>> x32 linux syscall is stored in %rax and not in %eax.
>>>>
>>>> Only 3 system calls return 64-bit value.  %eax is one-byte shorter.
>>>>
>>>
>>> I still fail the see the performance gains over the re-engineering and
>>> code duplication required to avoid make the syscall return 64-bit values.
>>
>> What code duplication?
>>
> 
> The whole point of x32 is it is ILP32.  Some system calls return
> 64-bit is just an exception.
> 

And my point is to avoid the creation of multiple implementation of
same syscall, it only adds maintenance burden and complication on
GLIBC code (for instance the pread/pwrite case).

For this specific case if x32 SYSCALL_XXX macros retuned 64-bit values
there would be no need for another syscall.S implementation. Same for
the time.c patch.

And again I see no compelling performance reason to add such complexity.
Rich Felker - Dec. 16, 2015, 5:12 p.m.
On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
> return type should be __syscall_slong_t instead of long int.  We need
> to properly return 64-bit error value.

Where is this actually needed? I don't think changing the type of a
public function to be arch-specific is an appropriate API change to
make, much less an appropriate ABI change.

Rich
Dmitry Levin - Dec. 16, 2015, 5:17 p.m.
On Wed, Dec 16, 2015 at 08:52:34AM -0800, H.J. Lu wrote:
> On Wed, Dec 16, 2015 at 8:48 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
> > On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
> >> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
> >> return type should be __syscall_slong_t instead of long int.  We need
> >> to properly return 64-bit error value.
> > [...]
> >> +#include <sysdep.h>
> >> +
> >> +/* Return -1LL in a full 64 bits.  */
> >> +#undef SYSCALL_ERROR_HANDLER
> >> +#define SYSCALL_ERROR_HANDLER        \
> >> +0:                           \
> >> +     SYSCALL_SET_ERRNO;      \
> >> +     orq $-1, %rax;          \
> >> +     ret;
> >> +
> >> +/* Always use our own error handler.  */
> >> +#undef SYSCALL_ERROR_LABEL
> >> +#define SYSCALL_ERROR_LABEL 0f
> >> +
> >> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>
> >
> > While this looks technically OK, I don't understand why RAX_LP is used
> > at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
> > would be simpler if glibc finally admitted that return code of any
> > x32 linux syscall is stored in %rax and not in %eax.
> 
> Only 3 system calls return 64-bit value.

What do you mean by saying that?
Every bit of %rax is initialized on return from any syscall.

> %eax is one-byte shorter.

In your proposed fix for times syscall wrapper, the new __times is 2 bytes
shorter than the old one.  Could it happen that other syscall wrappers
will also win after switching from %eax to %rax?
Dmitry Levin - Dec. 16, 2015, 5:37 p.m.
On Wed, Dec 16, 2015 at 12:12:12PM -0500, Rich Felker wrote:
> On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
> > X32 syscall() may return 64-bit integer as lseek, time and times.  Its
> > return type should be __syscall_slong_t instead of long int.  We need
> > to properly return 64-bit error value.
> 
> Where is this actually needed?

On linux x32, at least lseek, time, and times syscalls return values that
cannot be properly stored in a 32-bit long int.  If libc syscall wrapper
is unable to forward values returned by these syscalls, you hardly can use
such a syscall wrapper with __NR_lseek, __NR_time, or __NR_times.
H.J. Lu - Dec. 16, 2015, 9:21 p.m.
On Wed, Dec 16, 2015 at 9:17 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
> On Wed, Dec 16, 2015 at 08:52:34AM -0800, H.J. Lu wrote:
>> On Wed, Dec 16, 2015 at 8:48 AM, Dmitry V. Levin <ldv@altlinux.org> wrote:
>> > On Wed, Dec 16, 2015 at 07:01:39AM -0800, H.J. Lu wrote:
>> >> X32 syscall() may return 64-bit integer as lseek, time and times.  Its
>> >> return type should be __syscall_slong_t instead of long int.  We need
>> >> to properly return 64-bit error value.
>> > [...]
>> >> +#include <sysdep.h>
>> >> +
>> >> +/* Return -1LL in a full 64 bits.  */
>> >> +#undef SYSCALL_ERROR_HANDLER
>> >> +#define SYSCALL_ERROR_HANDLER        \
>> >> +0:                           \
>> >> +     SYSCALL_SET_ERRNO;      \
>> >> +     orq $-1, %rax;          \
>> >> +     ret;
>> >> +
>> >> +/* Always use our own error handler.  */
>> >> +#undef SYSCALL_ERROR_LABEL
>> >> +#define SYSCALL_ERROR_LABEL 0f
>> >> +
>> >> +#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>
>> >
>> > While this looks technically OK, I don't understand why RAX_LP is used
>> > at all in default SYSCALL_ERROR_HANDLER in place for rax.  Everything
>> > would be simpler if glibc finally admitted that return code of any
>> > x32 linux syscall is stored in %rax and not in %eax.
>>
>> Only 3 system calls return 64-bit value.
>
> What do you mean by saying that?
> Every bit of %rax is initialized on return from any syscall.
>
>> %eax is one-byte shorter.
>
> In your proposed fix for times syscall wrapper, the new __times is 2 bytes
> shorter than the old one.  Could it happen that other syscall wrappers
> will also win after switching from %eax to %rax?
>

You are asking for trouble.   How do you extend 64-bit syscall
return value, 0x80000000, to 64-bit?

Patch

diff --git a/posix/unistd.h b/posix/unistd.h
index 1b52930..bb61ad7 100644
--- a/posix/unistd.h
+++ b/posix/unistd.h
@@ -1058,7 +1058,7 @@  extern void *sbrk (intptr_t __delta) __THROW;
 
    In Mach, all system calls take normal arguments and always return an
    error code (zero for success).  */
-extern long int syscall (long int __sysno, ...) __THROW;
+extern __syscall_slong_t syscall (long int __sysno, ...) __THROW;
 
 #endif	/* Use misc.  */
 
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/syscall.S b/sysdeps/unix/sysv/linux/x86_64/x32/syscall.S
new file mode 100644
index 0000000..cc5255f
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/syscall.S
@@ -0,0 +1,33 @@ 
+/* The syscall system call.  Linux/x32 version.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Return -1LL in a full 64 bits.  */
+#undef SYSCALL_ERROR_HANDLER
+#define SYSCALL_ERROR_HANDLER	\
+0:				\
+	SYSCALL_SET_ERRNO;	\
+	orq $-1, %rax;		\
+	ret;
+
+/* Always use our own error handler.  */
+#undef SYSCALL_ERROR_LABEL
+#define SYSCALL_ERROR_LABEL 0f
+
+#include <sysdeps/unix/sysv/linux/x86_64/syscall.S>