diff mbox series

[2/3] x32: Properly pass long to syscall [BZ #25810]

Message ID 20200413142515.16619-3-hjl.tools@gmail.com
State Superseded
Headers show
Series x32: Properly pass long to syscall [BZ #25810] | expand

Commit Message

H.J. Lu April 13, 2020, 2:25 p.m. UTC
X32 has 32-bit long and pointer with 64-bit off_t.  Since x32 psABI
requires that pointers passed in registers must be zero-extended to
64bit, x32 can share many syscall interfaces with LP64.  When a LP64
syscall with long and unsigned long arguments is used for x32, these
arguments must be properly extended to 64-bit.  Otherwise if the upper
32 bits of the register have undefined value, such a syscall will be
rejected by kernel.

For x32 INLINE_SYSCALLs, cast

1. pointer to unsigned long (32 bit).
2. 32-bit unsigned integer to unsigned long long (64 bit).
3. 32-bit integer to long long (64 bit).

For

       void *mmap(void *addr, size_t length, int prot, int flags,
                  int fd, off_t offset);

we now generate

   0:	41 f7 c1 ff 0f 00 00 	test   $0xfff,%r9d
   7:	75 1f                	jne    28 <__mmap64+0x28>
   9:	48 63 d2             	movslq %edx,%rdx
   c:	89 f6                	mov    %esi,%esi
   e:	4d 63 c0             	movslq %r8d,%r8
  11:	4c 63 d1             	movslq %ecx,%r10
  14:	b8 09 00 00 40       	mov    $0x40000009,%eax
  19:	0f 05                	syscall

That is

1. addr is unchanged.
2. length is zero-extend to 64 bits.
3. prot is sign-extend to 64 bits.
4. flags is sign-extend to 64 bits.
5. fd is sign-extend to 64 bits.
6. offset is unchanged.

For int arguments, since kernel uses only the lower 32 bits and ignores
the upper 32 bits in 64-bit registers, these work correctly.

Tested on x86-64 and x32.  There are no code changes on x86-64.
---
 sysdeps/unix/sysv/linux/x86_64/sysdep.h     | 10 ++++++----
 sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

Comments

Florian Weimer April 13, 2020, 2:43 p.m. UTC | #1
* H. J. Lu via Libc-alpha:

> diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> index 06e39212ee..93dad97c8f 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> @@ -219,12 +219,14 @@
>  /* Registers clobbered by syscall.  */
>  # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
>  
> -/* Create a variable 'name' based on type 'X' to avoid explicit types.
> -   This is mainly used set use 64-bits arguments in x32.   */
> -#define TYPEFY(X, name) __typeof__ ((X) - (X)) name
> +/* This also works when X is an array.  */
> +#define TYPEFY1(X) __typeof__ ((X) - (X))

I think the comment is now misleading.  For an array X, (X) - (X) is
of type ptrdiff_t, which is signed, so it triggers sign extension, not
zero extension, in the x32 case.  I think this is the reason why my
proposed construct went astray in your experiments.

Is it really necessary to support arrays for system call arguments?

> diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
> index 2b5cf0c0ea..f3bc7e89df 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
> @@ -44,6 +44,24 @@
>  # define ZERO_EXTEND_5 movl %r8d, %r8d;
>  # undef ZERO_EXTEND_6
>  # define ZERO_EXTEND_6 movl %r9d, %r9d;
> +#else /*! __ASSEMBLER__ */
> +# undef ARGIFY
> +/* For x32, cast
> +   1. pointer to unsigned long (32 bit).
> +   2. 32-bit unsigned integer to unsigned long long (64 bit).
> +   3. 32-bit integer to long long (64 bit).
> + */
> +# define ARGIFY(X) \
> + ({									\
> +    _Pragma ("GCC diagnostic push");					\
> +    _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\"");	\
> +    (__builtin_classify_type (X) == 5					\
> +     ? (unsigned long) (X)						\
> +     : ((sizeof (X) <= 4 && (TYPEFY1 (X)) 0 < (TYPEFY1 (X)) -1)		\
> +	? (unsigned long long) (X)					\
> +	: (long long) (X)));						\
> +    _Pragma ("GCC diagnostic pop");					\
> +  })
>  #endif	/* __ASSEMBLER__ */

This should use long int instead long everywhere.
__builtin_classify_type is undocumented.  I'm not sure if its behavior
and the constant 5 is actually stable across GCC versions, but the
powerpc uses it as well, for a slightly difference purpose (to
recognize array arguments, it seems).
H.J. Lu April 13, 2020, 3:03 p.m. UTC | #2
On Mon, Apr 13, 2020 at 7:43 AM Florian Weimer <fw@deneb.enyo.de> wrote:
>
> * H. J. Lu via Libc-alpha:
>
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > index 06e39212ee..93dad97c8f 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > @@ -219,12 +219,14 @@
> >  /* Registers clobbered by syscall.  */
> >  # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
> >
> > -/* Create a variable 'name' based on type 'X' to avoid explicit types.
> > -   This is mainly used set use 64-bits arguments in x32.   */
> > -#define TYPEFY(X, name) __typeof__ ((X) - (X)) name
> > +/* This also works when X is an array.  */
> > +#define TYPEFY1(X) __typeof__ ((X) - (X))
>
> I think the comment is now misleading.  For an array X, (X) - (X) is
> of type ptrdiff_t, which is signed, so it triggers sign extension, not

I will update the comments.

> zero extension, in the x32 case.  I think this is the reason why my
> proposed construct went astray in your experiments.
>
> Is it really necessary to support arrays for system call arguments?

int
fexecve (int fd, char *const argv[], char *const envp[])
{
  if (fd < 0 || argv == NULL || envp == NULL)
    {
      __set_errno (EINVAL);
      return -1;
    }

#ifdef __NR_execveat
  /* Avoid implicit array coercion in syscall macros.  */
  INLINE_SYSCALL (execveat, 5, fd, "", &argv[0], &envp[0], AT_EMPTY_PATH);

"" is an array.

> > diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
> > index 2b5cf0c0ea..f3bc7e89df 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
> > @@ -44,6 +44,24 @@
> >  # define ZERO_EXTEND_5 movl %r8d, %r8d;
> >  # undef ZERO_EXTEND_6
> >  # define ZERO_EXTEND_6 movl %r9d, %r9d;
> > +#else /*! __ASSEMBLER__ */
> > +# undef ARGIFY
> > +/* For x32, cast
> > +   1. pointer to unsigned long (32 bit).
> > +   2. 32-bit unsigned integer to unsigned long long (64 bit).
> > +   3. 32-bit integer to long long (64 bit).
> > + */
> > +# define ARGIFY(X) \
> > + ({                                                                  \
> > +    _Pragma ("GCC diagnostic push");                                 \
> > +    _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\"");    \
> > +    (__builtin_classify_type (X) == 5                                        \
> > +     ? (unsigned long) (X)                                           \
> > +     : ((sizeof (X) <= 4 && (TYPEFY1 (X)) 0 < (TYPEFY1 (X)) -1)              \
> > +     ? (unsigned long long) (X)                                      \
> > +     : (long long) (X)));                                            \
> > +    _Pragma ("GCC diagnostic pop");                                  \
> > +  })
> >  #endif       /* __ASSEMBLER__ */
>
> This should use long int instead long everywhere.

I will update.

> __builtin_classify_type is undocumented.  I'm not sure if its behavior
> and the constant 5 is actually stable across GCC versions, but the
> powerpc uses it as well, for a slightly difference purpose (to
> recognize array arguments, it seems).

include/libc-pointer-arith.h has

/* 1 if 'type' is a pointer type, 0 otherwise.  */
# define __pointer_type(type) (__builtin_classify_type ((type) 0) == 5)

If __builtin_classify_type ever changes, glibc won't build correctly.
Florian Weimer April 13, 2020, 3:17 p.m. UTC | #3
* H. J. Lu:

> On Mon, Apr 13, 2020 at 7:43 AM Florian Weimer <fw@deneb.enyo.de> wrote:
>>
>> * H. J. Lu via Libc-alpha:
>>
>> > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
>> > index 06e39212ee..93dad97c8f 100644
>> > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
>> > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
>> > @@ -219,12 +219,14 @@
>> >  /* Registers clobbered by syscall.  */
>> >  # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
>> >
>> > -/* Create a variable 'name' based on type 'X' to avoid explicit types.
>> > -   This is mainly used set use 64-bits arguments in x32.   */
>> > -#define TYPEFY(X, name) __typeof__ ((X) - (X)) name
>> > +/* This also works when X is an array.  */
>> > +#define TYPEFY1(X) __typeof__ ((X) - (X))
>>
>> I think the comment is now misleading.  For an array X, (X) - (X) is
>> of type ptrdiff_t, which is signed, so it triggers sign extension, not
>
> I will update the comments.

Thanks.

>> zero extension, in the x32 case.  I think this is the reason why my
>> proposed construct went astray in your experiments.
>>
>> Is it really necessary to support arrays for system call arguments?
>
> int
> fexecve (int fd, char *const argv[], char *const envp[])
> {
>   if (fd < 0 || argv == NULL || envp == NULL)
>     {
>       __set_errno (EINVAL);
>       return -1;
>     }
>
> #ifdef __NR_execveat
>   /* Avoid implicit array coercion in syscall macros.  */
>   INLINE_SYSCALL (execveat, 5, fd, "", &argv[0], &envp[0], AT_EMPTY_PATH);
>
> "" is an array.

If that were the only reason for enforcing pointer decay, I'd say we'd
change the fexecve implementation.  But see below.

>> > diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
>> > index 2b5cf0c0ea..f3bc7e89df 100644
>> > --- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
>> > +++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
>> > @@ -44,6 +44,24 @@
>> >  # define ZERO_EXTEND_5 movl %r8d, %r8d;
>> >  # undef ZERO_EXTEND_6
>> >  # define ZERO_EXTEND_6 movl %r9d, %r9d;
>> > +#else /*! __ASSEMBLER__ */
>> > +# undef ARGIFY
>> > +/* For x32, cast
>> > +   1. pointer to unsigned long (32 bit).
>> > +   2. 32-bit unsigned integer to unsigned long long (64 bit).
>> > +   3. 32-bit integer to long long (64 bit).
>> > + */
>> > +# define ARGIFY(X) \
>> > + ({                                                                  \
>> > +    _Pragma ("GCC diagnostic push");                                 \
>> > +    _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\"");    \
>> > +    (__builtin_classify_type (X) == 5                                        \
>> > +     ? (unsigned long) (X)                                           \
>> > +     : ((sizeof (X) <= 4 && (TYPEFY1 (X)) 0 < (TYPEFY1 (X)) -1)              \
>> > +     ? (unsigned long long) (X)                                      \
>> > +     : (long long) (X)));                                            \
>> > +    _Pragma ("GCC diagnostic pop");                                  \
>> > +  })
>> >  #endif       /* __ASSEMBLER__ */
>>
>> This should use long int instead long everywhere.
>
> I will update.
>
>> __builtin_classify_type is undocumented.  I'm not sure if its behavior
>> and the constant 5 is actually stable across GCC versions, but the
>> powerpc uses it as well, for a slightly difference purpose (to
>> recognize array arguments, it seems).
>
> include/libc-pointer-arith.h has
>
> /* 1 if 'type' is a pointer type, 0 otherwise.  */
> # define __pointer_type(type) (__builtin_classify_type ((type) 0) == 5)
>
> If __builtin_classify_type ever changes, glibc won't build correctly.

I see.  In this case, can we solely rely on __builtin_classify_type?
I dont't think the inner ternary operator is necessary.

/* Enforce zero-extension for pointers and array system call
   arguments.  For integer types, extend to long long int (the full
   register) using a regular cast, resulting in zero or sign extension
   based on the signed-ness of the original type.  */
#define ARGIFY(X)                                                    \
  ({                                                                 \
    _Pragma ("GCC diagnostic push");                                 \
    _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\"");    \
    (__builtin_classify_type (X) == 5                                \
     ? (long long int) (uintptr_t) (X)                               \
     : (long long int) (X)));                                        \
    _Pragma ("GCC diagnostic pop");                                  \
  })

(Untested, sorry.)
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index 06e39212ee..93dad97c8f 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -219,12 +219,14 @@ 
 /* Registers clobbered by syscall.  */
 # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
 
-/* Create a variable 'name' based on type 'X' to avoid explicit types.
-   This is mainly used set use 64-bits arguments in x32.   */
-#define TYPEFY(X, name) __typeof__ ((X) - (X)) name
+/* This also works when X is an array.  */
+#define TYPEFY1(X) __typeof__ ((X) - (X))
 /* Explicit cast the argument to avoid integer from pointer warning on
    x32.  */
-#define ARGIFY(X) ((__typeof__ ((X) - (X))) (X))
+#define ARGIFY(X) ((TYPEFY1 (X)) (X))
+/* Create a variable 'name' based on type 'X' to avoid explicit types.
+   This is mainly used set use 64-bits arguments in x32.   */
+#define TYPEFY(X, name) __typeof__ (ARGIFY (X) - ARGIFY (X)) name
 
 #undef INTERNAL_SYSCALL
 #define INTERNAL_SYSCALL(name, nr, args...)				\
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
index 2b5cf0c0ea..f3bc7e89df 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
@@ -44,6 +44,24 @@ 
 # define ZERO_EXTEND_5 movl %r8d, %r8d;
 # undef ZERO_EXTEND_6
 # define ZERO_EXTEND_6 movl %r9d, %r9d;
+#else /*! __ASSEMBLER__ */
+# undef ARGIFY
+/* For x32, cast
+   1. pointer to unsigned long (32 bit).
+   2. 32-bit unsigned integer to unsigned long long (64 bit).
+   3. 32-bit integer to long long (64 bit).
+ */
+# define ARGIFY(X) \
+ ({									\
+    _Pragma ("GCC diagnostic push");					\
+    _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\"");	\
+    (__builtin_classify_type (X) == 5					\
+     ? (unsigned long) (X)						\
+     : ((sizeof (X) <= 4 && (TYPEFY1 (X)) 0 < (TYPEFY1 (X)) -1)		\
+	? (unsigned long long) (X)					\
+	: (long long) (X)));						\
+    _Pragma ("GCC diagnostic pop");					\
+  })
 #endif	/* __ASSEMBLER__ */
 
 #endif /* linux/x86_64/x32/sysdep.h */