[v6,1/3] x86_64: Set the syscall register right before doing the syscall.

Message ID 20230424150353.1469397-2-josimmon@redhat.com
State Rejected
Headers
Series x86_64: aarch64: Set call number just before syscall |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Joe Simmons-Talbott April 24, 2023, 3:03 p.m. UTC
  To make identifying syscalls easier during call tree analysis load the
syscall number just before performing the syscall.

Compiler optimizations can place quite a few instructions between the
setting of the syscall number and the syscall instruction.  During call
tree analysis the number of instructions between the two can lead to
more difficulty for both tools and humans in properly identifying the
syscall number.  Having the syscall number set in the prior instruction
to the syscall instruction makes this task easier and less error prone.
Being able to reliably identify syscalls made by a given API will make
it easier to understand and verify the safety and security of glibc.
---
 sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)
  

Comments

Joe Simmons-Talbott May 15, 2023, 2:15 p.m. UTC | #1
Hi H.J.,

Is there anything else you are looking for on x86_64 WRT this patch?

Thanks,
Joe
On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> To make identifying syscalls easier during call tree analysis load the
> syscall number just before performing the syscall.
> 
> Compiler optimizations can place quite a few instructions between the
> setting of the syscall number and the syscall instruction.  During call
> tree analysis the number of instructions between the two can lead to
> more difficulty for both tools and humans in properly identifying the
> syscall number.  Having the syscall number set in the prior instruction
> to the syscall instruction makes this task easier and less error prone.
> Being able to reliably identify syscalls made by a given API will make
> it easier to understand and verify the safety and security of glibc.
> ---
>  sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> index cfb51be8c5..0db8660531 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> @@ -257,9 +257,9 @@
>      TYPEFY (arg1, __arg1) = ARGIFY (arg1);			 	\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1)						\
> +    : "g" (number), "r" (_a1)						\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -273,9 +273,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2)				\
> +    : "g" (number), "r" (_a1), "r" (_a2)				\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -291,9 +291,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -311,9 +311,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)		\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)		\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -333,9 +333,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
>        "r" (_a5)								\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
> @@ -358,9 +358,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
>        "r" (_a5), "r" (_a6)						\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
> -- 
> 2.39.2
>
  
H.J. Lu May 15, 2023, 4:20 p.m. UTC | #2
On Mon, May 15, 2023 at 7:15 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
>
> Hi H.J.,
>
> Is there anything else you are looking for on x86_64 WRT this patch?
>
> Thanks,
> Joe
> On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> > To make identifying syscalls easier during call tree analysis load the
> > syscall number just before performing the syscall.
> >
> > Compiler optimizations can place quite a few instructions between the
> > setting of the syscall number and the syscall instruction.  During call
> > tree analysis the number of instructions between the two can lead to
> > more difficulty for both tools and humans in properly identifying the
> > syscall number.  Having the syscall number set in the prior instruction
> > to the syscall instruction makes this task easier and less error prone.
> > Being able to reliably identify syscalls made by a given API will make
> > it easier to understand and verify the safety and security of glibc.
> > ---
> >  sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
> >  1 file changed, 12 insertions(+), 12 deletions(-)
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > index cfb51be8c5..0db8660531 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > @@ -257,9 +257,9 @@
> >      TYPEFY (arg1, __arg1) = ARGIFY (arg1);                           \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1)                                                \
> > +    : "g" (number), "r" (_a1)                                                \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -273,9 +273,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2)                             \
> > +    : "g" (number), "r" (_a1), "r" (_a2)                             \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -291,9 +291,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3)                  \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3)                  \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -311,9 +311,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)               \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)               \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -333,9 +333,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> >        "r" (_a5)                                                              \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> > @@ -358,9 +358,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> >        "r" (_a5), "r" (_a6)                                           \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> > --
> > 2.39.2
> >
>

I have no more comments.
  
Joe Simmons-Talbott May 25, 2023, 6:07 p.m. UTC | #3
ping.

Thanks,
Joe
On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> To make identifying syscalls easier during call tree analysis load the
> syscall number just before performing the syscall.
> 
> Compiler optimizations can place quite a few instructions between the
> setting of the syscall number and the syscall instruction.  During call
> tree analysis the number of instructions between the two can lead to
> more difficulty for both tools and humans in properly identifying the
> syscall number.  Having the syscall number set in the prior instruction
> to the syscall instruction makes this task easier and less error prone.
> Being able to reliably identify syscalls made by a given API will make
> it easier to understand and verify the safety and security of glibc.
> ---
>  sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> index cfb51be8c5..0db8660531 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> @@ -257,9 +257,9 @@
>      TYPEFY (arg1, __arg1) = ARGIFY (arg1);			 	\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1)						\
> +    : "g" (number), "r" (_a1)						\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -273,9 +273,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2)				\
> +    : "g" (number), "r" (_a1), "r" (_a2)				\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -291,9 +291,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -311,9 +311,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)		\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)		\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
>  })
> @@ -333,9 +333,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
>        "r" (_a5)								\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
> @@ -358,9 +358,9 @@
>      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
>      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
>      asm volatile (							\
> -    "syscall\n\t"							\
> +    "movl %1, %k0\n\tsyscall\n\t"					\
>      : "=a" (resultvar)							\
> -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
> +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
>        "r" (_a5), "r" (_a6)						\
>      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
>      (long int) resultvar;						\
> -- 
> 2.39.2
>
  
Noah Goldstein May 25, 2023, 6:40 p.m. UTC | #4
On Thu, May 25, 2023 at 1:07 PM Joe Simmons-Talbott via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> ping.
>
> Thanks,
> Joe
> On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> > To make identifying syscalls easier during call tree analysis load the
> > syscall number just before performing the syscall.
> >
> > Compiler optimizations can place quite a few instructions between the
> > setting of the syscall number and the syscall instruction.  During call
> > tree analysis the number of instructions between the two can lead to
> > more difficulty for both tools and humans in properly identifying the
> > syscall number.  Having the syscall number set in the prior instruction
> > to the syscall instruction makes this task easier and less error prone.
> > Being able to reliably identify syscalls made by a given API will make
> > it easier to understand and verify the safety and security of glibc.
> > ---
> >  sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
> >  1 file changed, 12 insertions(+), 12 deletions(-)
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > index cfb51be8c5..0db8660531 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > @@ -257,9 +257,9 @@
> >      TYPEFY (arg1, __arg1) = ARGIFY (arg1);                           \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1)                                                \
> > +    : "g" (number), "r" (_a1)                                                \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -273,9 +273,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2)                             \
> > +    : "g" (number), "r" (_a1), "r" (_a2)                             \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -291,9 +291,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3)                  \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3)                  \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -311,9 +311,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)               \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)               \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> >  })
> > @@ -333,9 +333,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> >        "r" (_a5)                                                              \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> > @@ -358,9 +358,9 @@
> >      register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;                        \
> >      register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;                        \
> >      asm volatile (                                                   \
> > -    "syscall\n\t"                                                    \
> > +    "movl %1, %k0\n\tsyscall\n\t"                                    \
> >      : "=a" (resultvar)                                                       \
> > -    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> > +    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),              \
> >        "r" (_a5), "r" (_a6)                                           \
> >      : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);                     \
> >      (long int) resultvar;                                            \
> > --
> > 2.39.2
> >
>

I'm minorly opposed to this patch. Even if GLIBC guarantees all
syscalls will set the number the instruction before, that's no guarantee
for the entire program. Furthermore in the event of:
   `movl $VAL, %eax; syscall`
It's still not safe to *always* assume that `VAL` correspond to the
syscall number as a jump (direct or indirect) could still go between
the instructions (i.e there is no guarantee in the assembly that the
`mov` dominates the `syscall).
So at the end of the day, we are bloating the library without, AFAICT,
providing any real guarantee. Maybe I'm missing something?
  
Florian Weimer May 26, 2023, 7:04 a.m. UTC | #5
* Noah Goldstein via Libc-alpha:

> I'm minorly opposed to this patch. Even if GLIBC guarantees all
> syscalls will set the number the instruction before, that's no guarantee
> for the entire program. Furthermore in the event of:
>    `movl $VAL, %eax; syscall`
> It's still not safe to *always* assume that `VAL` correspond to the
> syscall number as a jump (direct or indirect) could still go between
> the instructions (i.e there is no guarantee in the assembly that the
> `mov` dominates the `syscall).
> So at the end of the day, we are bloating the library without, AFAICT,
> providing any real guarantee. Maybe I'm missing something?

Joe, is there a size change to libc.so.6 as the result of this change?

Thanks,
Florian
  
Joe Simmons-Talbott May 26, 2023, 12:59 p.m. UTC | #6
On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> * Noah Goldstein via Libc-alpha:
> 
> > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> > syscalls will set the number the instruction before, that's no guarantee
> > for the entire program. Furthermore in the event of:
> >    `movl $VAL, %eax; syscall`
> > It's still not safe to *always* assume that `VAL` correspond to the
> > syscall number as a jump (direct or indirect) could still go between
> > the instructions (i.e there is no guarantee in the assembly that the
> > `mov` dominates the `syscall).
> > So at the end of the day, we are bloating the library without, AFAICT,
> > providing any real guarantee. Maybe I'm missing something?
> 
> Joe, is there a size change to libc.so.6 as the result of this change?

No, the size is the same with and with out this patchset on x86_64.

Thanks,
Joe
  
Noah Goldstein May 26, 2023, 9:18 p.m. UTC | #7
On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
>
> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> > * Noah Goldstein via Libc-alpha:
> >
> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> > > syscalls will set the number the instruction before, that's no guarantee
> > > for the entire program. Furthermore in the event of:
> > >    `movl $VAL, %eax; syscall`
> > > It's still not safe to *always* assume that `VAL` correspond to the
> > > syscall number as a jump (direct or indirect) could still go between
> > > the instructions (i.e there is no guarantee in the assembly that the
> > > `mov` dominates the `syscall).
> > > So at the end of the day, we are bloating the library without, AFAICT,
> > > providing any real guarantee. Maybe I'm missing something?
> >
> > Joe, is there a size change to libc.so.6 as the result of this change?
>
> No, the size is the same with and with out this patchset on x86_64.
>
There aren't many syscalls so it's only a minor cost (hence the only
minor opposition), but I don't see the value this provides given that it
still won't be safe to assume the syscall number is always set the
instruction beforehand for any robust purpose. So it still feels like
why take any cost at all?

> Thanks,
> Joe
>
  
Florian Weimer May 30, 2023, 10:13 a.m. UTC | #8
* Noah Goldstein:

> On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
>>
>> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
>> > * Noah Goldstein via Libc-alpha:
>> >
>> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
>> > > syscalls will set the number the instruction before, that's no guarantee
>> > > for the entire program. Furthermore in the event of:
>> > >    `movl $VAL, %eax; syscall`
>> > > It's still not safe to *always* assume that `VAL` correspond to the
>> > > syscall number as a jump (direct or indirect) could still go between
>> > > the instructions (i.e there is no guarantee in the assembly that the
>> > > `mov` dominates the `syscall).
>> > > So at the end of the day, we are bloating the library without, AFAICT,
>> > > providing any real guarantee. Maybe I'm missing something?
>> >
>> > Joe, is there a size change to libc.so.6 as the result of this change?
>>
>> No, the size is the same with and with out this patchset on x86_64.
>>
> There aren't many syscalls so it's only a minor cost (hence the only
> minor opposition), but I don't see the value this provides given that it
> still won't be safe to assume the syscall number is always set the
> instruction beforehand for any robust purpose. So it still feels like
> why take any cost at all?

I think there is any run-time cost at all, only the increased source
complexity.

It's much easier to change glibc than to add full register tracking to a
the static analysis tool that discovers system calls in the disassembly.

Thanks,
Florian
  
Noah Goldstein May 31, 2023, 6:23 p.m. UTC | #9
On Tue, May 30, 2023 at 5:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Noah Goldstein:
>
> > On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
> >>
> >> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> >> > * Noah Goldstein via Libc-alpha:
> >> >
> >> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> >> > > syscalls will set the number the instruction before, that's no guarantee
> >> > > for the entire program. Furthermore in the event of:
> >> > >    `movl $VAL, %eax; syscall`
> >> > > It's still not safe to *always* assume that `VAL` correspond to the
> >> > > syscall number as a jump (direct or indirect) could still go between
> >> > > the instructions (i.e there is no guarantee in the assembly that the
> >> > > `mov` dominates the `syscall).
> >> > > So at the end of the day, we are bloating the library without, AFAICT,
> >> > > providing any real guarantee. Maybe I'm missing something?
> >> >
> >> > Joe, is there a size change to libc.so.6 as the result of this change?
> >>
> >> No, the size is the same with and with out this patchset on x86_64.
> >>
> > There aren't many syscalls so it's only a minor cost (hence the only
> > minor opposition), but I don't see the value this provides given that it
> > still won't be safe to assume the syscall number is always set the
> > instruction beforehand for any robust purpose. So it still feels like
> > why take any cost at all?
>
> I think there is any run-time cost at all, only the increased source
> complexity.
>
> It's much easier to change glibc than to add full register tracking to a
> the static analysis tool that discovers system calls in the disassembly.
>

Is the aim only to verify libc.so or to verify arbitrary binaries? If the
former then sure I guess we know we only use the syscall wrapper
so this may help (more on that), but if it's arbitrary binaries there
is no guarantee they are using the GLIBC syscall wrapper for their
syscalls.

If it really is just GLIBC then this change seems unnecessary. Even
if there can be separation between setting rax and the syscall, the
way we have the wrappers setup we know there will always be a
dominating write to rax with the syscall number so would rather see
the case where that isn't trivial to find as a motivator first. Or we could
just do source code level analysis as we will always have the
syscall number in macro invocation.

> Thanks,
> Florian
>
  
Joe Simmons-Talbott June 28, 2023, 7:17 p.m. UTC | #10
On Wed, May 31, 2023 at 01:23:44PM -0500, Noah Goldstein wrote:
> On Tue, May 30, 2023 at 5:13 AM Florian Weimer <fweimer@redhat.com> wrote:
> >
> > * Noah Goldstein:
> >
> > > On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
> > >>
> > >> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> > >> > * Noah Goldstein via Libc-alpha:
> > >> >
> > >> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> > >> > > syscalls will set the number the instruction before, that's no guarantee
> > >> > > for the entire program. Furthermore in the event of:
> > >> > >    `movl $VAL, %eax; syscall`
> > >> > > It's still not safe to *always* assume that `VAL` correspond to the
> > >> > > syscall number as a jump (direct or indirect) could still go between
> > >> > > the instructions (i.e there is no guarantee in the assembly that the
> > >> > > `mov` dominates the `syscall).
> > >> > > So at the end of the day, we are bloating the library without, AFAICT,
> > >> > > providing any real guarantee. Maybe I'm missing something?
> > >> >
> > >> > Joe, is there a size change to libc.so.6 as the result of this change?
> > >>
> > >> No, the size is the same with and with out this patchset on x86_64.
> > >>
> > > There aren't many syscalls so it's only a minor cost (hence the only
> > > minor opposition), but I don't see the value this provides given that it
> > > still won't be safe to assume the syscall number is always set the
> > > instruction beforehand for any robust purpose. So it still feels like
> > > why take any cost at all?
> >
> > I think there is any run-time cost at all, only the increased source
> > complexity.
> >
> > It's much easier to change glibc than to add full register tracking to a
> > the static analysis tool that discovers system calls in the disassembly.
> >
> 
> Is the aim only to verify libc.so or to verify arbitrary binaries? If the
> former then sure I guess we know we only use the syscall wrapper
> so this may help (more on that), but if it's arbitrary binaries there
> is no guarantee they are using the GLIBC syscall wrapper for their
> syscalls.
> 
> If it really is just GLIBC then this change seems unnecessary. Even
> if there can be separation between setting rax and the syscall, the
> way we have the wrappers setup we know there will always be a
> dominating write to rax with the syscall number so would rather see
> the case where that isn't trivial to find as a motivator first. Or we could
> just do source code level analysis as we will always have the
> syscall number in macro invocation.
> 

Right now we are only concerned with libc.so.  Here's an example case
from my installed libc.so.

000000000003ec70 <__GI___arc4random_buf.part.0>:
   3ec70:       55                      push   %rbp
   3ec71:       41 b8 3e 01 00 00       mov    $0x13e,%r8d
   3ec77:       48 89 e5                mov    %rsp,%rbp
   3ec7a:       41 56                   push   %r14
   3ec7c:       41 55                   push   %r13
   3ec7e:       41 54                   push   %r12
   3ec80:       49 89 fc                mov    %rdi,%r12
   3ec83:       53                      push   %rbx
   3ec84:       48 89 f3                mov    %rsi,%rbx
   3ec87:       48 83 ec 10             sub    $0x10,%rsp
   3ec8b:       64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
   3ec92:       00 00 
   3ec94:       48 89 45 d8             mov    %rax,-0x28(%rbp)
   3ec98:       31 c0                   xor    %eax,%eax
   3ec9a:       31 d2                   xor    %edx,%edx
   3ec9c:       48 89 de                mov    %rbx,%rsi
   3ec9f:       4c 89 e7                mov    %r12,%rdi
   3eca2:       44 89 c0                mov    %r8d,%eax
   3eca5:       0f 05                   syscall

And with my patchset:

0000000000039480 <__GI___arc4random_buf.part.0>:
   39480:       41 55                   push   %r13
   39482:       41 54                   push   %r12
   39484:       55                      push   %rbp
   39485:       48 89 fd                mov    %rdi,%rbp
   39488:       53                      push   %rbx
   39489:       48 89 f3                mov    %rsi,%rbx
   3948c:       48 83 ec 18             sub    $0x18,%rsp
   39490:       31 d2                   xor    %edx,%edx
   39492:       48 89 de                mov    %rbx,%rsi
   39495:       48 89 ef                mov    %rbp,%rdi
   39498:       b8 3e 01 00 00          mov    $0x13e,%eax
   3949d:       0f 05                   syscall 


Thanks,
Joe
  

Patch

diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index cfb51be8c5..0db8660531 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -257,9 +257,9 @@ 
     TYPEFY (arg1, __arg1) = ARGIFY (arg1);			 	\
     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
     asm volatile (							\
-    "syscall\n\t"							\
+    "movl %1, %k0\n\tsyscall\n\t"					\
     : "=a" (resultvar)							\
-    : "0" (number), "r" (_a1)						\
+    : "g" (number), "r" (_a1)						\
     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
     (long int) resultvar;						\
 })
@@ -273,9 +273,9 @@ 
     register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
     asm volatile (							\
-    "syscall\n\t"							\
+    "movl %1, %k0\n\tsyscall\n\t"					\
     : "=a" (resultvar)							\
-    : "0" (number), "r" (_a1), "r" (_a2)				\
+    : "g" (number), "r" (_a1), "r" (_a2)				\
     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
     (long int) resultvar;						\
 })
@@ -291,9 +291,9 @@ 
     register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
     asm volatile (							\
-    "syscall\n\t"							\
+    "movl %1, %k0\n\tsyscall\n\t"					\
     : "=a" (resultvar)							\
-    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
+    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
     (long int) resultvar;						\
 })
@@ -311,9 +311,9 @@ 
     register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
     asm volatile (							\
-    "syscall\n\t"							\
+    "movl %1, %k0\n\tsyscall\n\t"					\
     : "=a" (resultvar)							\
-    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)		\
+    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)		\
     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
     (long int) resultvar;						\
 })
@@ -333,9 +333,9 @@ 
     register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
     asm volatile (							\
-    "syscall\n\t"							\
+    "movl %1, %k0\n\tsyscall\n\t"					\
     : "=a" (resultvar)							\
-    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
+    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
       "r" (_a5)								\
     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
     (long int) resultvar;						\
@@ -358,9 +358,9 @@ 
     register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
     asm volatile (							\
-    "syscall\n\t"							\
+    "movl %1, %k0\n\tsyscall\n\t"					\
     : "=a" (resultvar)							\
-    : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
+    : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),		\
       "r" (_a5), "r" (_a6)						\
     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
     (long int) resultvar;						\