[v6,1/3] x86_64: Set the syscall register right before doing the syscall.
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
To make identifying syscalls easier during call tree analysis load the
syscall number just before performing the syscall.
Compiler optimizations can place quite a few instructions between the
setting of the syscall number and the syscall instruction. During call
tree analysis the number of instructions between the two can lead to
more difficulty for both tools and humans in properly identifying the
syscall number. Having the syscall number set in the prior instruction
to the syscall instruction makes this task easier and less error prone.
Being able to reliably identify syscalls made by a given API will make
it easier to understand and verify the safety and security of glibc.
---
sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
Comments
Hi H.J.,
Is there anything else you are looking for on x86_64 WRT this patch?
Thanks,
Joe
On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> To make identifying syscalls easier during call tree analysis load the
> syscall number just before performing the syscall.
>
> Compiler optimizations can place quite a few instructions between the
> setting of the syscall number and the syscall instruction. During call
> tree analysis the number of instructions between the two can lead to
> more difficulty for both tools and humans in properly identifying the
> syscall number. Having the syscall number set in the prior instruction
> to the syscall instruction makes this task easier and less error prone.
> Being able to reliably identify syscalls made by a given API will make
> it easier to understand and verify the safety and security of glibc.
> ---
> sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
> 1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> index cfb51be8c5..0db8660531 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> @@ -257,9 +257,9 @@
> TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1) \
> + : "g" (number), "r" (_a1) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -273,9 +273,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2) \
> + : "g" (number), "r" (_a1), "r" (_a2) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -291,9 +291,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -311,9 +311,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -333,9 +333,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> "r" (_a5) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> @@ -358,9 +358,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> "r" (_a5), "r" (_a6) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> --
> 2.39.2
>
On Mon, May 15, 2023 at 7:15 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
>
> Hi H.J.,
>
> Is there anything else you are looking for on x86_64 WRT this patch?
>
> Thanks,
> Joe
> On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> > To make identifying syscalls easier during call tree analysis load the
> > syscall number just before performing the syscall.
> >
> > Compiler optimizations can place quite a few instructions between the
> > setting of the syscall number and the syscall instruction. During call
> > tree analysis the number of instructions between the two can lead to
> > more difficulty for both tools and humans in properly identifying the
> > syscall number. Having the syscall number set in the prior instruction
> > to the syscall instruction makes this task easier and less error prone.
> > Being able to reliably identify syscalls made by a given API will make
> > it easier to understand and verify the safety and security of glibc.
> > ---
> > sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
> > 1 file changed, 12 insertions(+), 12 deletions(-)
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > index cfb51be8c5..0db8660531 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > @@ -257,9 +257,9 @@
> > TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1) \
> > + : "g" (number), "r" (_a1) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -273,9 +273,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2) \
> > + : "g" (number), "r" (_a1), "r" (_a2) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -291,9 +291,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -311,9 +311,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -333,9 +333,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > "r" (_a5) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > @@ -358,9 +358,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > "r" (_a5), "r" (_a6) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > --
> > 2.39.2
> >
>
I have no more comments.
ping.
Thanks,
Joe
On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> To make identifying syscalls easier during call tree analysis load the
> syscall number just before performing the syscall.
>
> Compiler optimizations can place quite a few instructions between the
> setting of the syscall number and the syscall instruction. During call
> tree analysis the number of instructions between the two can lead to
> more difficulty for both tools and humans in properly identifying the
> syscall number. Having the syscall number set in the prior instruction
> to the syscall instruction makes this task easier and less error prone.
> Being able to reliably identify syscalls made by a given API will make
> it easier to understand and verify the safety and security of glibc.
> ---
> sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
> 1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> index cfb51be8c5..0db8660531 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> @@ -257,9 +257,9 @@
> TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1) \
> + : "g" (number), "r" (_a1) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -273,9 +273,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2) \
> + : "g" (number), "r" (_a1), "r" (_a2) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -291,9 +291,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -311,9 +311,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> })
> @@ -333,9 +333,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> "r" (_a5) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> @@ -358,9 +358,9 @@
> register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> asm volatile ( \
> - "syscall\n\t" \
> + "movl %1, %k0\n\tsyscall\n\t" \
> : "=a" (resultvar) \
> - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> "r" (_a5), "r" (_a6) \
> : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> (long int) resultvar; \
> --
> 2.39.2
>
On Thu, May 25, 2023 at 1:07 PM Joe Simmons-Talbott via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> ping.
>
> Thanks,
> Joe
> On Mon, Apr 24, 2023 at 11:03:51AM -0400, Joe Simmons-Talbott wrote:
> > To make identifying syscalls easier during call tree analysis load the
> > syscall number just before performing the syscall.
> >
> > Compiler optimizations can place quite a few instructions between the
> > setting of the syscall number and the syscall instruction. During call
> > tree analysis the number of instructions between the two can lead to
> > more difficulty for both tools and humans in properly identifying the
> > syscall number. Having the syscall number set in the prior instruction
> > to the syscall instruction makes this task easier and less error prone.
> > Being able to reliably identify syscalls made by a given API will make
> > it easier to understand and verify the safety and security of glibc.
> > ---
> > sysdeps/unix/sysv/linux/x86_64/sysdep.h | 24 ++++++++++++------------
> > 1 file changed, 12 insertions(+), 12 deletions(-)
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > index cfb51be8c5..0db8660531 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > @@ -257,9 +257,9 @@
> > TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1) \
> > + : "g" (number), "r" (_a1) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -273,9 +273,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2) \
> > + : "g" (number), "r" (_a1), "r" (_a2) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -291,9 +291,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -311,9 +311,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > })
> > @@ -333,9 +333,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > "r" (_a5) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > @@ -358,9 +358,9 @@
> > register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
> > register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
> > asm volatile ( \
> > - "syscall\n\t" \
> > + "movl %1, %k0\n\tsyscall\n\t" \
> > : "=a" (resultvar) \
> > - : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > + : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
> > "r" (_a5), "r" (_a6) \
> > : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
> > (long int) resultvar; \
> > --
> > 2.39.2
> >
>
I'm minorly opposed to this patch. Even if GLIBC guarantees all
syscalls will set the number the instruction before, that's no guarantee
for the entire program. Furthermore in the event of:
`movl $VAL, %eax; syscall`
It's still not safe to *always* assume that `VAL` correspond to the
syscall number as a jump (direct or indirect) could still go between
the instructions (i.e there is no guarantee in the assembly that the
`mov` dominates the `syscall).
So at the end of the day, we are bloating the library without, AFAICT,
providing any real guarantee. Maybe I'm missing something?
* Noah Goldstein via Libc-alpha:
> I'm minorly opposed to this patch. Even if GLIBC guarantees all
> syscalls will set the number the instruction before, that's no guarantee
> for the entire program. Furthermore in the event of:
> `movl $VAL, %eax; syscall`
> It's still not safe to *always* assume that `VAL` correspond to the
> syscall number as a jump (direct or indirect) could still go between
> the instructions (i.e there is no guarantee in the assembly that the
> `mov` dominates the `syscall).
> So at the end of the day, we are bloating the library without, AFAICT,
> providing any real guarantee. Maybe I'm missing something?
Joe, is there a size change to libc.so.6 as the result of this change?
Thanks,
Florian
On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> * Noah Goldstein via Libc-alpha:
>
> > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> > syscalls will set the number the instruction before, that's no guarantee
> > for the entire program. Furthermore in the event of:
> > `movl $VAL, %eax; syscall`
> > It's still not safe to *always* assume that `VAL` correspond to the
> > syscall number as a jump (direct or indirect) could still go between
> > the instructions (i.e there is no guarantee in the assembly that the
> > `mov` dominates the `syscall).
> > So at the end of the day, we are bloating the library without, AFAICT,
> > providing any real guarantee. Maybe I'm missing something?
>
> Joe, is there a size change to libc.so.6 as the result of this change?
No, the size is the same with and with out this patchset on x86_64.
Thanks,
Joe
On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
>
> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> > * Noah Goldstein via Libc-alpha:
> >
> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> > > syscalls will set the number the instruction before, that's no guarantee
> > > for the entire program. Furthermore in the event of:
> > > `movl $VAL, %eax; syscall`
> > > It's still not safe to *always* assume that `VAL` correspond to the
> > > syscall number as a jump (direct or indirect) could still go between
> > > the instructions (i.e there is no guarantee in the assembly that the
> > > `mov` dominates the `syscall).
> > > So at the end of the day, we are bloating the library without, AFAICT,
> > > providing any real guarantee. Maybe I'm missing something?
> >
> > Joe, is there a size change to libc.so.6 as the result of this change?
>
> No, the size is the same with and with out this patchset on x86_64.
>
There aren't many syscalls so it's only a minor cost (hence the only
minor opposition), but I don't see the value this provides given that it
still won't be safe to assume the syscall number is always set the
instruction beforehand for any robust purpose. So it still feels like
why take any cost at all?
> Thanks,
> Joe
>
* Noah Goldstein:
> On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
>>
>> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
>> > * Noah Goldstein via Libc-alpha:
>> >
>> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
>> > > syscalls will set the number the instruction before, that's no guarantee
>> > > for the entire program. Furthermore in the event of:
>> > > `movl $VAL, %eax; syscall`
>> > > It's still not safe to *always* assume that `VAL` correspond to the
>> > > syscall number as a jump (direct or indirect) could still go between
>> > > the instructions (i.e there is no guarantee in the assembly that the
>> > > `mov` dominates the `syscall).
>> > > So at the end of the day, we are bloating the library without, AFAICT,
>> > > providing any real guarantee. Maybe I'm missing something?
>> >
>> > Joe, is there a size change to libc.so.6 as the result of this change?
>>
>> No, the size is the same with and with out this patchset on x86_64.
>>
> There aren't many syscalls so it's only a minor cost (hence the only
> minor opposition), but I don't see the value this provides given that it
> still won't be safe to assume the syscall number is always set the
> instruction beforehand for any robust purpose. So it still feels like
> why take any cost at all?
I think there is any run-time cost at all, only the increased source
complexity.
It's much easier to change glibc than to add full register tracking to a
the static analysis tool that discovers system calls in the disassembly.
Thanks,
Florian
On Tue, May 30, 2023 at 5:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Noah Goldstein:
>
> > On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
> >>
> >> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> >> > * Noah Goldstein via Libc-alpha:
> >> >
> >> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> >> > > syscalls will set the number the instruction before, that's no guarantee
> >> > > for the entire program. Furthermore in the event of:
> >> > > `movl $VAL, %eax; syscall`
> >> > > It's still not safe to *always* assume that `VAL` correspond to the
> >> > > syscall number as a jump (direct or indirect) could still go between
> >> > > the instructions (i.e there is no guarantee in the assembly that the
> >> > > `mov` dominates the `syscall).
> >> > > So at the end of the day, we are bloating the library without, AFAICT,
> >> > > providing any real guarantee. Maybe I'm missing something?
> >> >
> >> > Joe, is there a size change to libc.so.6 as the result of this change?
> >>
> >> No, the size is the same with and with out this patchset on x86_64.
> >>
> > There aren't many syscalls so it's only a minor cost (hence the only
> > minor opposition), but I don't see the value this provides given that it
> > still won't be safe to assume the syscall number is always set the
> > instruction beforehand for any robust purpose. So it still feels like
> > why take any cost at all?
>
> I think there is any run-time cost at all, only the increased source
> complexity.
>
> It's much easier to change glibc than to add full register tracking to a
> the static analysis tool that discovers system calls in the disassembly.
>
Is the aim only to verify libc.so or to verify arbitrary binaries? If the
former then sure I guess we know we only use the syscall wrapper
so this may help (more on that), but if it's arbitrary binaries there
is no guarantee they are using the GLIBC syscall wrapper for their
syscalls.
If it really is just GLIBC then this change seems unnecessary. Even
if there can be separation between setting rax and the syscall, the
way we have the wrappers setup we know there will always be a
dominating write to rax with the syscall number so would rather see
the case where that isn't trivial to find as a motivator first. Or we could
just do source code level analysis as we will always have the
syscall number in macro invocation.
> Thanks,
> Florian
>
On Wed, May 31, 2023 at 01:23:44PM -0500, Noah Goldstein wrote:
> On Tue, May 30, 2023 at 5:13 AM Florian Weimer <fweimer@redhat.com> wrote:
> >
> > * Noah Goldstein:
> >
> > > On Fri, May 26, 2023 at 5:59 AM Joe Simmons-Talbott <josimmon@redhat.com> wrote:
> > >>
> > >> On Fri, May 26, 2023 at 09:04:06AM +0200, Florian Weimer wrote:
> > >> > * Noah Goldstein via Libc-alpha:
> > >> >
> > >> > > I'm minorly opposed to this patch. Even if GLIBC guarantees all
> > >> > > syscalls will set the number the instruction before, that's no guarantee
> > >> > > for the entire program. Furthermore in the event of:
> > >> > > `movl $VAL, %eax; syscall`
> > >> > > It's still not safe to *always* assume that `VAL` correspond to the
> > >> > > syscall number as a jump (direct or indirect) could still go between
> > >> > > the instructions (i.e there is no guarantee in the assembly that the
> > >> > > `mov` dominates the `syscall).
> > >> > > So at the end of the day, we are bloating the library without, AFAICT,
> > >> > > providing any real guarantee. Maybe I'm missing something?
> > >> >
> > >> > Joe, is there a size change to libc.so.6 as the result of this change?
> > >>
> > >> No, the size is the same with and with out this patchset on x86_64.
> > >>
> > > There aren't many syscalls so it's only a minor cost (hence the only
> > > minor opposition), but I don't see the value this provides given that it
> > > still won't be safe to assume the syscall number is always set the
> > > instruction beforehand for any robust purpose. So it still feels like
> > > why take any cost at all?
> >
> > I think there is any run-time cost at all, only the increased source
> > complexity.
> >
> > It's much easier to change glibc than to add full register tracking to a
> > the static analysis tool that discovers system calls in the disassembly.
> >
>
> Is the aim only to verify libc.so or to verify arbitrary binaries? If the
> former then sure I guess we know we only use the syscall wrapper
> so this may help (more on that), but if it's arbitrary binaries there
> is no guarantee they are using the GLIBC syscall wrapper for their
> syscalls.
>
> If it really is just GLIBC then this change seems unnecessary. Even
> if there can be separation between setting rax and the syscall, the
> way we have the wrappers setup we know there will always be a
> dominating write to rax with the syscall number so would rather see
> the case where that isn't trivial to find as a motivator first. Or we could
> just do source code level analysis as we will always have the
> syscall number in macro invocation.
>
Right now we are only concerned with libc.so. Here's an example case
from my installed libc.so.
000000000003ec70 <__GI___arc4random_buf.part.0>:
3ec70: 55 push %rbp
3ec71: 41 b8 3e 01 00 00 mov $0x13e,%r8d
3ec77: 48 89 e5 mov %rsp,%rbp
3ec7a: 41 56 push %r14
3ec7c: 41 55 push %r13
3ec7e: 41 54 push %r12
3ec80: 49 89 fc mov %rdi,%r12
3ec83: 53 push %rbx
3ec84: 48 89 f3 mov %rsi,%rbx
3ec87: 48 83 ec 10 sub $0x10,%rsp
3ec8b: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
3ec92: 00 00
3ec94: 48 89 45 d8 mov %rax,-0x28(%rbp)
3ec98: 31 c0 xor %eax,%eax
3ec9a: 31 d2 xor %edx,%edx
3ec9c: 48 89 de mov %rbx,%rsi
3ec9f: 4c 89 e7 mov %r12,%rdi
3eca2: 44 89 c0 mov %r8d,%eax
3eca5: 0f 05 syscall
And with my patchset:
0000000000039480 <__GI___arc4random_buf.part.0>:
39480: 41 55 push %r13
39482: 41 54 push %r12
39484: 55 push %rbp
39485: 48 89 fd mov %rdi,%rbp
39488: 53 push %rbx
39489: 48 89 f3 mov %rsi,%rbx
3948c: 48 83 ec 18 sub $0x18,%rsp
39490: 31 d2 xor %edx,%edx
39492: 48 89 de mov %rbx,%rsi
39495: 48 89 ef mov %rbp,%rdi
39498: b8 3e 01 00 00 mov $0x13e,%eax
3949d: 0f 05 syscall
Thanks,
Joe
@@ -257,9 +257,9 @@
TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
- "syscall\n\t" \
+ "movl %1, %k0\n\tsyscall\n\t" \
: "=a" (resultvar) \
- : "0" (number), "r" (_a1) \
+ : "g" (number), "r" (_a1) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
})
@@ -273,9 +273,9 @@
register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
- "syscall\n\t" \
+ "movl %1, %k0\n\tsyscall\n\t" \
: "=a" (resultvar) \
- : "0" (number), "r" (_a1), "r" (_a2) \
+ : "g" (number), "r" (_a1), "r" (_a2) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
})
@@ -291,9 +291,9 @@
register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
- "syscall\n\t" \
+ "movl %1, %k0\n\tsyscall\n\t" \
: "=a" (resultvar) \
- : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
+ : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
})
@@ -311,9 +311,9 @@
register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
- "syscall\n\t" \
+ "movl %1, %k0\n\tsyscall\n\t" \
: "=a" (resultvar) \
- : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
+ : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
})
@@ -333,9 +333,9 @@
register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
- "syscall\n\t" \
+ "movl %1, %k0\n\tsyscall\n\t" \
: "=a" (resultvar) \
- : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
+ : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
"r" (_a5) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
@@ -358,9 +358,9 @@
register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
- "syscall\n\t" \
+ "movl %1, %k0\n\tsyscall\n\t" \
: "=a" (resultvar) \
- : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
+ : "g" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
"r" (_a5), "r" (_a6) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \