From patchwork Wed Aug 12 22:12:03 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Lu, Hongjiu" X-Patchwork-Id: 8171 Received: (qmail 96353 invoked by alias); 12 Aug 2015 22:12:06 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 96343 invoked by uid 89); 12 Aug 2015 22:12:06 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.8 required=5.0 tests=AWL, BAYES_50, KAM_LAZY_DOMAIN_SECURITY, NO_DNS_FOR_FROM, RP_MATCHES_RCVD autolearn=no version=3.3.2 X-HELO: mga14.intel.com X-ExtLoop1: 1 Date: Wed, 12 Aug 2015 15:12:03 -0700 From: "H.J. Lu" To: GNU C Library Subject: Re: [PATCH] Optimize i386 syscall inlining for GCC 5 Message-ID: <20150812221203.GA4224@intel.com> Reply-To: "H.J. Lu" References: <20150812192001.GA12730@intel.com> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20150812192001.GA12730@intel.com> User-Agent: Mutt/1.5.23 (2014-03-12) On Wed, Aug 12, 2015 at 12:20:01PM -0700, H.J. Lu wrote: > Since GCC 5 and above can properly spill %ebx when needed, we can inline > syscalls with 6 arguments if GCC 5 or above is used to compile glinc. We > also skip __libc_do_syscall for GCC 5. Tested with -march=i486 and > -march=i686. > > OK for master? > Here is the updated version. I optimized out one register move instruction. OK for master? H.J. --- Since GCC 5 and above can properly spill %ebx when needed, we can inline syscalls with 6 arguments if GCC 5 or above is used to compile glibc. This patch rewrites INTERNAL_SYSCALL macros and skips __libc_do_syscall for GCC 5. For sysdeps/unix/sysv/linux/i386/brk.c, with -O2 -march=i686 -mtune=generic, GCC 5.2 now generates: <__brk>: 0: push %ebx 1: mov $0x2d,%eax 6: mov 0x8(%esp),%ebx a: call b <__brk+0xb> b: R_386_PC32 __x86.get_pc_thunk.cx f: add $0x2,%ecx 11: R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 15: call *%gs:0x10 1c: mov %eax,%edx 1e: mov 0x0(%ecx),%eax 20: R_386_GOT32 __curbrk 24: mov %edx,(%eax) 26: xor %eax,%eax 28: cmp %edx,%ebx 2a: ja 30 <__brk+0x30> 2c: pop %ebx 2d: ret instead of <__brk>: 0: push %ebx 1: mov 0x8(%esp),%ecx 5: call 6 <__brk+0x6> 6: R_386_PC32 __x86.get_pc_thunk.bx a: add $0x2,%ebx c: R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 10: xchg %ecx,%ebx 12: mov $0x2d,%eax 17: call *%gs:0x10 1e: xchg %ecx,%ebx 20: mov %eax,%edx 22: mov 0x0(%ebx),%eax 24: R_386_GOT32 __curbrk 28: mov %edx,(%eax) 2a: xor %eax,%eax 2c: cmp %edx,%ecx 2e: ja 38 <__brk+0x38> 30: pop %ebx 31: ret The new one is shorter by 2 instructions. * sysdeps/unix/sysv/linux/i386/libc-do-syscall.S (__libc_do_syscall): Defined only if !__GNUC_PREREQ (5,0). * sysdeps/unix/sysv/linux/i386/sysdep.h: Define assembler macros only if !__GNUC_PREREQ (5,0). (INTERNAL_SYSCALL_MAIN_6): Optimize for GCC 5. (INTERNAL_SYSCALL_MAIN_INLINE): Likewise. (INTERNAL_SYSCALL_NCS): Likewise. (LOADREGS_0): New. (ASMARGS_0): Likewise. (LOADREGS_1): Likewise. (ASMARGS_1): Likewise. (LOADREGS_2): Likewise. (ASMARGS_2): Likewise. (LOADREGS_3): Likewise. (ASMARGS_3): Likewise. (LOADREGS_4): Likewise. (ASMARGS_4): Likewise. (LOADREGS_5): Likewise. (ASMARGS_5): Likewise. (LOADREGS_6): Likewise. (ASMARGS_6): Likewise. --- sysdeps/unix/sysv/linux/i386/libc-do-syscall.S | 3 + sysdeps/unix/sysv/linux/i386/sysdep.h | 115 ++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 11 deletions(-) diff --git a/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S b/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S index af5c6f0..cdef3d5 100644 --- a/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S +++ b/sysdeps/unix/sysv/linux/i386/libc-do-syscall.S @@ -18,6 +18,8 @@ #include +#if !__GNUC_PREREQ (5,0) + /* %eax, %ecx, %edx and %esi contain the values expected by the kernel. %edi points to a structure with the values of %ebx, %edi and %ebp. */ @@ -48,3 +50,4 @@ ENTRY (__libc_do_syscall) cfi_restore (ebx) ret END (__libc_do_syscall) +#endif diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h index d76aca5..92e7e7a 100644 --- a/sysdeps/unix/sysv/linux/i386/sysdep.h +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h @@ -275,6 +275,7 @@ #else /* !__ASSEMBLER__ */ +#if !__GNUC_PREREQ (5,0) /* We need some help from the assembler to generate optimal code. We define some macros here which later will be used. */ asm (".L__X'%ebx = 1\n\t" @@ -314,6 +315,7 @@ struct libc_do_syscall_args { int ebx, edi, ebp; }; +#endif /* Define a macro which expands inline into the wrapper code for a system call. */ @@ -354,8 +356,12 @@ struct libc_do_syscall_args INTERNAL_SYSCALL_MAIN_INLINE(name, err, 5, args) /* Each object using 6-argument inline syscalls must include a definition of __libc_do_syscall. */ -#define INTERNAL_SYSCALL_MAIN_6(name, err, arg1, arg2, arg3, \ - arg4, arg5, arg6) \ +#if __GNUC_PREREQ (5,0) +# define INTERNAL_SYSCALL_MAIN_6(name, err, args...) \ + INTERNAL_SYSCALL_MAIN_INLINE(name, err, 6, args) +#else /* GCC 5 */ +# define INTERNAL_SYSCALL_MAIN_6(name, err, arg1, arg2, arg3, \ + arg4, arg5, arg6) \ struct libc_do_syscall_args _xv = \ { \ (int) (arg1), \ @@ -368,14 +374,52 @@ struct libc_do_syscall_args : "=a" (resultvar) \ : "i" (__NR_##name), "c" (arg2), "d" (arg3), "S" (arg4), "D" (&_xv) \ : "memory", "cc") +#endif /* GCC 5 */ #define INTERNAL_SYSCALL(name, err, nr, args...) \ ({ \ register unsigned int resultvar; \ INTERNAL_SYSCALL_MAIN_##nr (name, err, args); \ (int) resultvar; }) #ifdef I386_USE_SYSENTER -# ifdef SHARED -# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ +# if __GNUC_PREREQ (5,0) +# ifdef SHARED +# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ + LOADREGS_##nr(args) \ + asm volatile ( \ + "call *%%gs:%P2" \ + : "=a" (resultvar) \ + : "a" (__NR_##name), "i" (offsetof (tcbhead_t, sysinfo)) \ + ASMARGS_##nr(args) : "memory", "cc") +# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ + ({ \ + register unsigned int resultvar; \ + LOADREGS_##nr(args) \ + asm volatile ( \ + "call *%%gs:%P2" \ + : "=a" (resultvar) \ + : "a" (name), "i" (offsetof (tcbhead_t, sysinfo)) \ + ASMARGS_##nr(args) : "memory", "cc"); \ + (int) resultvar; }) +# else +# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ + LOADREGS_##nr(args) \ + asm volatile ( \ + "call *_dl_sysinfo" \ + : "=a" (resultvar) \ + : "a" (__NR_##name) ASMARGS_##nr(args) : "memory", "cc") +# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ + ({ \ + register unsigned int resultvar; \ + LOADREGS_##nr(args) \ + asm volatile ( \ + "call *_dl_sysinfo" \ + : "=a" (resultvar) \ + : "a" (name) ASMARGS_##nr(args) : "memory", "cc"); \ + (int) resultvar; }) +# endif +# else /* GCC 5 */ +# ifdef SHARED +# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ EXTRAVAR_##nr \ asm volatile ( \ LOADARGS_##nr \ @@ -385,7 +429,7 @@ struct libc_do_syscall_args : "=a" (resultvar) \ : "i" (__NR_##name), "i" (offsetof (tcbhead_t, sysinfo)) \ ASMFMT_##nr(args) : "memory", "cc") -# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ +# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ ({ \ register unsigned int resultvar; \ EXTRAVAR_##nr \ @@ -397,8 +441,8 @@ struct libc_do_syscall_args : "0" (name), "i" (offsetof (tcbhead_t, sysinfo)) \ ASMFMT_##nr(args) : "memory", "cc"); \ (int) resultvar; }) -# else -# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ +# else +# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ EXTRAVAR_##nr \ asm volatile ( \ LOADARGS_##nr \ @@ -407,7 +451,7 @@ struct libc_do_syscall_args RESTOREARGS_##nr \ : "=a" (resultvar) \ : "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc") -# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ +# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ ({ \ register unsigned int resultvar; \ EXTRAVAR_##nr \ @@ -418,9 +462,27 @@ struct libc_do_syscall_args : "=a" (resultvar) \ : "0" (name) ASMFMT_##nr(args) : "memory", "cc"); \ (int) resultvar; }) -# endif +# endif +# endif /* GCC 5 */ #else -# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ +# if __GNUC_PREREQ (5,0) +# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ + LOADREGS_##nr(args) \ + asm volatile ( \ + "int $0x80" \ + : "=a" (resultvar) \ + : "a" (__NR_##name) ASMARGS_##nr(args) : "memory", "cc") +# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ + ({ \ + register unsigned int resultvar; \ + LOADREGS_##nr(args) \ + asm volatile ( \ + "int $0x80" \ + : "=a" (resultvar) \ + : "a" (name) ASMARGS_##nr(args) : "memory", "cc"); \ + (int) resultvar; }) +# else /* GCC 5 */ +# define INTERNAL_SYSCALL_MAIN_INLINE(name, err, nr, args...) \ EXTRAVAR_##nr \ asm volatile ( \ LOADARGS_##nr \ @@ -429,7 +491,7 @@ struct libc_do_syscall_args RESTOREARGS_##nr \ : "=a" (resultvar) \ : "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc") -# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ +# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ ({ \ register unsigned int resultvar; \ EXTRAVAR_##nr \ @@ -440,6 +502,7 @@ struct libc_do_syscall_args : "=a" (resultvar) \ : "0" (name) ASMFMT_##nr(args) : "memory", "cc"); \ (int) resultvar; }) +# endif /* GCC 5 */ #endif #undef INTERNAL_SYSCALL_DECL @@ -504,6 +567,36 @@ struct libc_do_syscall_args # define RESTOREARGS_5 #endif +#if __GNUC_PREREQ (5,0) +# define LOADREGS_0() +# define ASMARGS_0() +# define LOADREGS_1(arg1) \ + LOADREGS_0 () +# define ASMARGS_1(arg1) \ + ASMARGS_0 (), "b" ((unsigned int) (arg1)) +# define LOADREGS_2(arg1, arg2) \ + LOADREGS_1 (arg1) +# define ASMARGS_2(arg1, arg2) \ + ASMARGS_1 (arg1), "c" ((unsigned int) (arg2)) +# define LOADREGS_3(arg1, arg2, arg3) \ + LOADREGS_2 (arg1, arg2) +# define ASMARGS_3(arg1, arg2, arg3) \ + ASMARGS_2 (arg1, arg2), "d" ((unsigned int) (arg3)) +# define LOADREGS_4(arg1, arg2, arg3, arg4) \ + LOADREGS_3 (arg1, arg2, arg3) +# define ASMARGS_4(arg1, arg2, arg3, arg4) \ + ASMARGS_3 (arg1, arg2, arg3), "S" ((unsigned int) (arg4)) +# define LOADREGS_5(arg1, arg2, arg3, arg4, arg5) \ + LOADREGS_4 (arg1, arg2, arg3, arg4) +# define ASMARGS_5(arg1, arg2, arg3, arg4, arg5) \ + ASMARGS_4 (arg1, arg2, arg3, arg4), "D" ((unsigned int) (arg5)) +# define LOADREGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \ + register unsigned int _a6 asm ("ebp") = (unsigned int) (arg6); \ + LOADREGS_5 (arg1, arg2, arg3, arg4, arg5) +# define ASMARGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \ + ASMARGS_5 (arg1, arg2, arg3, arg4, arg5), "r" (_a6) +#endif /* GCC 5 */ + #define ASMFMT_0() #ifdef __PIC__ # define ASMFMT_1(arg1) \