LoongArch: Add Syscall Assembly Implementation

Message ID 20230323084013.1100656-1-caiyinyu@loongson.cn
State Superseded
Headers
Series LoongArch: Add Syscall Assembly Implementation |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

caiyinyu March 23, 2023, 8:40 a.m. UTC
  ---
 sysdeps/unix/sysv/linux/loongarch/syscall.S | 48 +++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/syscall.S
  

Comments

Xi Ruoyao March 23, 2023, 9:25 a.m. UTC | #1
General question: is there a notable benefit optimizing syscall with
assembly?  AFAIK nobody will put syscall on a hot path, and the cycles
saved by using the assembly implementation should be negligible
comparing with all the cost of context switch etc.

On Thu, 2023-03-23 at 16:40 +0800, caiyinyu wrote:
> + ENTRY (syscall)
> +        move	t0, a7
> +        move	a7, a0		/* Syscall number -> a0.  */
> +        move	a0, a1		/* shift arg1 - arg6.  */
> +        move	a1, a2
> +        move	a2, a3
> +        move	a3, a4
> +        move	a4, a5
> +        move	a5, a6
> +        move	a6, t0		/* arg7 is saved in t0.  */
> +        syscall 0		/* Do the system call.  */
> +       lu12i.w t0, -1

"li.w t0, -4096" will do the same thing, and be more readable.

And this line seems indented with a tab, while other lines are indented
with 8 spaces.

> +        bltu   t0, a0, L (error)
> +        ret                     /* Return to caller.  */

"ret" is not recognized by GNU assembler <= 2.39, it's better to use
the old-style "jr      ra" for backward compatibility.

> +
> +L (error):
> +        b      __syscall_error
  
caiyinyu March 23, 2023, 12:01 p.m. UTC | #2
在 2023/3/23 下午5:25, Xi Ruoyao 写道:
> General question: is there a notable benefit optimizing syscall with
> assembly?  AFAIK nobody will put syscall on a hot path, and the cycles
> saved by using the assembly implementation should be negligible
> comparing with all the cost of context switch etc.

Yes.

New patch:

https://sourceware.org/pipermail/libc-alpha/2023-March/146588.html

Without this patch(objdump -d libc.so...):

00000000000dd45c <syscall>:
    dd45c:       02fec063        addi.d          $sp, $sp, -80(0xfb0)
    dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)
    dd464:       29c06065        st.d            $a1, $sp, 24(0x18)
    dd468:       29c08066        st.d            $a2, $sp, 32(0x20)
    dd46c:       29c0a067        st.d            $a3, $sp, 40(0x28)
    dd470:       29c0c068        st.d            $a4, $sp, 48(0x30)
    dd474:       29c0e069        st.d            $a5, $sp, 56(0x38)
    dd478:       29c1206b        st.d            $a7, $sp, 72(0x48)
    dd47c:       29c1006a        st.d            $a6, $sp, 64(0x40)
    dd480:       0015008b        move            $a7, $a0
    dd484:       29c0206c        st.d            $t0, $sp, 8(0x8)
    dd488:       001500a4        move            $a0, $a1
    dd48c:       001500c5        move            $a1, $a2
    dd490:       001500e6        move            $a2, $a3
    dd494:       00150107        move            $a3, $a4
    dd498:       00150128        move            $a4, $a5
    dd49c:       00150149        move            $a5, $a6
    dd4a0:       002b0000        syscall         0x0
    dd4a4:       15ffffec        lu12i.w         $t0, -1(0xfffff)
    dd4a8:       68000d84        bltu            $t0, $a0, 12(0xc)       
# dd4b4 <syscall+0x58>
    dd4ac:       02c14063        addi.d          $sp, $sp, 80(0x50)
    dd4b0:       4c000020        jirl            $zero, $ra, 0
    dd4b4:       1a00128c        pcalau12i       $t0, 148(0x94)
    dd4b8:       28d9e18c        ld.d            $t0, $t0, 1656(0x678)
    dd4bc:       0011100d        sub.w           $t1, $zero, $a0
    dd4c0:       02bffc04        addi.w          $a0, $zero, -1(0xfff)
    dd4c4:       3818098d        stx.w           $t1, $t0, $tp
    dd4c8:       02c14063        addi.d          $sp, $sp, 80(0x50)
    dd4cc:       4c000020        jirl            $zero, $ra, 0


>
> On Thu, 2023-03-23 at 16:40 +0800, caiyinyu wrote:
>> + ENTRY (syscall)
>> +        move	t0, a7
>> +        move	a7, a0		/* Syscall number -> a0.  */
>> +        move	a0, a1		/* shift arg1 - arg6.  */
>> +        move	a1, a2
>> +        move	a2, a3
>> +        move	a3, a4
>> +        move	a4, a5
>> +        move	a5, a6
>> +        move	a6, t0		/* arg7 is saved in t0.  */
>> +        syscall 0		/* Do the system call.  */
>> +       lu12i.w t0, -1
> "li.w t0, -4096" will do the same thing, and be more readable.
>
> And this line seems indented with a tab, while other lines are indented
> with 8 spaces.

Fixed


>> +        bltu   t0, a0, L (error)
>> +        ret                     /* Return to caller.  */
> "ret" is not recognized by GNU assembler <= 2.39, it's better to use
> the old-style "jr      ra" for backward compatibility.

Fixed.



>
>> +
>> +L (error):
>> +        b      __syscall_error
  
Andreas Schwab March 23, 2023, 1:12 p.m. UTC | #3
On Mär 23 2023, caiyinyu wrote:

> Without this patch(objdump -d libc.so...):
>
> 00000000000dd45c <syscall>:
>    dd45c:       02fec063        addi.d          $sp, $sp, -80(0xfb0)
>    dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)
>    dd464:       29c06065        st.d            $a1, $sp, 24(0x18)
>    dd468:       29c08066        st.d            $a2, $sp, 32(0x20)
>    dd46c:       29c0a067        st.d            $a3, $sp, 40(0x28)
>    dd470:       29c0c068        st.d            $a4, $sp, 48(0x30)
>    dd474:       29c0e069        st.d            $a5, $sp, 56(0x38)
>    dd478:       29c1206b        st.d            $a7, $sp, 72(0x48)
>    dd47c:       29c1006a        st.d            $a6, $sp, 64(0x40)

If the argument registers are call-clobbbered, why does the compiler
need to save them?
  
Xi Ruoyao March 23, 2023, 1:12 p.m. UTC | #4
On Thu, 2023-03-23 at 20:01 +0800, caiyinyu wrote:
> Without this patch(objdump -d libc.so...):
> 
> 00000000000dd45c <syscall>:
>     dd45c:       02fec063        addi.d          $sp, $sp, -80(0xfb0)
>     dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)

/* snip */

Yes, I know this will save some cycles.  The problem is during a
syscall, the context switch etc. uses much more time than the
instruction sequence in syscall() function itself.

I wrote a program which calls syscall(SYS_getpid) 1000000 times. 
Without the (unpatched) Glibc syscall(), the time consumed is 1.376s;
with a syscall() in asm (similar to the one in the patch but I changed
the __syscall_error tail call to a "break 0"), the time consumed is
1.203s (0.173s faster).  So we can say it saves 17.3us (about 430
cycles) per syscall.

I doubt if the improvement is really worthy for real life applications:
is there any application invoking syscalls so intensively that the
17.3us will accumulate into something we can feel?
  
Xi Ruoyao March 23, 2023, 1:34 p.m. UTC | #5
On Thu, 2023-03-23 at 14:12 +0100, Andreas Schwab wrote:
> On Mär 23 2023, caiyinyu wrote:
> 
> > Without this patch(objdump -d libc.so...):
> > 
> > 00000000000dd45c <syscall>:
> >    dd45c:       02fec063        addi.d          $sp, $sp, -80(0xfb0)
> >    dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)
> >    dd464:       29c06065        st.d            $a1, $sp, 24(0x18)
> >    dd468:       29c08066        st.d            $a2, $sp, 32(0x20)
> >    dd46c:       29c0a067        st.d            $a3, $sp, 40(0x28)
> >    dd470:       29c0c068        st.d            $a4, $sp, 48(0x30)
> >    dd474:       29c0e069        st.d            $a5, $sp, 56(0x38)
> >    dd478:       29c1206b        st.d            $a7, $sp, 72(0x48)
> >    dd47c:       29c1006a        st.d            $a6, $sp, 64(0x40)
> 
> If the argument registers are call-clobbbered, why does the compiler
> need to save them?

It seems triggered by va_start.  If I don't use "..." and replace it
with "a0, a1, a2, ..., a5", and remove va_start ... va_end, the compiled
code won't save registers.

I'll try to investigate further.
  
Xi Ruoyao March 23, 2023, 1:43 p.m. UTC | #6
On Thu, 2023-03-23 at 21:34 +0800, Xi Ruoyao wrote:
> On Thu, 2023-03-23 at 14:12 +0100, Andreas Schwab wrote:
> > On Mär 23 2023, caiyinyu wrote:
> > 
> > > Without this patch(objdump -d libc.so...):
> > > 
> > > 00000000000dd45c <syscall>:
> > >    dd45c:       02fec063        addi.d          $sp, $sp, -
> > > 80(0xfb0)
> > >    dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)
> > >    dd464:       29c06065        st.d            $a1, $sp, 24(0x18)
> > >    dd468:       29c08066        st.d            $a2, $sp, 32(0x20)
> > >    dd46c:       29c0a067        st.d            $a3, $sp, 40(0x28)
> > >    dd470:       29c0c068        st.d            $a4, $sp, 48(0x30)
> > >    dd474:       29c0e069        st.d            $a5, $sp, 56(0x38)
> > >    dd478:       29c1206b        st.d            $a7, $sp, 72(0x48)
> > >    dd47c:       29c1006a        st.d            $a6, $sp, 64(0x40)
> > 
> > If the argument registers are call-clobbbered, why does the compiler
> > need to save them?
> 
> It seems triggered by va_start.  If I don't use "..." and replace it
> with "a0, a1, a2, ..., a5", and remove va_start ... va_end, the
> compiled
> code won't save registers.
> 
> I'll try to investigate further.

Similar to GCC PR100955.
  
Xi Ruoyao March 23, 2023, 5:34 p.m. UTC | #7
On Thu, 2023-03-23 at 21:43 +0800, Xi Ruoyao wrote:
> On Thu, 2023-03-23 at 21:34 +0800, Xi Ruoyao wrote:
> > On Thu, 2023-03-23 at 14:12 +0100, Andreas Schwab wrote:
> > > On Mär 23 2023, caiyinyu wrote:
> > > 
> > > > Without this patch(objdump -d libc.so...):
> > > > 
> > > > 00000000000dd45c <syscall>:
> > > >    dd45c:       02fec063        addi.d          $sp, $sp, -
> > > > 80(0xfb0)
> > > >    dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)
> > > >    dd464:       29c06065        st.d            $a1, $sp, 24(0x18)
> > > >    dd468:       29c08066        st.d            $a2, $sp, 32(0x20)
> > > >    dd46c:       29c0a067        st.d            $a3, $sp, 40(0x28)
> > > >    dd470:       29c0c068        st.d            $a4, $sp, 48(0x30)
> > > >    dd474:       29c0e069        st.d            $a5, $sp, 56(0x38)
> > > >    dd478:       29c1206b        st.d            $a7, $sp, 72(0x48)
> > > >    dd47c:       29c1006a        st.d            $a6, $sp, 64(0x40)
> > > 
> > > If the argument registers are call-clobbbered, why does the compiler
> > > need to save them?
> > 
> > It seems triggered by va_start.  If I don't use "..." and replace it
> > with "a0, a1, a2, ..., a5", and remove va_start ... va_end, the
> > compiled
> > code won't save registers.
> > 
> > I'll try to investigate further.
> 
> Similar to GCC PR100955.

Nope, it's not PR100955.  PR100955 is about AArch64 but syscall is
compiled to almost perfect assemble code on AArch64.

It looks like caused by the lack of [TARGET_SETUP_INCOMING_VARARGS][1]
in GCC config/loongarch.  I'll try to add it...

[1]: https://gcc.gnu.org/onlinedocs/gccint/Varargs.html#index-TARGET_005fSETUP_005fINCOMING_005fVARARGS
  
Xi Ruoyao March 23, 2023, 6:26 p.m. UTC | #8
On Fri, 2023-03-24 at 01:34 +0800, Xi Ruoyao via Libc-alpha wrote:
> On Thu, 2023-03-23 at 21:43 +0800, Xi Ruoyao wrote:
> > On Thu, 2023-03-23 at 21:34 +0800, Xi Ruoyao wrote:
> > > On Thu, 2023-03-23 at 14:12 +0100, Andreas Schwab wrote:
> > > > On Mär 23 2023, caiyinyu wrote:
> > > > 
> > > > > Without this patch(objdump -d libc.so...):
> > > > > 
> > > > > 00000000000dd45c <syscall>:
> > > > >    dd45c:       02fec063        addi.d          $sp, $sp, -
> > > > > 80(0xfb0)
> > > > >    dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)
> > > > >    dd464:       29c06065        st.d            $a1, $sp, 24(0x18)
> > > > >    dd468:       29c08066        st.d            $a2, $sp, 32(0x20)
> > > > >    dd46c:       29c0a067        st.d            $a3, $sp, 40(0x28)
> > > > >    dd470:       29c0c068        st.d            $a4, $sp, 48(0x30)
> > > > >    dd474:       29c0e069        st.d            $a5, $sp, 56(0x38)
> > > > >    dd478:       29c1206b        st.d            $a7, $sp, 72(0x48)
> > > > >    dd47c:       29c1006a        st.d            $a6, $sp, 64(0x40)
> > > > 
> > > > If the argument registers are call-clobbbered, why does the compiler
> > > > need to save them?
> > > 
> > > It seems triggered by va_start.  If I don't use "..." and replace it
> > > with "a0, a1, a2, ..., a5", and remove va_start ... va_end, the
> > > compiled
> > > code won't save registers.
> > > 
> > > I'll try to investigate further.
> > 
> > Similar to GCC PR100955.
> 
> Nope, it's not PR100955.  PR100955 is about AArch64 but syscall is
> compiled to almost perfect assemble code on AArch64.

I was wrong.  AArch64 has a assembly syscall.

> It looks like caused by the lack of [TARGET_SETUP_INCOMING_VARARGS][1]
> in GCC config/loongarch.  I'll try to add it...

LoongArch has a TARGET_SETUP_INCOMING_VARARGS but it does not use the
information from stdarg pass.  I can fix it, but even with the fix GCC
would still save 7 registers (now GCC trunk saves 9 registers, the fix
would make some improvement but no much).

And the issue seems not trivial to fix.  On x86_64, all of GCC, Clang,
and MSVC will save some registers if va_arg is used.  I've not found any
compiler which can avoid saving the va_arg GARs unnecessarily yet:

https://godbolt.org/z/n1YqWq9c9

Now to me it seems a bad idea to use va_arg in syscall.c.
  
Adhemerval Zanella March 23, 2023, 6:40 p.m. UTC | #9
On 23/03/23 15:26, Xi Ruoyao wrote:
> On Fri, 2023-03-24 at 01:34 +0800, Xi Ruoyao via Libc-alpha wrote:
>> On Thu, 2023-03-23 at 21:43 +0800, Xi Ruoyao wrote:
>>> On Thu, 2023-03-23 at 21:34 +0800, Xi Ruoyao wrote:
>>>> On Thu, 2023-03-23 at 14:12 +0100, Andreas Schwab wrote:
>>>>> On Mär 23 2023, caiyinyu wrote:
>>>>>
>>>>>> Without this patch(objdump -d libc.so...):
>>>>>>
>>>>>> 00000000000dd45c <syscall>:
>>>>>>    dd45c:       02fec063        addi.d          $sp, $sp, -
>>>>>> 80(0xfb0)
>>>>>>    dd460:       02c0606c        addi.d          $t0, $sp, 24(0x18)
>>>>>>    dd464:       29c06065        st.d            $a1, $sp, 24(0x18)
>>>>>>    dd468:       29c08066        st.d            $a2, $sp, 32(0x20)
>>>>>>    dd46c:       29c0a067        st.d            $a3, $sp, 40(0x28)
>>>>>>    dd470:       29c0c068        st.d            $a4, $sp, 48(0x30)
>>>>>>    dd474:       29c0e069        st.d            $a5, $sp, 56(0x38)
>>>>>>    dd478:       29c1206b        st.d            $a7, $sp, 72(0x48)
>>>>>>    dd47c:       29c1006a        st.d            $a6, $sp, 64(0x40)
>>>>>
>>>>> If the argument registers are call-clobbbered, why does the compiler
>>>>> need to save them?
>>>>
>>>> It seems triggered by va_start.  If I don't use "..." and replace it
>>>> with "a0, a1, a2, ..., a5", and remove va_start ... va_end, the
>>>> compiled
>>>> code won't save registers.
>>>>
>>>> I'll try to investigate further.
>>>
>>> Similar to GCC PR100955.
>>
>> Nope, it's not PR100955.  PR100955 is about AArch64 but syscall is
>> compiled to almost perfect assemble code on AArch64.
> 
> I was wrong.  AArch64 has a assembly syscall.
> 
>> It looks like caused by the lack of [TARGET_SETUP_INCOMING_VARARGS][1]
>> in GCC config/loongarch.  I'll try to add it...
> 
> LoongArch has a TARGET_SETUP_INCOMING_VARARGS but it does not use the
> information from stdarg pass.  I can fix it, but even with the fix GCC
> would still save 7 registers (now GCC trunk saves 9 registers, the fix
> would make some improvement but no much).
> 
> And the issue seems not trivial to fix.  On x86_64, all of GCC, Clang,
> and MSVC will save some registers if va_arg is used.  I've not found any
> compiler which can avoid saving the va_arg GARs unnecessarily yet:
> 
> https://godbolt.org/z/n1YqWq9c9
> 
> Now to me it seems a bad idea to use va_arg in syscall.c.
> 

I think it was the natural way to express kernel communication mechanism
that indeed takes variadic arguments.   And since it is older than Linux
(man-pages stated it was from 4BSD), it also mean that you don't bind a
maximum limit or arguments (although on Linux and BSD does have a pratical
limit).

We can maybe add a implementation that uses named args (which extra
boilerplate to architectures that accepts 7 arguments instead of usual
6); and just enable it if a per-architecture flag is set meaning that
for that specific ABI the variadic is essentially the same as named
functions calls.  Something like:

long int
syscall (long int number, 
#if __ASSUME_SYSCALL_NAMED_WORKS
         long int a0, long int a1, long int a2,
         long int a3, long int a4, long int a5
#else
         ...
#endif
        )
{
#ifndef __ASSUME_SYSCALL_NAMED_WORKS
  va_list args;
  va_start (args, number);
  long int a0 = va_arg (args, long int);
  long int a1 = va_arg (args, long int);
  long int a2 = va_arg (args, long int);
  long int a3 = va_arg (args, long int);
  long int a4 = va_arg (args, long int);
  long int a5 = va_arg (args, long int);
  va_end (args);
#endif
  long int r = INTERNAL_SYSCALL_NCS_CALL (number, a0, a1, a2, a3, a4, a5);
  if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (r)))
    {
      __set_errno (-r);
      return -1;
    }
  return r;
}

It might need some more hacks to hide the syscall prototype.
  
Xi Ruoyao March 23, 2023, 7:08 p.m. UTC | #10
On Thu, 2023-03-23 at 15:40 -0300, Adhemerval Zanella Netto wrote:
> 
> On 23/03/23 15:26, Xi Ruoyao wrote:
> > 
> > LoongArch has a TARGET_SETUP_INCOMING_VARARGS but it does not use the
> > information from stdarg pass.  I can fix it, but even with the fix GCC
> > would still save 7 registers (now GCC trunk saves 9 registers, the fix
> > would make some improvement but no much).
> > 
> > And the issue seems not trivial to fix.  On x86_64, all of GCC, Clang,
> > and MSVC will save some registers if va_arg is used.  I've not found any
> > compiler which can avoid saving the va_arg GARs unnecessarily yet:
> > 
> > https://godbolt.org/z/n1YqWq9c9
> > 
> > Now to me it seems a bad idea to use va_arg in syscall.c.
> > 
> 
> I think it was the natural way to express kernel communication mechanism
> that indeed takes variadic arguments.   And since it is older than Linux
> (man-pages stated it was from 4BSD), it also mean that you don't bind a
> maximum limit or arguments (although on Linux and BSD does have a pratical
> limit).

Well, my statement is only for Linux and a modern architecture with
enough GARs.

> We can maybe add a implementation that uses named args (which extra
> boilerplate to architectures that accepts 7 arguments instead of usual
> 6); and just enable it if a per-architecture flag is set meaning that
> for that specific ABI the variadic is essentially the same as named
> functions calls.  Something like:
> 
> long int
> syscall (long int number, 
> #if __ASSUME_SYSCALL_NAMED_WORKS
>          long int a0, long int a1, long int a2,
>          long int a3, long int a4, long int a5
> #else
>          ...
> #endif

/* snip */

> It might need some more hacks to hide the syscall prototype.

I've already tried something similar on my box (I named the macro
"SYSCALL_VA_ARG_NOT_NEEDED"); it looks like we don't need to hide the
syscall prototype:  unistd.h is not in misc/syscall.o.d.

The disassemble of misc/syscall.o looks perfect when I defined the macro
in sysdep/unix/sysv/linux/loongarch/sysdep.h:

0000000000000000 <syscall>:
   0:	0015008b 	move        	$a7, $a0
   4:	001500a4 	move        	$a0, $a1
   8:	001500c5 	move        	$a1, $a2
   c:	001500e6 	move        	$a2, $a3
  10:	00150107 	move        	$a3, $a4
  14:	00150128 	move        	$a4, $a5
  18:	00150149 	move        	$a5, $a6
  1c:	002b0000 	syscall     	0x0
  20:	15ffffec 	lu12i.w     	$t0, -1(0xfffff)
  24:	68000984 	bltu        	$t0, $a0, 8(0x8)	# 2c <syscall+0x2c>
  28:	4c000020 	jirl        	$zero, $ra, 0
  2c:	1a00000c 	pcalau12i   	$t0, 0
  30:	00111004 	sub.w       	$a0, $zero, $a0
  34:	28c0018c 	ld.d        	$t0, $t0, 0
  38:	38180984 	stx.w       	$a0, $t0, $tp
  3c:	02bffc04 	addi.w      	$a0, $zero, -1(0xfff)
  40:	4c000020 	jirl        	$zero, $ra, 0
  

Patch

diff --git a/sysdeps/unix/sysv/linux/loongarch/syscall.S b/sysdeps/unix/sysv/linux/loongarch/syscall.S
new file mode 100644
index 0000000000..de2d5e9fb8
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/loongarch/syscall.S
@@ -0,0 +1,48 @@ 
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* syscall (int nr, ...)
+
+   LoongArch system calls take between 0 and 7 arguments. On entry here nr
+   is in a0 and any other system call arguments are in register a1..a7.
+
+   For kernel entry we need to move the system call nr to a7 then
+   load the remaining arguments to register.  */
+
+ ENTRY (syscall)
+        move	t0, a7
+        move	a7, a0		/* Syscall number -> a0.  */
+        move	a0, a1		/* shift arg1 - arg6.  */
+        move	a1, a2
+        move	a2, a3
+        move	a3, a4
+        move	a4, a5
+        move	a5, a6
+        move	a6, t0		/* arg7 is saved in t0.  */
+        syscall 0		/* Do the system call.  */
+
+	lu12i.w	t0, -1
+        bltu	t0, a0, L (error)
+        ret                     /* Return to caller.  */
+
+L (error):
+        b	__syscall_error
+
+PSEUDO_END (syscall)