@@ -31,19 +31,16 @@ along with the GNU MP Library. If not, see
.option pic2
#endif
ENTRY (__mpn_add_n)
- .set noreorder
#ifdef __PIC__
.cpload t9
#endif
- .set nomacro
-
lw $10,0($5)
lw $11,0($6)
addiu $7,$7,-1
and $9,$7,4-1 /* number of limbs in first loop */
- beq $9,$0,L(L0) /* if multiple of 4 limbs, skip first loop */
move $2,$0
+ beq $9,$0,L(L0) /* if multiple of 4 limbs, skip first loop */
subu $7,$7,$9
@@ -61,11 +58,10 @@ L(Loop0): addiu $9,$9,-1
addiu $6,$6,4
move $10,$12
move $11,$13
- bne $9,$0,L(Loop0)
addiu $4,$4,4
+ bne $9,$0,L(Loop0)
L(L0): beq $7,$0,L(end)
- nop
L(Loop): addiu $7,$7,-4
@@ -108,14 +104,14 @@ L(Loop): addiu $7,$7,-4
addiu $5,$5,16
addiu $6,$6,16
- bne $7,$0,L(Loop)
addiu $4,$4,16
+ bne $7,$0,L(Loop)
L(end): addu $11,$11,$2
sltu $8,$11,$2
addu $11,$10,$11
sltu $2,$11,$10
sw $11,0($4)
- j $31
or $2,$2,$8
+ jr $31
END (__mpn_add_n)
@@ -31,12 +31,9 @@ along with the GNU MP Library. If not, see
.option pic2
#endif
ENTRY (__mpn_addmul_1)
- .set noreorder
#ifdef __PIC__
.cpload t9
#endif
- .set nomacro
-
/* warm up phase 0 */
lw $8,0($5)
@@ -50,12 +47,12 @@ ENTRY (__mpn_addmul_1)
#endif
addiu $6,$6,-1
- beq $6,$0,L(LC0)
move $2,$0 /* zero cy2 */
+ beq $6,$0,L(LC0)
addiu $6,$6,-1
- beq $6,$0,L(LC1)
lw $8,0($5) /* load new s1 limb as early as possible */
+ beq $6,$0,L(LC1)
L(Loop): lw $10,0($4)
#if __mips_isa_rev < 6
@@ -81,8 +78,8 @@ L(Loop): lw $10,0($4)
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
- bne $6,$0,L(Loop) /* should be "bnel" */
addu $2,$9,$2 /* add high product limb and carry from addition */
+ bne $6,$0,L(Loop) /* should be "bnel" */
/* cool down phase 1 */
L(LC1): lw $10,0($4)
@@ -123,6 +120,6 @@ L(LC0): lw $10,0($4)
sltu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
- j $31
addu $2,$9,$2 /* add high product limb and carry from addition */
+ jr $31
END (__mpn_addmul_1)
@@ -127,16 +127,13 @@ elf_machine_load_address (void)
{
ElfW(Addr) addr;
#ifndef __mips16
- asm (" .set noreorder\n"
- " " STRINGXP (PTR_LA) " %0, 0f\n"
+ asm (" " STRINGXP (PTR_LA) " %0, 0f\n"
# if !defined __mips_isa_rev || __mips_isa_rev < 6
" bltzal $0, 0f\n"
- " nop\n"
+#else
+ " bal 0f\n"
+#endif
"0: " STRINGXP (PTR_SUBU) " %0, $31, %0\n"
-# else
- "0: addiupc $31, 0\n"
- " " STRINGXP (PTR_SUBU) " %0, $31, %0\n"
-# endif
" .set reorder\n"
: "=r" (addr)
: /* No inputs */
@@ -237,7 +234,9 @@ do { \
and not just plain _start. */
#ifndef __mips16
-# if !defined __mips_isa_rev || __mips_isa_rev < 6
+/* Although microMIPSr6 has an ADDIUPC instruction, it must be 4-byte aligned
+ for the address calculation to be valid. */
+# if !defined __mips_isa_rev || __mips_isa_rev < 6 || defined __mips_micromips
# define LCOFF STRINGXP(.Lcof2)
# define LOAD_31 STRINGXP(bltzal $8) "," STRINGXP(.Lcof2)
# else
@@ -301,7 +301,6 @@ asm ("\n\
.ent _dl_runtime_resolve\n\
_dl_runtime_resolve:\n\
.frame $29, " STRINGXP(ELF_DL_FRAME_SIZE) ", $31\n\
- .set noreorder\n\
# Save GP.\n\
1: move $3, $28\n\
# Save arguments and sp value in stack.\n\
@@ -311,7 +310,6 @@ _dl_runtime_resolve:\n\
# Compute GP.\n\
2: " STRINGXP(SETUP_GP) "\n\
" STRINGXV(SETUP_GP64 (0, _dl_runtime_resolve)) "\n\
- .set reorder\n\
# Save slot call pc.\n\
move $2, $31\n\
" IFABIO32(STRINGXP(CPRESTORE(32))) "\n\
@@ -358,7 +356,6 @@ asm ("\n\
.ent _dl_runtime_pltresolve\n\
_dl_runtime_pltresolve:\n\
.frame $29, " STRINGXP(ELF_DL_PLT_FRAME_SIZE) ", $31\n\
- .set noreorder\n\
# Save arguments and sp value in stack.\n\
1: " STRINGXP(PTR_SUBIU) " $29, " STRINGXP(ELF_DL_PLT_FRAME_SIZE) "\n\
" IFABIO32(STRINGXP(PTR_L) " $13, " STRINGXP(PTRSIZE) "($28)") "\n\
@@ -368,7 +365,6 @@ _dl_runtime_pltresolve:\n\
# Compute GP.\n\
2: " STRINGXP(SETUP_GP) "\n\
" STRINGXV(SETUP_GP64 (0, _dl_runtime_pltresolve)) "\n\
- .set reorder\n\
" IFABIO32(STRINGXP(CPRESTORE(32))) "\n\
" ELF_DL_PLT_SAVE_ARG_REGS "\
move $4, $13\n\
@@ -30,12 +30,9 @@ along with the GNU MP Library. If not, see
.option pic2
#endif
ENTRY (__mpn_lshift)
- .set noreorder
#ifdef __PIC__
.cpload t9
#endif
- .set nomacro
-
sll $2,$6,2
addu $5,$5,$2 /* make r5 point at end of src */
lw $10,-4($5) /* load first limb */
@@ -43,8 +40,8 @@ ENTRY (__mpn_lshift)
addu $4,$4,$2 /* make r4 point at end of res */
addiu $6,$6,-1
and $9,$6,4-1 /* number of limbs in first loop */
- beq $9,$0,L(L0) /* if multiple of 4 limbs, skip first loop */
srl $2,$10,$13 /* compute function result */
+ beq $9,$0,L(L0) /* if multiple of 4 limbs, skip first loop */
subu $6,$6,$9
@@ -56,11 +53,10 @@ L(Loop0): lw $3,-8($5)
srl $12,$3,$13
move $10,$3
or $8,$11,$12
- bne $9,$0,L(Loop0)
sw $8,0($4)
+ bne $9,$0,L(Loop0)
L(L0): beq $6,$0,L(Lend)
- nop
L(Loop): lw $3,-8($5)
addiu $4,$4,-16
@@ -88,10 +84,10 @@ L(Loop): lw $3,-8($5)
addiu $5,$5,-16
or $8,$14,$9
- bgtz $6,L(Loop)
sw $8,0($4)
+ bgtz $6,L(Loop)
L(Lend): sll $8,$10,$7
- j $31
sw $8,-4($4)
+ jr $31
END (__mpn_lshift)
@@ -34,6 +34,42 @@ static void __attribute_used__ __mcount (u_long frompc, u_long selfpc)
# define CPRESTORE
#endif
+#if __mips_isa_rev > 5 && defined (__mips_micromips)
+#define MCOUNT asm(\
+ ".globl _mcount;\n\t" \
+ ".align 2;\n\t" \
+ ".set push;\n\t" \
+ ".set nomips16;\n\t" \
+ ".type _mcount,@function;\n\t" \
+ ".ent _mcount\n\t" \
+ "_mcount:\n\t" \
+ ".frame $sp,44,$31\n\t" \
+ ".set noat;\n\t" \
+ CPLOAD \
+ "subu $29,$29,48;\n\t" \
+ CPRESTORE \
+ "sw $4,24($29);\n\t" \
+ "sw $5,28($29);\n\t" \
+ "sw $6,32($29);\n\t" \
+ "sw $7,36($29);\n\t" \
+ "sw $2,40($29);\n\t" \
+ "sw $1,16($29);\n\t" \
+ "sw $31,20($29);\n\t" \
+ "move $5,$31;\n\t" \
+ "move $4,$1;\n\t" \
+ "balc __mcount;\n\t" \
+ "lw $4,24($29);\n\t" \
+ "lw $5,28($29);\n\t" \
+ "lw $6,32($29);\n\t" \
+ "lw $7,36($29);\n\t" \
+ "lw $2,40($29);\n\t" \
+ "lw $1,20($29);\n\t" \
+ "lw $31,16($29);\n\t" \
+ "addu $29,$29,56;\n\t" \
+ "jrc $1;\n\t" \
+ ".end _mcount;\n\t" \
+ ".set pop");
+#else
#define MCOUNT asm(\
".globl _mcount;\n\t" \
".align 2;\n\t" \
@@ -71,6 +107,7 @@ static void __attribute_used__ __mcount (u_long frompc, u_long selfpc)
"move $31,$1;\n\t" \
".end _mcount;\n\t" \
".set pop");
+#endif
#else
@@ -97,6 +134,50 @@ static void __attribute_used__ __mcount (u_long frompc, u_long selfpc)
# error "Unknown ABI"
#endif
+#if __mips_isa_rev > 5 && defined (__mips_micromips)
+#define MCOUNT asm(\
+ ".globl _mcount;\n\t" \
+ ".align 3;\n\t" \
+ ".set push;\n\t" \
+ ".set nomips16;\n\t" \
+ ".type _mcount,@function;\n\t" \
+ ".ent _mcount\n\t" \
+ "_mcount:\n\t" \
+ ".frame $sp,88,$31\n\t" \
+ ".set noat;\n\t" \
+ PTR_SUBU_STRING " $29,$29,96;\n\t" \
+ CPSETUP \
+ "sd $4,24($29);\n\t" \
+ "sd $5,32($29);\n\t" \
+ "sd $6,40($29);\n\t" \
+ "sd $7,48($29);\n\t" \
+ "sd $8,56($29);\n\t" \
+ "sd $9,64($29);\n\t" \
+ "sd $10,72($29);\n\t" \
+ "sd $11,80($29);\n\t" \
+ "sd $2,16($29);\n\t" \
+ "sd $1,0($29);\n\t" \
+ "sd $31,8($29);\n\t" \
+ "move $5,$31;\n\t" \
+ "move $4,$1;\n\t" \
+ "balc __mcount;\n\t" \
+ "ld $4,24($29);\n\t" \
+ "ld $5,32($29);\n\t" \
+ "ld $6,40($29);\n\t" \
+ "ld $7,48($29);\n\t" \
+ "ld $8,56($29);\n\t" \
+ "ld $9,64($29);\n\t" \
+ "ld $10,72($29);\n\t" \
+ "ld $11,80($29);\n\t" \
+ "ld $2,16($29);\n\t" \
+ "ld $1,8($29);\n\t" \
+ "ld $31,0($29);\n\t" \
+ CPRETURN \
+ PTR_ADDU_STRING " $29,$29,96;\n\t" \
+ "jrc $1;\n\t" \
+ ".end _mcount;\n\t" \
+ ".set pop");
+#else
#define MCOUNT asm(\
".globl _mcount;\n\t" \
".align 3;\n\t" \
@@ -142,5 +223,6 @@ static void __attribute_used__ __mcount (u_long frompc, u_long selfpc)
"move $31,$1;\n\t" \
".end _mcount;\n\t" \
".set pop");
+#endif
#endif
@@ -86,6 +86,12 @@
# endif
#endif
+#if __mips_isa_rev > 5 && defined (__mips_micromips)
+# define PTR_BC bc16
+#else
+# define PTR_BC bc
+#endif
+
/*
* Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
* prefetches appear to offer a slight performance advantage.
@@ -272,7 +278,6 @@ LEAF(MEMCPY_NAME, 0)
LEAF(MEMCPY_NAME)
#endif
.set nomips16
- .set noreorder
/*
* Below we handle the case where memcpy is called with overlapping src and dst.
* Although memcpy is not required to handle this case, some parts of Android
@@ -284,10 +289,9 @@ LEAF(MEMCPY_NAME)
xor t1,t0,t2
PTR_SUBU t0,t1,t2
sltu t2,t0,a2
- beq t2,zero,L(memcpy)
la t9,memmove
+ beq t2,zero,L(memcpy)
jr t9
- nop
L(memcpy):
#endif
/*
@@ -295,12 +299,12 @@ L(memcpy):
* size, copy dst pointer to v0 for the return value.
*/
slti t2,a2,(2 * NSIZE)
- bne t2,zero,L(lasts)
#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
move v0,zero
#else
move v0,a0
#endif
+ bne t2,zero,L(lasts)
#ifndef R6_CODE
@@ -312,12 +316,12 @@ L(memcpy):
*/
xor t8,a1,a0
andi t8,t8,(NSIZE-1) /* t8 is a0/a1 word-displacement */
- bne t8,zero,L(unaligned)
PTR_SUBU a3, zero, a0
+ bne t8,zero,L(unaligned)
andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
+ PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
beq a3,zero,L(aligned) /* if a3=0, it is already aligned */
- PTR_SUBU a2,a2,a3 /* a2 is the remaining bytes count */
C_LDHI t8,0(a1)
PTR_ADDU a1,a1,a3
@@ -332,18 +336,24 @@ L(memcpy):
* align instruction.
*/
andi t8,a0,7
+#ifdef __mips_micromips
+ auipc t9,%pcrel_hi(L(atable))
+ addiu t9,t9,%pcrel_lo(L(atable)+4)
+ PTR_LSA t9,t8,t9,1
+#else
lapc t9,L(atable)
PTR_LSA t9,t8,t9,2
+#endif
jrc t9
L(atable):
- bc L(lb0)
- bc L(lb7)
- bc L(lb6)
- bc L(lb5)
- bc L(lb4)
- bc L(lb3)
- bc L(lb2)
- bc L(lb1)
+ PTR_BC L(lb0)
+ PTR_BC L(lb7)
+ PTR_BC L(lb6)
+ PTR_BC L(lb5)
+ PTR_BC L(lb4)
+ PTR_BC L(lb3)
+ PTR_BC L(lb2)
+ PTR_BC L(lb1)
L(lb7):
lb a3, 6(a1)
sb a3, 6(a0)
@@ -374,20 +384,26 @@ L(lb1):
L(lb0):
andi t8,a1,(NSIZE-1)
+#ifdef __mips_micromips
+ auipc t9,%pcrel_hi(L(jtable))
+ addiu t9,t9,%pcrel_lo(L(jtable)+4)
+ PTR_LSA t9,t8,t9,1
+#else
lapc t9,L(jtable)
PTR_LSA t9,t8,t9,2
+#endif
jrc t9
L(jtable):
- bc L(aligned)
- bc L(r6_unaligned1)
- bc L(r6_unaligned2)
- bc L(r6_unaligned3)
-# ifdef USE_DOUBLE
- bc L(r6_unaligned4)
- bc L(r6_unaligned5)
- bc L(r6_unaligned6)
- bc L(r6_unaligned7)
-# endif
+ PTR_BC L(aligned)
+ PTR_BC L(r6_unaligned1)
+ PTR_BC L(r6_unaligned2)
+ PTR_BC L(r6_unaligned3)
+#ifdef USE_DOUBLE
+ PTR_BC L(r6_unaligned4)
+ PTR_BC L(r6_unaligned5)
+ PTR_BC L(r6_unaligned6)
+ PTR_BC L(r6_unaligned7)
+#endif
#endif /* R6_CODE */
L(aligned):
@@ -401,8 +417,8 @@ L(aligned):
*/
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
- beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
+ beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
/* When in the loop we may prefetch with the 'prepare to store' hint,
@@ -428,7 +444,6 @@ L(aligned):
# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE
sltu v1,t9,a0
bgtz v1,L(skip_set)
- nop
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
L(skip_set):
# else
@@ -444,11 +459,16 @@ L(skip_set):
#endif
L(loop16w):
C_LD t0,UNIT(0)(a1)
+/* We need to separate out the C_LD instruction here so that it will work
+ both when it is used by itself and when it is used with the branch
+ instruction. */
#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
+ C_LD t1,UNIT(1)(a1)
bgtz v1,L(skip_pref)
-#endif
+#else
C_LD t1,UNIT(1)(a1)
+#endif
#ifdef R6_CODE
PREFETCH_FOR_STORE (2, a0)
#else
@@ -502,8 +522,8 @@ L(skip_pref):
C_ST REG6,UNIT(14)(a0)
C_ST REG7,UNIT(15)(a0)
PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
- bne a0,a3,L(loop16w)
PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
+ bne a0,a3,L(loop16w)
move a2,t8
/* Here we have src and dest word-aligned but less than 64-bytes or
@@ -517,7 +537,6 @@ L(chkw):
andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
/* The t8 is the reminder count past 32-bytes */
beq a2,t8,L(chk1w) /* When a2=t8, no 32-byte chunk */
- nop
C_LD t0,UNIT(0)(a1)
C_LD t1,UNIT(1)(a1)
C_LD REG2,UNIT(2)(a1)
@@ -546,8 +565,8 @@ L(chkw):
*/
L(chk1w):
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
- beq a2,t8,L(lastw)
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
+ beq a2,t8,L(lastw)
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
/* copying in words (4-byte or 8-byte chunks) */
@@ -555,8 +574,8 @@ L(wordCopy_loop):
C_LD REG3,UNIT(0)(a1)
PTR_ADDIU a0,a0,UNIT(1)
PTR_ADDIU a1,a1,UNIT(1)
- bne a0,a3,L(wordCopy_loop)
C_ST REG3,UNIT(-1)(a0)
+ bne a0,a3,L(wordCopy_loop)
/* If we have been copying double words, see if we can copy a single word
before doing byte copies. We can have, at most, one word to copy. */
@@ -574,17 +593,16 @@ L(lastw):
/* Copy the last 8 (or 16) bytes */
L(lastb):
- blez a2,L(leave)
PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
+ blez a2,L(leave)
L(lastbloop):
lb v1,0(a1)
PTR_ADDIU a0,a0,1
PTR_ADDIU a1,a1,1
- bne a0,a3,L(lastbloop)
sb v1,-1(a0)
+ bne a0,a3,L(lastbloop)
L(leave):
- j ra
- nop
+ jr ra
/* We jump here with a memcpy of less than 8 or 16 bytes, depending on
whether or not USE_DOUBLE is defined. Instead of just doing byte
@@ -625,8 +643,8 @@ L(wcopy_loop):
L(unaligned):
andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
+ PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
beqz a3,L(ua_chk16w) /* if a3=0, it is already aligned */
- PTR_SUBU a2,a2,a3 /* a2 is the remaining bytes count */
C_LDHI v1,UNIT(0)(a1)
C_LDLO v1,UNITM1(1)(a1)
@@ -644,8 +662,8 @@ L(unaligned):
L(ua_chk16w):
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
- beq a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
+ beq a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
@@ -664,7 +682,6 @@ L(ua_chk16w):
# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0
bgtz v1,L(ua_skip_set)
- nop
PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
L(ua_skip_set):
# else
@@ -676,11 +693,16 @@ L(ua_loop16w):
C_LDHI t0,UNIT(0)(a1)
C_LDHI t1,UNIT(1)(a1)
C_LDHI REG2,UNIT(2)(a1)
+/* We need to separate out the C_LDHI instruction here so that it will work
+ both when it is used by itself and when it is used with the branch
+ instruction. */
# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0
+ C_LDHI REG3,UNIT(3)(a1)
bgtz v1,L(ua_skip_pref)
-# endif
+# else
C_LDHI REG3,UNIT(3)(a1)
+# endif
PREFETCH_FOR_STORE (4, a0)
PREFETCH_FOR_STORE (5, a0)
L(ua_skip_pref):
@@ -731,8 +753,8 @@ L(ua_skip_pref):
C_ST REG6,UNIT(14)(a0)
C_ST REG7,UNIT(15)(a0)
PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
- bne a0,a3,L(ua_loop16w)
PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
+ bne a0,a3,L(ua_loop16w)
move a2,t8
/* Here we have src and dest word-aligned but less than 64-bytes or
@@ -745,7 +767,6 @@ L(ua_chkw):
andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
/* t8 is the reminder count past 32-bytes */
beq a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */
- nop
C_LDHI t0,UNIT(0)(a1)
C_LDHI t1,UNIT(1)(a1)
C_LDHI REG2,UNIT(2)(a1)
@@ -778,8 +799,8 @@ L(ua_chkw):
*/
L(ua_chk1w):
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
- beq a2,t8,L(ua_smallCopy)
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
+ beq a2,t8,L(ua_smallCopy)
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
/* copying in words (4-byte or 8-byte chunks) */
@@ -788,22 +809,21 @@ L(ua_wordCopy_loop):
C_LDLO v1,UNITM1(1)(a1)
PTR_ADDIU a0,a0,UNIT(1)
PTR_ADDIU a1,a1,UNIT(1)
- bne a0,a3,L(ua_wordCopy_loop)
C_ST v1,UNIT(-1)(a0)
+ bne a0,a3,L(ua_wordCopy_loop)
/* Copy the last 8 (or 16) bytes */
L(ua_smallCopy):
- beqz a2,L(leave)
PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
+ beqz a2,L(leave)
L(ua_smallCopy_loop):
lb v1,0(a1)
PTR_ADDIU a0,a0,1
PTR_ADDIU a1,a1,1
- bne a0,a3,L(ua_smallCopy_loop)
sb v1,-1(a0)
+ bne a0,a3,L(ua_smallCopy_loop)
- j ra
- nop
+ jr ra
#else /* R6_CODE */
@@ -816,9 +836,9 @@ L(ua_smallCopy_loop):
# endif
# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
andi REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes. */ \
- beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \
PTR_SUBU a3, a2, REG7; /* a3 is number of bytes to be copied in */ \
/* (d)word chunks. */ \
+ beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \
move a2, REG7; /* a2 is # of bytes to copy byte by byte */ \
/* after word loop is finished. */ \
PTR_ADDU REG6, a0, a3; /* REG6 is the dst address after loop. */ \
@@ -831,10 +851,9 @@ L(r6_ua_wordcopy##BYTEOFFSET): \
PTR_ADDIU a0, a0, UNIT(1); /* Increment destination pointer. */ \
PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
move t0, t1; /* Move second part of source to first. */ \
- bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \
C_ST REG3, UNIT(-1)(a0); \
+ bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \
j L(lastb); \
- nop
/* We are generating R6 code, the destination is 4 byte aligned and
the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
@@ -859,7 +878,6 @@ L(r6_unaligned7):
#endif /* R6_CODE */
.set at
- .set reorder
END(MEMCPY_NAME)
#ifndef ANDROID_CHANGES
# ifdef _LIBC
@@ -82,6 +82,12 @@
# endif
#endif
+#if __mips_isa_rev > 5 && defined (__mips_micromips)
+# define PTR_BC bc16
+#else
+# define PTR_BC bc
+#endif
+
/* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
or PREFETCH_STORE_STREAMED offers a large performance advantage
but PREPAREFORSTORE has some special restrictions to consider.
@@ -205,17 +211,16 @@ LEAF(MEMSET_NAME)
#endif
.set nomips16
- .set noreorder
-/* If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
+/* If the size is less than 4*NSIZE (16 or 32), go to L(lastb). Regardless of
size, copy dst pointer to v0 for the return value. */
- slti t2,a2,(2 * NSIZE)
- bne t2,zero,L(lastb)
+ slti t2,a2,(4 * NSIZE)
move v0,a0
+ bne t2,zero,L(lastb)
/* If memset value is not zero, we copy it to all the bytes in a 32 or 64
bit word. */
- beq a1,zero,L(set0) /* If memset value is zero no smear */
PTR_SUBU a3,zero,a0
+ beq a1,zero,L(set0) /* If memset value is zero no smear */
nop
/* smear byte into 32 or 64 bit word */
@@ -251,26 +256,30 @@ LEAF(MEMSET_NAME)
L(set0):
#ifndef R6_CODE
andi t2,a3,(NSIZE-1) /* word-unaligned address? */
- beq t2,zero,L(aligned) /* t2 is the unalignment count */
PTR_SUBU a2,a2,t2
+ beq t2,zero,L(aligned) /* t2 is the unalignment count */
C_STHI a1,0(a0)
PTR_ADDU a0,a0,t2
#else /* R6_CODE */
- andi t2,a0,(NSIZE-1)
+ andi t2,a0,7
+# ifdef __mips_micromips
+ auipc t9,%pcrel_hi(L(atable))
+ addiu t9,t9,%pcrel_lo(L(atable)+4)
+ PTR_LSA t9,t2,t9,1
+# else
lapc t9,L(atable)
PTR_LSA t9,t2,t9,2
+# endif
jrc t9
L(atable):
- bc L(aligned)
-# ifdef USE_DOUBLE
- bc L(lb7)
- bc L(lb6)
- bc L(lb5)
- bc L(lb4)
-# endif
- bc L(lb3)
- bc L(lb2)
- bc L(lb1)
+ PTR_BC L(aligned)
+ PTR_BC L(lb7)
+ PTR_BC L(lb6)
+ PTR_BC L(lb5)
+ PTR_BC L(lb4)
+ PTR_BC L(lb3)
+ PTR_BC L(lb2)
+ PTR_BC L(lb1)
L(lb7):
sb a1,6(a0)
L(lb6):
@@ -300,8 +309,8 @@ L(aligned):
left to store or we would have jumped to L(lastb) earlier in the code. */
#ifdef DOUBLE_ALIGN
andi t2,a3,4
- beq t2,zero,L(double_aligned)
PTR_SUBU a2,a2,t2
+ beq t2,zero,L(double_aligned)
sw a1,0(a0)
PTR_ADDU a0,a0,t2
L(double_aligned):
@@ -313,8 +322,8 @@ L(double_aligned):
chunks have been copied. We will loop, incrementing a0 until it equals
a3. */
andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
- beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
+ beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
/* When in the loop we may prefetch with the 'prepare to store' hint,
@@ -339,7 +348,6 @@ L(loop16w):
&& (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
bgtz v1,L(skip_pref)
- nop
#endif
#ifdef R6_CODE
PREFETCH_FOR_STORE (2, a0)
@@ -366,7 +374,6 @@ L(skip_pref):
C_ST a1,UNIT(15)(a0)
PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
bne a0,a3,L(loop16w)
- nop
move a2,t8
/* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go.
@@ -376,7 +383,6 @@ L(chkw):
andi t8,a2,NSIZEMASK /* is there a 32-byte/64-byte chunk. */
/* the t8 is the reminder count past 32-bytes */
beq a2,t8,L(chk1w)/* when a2==t8, no 32-byte chunk */
- nop
C_ST a1,UNIT(0)(a0)
C_ST a1,UNIT(1)(a0)
C_ST a1,UNIT(2)(a0)
@@ -394,30 +400,28 @@ L(chkw):
been copied. We will loop, incrementing a0 until a0 equals a3. */
L(chk1w):
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
- beq a2,t8,L(lastb)
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
+ beq a2,t8,L(lastb)
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
/* copying in words (4-byte or 8 byte chunks) */
L(wordCopy_loop):
PTR_ADDIU a0,a0,UNIT(1)
- bne a0,a3,L(wordCopy_loop)
C_ST a1,UNIT(-1)(a0)
+ bne a0,a3,L(wordCopy_loop)
/* Copy the last 8 (or 16) bytes */
L(lastb):
- blez a2,L(leave)
PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
+ blez a2,L(leave)
L(lastbloop):
PTR_ADDIU a0,a0,1
- bne a0,a3,L(lastbloop)
sb a1,-1(a0)
+ bne a0,a3,L(lastbloop)
L(leave):
- j ra
- nop
+ jr ra
.set at
- .set reorder
END(MEMSET_NAME)
#ifndef ANDROID_CHANGES
# ifdef _LIBC
@@ -40,18 +40,10 @@
.section .init,"ax",@progbits
lw $31,28($sp)
- .set noreorder
- .set nomacro
- j $31
addiu $sp,$sp,32
- .set macro
- .set reorder
+ jr $31
.section .fini,"ax",@progbits
lw $31,28($sp)
- .set noreorder
- .set nomacro
- j $31
addiu $sp,$sp,32
- .set macro
- .set reorder
+ jr $31
@@ -87,7 +87,7 @@ __longjmp (__jmp_buf env_arg, int val_arg)
else
asm volatile ("move $2, %0" : : "r" (val));
- asm volatile ("j $31");
+ asm volatile ("jr $31");
/* Avoid `volatile function does return' warnings. */
for (;;);
@@ -37,16 +37,13 @@ ENTRY (__mpn_add_n)
#ifdef __PIC__
SETUP_GP /* ??? unused */
#endif
- .set noreorder
- .set nomacro
-
ld $10,0($5)
ld $11,0($6)
daddiu $7,$7,-1
and $9,$7,4-1 # number of limbs in first loop
- beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
move $2,$0
+ beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
dsubu $7,$7,$9
@@ -64,11 +61,10 @@ L(Loop0): daddiu $9,$9,-1
daddiu $6,$6,8
move $10,$12
move $11,$13
- bne $9,$0,L(Loop0)
daddiu $4,$4,8
+ bne $9,$0,L(Loop0)
L(L0): beq $7,$0,L(Lend)
- nop
L(Loop): daddiu $7,$7,-4
@@ -111,15 +107,15 @@ L(Loop): daddiu $7,$7,-4
daddiu $5,$5,32
daddiu $6,$6,32
- bne $7,$0,L(Loop)
daddiu $4,$4,32
+ bne $7,$0,L(Loop)
L(Lend): daddu $11,$11,$2
sltu $8,$11,$2
daddu $11,$10,$11
sltu $2,$11,$10
sd $11,0($4)
- j $31
or $2,$2,$8
+ jr $31
END (__mpn_add_n)
@@ -36,9 +36,6 @@ ENTRY (__mpn_addmul_1)
#ifdef PIC
SETUP_GP /* ??? unused */
#endif
- .set noreorder
- .set nomacro
-
# warm up phase 0
ld $8,0($5)
@@ -52,12 +49,12 @@ ENTRY (__mpn_addmul_1)
#endif
daddiu $6,$6,-1
- beq $6,$0,L(LC0)
move $2,$0 # zero cy2
+ beq $6,$0,L(LC0)
daddiu $6,$6,-1
- beq $6,$0,L(LC1)
ld $8,0($5) # load new s1 limb as early as possible
+ beq $6,$0,L(LC1)
L(Loop): ld $10,0($4)
#if __mips_isa_rev < 6
@@ -83,8 +80,8 @@ L(Loop): ld $10,0($4)
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
- bne $6,$0,L(Loop)
daddu $2,$9,$2 # add high product limb and carry from addition
+ bne $6,$0,L(Loop)
# cool down phase 1
L(LC1): ld $10,0($4)
@@ -125,7 +122,7 @@ L(LC0): ld $10,0($4)
sltu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
- j $31
daddu $2,$9,$2 # add high product limb and carry from addition
+ jr $31
END (__mpn_addmul_1)
@@ -36,9 +36,6 @@ ENTRY (__mpn_lshift)
#ifdef __PIC__
SETUP_GP /* ??? unused */
#endif
- .set noreorder
- .set nomacro
-
dsll $2,$6,3
daddu $5,$5,$2 # make r5 point at end of src
ld $10,-8($5) # load first limb
@@ -46,8 +43,8 @@ ENTRY (__mpn_lshift)
daddu $4,$4,$2 # make r4 point at end of res
daddiu $6,$6,-1
and $9,$6,4-1 # number of limbs in first loop
- beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
dsrl $2,$10,$13 # compute function result
+ beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
dsubu $6,$6,$9
@@ -59,11 +56,10 @@ L(Loop0): ld $3,-16($5)
dsrl $12,$3,$13
move $10,$3
or $8,$11,$12
- bne $9,$0,L(Loop0)
sd $8,0($4)
+ bne $9,$0,L(Loop0)
L(L0): beq $6,$0,L(Lend)
- nop
L(Loop): ld $3,-16($5)
daddiu $4,$4,-32
@@ -91,10 +87,10 @@ L(Loop): ld $3,-16($5)
daddiu $5,$5,-32
or $8,$14,$9
- bgtz $6,L(Loop)
sd $8,0($4)
+ bgtz $6,L(Loop)
L(Lend): dsll $8,$10,$7
- j $31
sd $8,-8($4)
+ jr $31
END (__mpn_lshift)
@@ -37,9 +37,6 @@ ENTRY (__mpn_mul_1)
#ifdef __PIC__
SETUP_GP /* ??? unused */
#endif
- .set noreorder
- .set nomacro
-
# warm up phase 0
ld $8,0($5)
@@ -53,12 +50,12 @@ ENTRY (__mpn_mul_1)
#endif
daddiu $6,$6,-1
- beq $6,$0,L(LC0)
move $2,$0 # zero cy2
+ beq $6,$0,L(LC0)
daddiu $6,$6,-1
- beq $6,$0,L(LC1)
ld $8,0($5) # load new s1 limb as early as possible
+ beq $6,$0,L(LC1)
#if __mips_isa_rev < 6
L(Loop): mflo $10
@@ -80,8 +77,8 @@ L(Loop): move $10,$11
sltu $2,$10,$2 # carry from previous addition -> $2
sd $10,0($4)
daddiu $4,$4,8
- bne $6,$0,L(Loop)
daddu $2,$9,$2 # add high product limb and carry from addition
+ bne $6,$0,L(Loop)
# cool down phase 1
#if __mips_isa_rev < 6
@@ -114,7 +111,7 @@ L(LC0): move $10,$11
daddu $10,$10,$2
sltu $2,$10,$2
sd $10,0($4)
- j $31
daddu $2,$9,$2 # add high product limb and carry from addition
+ jr $31
END (__mpn_mul_1)
@@ -41,19 +41,11 @@
.section .init,"ax",@progbits
ld $31,8($sp)
ld $28,0($sp)
- .set noreorder
- .set nomacro
- j $31
addiu $sp,$sp,16
- .set macro
- .set reorder
+ jr $31
.section .fini,"ax",@progbits
ld $31,8($sp)
ld $28,0($sp)
- .set noreorder
- .set nomacro
- j $31
addiu $sp,$sp,16
- .set macro
- .set reorder
+ jr $31
@@ -41,19 +41,11 @@
.section .init,"ax",@progbits
ld $31,8($sp)
ld $28,0($sp)
- .set noreorder
- .set nomacro
- j $31
daddiu $sp,$sp,16
- .set macro
- .set reorder
+ jr $31
.section .fini,"ax",@progbits
ld $31,8($sp)
ld $28,0($sp)
- .set noreorder
- .set nomacro
- j $31
daddiu $sp,$sp,16
- .set macro
- .set reorder
+ jr $31
@@ -36,15 +36,12 @@ ENTRY (__mpn_rshift)
#ifdef __PIC__
SETUP_GP /* ??? unused */
#endif
- .set noreorder
- .set nomacro
-
ld $10,0($5) # load first limb
dsubu $13,$0,$7
daddiu $6,$6,-1
and $9,$6,4-1 # number of limbs in first loop
- beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
dsll $2,$10,$13 # compute function result
+ beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
dsubu $6,$6,$9
@@ -56,11 +53,10 @@ L(Loop0): ld $3,8($5)
dsll $12,$3,$13
move $10,$3
or $8,$11,$12
- bne $9,$0,L(Loop0)
sd $8,-8($4)
+ bne $9,$0,L(Loop0)
L(L0): beq $6,$0,L(Lend)
- nop
L(Loop): ld $3,8($5)
daddiu $4,$4,32
@@ -88,10 +84,10 @@ L(Loop): ld $3,8($5)
daddiu $5,$5,32
or $8,$14,$9
- bgtz $6,L(Loop)
sd $8,-8($4)
+ bgtz $6,L(Loop)
L(Lend): dsrl $8,$10,$7
- j $31
sd $8,0($4)
+ jr $31
END (__mpn_rshift)
@@ -37,16 +37,13 @@ ENTRY (__mpn_sub_n)
#ifdef __PIC__
SETUP_GP /* ??? unused */
#endif
- .set noreorder
- .set nomacro
-
ld $10,0($5)
ld $11,0($6)
daddiu $7,$7,-1
and $9,$7,4-1 # number of limbs in first loop
- beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
move $2,$0
+ beq $9,$0,L(L0) # if multiple of 4 limbs, skip first loop
dsubu $7,$7,$9
@@ -64,11 +61,10 @@ L(Loop0): daddiu $9,$9,-1
daddiu $6,$6,8
move $10,$12
move $11,$13
- bne $9,$0,L(Loop0)
daddiu $4,$4,8
+ bne $9,$0,L(Loop0)
L(L0): beq $7,$0,L(Lend)
- nop
L(Loop): daddiu $7,$7,-4
@@ -111,15 +107,15 @@ L(Loop): daddiu $7,$7,-4
daddiu $5,$5,32
daddiu $6,$6,32
- bne $7,$0,L(Loop)
daddiu $4,$4,32
+ bne $7,$0,L(Loop)
L(Lend): daddu $11,$11,$2
sltu $8,$11,$2
dsubu $11,$10,$11
sltu $2,$10,$11
sd $11,0($4)
- j $31
or $2,$2,$8
+ jr $31
END (__mpn_sub_n)
@@ -37,9 +37,6 @@ ENTRY (__mpn_submul_1)
#ifdef __PIC__
SETUP_GP /* ??? unused */
#endif
- .set noreorder
- .set nomacro
-
# warm up phase 0
ld $8,0($5)
@@ -53,12 +50,12 @@ ENTRY (__mpn_submul_1)
#endif
daddiu $6,$6,-1
- beq $6,$0,L(LC0)
move $2,$0 # zero cy2
+ beq $6,$0,L(LC0)
daddiu $6,$6,-1
- beq $6,$0,L(LC1)
ld $8,0($5) # load new s1 limb as early as possible
+ beq $6,$0,L(LC1)
L(Loop): ld $10,0($4)
#if __mips_isa_rev < 6
@@ -84,8 +81,8 @@ L(Loop): ld $10,0($4)
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
- bne $6,$0,L(Loop)
daddu $2,$9,$2 # add high product limb and carry from addition
+ bne $6,$0,L(Loop)
# cool down phase 1
L(LC1): ld $10,0($4)
@@ -126,7 +123,7 @@ L(LC0): ld $10,0($4)
sgtu $10,$3,$10
daddu $2,$2,$10
sd $3,0($4)
- j $31
daddu $2,$9,$2 # add high product limb and carry from addition
+ jr $31
END (__mpn_submul_1)
@@ -31,12 +31,9 @@ along with the GNU MP Library. If not, see
.option pic2
#endif
ENTRY (__mpn_mul_1)
- .set noreorder
#ifdef __PIC__
.cpload t9
#endif
- .set nomacro
-
/* warm up phase 0 */
lw $8,0($5)
@@ -50,12 +47,12 @@ ENTRY (__mpn_mul_1)
#endif
addiu $6,$6,-1
- beq $6,$0,L(LC0)
move $2,$0 /* zero cy2 */
+ beq $6,$0,L(LC0)
addiu $6,$6,-1
- beq $6,$0,L(LC1)
lw $8,0($5) /* load new s1 limb as early as possible */
+ beq $6,$0,L(LC1)
#if __mips_isa_rev < 6
@@ -78,8 +75,8 @@ L(Loop): move $10,$11
sltu $2,$10,$2 /* carry from previous addition -> $2 */
sw $10,0($4)
addiu $4,$4,4
- bne $6,$0,L(Loop) /* should be "bnel" */
addu $2,$9,$2 /* add high product limb and carry from addition */
+ bne $6,$0,L(Loop) /* should be "bnel" */
/* cool down phase 1 */
#if __mips_isa_rev < 6
@@ -112,6 +109,6 @@ L(LC0): move $10,$11
addu $10,$10,$2
sltu $2,$10,$2
sw $10,0($4)
- j $31
addu $2,$9,$2 /* add high product limb and carry from addition */
+ jr $31
END (__mpn_mul_1)
@@ -30,18 +30,15 @@ along with the GNU MP Library. If not, see
.option pic2
#endif
ENTRY (__mpn_rshift)
- .set noreorder
#ifdef __PIC__
.cpload t9
#endif
- .set nomacro
-
lw $10,0($5) /* load first limb */
subu $13,$0,$7
addiu $6,$6,-1
and $9,$6,4-1 /* number of limbs in first loop */
+ sll $2,$10,$13 /* compute function result */
beq $9,$0,L(L0) /* if multiple of 4 limbs, skip first loop*/
- sll $2,$10,$13 /* compute function result */
subu $6,$6,$9
@@ -53,11 +50,10 @@ L(Loop0): lw $3,4($5)
sll $12,$3,$13
move $10,$3
or $8,$11,$12
+ sw $8,-4($4)
bne $9,$0,L(Loop0)
- sw $8,-4($4)
L(L0): beq $6,$0,L(Lend)
- nop
L(Loop): lw $3,4($5)
addiu $4,$4,16
@@ -85,10 +81,10 @@ L(Loop): lw $3,4($5)
addiu $5,$5,16
or $8,$14,$9
+ sw $8,-4($4)
bgtz $6,L(Loop)
- sw $8,-4($4)
L(Lend): srl $8,$10,$7
- j $31
sw $8,0($4)
+ jr $31
END (__mpn_rshift)
@@ -31,19 +31,16 @@ along with the GNU MP Library. If not, see
.option pic2
#endif
ENTRY (__mpn_sub_n)
- .set noreorder
#ifdef __PIC__
.cpload t9
#endif
- .set nomacro
-
lw $10,0($5)
lw $11,0($6)
addiu $7,$7,-1
and $9,$7,4-1 /* number of limbs in first loop */
- beq $9,$0,L(L0) /* if multiple of 4 limbs, skip first loop */
move $2,$0
+ beq $9,$0,L(L0) /* if multiple of 4 limbs, skip first loop */
subu $7,$7,$9
@@ -61,11 +58,10 @@ L(Loop0): addiu $9,$9,-1
addiu $6,$6,4
move $10,$12
move $11,$13
- bne $9,$0,L(Loop0)
addiu $4,$4,4
+ bne $9,$0,L(Loop0)
L(L0): beq $7,$0,L(Lend)
- nop
L(Loop): addiu $7,$7,-4
@@ -108,14 +104,14 @@ L(Loop): addiu $7,$7,-4
addiu $5,$5,16
addiu $6,$6,16
- bne $7,$0,L(Loop)
addiu $4,$4,16
+ bne $7,$0,L(Loop)
L(Lend): addu $11,$11,$2
sltu $8,$11,$2
subu $11,$10,$11
sltu $2,$10,$11
sw $11,0($4)
- j $31
or $2,$2,$8
+ jr $31
END (__mpn_sub_n)
@@ -31,12 +31,9 @@ along with the GNU MP Library. If not, see
.option pic2
#endif
ENTRY (__mpn_submul_1)
- .set noreorder
#ifdef __PIC__
.cpload t9
#endif
- .set nomacro
-
/* warm up phase 0 */
lw $8,0($5)
@@ -50,12 +47,12 @@ ENTRY (__mpn_submul_1)
#endif
addiu $6,$6,-1
- beq $6,$0,L(LC0)
move $2,$0 /* zero cy2 */
+ beq $6,$0,L(LC0)
addiu $6,$6,-1
- beq $6,$0,L(LC1)
lw $8,0($5) /* load new s1 limb as early as possible */
+ beq $6,$0,L(LC1)
L(Loop): lw $10,0($4)
#if __mips_isa_rev < 6
@@ -81,8 +78,8 @@ L(Loop): lw $10,0($4)
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
- bne $6,$0,L(Loop) /* should be "bnel" */
addu $2,$9,$2 /* add high product limb and carry from addition */
+ bne $6,$0,L(Loop) /* should be "bnel" */
/* cool down phase 1 */
L(LC1): lw $10,0($4)
@@ -123,6 +120,6 @@ L(LC0): lw $10,0($4)
sgtu $10,$3,$10
addu $2,$2,$10
sw $3,0($4)
- j $31
addu $2,$9,$2 /* add high product limb and carry from addition */
+ jr $31
END (__mpn_submul_1)
@@ -71,23 +71,21 @@
.set reorder
/* Set gp when not at 1st instruction */
# define SETUP_GPX(r) \
- .set noreorder; \
move r, $31; /* Save old ra. */ \
bal 10f; /* Find addr of cpload. */ \
- nop; \
10: \
+ .set noreorder; \
.cpload $31; \
- move $31, r; \
- .set reorder
+ .set reorder; \
+ move $31, r;
# define SETUP_GPX_L(r, l) \
- .set noreorder; \
move r, $31; /* Save old ra. */ \
bal l; /* Find addr of cpload. */ \
- nop; \
l: \
+ .set noreorder; \
.cpload $31; \
- move $31, r; \
- .set reorder
+ .set reorder; \
+ move $31, r;
# define SAVE_GP(x) \
.cprestore x /* Save gp trigger t9/jalr conversion. */
# define SETUP_GP64(a, b)
@@ -108,20 +106,14 @@ l: \
.cpsetup $25, gpoffset, proc
# define SETUP_GPX64(cp_reg, ra_save) \
move ra_save, $31; /* Save old ra. */ \
- .set noreorder; \
bal 10f; /* Find addr of .cpsetup. */ \
- nop; \
10: \
- .set reorder; \
.cpsetup $31, cp_reg, 10b; \
move $31, ra_save
# define SETUP_GPX64_L(cp_reg, ra_save, l) \
move ra_save, $31; /* Save old ra. */ \
- .set noreorder; \
bal l; /* Find addr of .cpsetup. */ \
- nop; \
l: \
- .set reorder; \
.cpsetup $31, cp_reg, l; \
move $31, ra_save
# define RESTORE_GP64 \
@@ -38,18 +38,14 @@
L(syse1):
#else
#define PSEUDO(name, syscall_name, args) \
- .set noreorder; \
.set nomips16; \
.align 2; \
cfi_startproc; \
99: j __syscall_error; \
- nop; \
cfi_endproc; \
ENTRY(name) \
- .set noreorder; \
li v0, SYS_ify(syscall_name); \
syscall; \
- .set reorder; \
bne a3, zero, 99b; \
L(syse1):
#endif
@@ -45,18 +45,14 @@
L(syse1):
#else
#define PSEUDO(name, syscall_name, args) \
- .set noreorder; \
.align 2; \
.set nomips16; \
cfi_startproc; \
99: j __syscall_error; \
- nop; \
cfi_endproc; \
ENTRY(name) \
- .set noreorder; \
li v0, SYS_ify(syscall_name); \
syscall; \
- .set reorder; \
bne a3, zero, 99b; \
L(syse1):
#endif
@@ -48,7 +48,6 @@
.align 2; \
ENTRY(name) \
.set nomips16; \
- .set noreorder; \
li v0, SYS_ify(syscall_name); \
syscall
@@ -61,7 +60,6 @@
.align 2; \
ENTRY(name) \
.set nomips16; \
- .set noreorder; \
li v0, SYS_ify(syscall_name); \
syscall
@@ -140,10 +140,8 @@ union __mips_syscall_return
register long int __v0 asm ("$2"); \
register long int __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set reorder" \
: "=r" (__v0), "=r" (__a3) \
: input \
: __SYSCALL_CLOBBERS); \
@@ -164,10 +162,8 @@ union __mips_syscall_return
register long int __a0 asm ("$4") = _arg1; \
register long int __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set reorder" \
: "=r" (__v0), "=r" (__a3) \
: input, "r" (__a0) \
: __SYSCALL_CLOBBERS); \
@@ -190,10 +186,8 @@ union __mips_syscall_return
register long int __a1 asm ("$5") = _arg2; \
register long int __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "=r" (__a3) \
: input, "r" (__a0), "r" (__a1) \
: __SYSCALL_CLOBBERS); \
@@ -219,10 +213,8 @@ union __mips_syscall_return
register long int __a2 asm ("$6") = _arg3; \
register long int __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "=r" (__a3) \
: input, "r" (__a0), "r" (__a1), "r" (__a2) \
: __SYSCALL_CLOBBERS); \
@@ -249,10 +241,8 @@ union __mips_syscall_return
register long int __a2 asm ("$6") = _arg3; \
register long int __a3 asm ("$7") = _arg4; \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "+r" (__a3) \
: input, "r" (__a0), "r" (__a1), "r" (__a2) \
: __SYSCALL_CLOBBERS); \
@@ -95,10 +95,8 @@
register __syscall_arg_t __v0 asm ("$2"); \
register __syscall_arg_t __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set reorder" \
: "=r" (__v0), "=r" (__a3) \
: input \
: __SYSCALL_CLOBBERS); \
@@ -119,10 +117,8 @@
register __syscall_arg_t __a0 asm ("$4") = _arg1; \
register __syscall_arg_t __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set reorder" \
: "=r" (__v0), "=r" (__a3) \
: input, "r" (__a0) \
: __SYSCALL_CLOBBERS); \
@@ -145,10 +141,8 @@
register __syscall_arg_t __a1 asm ("$5") = _arg2; \
register __syscall_arg_t __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "=r" (__a3) \
: input, "r" (__a0), "r" (__a1) \
: __SYSCALL_CLOBBERS); \
@@ -173,10 +167,8 @@
register __syscall_arg_t __a2 asm ("$6") = _arg3; \
register __syscall_arg_t __a3 asm ("$7"); \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "=r" (__a3) \
: input, "r" (__a0), "r" (__a1), "r" (__a2) \
: __SYSCALL_CLOBBERS); \
@@ -203,10 +195,8 @@
register __syscall_arg_t __a2 asm ("$6") = _arg3; \
register __syscall_arg_t __a3 asm ("$7") = _arg4; \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "+r" (__a3) \
: input, "r" (__a0), "r" (__a1), "r" (__a2) \
: __SYSCALL_CLOBBERS); \
@@ -235,10 +225,8 @@
register __syscall_arg_t __a3 asm ("$7") = _arg4; \
register __syscall_arg_t __a4 asm ("$8") = _arg5; \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "+r" (__a3) \
: input, "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a4) \
: __SYSCALL_CLOBBERS); \
@@ -269,10 +257,8 @@
register __syscall_arg_t __a4 asm ("$8") = _arg5; \
register __syscall_arg_t __a5 asm ("$9") = _arg6; \
__asm__ volatile ( \
- ".set\tnoreorder\n\t" \
v0_init \
"syscall\n\t" \
- ".set\treorder" \
: "=r" (__v0), "+r" (__a3) \
: input, "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a4), \
"r" (__a5) \