@@ -798,7 +798,7 @@ check_PROGRAMS =
@HAVE_LIBC_MACHINE_M88K_TRUE@am__append_95 = libc/machine/m88k/setjmp.S
@HAVE_LIBC_MACHINE_MEP_TRUE@am__append_96 = libc/machine/mep/setjmp.S
@HAVE_LIBC_MACHINE_MICROBLAZE_TRUE@am__append_97 = libc/machine/microblaze/strlen.c libc/machine/microblaze/strcmp.c libc/machine/microblaze/strcpy.c libc/machine/microblaze/setjmp.S libc/machine/microblaze/longjmp.S
-@HAVE_LIBC_MACHINE_MIPS_TRUE@am__append_98 = libc/machine/mips/setjmp.S libc/machine/mips/strlen.c libc/machine/mips/strcmp.S libc/machine/mips/strncpy.c libc/machine/mips/memset.S libc/machine/mips/memcpy.S
+@HAVE_LIBC_MACHINE_MIPS_TRUE@am__append_98 = libc/machine/mips/setjmp.S libc/machine/mips/strlen.c libc/machine/mips/strcmp.S libc/machine/mips/strncpy.c libc/machine/mips/memset.c libc/machine/mips/memcpy.c
@HAVE_LIBC_MACHINE_MN10200_TRUE@am__append_99 = libc/machine/mn10200/setjmp.S
@HAVE_LIBC_MACHINE_MN10300_TRUE@am__append_100 = \
@HAVE_LIBC_MACHINE_MN10300_TRUE@ libc/machine/mn10300/setjmp.S libc/machine/mn10300/memchr.S libc/machine/mn10300/memcmp.S libc/machine/mn10300/memcpy.S libc/machine/mn10300/memset.S libc/machine/mn10300/strchr.S \
@@ -18846,34 +18846,6 @@ libc/machine/mips/libc_a-strcmp.obj: libc/machine/mips/strcmp.S
@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o libc/machine/mips/libc_a-strcmp.obj `if test -f 'libc/machine/mips/strcmp.S'; then $(CYGPATH_W) 'libc/machine/mips/strcmp.S'; else $(CYGPATH_W) '$(srcdir)/libc/machine/mips/strcmp.S'; fi`
-libc/machine/mips/libc_a-memset.o: libc/machine/mips/memset.S
-@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -MT libc/machine/mips/libc_a-memset.o -MD -MP -MF libc/machine/mips/$(DEPDIR)/libc_a-memset.Tpo -c -o libc/machine/mips/libc_a-memset.o `test -f 'libc/machine/mips/memset.S' || echo '$(srcdir)/'`libc/machine/mips/memset.S
-@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) libc/machine/mips/$(DEPDIR)/libc_a-memset.Tpo libc/machine/mips/$(DEPDIR)/libc_a-memset.Po
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='libc/machine/mips/memset.S' object='libc/machine/mips/libc_a-memset.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o libc/machine/mips/libc_a-memset.o `test -f 'libc/machine/mips/memset.S' || echo '$(srcdir)/'`libc/machine/mips/memset.S
-
-libc/machine/mips/libc_a-memset.obj: libc/machine/mips/memset.S
-@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -MT libc/machine/mips/libc_a-memset.obj -MD -MP -MF libc/machine/mips/$(DEPDIR)/libc_a-memset.Tpo -c -o libc/machine/mips/libc_a-memset.obj `if test -f 'libc/machine/mips/memset.S'; then $(CYGPATH_W) 'libc/machine/mips/memset.S'; else $(CYGPATH_W) '$(srcdir)/libc/machine/mips/memset.S'; fi`
-@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) libc/machine/mips/$(DEPDIR)/libc_a-memset.Tpo libc/machine/mips/$(DEPDIR)/libc_a-memset.Po
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='libc/machine/mips/memset.S' object='libc/machine/mips/libc_a-memset.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o libc/machine/mips/libc_a-memset.obj `if test -f 'libc/machine/mips/memset.S'; then $(CYGPATH_W) 'libc/machine/mips/memset.S'; else $(CYGPATH_W) '$(srcdir)/libc/machine/mips/memset.S'; fi`
-
-libc/machine/mips/libc_a-memcpy.o: libc/machine/mips/memcpy.S
-@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -MT libc/machine/mips/libc_a-memcpy.o -MD -MP -MF libc/machine/mips/$(DEPDIR)/libc_a-memcpy.Tpo -c -o libc/machine/mips/libc_a-memcpy.o `test -f 'libc/machine/mips/memcpy.S' || echo '$(srcdir)/'`libc/machine/mips/memcpy.S
-@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) libc/machine/mips/$(DEPDIR)/libc_a-memcpy.Tpo libc/machine/mips/$(DEPDIR)/libc_a-memcpy.Po
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='libc/machine/mips/memcpy.S' object='libc/machine/mips/libc_a-memcpy.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o libc/machine/mips/libc_a-memcpy.o `test -f 'libc/machine/mips/memcpy.S' || echo '$(srcdir)/'`libc/machine/mips/memcpy.S
-
-libc/machine/mips/libc_a-memcpy.obj: libc/machine/mips/memcpy.S
-@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -MT libc/machine/mips/libc_a-memcpy.obj -MD -MP -MF libc/machine/mips/$(DEPDIR)/libc_a-memcpy.Tpo -c -o libc/machine/mips/libc_a-memcpy.obj `if test -f 'libc/machine/mips/memcpy.S'; then $(CYGPATH_W) 'libc/machine/mips/memcpy.S'; else $(CYGPATH_W) '$(srcdir)/libc/machine/mips/memcpy.S'; fi`
-@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) libc/machine/mips/$(DEPDIR)/libc_a-memcpy.Tpo libc/machine/mips/$(DEPDIR)/libc_a-memcpy.Po
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='libc/machine/mips/memcpy.S' object='libc/machine/mips/libc_a-memcpy.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -c -o libc/machine/mips/libc_a-memcpy.obj `if test -f 'libc/machine/mips/memcpy.S'; then $(CYGPATH_W) 'libc/machine/mips/memcpy.S'; else $(CYGPATH_W) '$(srcdir)/libc/machine/mips/memcpy.S'; fi`
-
libc/machine/mn10200/libc_a-setjmp.o: libc/machine/mn10200/setjmp.S
@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libc_a_CPPFLAGS) $(CPPFLAGS) $(libc_a_CCASFLAGS) $(CCASFLAGS) -MT libc/machine/mn10200/libc_a-setjmp.o -MD -MP -MF libc/machine/mn10200/$(DEPDIR)/libc_a-setjmp.Tpo -c -o libc/machine/mn10200/libc_a-setjmp.o `test -f 'libc/machine/mn10200/setjmp.S' || echo '$(srcdir)/'`libc/machine/mn10200/setjmp.S
@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) libc/machine/mn10200/$(DEPDIR)/libc_a-setjmp.Tpo libc/machine/mn10200/$(DEPDIR)/libc_a-setjmp.Po
new file mode 100644
@@ -0,0 +1,405 @@
+/*
+ * Copyright (C) 2018 MIPS Tech, LLC
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Typical observed latency in cycles in fetching from DRAM. */
+#define LATENCY_CYCLES 63
+
+/* Pre-fetch performance is subject to accurate prefetch ahead,
+ which in turn depends on both the cache-line size and the amount
+ of look-ahead. Since cache-line size is not nominally fixed in
+ a typically library built for multiple platforms, we make conservative
+ assumptions in the default case. This code will typically operate
+ on such conservative assumptions, but if compiled with the correct
+ -mtune=xx options, will perform even better on those specific
+ platforms. */
+#if defined(_MIPS_TUNE_OCTEON2) || defined(_MIPS_TUNE_OCTEON3)
+ #define CACHE_LINE 128
+ #define BLOCK_CYCLES 30
+ #undef LATENCY_CYCLES
+ #define LATENCY_CYCLES 150
+#elif defined(_MIPS_TUNE_I6400) || defined(_MIPS_TUNE_I6500)
+ #define CACHE_LINE 64
+ #define BLOCK_CYCLES 16
+#elif defined(_MIPS_TUNE_P6600)
+ #define CACHE_LINE 32
+ #define BLOCK_CYCLES 12
+#elif defined(_MIPS_TUNE_INTERAPTIV) || defined(_MIPS_TUNE_INTERAPTIV_MR2)
+ #define CACHE_LINE 32
+ #define BLOCK_CYCLES 30
+#else
+ #define CACHE_LINE 32
+ #define BLOCK_CYCLES 11
+#endif
+
+/* Pre-fetch look ahead = ceil (latency / block-cycles) */
+#define PREF_AHEAD (LATENCY_CYCLES / BLOCK_CYCLES \
+ + ((LATENCY_CYCLES % BLOCK_CYCLES) == 0 ? 0 : 1))
+
+/* Unroll-factor, controls how many words at a time in the core loop. */
+#define BLOCK (CACHE_LINE == 128 ? 16 : 8)
+
+#define __overloadable
+#ifndef UNALIGNED_INSTR_SUPPORT
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+ #define UNALIGNED_INSTR_SUPPORT 0
+ #if (__mips_isa_rev < 6 && !__mips1)
+ #undef UNALIGNED_INSTR_SUPPORT
+ #define UNALIGNED_INSTR_SUPPORT 1
+ #endif
+#endif
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses? */
+ #define HW_UNALIGNED_SUPPORT 0
+ #if __mips_isa_rev >= 6
+ #undef HW_UNALIGNED_SUPPORT
+ #define HW_UNALIGNED_SUPPORT 1
+ #endif
+#endif
+#define ENABLE_PREFETCH 1
+#if ENABLE_PREFETCH
+ #define PREFETCH(addr) __builtin_prefetch (addr, 0, 0)
+#else
+ #define PREFETCH(addr)
+#endif
+
+#include <string.h>
+
+#ifdef __mips64
+typedef unsigned long long reg_t;
+typedef struct
+{
+ reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#else
+typedef unsigned long reg_t;
+typedef struct
+{
+ reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#endif
+
+#define CACHE_LINES_PER_BLOCK ((BLOCK * sizeof (reg_t) > CACHE_LINE) ? \
+ (BLOCK * sizeof (reg_t) / CACHE_LINE) \
+ : 1)
+
+typedef union
+{
+ reg_t v;
+ bits_t b;
+} bitfields_t;
+
+#define DO_BYTE(a, i) \
+ a[i] = bw.b.B##i; \
+ len--; \
+ if(!len) return ret; \
+
+/* This code is called when aligning a pointer, there are remaining bytes
+ after doing word compares, or architecture does not have some form
+ of unaligned support. */
+static inline void * __attribute__ ((always_inline))
+do_bytes (void *a, const void *b, unsigned long len, void *ret)
+{
+ unsigned char *x = (unsigned char *) a;
+ unsigned char *y = (unsigned char *) b;
+ unsigned long i;
+ /* 'len' might be zero here, so preloading the first two values
+ before the loop may access unallocated memory. */
+ for (i = 0; i < len; i++)
+ {
+ *x = *y;
+ x++;
+ y++;
+ }
+ return ret;
+}
+
+/* This code is called to copy only remaining bytes within word or doubleword */
+static inline void * __attribute__ ((always_inline))
+do_bytes_remaining (void *a, const void *b, unsigned long len, void *ret)
+{
+ unsigned char *x = (unsigned char *) a;
+ bitfields_t bw;
+ if(len > 0)
+ {
+ bw.v = *(reg_t *)b;
+ DO_BYTE(x, 0);
+ DO_BYTE(x, 1);
+ DO_BYTE(x, 2);
+#ifdef __mips64
+ DO_BYTE(x, 3);
+ DO_BYTE(x, 4);
+ DO_BYTE(x, 5);
+ DO_BYTE(x, 6);
+#endif
+ }
+ return ret;
+}
+
+static inline void * __attribute__ ((always_inline))
+do_words_remaining (reg_t *a, const reg_t *b, unsigned long words,
+ unsigned long bytes, void *ret)
+{
+ /* Use a set-back so that load/stores have incremented addresses in
+ order to promote bonding. */
+ int off = (BLOCK - words);
+ a -= off;
+ b -= off;
+ switch (off)
+ {
+ case 1: a[1] = b[1];
+ case 2: a[2] = b[2];
+ case 3: a[3] = b[3];
+ case 4: a[4] = b[4];
+ case 5: a[5] = b[5];
+ case 6: a[6] = b[6];
+ case 7: a[7] = b[7];
+#if BLOCK==16
+ case 8: a[8] = b[8];
+ case 9: a[9] = b[9];
+ case 10: a[10] = b[10];
+ case 11: a[11] = b[11];
+ case 12: a[12] = b[12];
+ case 13: a[13] = b[13];
+ case 14: a[14] = b[14];
+ case 15: a[15] = b[15];
+#endif
+ }
+ return do_bytes_remaining (a + BLOCK, b + BLOCK, bytes, ret);
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* For MIPS GCC, there are no unaligned builtins - so this struct forces
+ the compiler to treat the pointer access as unaligned. */
+struct ulw
+{
+ reg_t uli;
+} __attribute__ ((packed));
+static inline void * __attribute__ ((always_inline))
+do_uwords_remaining (struct ulw *a, const reg_t *b, unsigned long words,
+ unsigned long bytes, void *ret)
+{
+ /* Use a set-back so that load/stores have incremented addresses in
+ order to promote bonding. */
+ int off = (BLOCK - words);
+ a -= off;
+ b -= off;
+ switch (off)
+ {
+ case 1: a[1].uli = b[1];
+ case 2: a[2].uli = b[2];
+ case 3: a[3].uli = b[3];
+ case 4: a[4].uli = b[4];
+ case 5: a[5].uli = b[5];
+ case 6: a[6].uli = b[6];
+ case 7: a[7].uli = b[7];
+#if BLOCK==16
+ case 8: a[8].uli = b[8];
+ case 9: a[9].uli = b[9];
+ case 10: a[10].uli = b[10];
+ case 11: a[11].uli = b[11];
+ case 12: a[12].uli = b[12];
+ case 13: a[13].uli = b[13];
+ case 14: a[14].uli = b[14];
+ case 15: a[15].uli = b[15];
+#endif
+ }
+ return do_bytes_remaining (a + BLOCK, b + BLOCK, bytes, ret);
+}
+
+/* The first pointer is not aligned while second pointer is. */
+static void *
+unaligned_words (struct ulw *a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+ unsigned long i, words_by_block, words_by_1;
+ words_by_1 = words % BLOCK;
+ words_by_block = words / BLOCK;
+ for (; words_by_block > 0; words_by_block--)
+ {
+ if (words_by_block >= PREF_AHEAD - CACHE_LINES_PER_BLOCK)
+ for (i = 0; i < CACHE_LINES_PER_BLOCK; i++)
+ PREFETCH (b + (BLOCK / CACHE_LINES_PER_BLOCK) * (PREF_AHEAD + i));
+
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];
+ a[0].uli = y0;
+ a[1].uli = y1;
+ a[2].uli = y2;
+ a[3].uli = y3;
+ a[4].uli = y4;
+ a[5].uli = y5;
+ a[6].uli = y6;
+ a[7].uli = y7;
+#if BLOCK==16
+ y0 = b[8], y1 = b[9], y2 = b[10], y3 = b[11];
+ y4 = b[12], y5 = b[13], y6 = b[14], y7 = b[15];
+ a[8].uli = y0;
+ a[9].uli = y1;
+ a[10].uli = y2;
+ a[11].uli = y3;
+ a[12].uli = y4;
+ a[13].uli = y5;
+ a[14].uli = y6;
+ a[15].uli = y7;
+#endif
+ a += BLOCK;
+ b += BLOCK;
+ }
+
+ /* Mop up any remaining bytes. */
+ return do_uwords_remaining (a, b, words_by_1, bytes, ret);
+}
+
+#else
+
+/* No HW support or unaligned lw/ld/ualw/uald instructions. */
+static void *
+unaligned_words (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+ unsigned long i;
+ unsigned char *x;
+ for (i = 0; i < words; i++)
+ {
+ bitfields_t bw;
+ bw.v = *((reg_t*) b);
+ x = (unsigned char *) a;
+ x[0] = bw.b.B0;
+ x[1] = bw.b.B1;
+ x[2] = bw.b.B2;
+ x[3] = bw.b.B3;
+#if __mips64
+ x[4] = bw.b.B4;
+ x[5] = bw.b.B5;
+ x[6] = bw.b.B6;
+ x[7] = bw.b.B7;
+#endif
+ a += 1;
+ b += 1;
+ }
+ /* Mop up any remaining bytes. */
+ return do_bytes_remaining (a, b, bytes, ret);
+}
+
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned. */
+static void *
+aligned_words (reg_t * a, const reg_t * b,
+ unsigned long words, unsigned long bytes, void *ret)
+{
+ unsigned long i, words_by_block, words_by_1;
+ words_by_1 = words % BLOCK;
+ words_by_block = words / BLOCK;
+ for (; words_by_block > 0; words_by_block--)
+ {
+ if(words_by_block >= PREF_AHEAD - CACHE_LINES_PER_BLOCK)
+ for (i = 0; i < CACHE_LINES_PER_BLOCK; i++)
+ PREFETCH (b + ((BLOCK / CACHE_LINES_PER_BLOCK) * (PREF_AHEAD + i)));
+
+ reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
+ reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];
+ a[0] = x0;
+ a[1] = x1;
+ a[2] = x2;
+ a[3] = x3;
+ a[4] = x4;
+ a[5] = x5;
+ a[6] = x6;
+ a[7] = x7;
+#if BLOCK==16
+ x0 = b[8], x1 = b[9], x2 = b[10], x3 = b[11];
+ x4 = b[12], x5 = b[13], x6 = b[14], x7 = b[15];
+ a[8] = x0;
+ a[9] = x1;
+ a[10] = x2;
+ a[11] = x3;
+ a[12] = x4;
+ a[13] = x5;
+ a[14] = x6;
+ a[15] = x7;
+#endif
+ a += BLOCK;
+ b += BLOCK;
+ }
+
+ /* mop up any remaining bytes. */
+ return do_words_remaining (a, b, words_by_1, bytes, ret);
+}
+
+void *
+memcpy (void *a, const void *b, size_t len) __overloadable
+{
+ unsigned long bytes, words, i;
+ void *ret = a;
+ /* shouldn't hit that often. */
+ if (len <= 8)
+ return do_bytes (a, b, len, a);
+
+ /* Start pre-fetches ahead of time. */
+ if (len > CACHE_LINE * (PREF_AHEAD - 1))
+ for (i = 1; i < PREF_AHEAD - 1; i++)
+ PREFETCH ((char *)b + CACHE_LINE * i);
+ else
+ for (i = 1; i < len / CACHE_LINE; i++)
+ PREFETCH ((char *)b + CACHE_LINE * i);
+
+ /* Align the second pointer to word/dword alignment.
+ Note that the pointer is only 32-bits for o32/n32 ABIs. For
+ n32, loads are done as 64-bit while address remains 32-bit. */
+ bytes = ((unsigned long) b) % (sizeof (reg_t));
+
+ if (bytes)
+ {
+ bytes = (sizeof (reg_t)) - bytes;
+ if (bytes > len)
+ bytes = len;
+ do_bytes (a, b, bytes, ret);
+ if (len == bytes)
+ return ret;
+ len -= bytes;
+ a = (void *) (((unsigned char *) a) + bytes);
+ b = (const void *) (((unsigned char *) b) + bytes);
+ }
+
+ /* Second pointer now aligned. */
+ words = len / sizeof (reg_t);
+ bytes = len % sizeof (reg_t);
+
+#if HW_UNALIGNED_SUPPORT
+ /* treat possible unaligned first pointer as aligned. */
+ return aligned_words (a, b, words, bytes, ret);
+#else
+ if (((unsigned long) a) % sizeof (reg_t) == 0)
+ return aligned_words (a, b, words, bytes, ret);
+ /* need to use unaligned instructions on first pointer. */
+ return unaligned_words (a, b, words, bytes, ret);
+#endif
+}
new file mode 100644
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2018 MIPS Tech, LLC
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <string.h>
+
+#if _MIPS_SIM == _ABIO32
+#define SIZEOF_reg_t 4
+typedef unsigned long reg_t;
+#else
+#define SIZEOF_reg_t 8
+typedef unsigned long long reg_t;
+#endif
+
+typedef struct bits8
+{
+ reg_t B0:8, B1:8, B2:8, B3:8;
+#if SIZEOF_reg_t == 8
+ reg_t B4:8, B5:8, B6:8, B7:8;
+#endif
+} bits8_t;
+typedef struct bits16
+{
+ reg_t B0:16, B1:16;
+#if SIZEOF_reg_t == 8
+ reg_t B2:16, B3:16;
+#endif
+} bits16_t;
+typedef struct bits32
+{
+ reg_t B0:32;
+#if SIZEOF_reg_t == 8
+ reg_t B1:32;
+#endif
+} bits32_t;
+
+/* This union assumes that small structures can be in registers. If
+ not, then memory accesses will be done - not optimal, but ok. */
+typedef union
+{
+ reg_t v;
+ bits8_t b8;
+ bits16_t b16;
+ bits32_t b32;
+} bitfields_t;
+
+/* This code is called when aligning a pointer or there are remaining bytes
+ after doing word sets. */
+static inline void * __attribute__ ((always_inline))
+do_bytes (void *a, void *retval, unsigned char fill, const unsigned long len)
+{
+ unsigned char *x = ((unsigned char *) a);
+ unsigned long i;
+
+ for (i = 0; i < len; i++)
+ *x++ = fill;
+
+ return retval;
+}
+
+/* Pointer is aligned. */
+static void *
+do_aligned_words (reg_t * a, void * retval, reg_t fill,
+ unsigned long words, unsigned long bytes)
+{
+ unsigned long i, words_by_1, words_by_16;
+
+ words_by_1 = words % 16;
+ words_by_16 = words / 16;
+
+ /*
+ * Note: prefetching the store memory is not beneficial on most
+ * cores since the ls/st unit has store buffers that will be filled
+ * before the cache line is actually needed.
+ *
+ * Also, using prepare-for-store cache op is problematic since we
+ * don't know the implementation-defined cache line length and we
+ * don't want to touch unintended memory.
+ */
+ for (i = 0; i < words_by_16; i++)
+ {
+ a[0] = fill;
+ a[1] = fill;
+ a[2] = fill;
+ a[3] = fill;
+ a[4] = fill;
+ a[5] = fill;
+ a[6] = fill;
+ a[7] = fill;
+ a[8] = fill;
+ a[9] = fill;
+ a[10] = fill;
+ a[11] = fill;
+ a[12] = fill;
+ a[13] = fill;
+ a[14] = fill;
+ a[15] = fill;
+ a += 16;
+ }
+
+ /* do remaining words. */
+ for (i = 0; i < words_by_1; i++)
+ *a++ = fill;
+
+ /* mop up any remaining bytes. */
+ return do_bytes (a, retval, fill, bytes);
+}
+
+void *
+memset (void *a, int ifill, size_t len)
+{
+ unsigned long bytes, words;
+ bitfields_t fill;
+ void *retval = (void *) a;
+
+ /* shouldn't hit that often. */
+ if (len < 16)
+ return do_bytes (a, retval, ifill, len);
+
+ /* Align the pointer to word/dword alignment.
+ Note that the pointer is only 32-bits for o32/n32 ABIs. For
+ n32, loads are done as 64-bit while address remains 32-bit. */
+ bytes = ((unsigned long) a) % (sizeof (reg_t) * 2);
+ if (bytes)
+ {
+ bytes = (sizeof (reg_t) * 2 - bytes);
+ if (bytes > len)
+ bytes = len;
+ do_bytes (a, retval, ifill, bytes);
+ if (len == bytes)
+ return retval;
+ len -= bytes;
+ a = (void *) (((unsigned char *) a) + bytes);
+ }
+
+ /* Create correct fill value for reg_t sized variable. */
+ if (ifill != 0)
+ {
+ fill.b8.B0 = (unsigned char) ifill;
+ fill.b8.B1 = fill.b8.B0;
+ fill.b16.B1 = fill.b16.B0;
+#if SIZEOF_reg_t == 8
+ fill.b32.B1 = fill.b32.B0;
+#endif
+ }
+ else
+ fill.v = 0;
+
+ words = len / sizeof (reg_t);
+ bytes = len % sizeof (reg_t);
+ return do_aligned_words (a, retval, fill.v, words, bytes);
+}