diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 41161f5b4..2377ec10b 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,11 @@ +2013-01-10 Marcus Shawcroft + + * libc/machine/aarch64/Makefile.am (lib_a_SOURCES): + Add memmove.S and memmove-stub.c. + * libc/machine/aarch64/Makefile.in: Regenerated. + * libc/machine/aarch64/memmove-stub.c: New file. + * libc/machine/aarch64/memmove.S: New file. + 2013-01-10 Marcus Shawcroft * libc/machine/aarch64/Makefile.am (lib_a_SOURCES): Re-ordered. diff --git a/newlib/libc/machine/aarch64/Makefile.am b/newlib/libc/machine/aarch64/Makefile.am index ab0206e0b..5623bbd76 100644 --- a/newlib/libc/machine/aarch64/Makefile.am +++ b/newlib/libc/machine/aarch64/Makefile.am @@ -11,6 +11,8 @@ noinst_LIBRARIES = lib.a lib_a_SOURCES = lib_a_SOURCES += memcpy-stub.c lib_a_SOURCES += memcpy.S +lib_a_SOURCES += memmove-stub.c +lib_a_SOURCES += memmove.S lib_a_SOURCES += memset-stub.c lib_a_SOURCES += memset.S lib_a_SOURCES += setjmp.S diff --git a/newlib/libc/machine/aarch64/Makefile.in b/newlib/libc/machine/aarch64/Makefile.in index 6642f6700..c3d5882ff 100644 --- a/newlib/libc/machine/aarch64/Makefile.in +++ b/newlib/libc/machine/aarch64/Makefile.in @@ -70,6 +70,7 @@ ARFLAGS = cru lib_a_AR = $(AR) $(ARFLAGS) lib_a_LIBADD = am_lib_a_OBJECTS = lib_a-memcpy-stub.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \ + lib_a-memmove-stub.$(OBJEXT) lib_a-memmove.$(OBJEXT) \ lib_a-memset-stub.$(OBJEXT) lib_a-memset.$(OBJEXT) \ lib_a-setjmp.$(OBJEXT) lib_a-strcmp-stub.$(OBJEXT) \ lib_a-strcmp.$(OBJEXT) lib_a-strncmp-stub.$(OBJEXT) \ @@ -199,8 +200,9 @@ AUTOMAKE_OPTIONS = cygnus INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -lib_a_SOURCES = memcpy-stub.c memcpy.S memset-stub.c memset.S setjmp.S \ - strcmp-stub.c strcmp.S strncmp-stub.c strncmp.S +lib_a_SOURCES = memcpy-stub.c memcpy.S memmove-stub.c memmove.S \ + memset-stub.c memset.S setjmp.S strcmp-stub.c strcmp.S \ + strncmp-stub.c strncmp.S lib_a_CCASFLAGS = $(AM_CCASFLAGS) lib_a_CFLAGS = $(AM_CFLAGS) ACLOCAL_AMFLAGS = -I ../../.. -I ../../../.. @@ -269,6 +271,12 @@ lib_a-memcpy.o: memcpy.S lib_a-memcpy.obj: memcpy.S $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcpy.obj `if test -f 'memcpy.S'; then $(CYGPATH_W) 'memcpy.S'; else $(CYGPATH_W) '$(srcdir)/memcpy.S'; fi` +lib_a-memmove.o: memmove.S + $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memmove.o `test -f 'memmove.S' || echo '$(srcdir)/'`memmove.S + +lib_a-memmove.obj: memmove.S + $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memmove.obj `if test -f 'memmove.S'; then $(CYGPATH_W) 'memmove.S'; else $(CYGPATH_W) '$(srcdir)/memmove.S'; fi` + lib_a-memset.o: memset.S $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memset.o `test -f 'memset.S' || echo '$(srcdir)/'`memset.S @@ -305,6 +313,12 @@ lib_a-memcpy-stub.o: memcpy-stub.c lib_a-memcpy-stub.obj: memcpy-stub.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memcpy-stub.obj `if test -f 'memcpy-stub.c'; then $(CYGPATH_W) 'memcpy-stub.c'; else $(CYGPATH_W) '$(srcdir)/memcpy-stub.c'; fi` +lib_a-memmove-stub.o: memmove-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memmove-stub.o `test -f 'memmove-stub.c' || echo '$(srcdir)/'`memmove-stub.c + +lib_a-memmove-stub.obj: memmove-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memmove-stub.obj `if test -f 'memmove-stub.c'; then $(CYGPATH_W) 'memmove-stub.c'; else $(CYGPATH_W) '$(srcdir)/memmove-stub.c'; fi` + lib_a-memset-stub.o: memset-stub.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memset-stub.o `test -f 'memset-stub.c' || echo '$(srcdir)/'`memset-stub.c diff --git a/newlib/libc/machine/aarch64/memmove-stub.c b/newlib/libc/machine/aarch64/memmove-stub.c new file mode 100644 index 000000000..8fa4ab938 --- /dev/null +++ b/newlib/libc/machine/aarch64/memmove-stub.c @@ -0,0 +1,31 @@ +/* Copyright (c) 2013, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) +# include "../../string/memmove.c" +#else +/* See memmove.S */ +#endif diff --git a/newlib/libc/machine/aarch64/memmove.S b/newlib/libc/machine/aarch64/memmove.S new file mode 100644 index 000000000..8cb91f099 --- /dev/null +++ b/newlib/libc/machine/aarch64/memmove.S @@ -0,0 +1,329 @@ +/* Copyright (c) 2013, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) +/* See memmove-stub.c */ +#else + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Unaligned accesses + */ + + .macro def_fn f p2align=0 + .text + .p2align \p2align + .global \f + .type \f, %function +\f: + .endm + +/* Parameters and result. */ +#define dstin x0 +#define src x1 +#define count x2 +#define tmp1 x3 +#define tmp1w w3 +#define tmp2 x4 +#define tmp2w w4 +#define tmp3 x5 +#define tmp3w w5 +#define dst x6 + +#define A_l x7 +#define A_h x8 +#define B_l x9 +#define B_h x10 +#define C_l x11 +#define C_h x12 +#define D_l x13 +#define D_h x14 + +def_fn memmove, 6 + cmp dstin, src + b.lo .Ldownwards + add tmp1, src, count + cmp dstin, tmp1 + b.hs memcpy /* No overlap. */ + + /* Upwards move with potential overlap. + * Need to move from the tail backwards. SRC and DST point one + * byte beyond the remaining data to move. */ + add dst, dstin, count + add src, src, count + cmp count, #64 + b.ge .Lmov_not_short_up + + /* Deal with small moves quickly by dropping straight into the + * exit block. */ +.Ltail63up: + /* Move up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. */ + ands tmp1, count, #0x30 + b.eq .Ltail15up + sub dst, dst, tmp1 + sub src, src, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src, #32] + stp A_l, A_h, [dst, #32] +1: + ldp A_l, A_h, [src, #16] + stp A_l, A_h, [dst, #16] +2: + ldp A_l, A_h, [src] + stp A_l, A_h, [dst] +.Ltail15up: + /* Move up to 15 bytes of data. Does not assume additional data + * being moved. */ + tbz count, #3, 1f + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! +1: + tbz count, #2, 1f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +1: + tbz count, #1, 1f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +1: + tbz count, #0, 1f + ldrb tmp1w, [src, #-1] + strb tmp1w, [dst, #-1] +1: + ret + +.Lmov_not_short_up: + /* We don't much care about the alignment of DST, but we want SRC + * to be 128-bit (16 byte) aligned so that we don't cross cache line + * boundaries on both loads and stores. */ + ands tmp2, src, #15 /* Bytes to reach alignment. */ + b.eq 2f + sub count, count, tmp2 + /* Move enough data to reach alignment; unlike memcpy, we have to + * be aware of the overlap, which means we can't move data twice. */ + tbz tmp2, #3, 1f + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! +1: + tbz tmp2, #2, 1f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +1: + tbz tmp2, #1, 1f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +1: + tbz tmp2, #0, 1f + ldrb tmp1w, [src, #-1]! + strb tmp1w, [dst, #-1]! +1: + + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63up +2: + subs count, count, #128 + b.ge .Lmov_body_large_up + /* Less than 128 bytes to move, so handle 64 here and then jump + * to the tail. */ + ldp A_l, A_h, [src, #-64]! + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48] + stp A_l, A_h, [dst, #-64]! + stp B_l, B_h, [dst, #16] + stp C_l, C_h, [dst, #32] + stp D_l, D_h, [dst, #48] + tst count, #0x3f + b.ne .Ltail63up + ret + + /* Critical loop. Start at a new Icache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. */ + .p2align 6 +.Lmov_body_large_up: + /* There are at least 128 bytes to move. */ + ldp A_l, A_h, [src, #-16] + ldp B_l, B_h, [src, #-32] + ldp C_l, C_h, [src, #-48] + ldp D_l, D_h, [src, #-64]! +1: + stp A_l, A_h, [dst, #-16] + ldp A_l, A_h, [src, #-16] + stp B_l, B_h, [dst, #-32] + ldp B_l, B_h, [src, #-32] + stp C_l, C_h, [dst, #-48] + ldp C_l, C_h, [src, #-48] + stp D_l, D_h, [dst, #-64]! + ldp D_l, D_h, [src, #-64]! + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst, #-16] + stp B_l, B_h, [dst, #-32] + stp C_l, C_h, [dst, #-48] + stp D_l, D_h, [dst, #-64]! + tst count, #0x3f + b.ne .Ltail63up + ret + + +.Ldownwards: + /* For a downwards move we can safely use memcpy provided that + * DST is more than 16 bytes away from SRC. */ + sub tmp1, src, #16 + cmp dstin, tmp1 + b.ls memcpy /* May overlap, but not critically. */ + + mov dst, dstin /* Preserve DSTIN for return value. */ + cmp count, #64 + b.ge .Lmov_not_short_down + + /* Deal with small moves quickly by dropping straight into the + * exit block. */ +.Ltail63down: + /* Move up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. */ + ands tmp1, count, #0x30 + b.eq .Ltail15down + add dst, dst, tmp1 + add src, src, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src, #-48] + stp A_l, A_h, [dst, #-48] +1: + ldp A_l, A_h, [src, #-32] + stp A_l, A_h, [dst, #-32] +2: + ldp A_l, A_h, [src, #-16] + stp A_l, A_h, [dst, #-16] +.Ltail15down: + /* Move up to 15 bytes of data. Does not assume additional data + being moved. */ + tbz count, #3, 1f + ldr tmp1, [src], #8 + str tmp1, [dst], #8 +1: + tbz count, #2, 1f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +1: + tbz count, #1, 1f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +1: + tbz count, #0, 1f + ldrb tmp1w, [src] + strb tmp1w, [dst] +1: + ret + +.Lmov_not_short_down: + /* We don't much care about the alignment of DST, but we want SRC + * to be 128-bit (16 byte) aligned so that we don't cross cache line + * boundaries on both loads and stores. */ + neg tmp2, src + ands tmp2, tmp2, #15 /* Bytes to reach alignment. */ + b.eq 2f + sub count, count, tmp2 + /* Move enough data to reach alignment; unlike memcpy, we have to + * be aware of the overlap, which means we can't move data twice. */ + tbz tmp2, #3, 1f + ldr tmp1, [src], #8 + str tmp1, [dst], #8 +1: + tbz tmp2, #2, 1f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +1: + tbz tmp2, #1, 1f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +1: + tbz tmp2, #0, 1f + ldrb tmp1w, [src], #1 + strb tmp1w, [dst], #1 +1: + + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63down +2: + subs count, count, #128 + b.ge .Lmov_body_large_down + /* Less than 128 bytes to move, so handle 64 here and then jump + * to the tail. */ + ldp A_l, A_h, [src] + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48] + stp A_l, A_h, [dst] + stp B_l, B_h, [dst, #16] + stp C_l, C_h, [dst, #32] + stp D_l, D_h, [dst, #48] + tst count, #0x3f + add src, src, #64 + add dst, dst, #64 + b.ne .Ltail63down + ret + + /* Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. */ + .p2align 6 +.Lmov_body_large_down: + /* There are at least 128 bytes to move. */ + ldp A_l, A_h, [src, #0] + sub dst, dst, #16 /* Pre-bias. */ + ldp B_l, B_h, [src, #16] + ldp C_l, C_h, [src, #32] + ldp D_l, D_h, [src, #48]! /* src += 64 - Pre-bias. */ +1: + stp A_l, A_h, [dst, #16] + ldp A_l, A_h, [src, #16] + stp B_l, B_h, [dst, #32] + ldp B_l, B_h, [src, #32] + stp C_l, C_h, [dst, #48] + ldp C_l, C_h, [src, #48] + stp D_l, D_h, [dst, #64]! + ldp D_l, D_h, [src, #64]! + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst, #16] + stp B_l, B_h, [dst, #32] + stp C_l, C_h, [dst, #48] + stp D_l, D_h, [dst, #64] + add src, src, #16 + add dst, dst, #64 + 16 + tst count, #0x3f + b.ne .Ltail63down + ret + .size memmove, . - memmove +#endif