From 2b7e0645c95a3b483231a61e671b829420ddca59 Mon Sep 17 00:00:00 2001 From: Jeff Johnston Date: Thu, 22 Jan 2009 00:02:35 +0000 Subject: [PATCH] 2009-01-21 Richard Earnshaw * libc/machine/arm/arm_asm.h: New file. * libc/machine/arm/strlen.c: New file. * libc/machine/arm/strcpy.c: New file. * libc/machine/arm/strcmp.c: New file. * libc/machine/arm/Makefile.am: Add new string routines. --- newlib/ChangeLog | 8 + newlib/libc/machine/arm/Makefile.am | 2 +- newlib/libc/machine/arm/Makefile.in | 27 +- newlib/libc/machine/arm/arm_asm.h | 81 ++++++ newlib/libc/machine/arm/strcmp.c | 404 ++++++++++++++++++++++++++++ newlib/libc/machine/arm/strcpy.c | 169 ++++++++++++ newlib/libc/machine/arm/strlen.c | 177 ++++++++++++ 7 files changed, 864 insertions(+), 4 deletions(-) create mode 100644 newlib/libc/machine/arm/arm_asm.h create mode 100644 newlib/libc/machine/arm/strcmp.c create mode 100644 newlib/libc/machine/arm/strcpy.c create mode 100644 newlib/libc/machine/arm/strlen.c diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 888553fc7..bc1eab0d1 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,11 @@ +2009-01-21 Richard Earnshaw + + * libc/machine/arm/arm_asm.h: New file. + * libc/machine/arm/strlen.c: New file. + * libc/machine/arm/strcpy.c: New file. + * libc/machine/arm/strcmp.c: New file. + * libc/machine/arm/Makefile.am: Add new string routines. + 2009-01-19 Neal H. Walfield * libc/include/stdint.h (INT64_C, UINT64_C, INTMAX_C, UINTMAX_C) diff --git a/newlib/libc/machine/arm/Makefile.am b/newlib/libc/machine/arm/Makefile.am index 29b0410f7..19639b5c2 100644 --- a/newlib/libc/machine/arm/Makefile.am +++ b/newlib/libc/machine/arm/Makefile.am @@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -lib_a_SOURCES = setjmp.S access.c +lib_a_SOURCES = setjmp.S access.c strlen.c strcmp.c strcpy.c lib_a_CCASFLAGS=$(AM_CCASFLAGS) lib_a_CFLAGS = $(AM_CFLAGS) diff --git a/newlib/libc/machine/arm/Makefile.in b/newlib/libc/machine/arm/Makefile.in index 81c873eaf..3cdfd13e5 100644 --- a/newlib/libc/machine/arm/Makefile.in +++ b/newlib/libc/machine/arm/Makefile.in @@ -40,7 +40,8 @@ DIST_COMMON = $(srcdir)/../../../../config.guess \ $(srcdir)/../../../../config.sub $(srcdir)/Makefile.in \ $(srcdir)/Makefile.am $(top_srcdir)/configure \ $(am__configure_deps) $(srcdir)/../../../../mkinstalldirs \ - $(srcdir)/../../../../compile + $(srcdir)/../../../../compile $(srcdir)/../../../../compile \ + $(srcdir)/../../../../compile $(srcdir)/../../../../compile subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../../../acinclude.m4 \ @@ -55,7 +56,9 @@ LIBRARIES = $(noinst_LIBRARIES) ARFLAGS = cru lib_a_AR = $(AR) $(ARFLAGS) lib_a_LIBADD = -am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-access.$(OBJEXT) +am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-access.$(OBJEXT) \ + lib_a-strlen.$(OBJEXT) lib_a-strcmp.$(OBJEXT) \ + lib_a-strcpy.$(OBJEXT) lib_a_OBJECTS = $(am_lib_a_OBJECTS) DEFAULT_INCLUDES = -I. -I$(srcdir) depcomp = @@ -180,7 +183,7 @@ AUTOMAKE_OPTIONS = cygnus INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -lib_a_SOURCES = setjmp.S access.c +lib_a_SOURCES = setjmp.S access.c strlen.c strcmp.c strcpy.c lib_a_CCASFLAGS = $(AM_CCASFLAGS) lib_a_CFLAGS = $(AM_CFLAGS) ACLOCAL_AMFLAGS = -I ../../.. -I ../../../.. @@ -259,6 +262,24 @@ lib_a-access.o: access.c lib_a-access.obj: access.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` + +lib_a-strlen.o: strlen.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.o `test -f 'strlen.c' || echo '$(srcdir)/'`strlen.c + +lib_a-strlen.obj: strlen.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.obj `if test -f 'strlen.c'; then $(CYGPATH_W) 'strlen.c'; else $(CYGPATH_W) '$(srcdir)/strlen.c'; fi` + +lib_a-strcmp.o: strcmp.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcmp.o `test -f 'strcmp.c' || echo '$(srcdir)/'`strcmp.c + +lib_a-strcmp.obj: strcmp.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcmp.obj `if test -f 'strcmp.c'; then $(CYGPATH_W) 'strcmp.c'; else $(CYGPATH_W) '$(srcdir)/strcmp.c'; fi` + +lib_a-strcpy.o: strcpy.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcpy.o `test -f 'strcpy.c' || echo '$(srcdir)/'`strcpy.c + +lib_a-strcpy.obj: strcpy.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcpy.obj `if test -f 'strcpy.c'; then $(CYGPATH_W) 'strcpy.c'; else $(CYGPATH_W) '$(srcdir)/strcpy.c'; fi` uninstall-info-am: ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) diff --git a/newlib/libc/machine/arm/arm_asm.h b/newlib/libc/machine/arm/arm_asm.h new file mode 100644 index 000000000..c6d3abc65 --- /dev/null +++ b/newlib/libc/machine/arm/arm_asm.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2009 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ARM_ASM__H +#define AMR_ASM__H + +/* First define some macros that keep everything else sane. */ +#if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) +#define _ISA_ARM_7 +#endif + +#if defined (_ISA_ARM_7) || defined (__ARM_ARCH_6__) || \ + defined (__ARM_ARCH_6J__) || defined (__ARM_ARCH_6T2__) || \ + defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) || \ + defined (__ARM_ARCH_6Z__) +#define _ISA_ARM_6 +#endif + +#if defined (_ISA_ARM_6) || defined (__ARM_ARCH_5__) || \ + defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5TE__) || \ + defined (__ARM_ARCH_5TEJ__) +#define _ISA_ARM_5 +#endif + +#if defined (_ISA_ARM_5) || defined (__ARM_ARCH_4T__) +#define _ISA_ARM_4T +#endif + +#if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7__) +#define _ISA_THUMB_2 +#endif + +#if defined (_ISA_THUMB_2) || defined (__ARM_ARCH_6M__) +#define _ISA_THUMB_1 +#endif + + +/* Now some macros for common instruction sequences. */ + +asm(".macro RETURN cond=\n\t" +#if defined (_ISA_ARM_4T) || defined (_ISA_THUMB_1) + "bx\\cond lr\n\t" +#else + "mov\\cond pc, lr\n\t" +#endif + ".endm" + ); + +asm(".macro optpld base, offset=#0\n\t" +#if defined (_ISA_ARM_7) + "pld [\\base, \\offset]\n\t" +#endif + ".endm" + ); + +#endif /* ARM_ASM__H */ diff --git a/newlib/libc/machine/arm/strcmp.c b/newlib/libc/machine/arm/strcmp.c new file mode 100644 index 000000000..f7d39aa77 --- /dev/null +++ b/newlib/libc/machine/arm/strcmp.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2008 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arm_asm.h" +#include <_ansi.h> +#include + +#ifdef __ARMEB__ +#define SHFT2LSB "lsl" +#define SHFT2MSB "lsr" +#define MSB "0x000000ff" +#define LSB "0xff000000" +#else +#define SHFT2LSB "lsr" +#define SHFT2MSB "lsl" +#define MSB "0xff000000" +#define LSB "0x000000ff" +#endif + +#ifdef __thumb2__ +#define magic1(REG) "#0x01010101" +#define magic2(REG) "#0x80808080" +#else +#define magic1(REG) #REG +#define magic2(REG) #REG ", lsl #7" +#endif + +int +__attribute__((naked)) strcmp (const char* s1, const char* s2) +{ + asm( +#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ + (defined (__thumb__) && !defined (__thumb2__))) + "optpld r0\n\t" + "optpld r1\n\t" + "eor r2, r0, r1\n\t" + "tst r2, #3\n\t" + /* Strings not at same byte offset from a word boundary. */ + "bne strcmp_unaligned\n\t" + "ands r2, r0, #3\n\t" + "bic r0, r0, #3\n\t" + "bic r1, r1, #3\n\t" + "ldr ip, [r0], #4\n\t" + "it eq\n\t" + "ldreq r3, [r1], #4\n\t" + "beq 1f\n\t" + /* Although s1 and s2 have identical initial alignment, they are + not currently word aligned. Rather than comparing bytes, + make sure that any bytes fetched from before the addressed + bytes are forced to 0xff. Then they will always compare + equal. */ + "eor r2, r2, #3\n\t" + "lsl r2, r2, #3\n\t" + "mvn r3, #"MSB"\n\t" + SHFT2LSB" r2, r3, r2\n\t" + "ldr r3, [r1], #4\n\t" + "orr ip, ip, r2\n\t" + "orr r3, r3, r2\n" + "1:\n\t" +#ifndef __thumb2__ + /* Load the 'magic' constant 0x01010101. */ + "str r4, [sp, #-4]!\n\t" + "mov r4, #1\n\t" + "orr r4, r4, r4, lsl #8\n\t" + "orr r4, r4, r4, lsl #16\n" +#endif + ".p2align 2\n" + "4:\n\t" + "optpld r0, #8\n\t" + "optpld r1, #8\n\t" + "sub r2, ip, "magic1(r4)"\n\t" + "cmp ip, r3\n\t" + "itttt eq\n\t" + /* check for any zero bytes in first word */ + "eoreq r2, r2, ip\n\t" + "tsteq r2, "magic2(r4)"\n\t" + "ldreq ip, [r0], #4\n\t" + "ldreq r3, [r1], #4\n\t" + "beq 4b\n" + "2:\n\t" + /* There's a zero or a different byte in the word */ + SHFT2MSB" r0, ip, #24\n\t" + SHFT2LSB" ip, ip, #8\n\t" + "cmp r0, #1\n\t" + "it cs\n\t" + "cmpcs r0, r3, "SHFT2MSB" #24\n\t" + "it eq\n\t" + SHFT2LSB"eq r3, r3, #8\n\t" + "beq 2b\n\t" + "sub r0, r0, r3, "SHFT2MSB" #24\n\t" +#ifndef __thumb2__ + "ldr r4, [sp], #4\n\t" +#endif + "RETURN" +#elif (defined (__thumb__) && !defined (__thumb2__)) + "1:\n\t" + "ldrb r2, [r0]\n\t" + "ldrb r3, [r1]\n\t" + "add r0, r0, #1\n\t" + "add r1, r1, #1\n\t" + "cmp r2, #0\n\t" + "beq 2f\n\t" + "cmp r2, r3\n\t" + "beq 1b\n\t" + "2:\n\t" + "sub r0, r2, r3\n\t" + "bx lr" +#else + "3:\n\t" + "ldrb r2, [r0], #1\n\t" + "ldrb r3, [r1], #1\n\t" + "cmp r2, #1\n\t" + "it cs\n\t" + "cmpcs r2, r3\n\t" + "beq 3b\n\t" + "sub r0, r2, r3\n\t" + "RETURN" +#endif + ); +} + +#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ + (defined (__thumb__) && !defined (__thumb2__))) +static int __attribute__((naked, used)) +strcmp_unaligned(const char* s1, const char* s2) +{ +#if 0 + /* The assembly code below is based on the following alogrithm. */ +#ifdef __ARMEB__ +#define RSHIFT << +#define LSHIFT >> +#else +#define RSHIFT >> +#define LSHIFT << +#endif + +#define body(shift) \ + mask = 0xffffffffU RSHIFT shift; \ + w1 = *wp1++; \ + w2 = *wp2++; \ + do \ + { \ + t1 = w1 & mask; \ + if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \ + { \ + w2 RSHIFT= shift; \ + break; \ + } \ + if (__builtin_expect(((w1 - b1) ^ w1) & (b1 << 7), 0)) \ + { \ + if ((((w1 - b1) ^ w1) & (b1 << 7)) & mask) \ + w2 RSHIFT= shift; \ + else \ + { \ + w2 = *wp2; \ + t1 = w1 RSHIFT (32 - shift); \ + w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ + } \ + break; \ + } \ + w2 = *wp2++; \ + t1 ^= w1; \ + if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \ + { \ + t1 = w1 >> (32 - shift); \ + w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \ + break; \ + } \ + w1 = *wp1++; \ + } while (1) + + const unsigned* wp1; + const unsigned* wp2; + unsigned w1, w2; + unsigned mask; + unsigned shift; + unsigned b1 = 0x01010101; + char c1, c2; + unsigned t1; + + while (((unsigned) s1) & 3) + { + c1 = *s1++; + c2 = *s2++; + if (c1 == 0 || c1 != c2) + return c1 - (int)c2; + } + wp1 = (unsigned*) (((unsigned)s1) & ~3); + wp2 = (unsigned*) (((unsigned)s2) & ~3); + t1 = ((unsigned) s2) & 3; + if (t1 == 1) + { + body(8); + } + else if (t1 == 2) + { + body(16); + } + else + { + body (24); + } + + do + { +#ifdef __ARMEB__ + c1 = (char) t1 >> 24; + c2 = (char) w2 >> 24; +#else + c1 = (char) t1; + c2 = (char) w2; +#endif + t1 RSHIFT= 8; + w2 RSHIFT= 8; + } while (c1 != 0 && c1 == c2); + return c1 - c2; +#endif + + asm("wp1 .req r0\n\t" + "wp2 .req r1\n\t" + "b1 .req r2\n\t" + "w1 .req r4\n\t" + "w2 .req r5\n\t" + "t1 .req ip\n\t" + "@ r3 is scratch\n" + + /* First of all, compare bytes until wp1(sp1) is word-aligned. */ + "1:\n\t" + "tst wp1, #3\n\t" + "beq 2f\n\t" + "ldrb r2, [wp1], #1\n\t" + "ldrb r3, [wp2], #1\n\t" + "cmp r2, #1\n\t" + "it cs\n\t" + "cmpcs r2, r3\n\t" + "beq 1b\n\t" + "sub r0, r2, r3\n\t" + "RETURN\n" + + "2:\n\t" + "str r5, [sp, #-4]!\n\t" + "str r4, [sp, #-4]!\n\t" + // "stmfd sp!, {r4, r5}\n\t" + "mov b1, #1\n\t" + "orr b1, b1, b1, lsl #8\n\t" + "orr b1, b1, b1, lsl #16\n\t" + + "and t1, wp2, #3\n\t" + "bic wp2, wp2, #3\n\t" + "ldr w1, [wp1], #4\n\t" + "ldr w2, [wp2], #4\n\t" + "cmp t1, #2\n\t" + "beq 2f\n\t" + "bhi 3f\n" + + /* Critical inner Loop: Block with 3 bytes initial overlap */ + ".p2align 2\n" + "1:\n\t" + "bic t1, w1, #"MSB"\n\t" + "cmp t1, w2, "SHFT2LSB" #8\n\t" + "sub r3, w1, b1\n\t" + "eor r3, r3, w1\n\t" + "bne 4f\n\t" + "ands r3, r3, b1, lsl #7\n\t" + "it eq\n\t" + "ldreq w2, [wp2], #4\n\t" + "bne 5f\n\t" + "eor t1, t1, w1\n\t" + "cmp t1, w2, "SHFT2MSB" #24\n\t" + "bne 6f\n\t" + "ldr w1, [wp1], #4\n\t" + "b 1b\n" + "4:\n\t" + SHFT2LSB" w2, w2, #8\n\t" + "b 8f\n" + + "5:\n\t" + "bics r3, r3, #"MSB"\n\t" + "bne 7f\n\t" + "ldrb w2, [wp2]\n\t" + SHFT2LSB" t1, w1, #24\n\t" +#ifdef __ARMEB__ + SHFT2LSB" w2, w2, #24\n\t" +#endif + "b 8f\n" + + "6:\n\t" + SHFT2LSB" t1, w1, #24\n\t" + "and w2, w2, #"LSB"\n\t" + "b 8f\n" + + /* Critical inner Loop: Block with 2 bytes initial overlap */ + ".p2align 2\n" + "2:\n\t" + SHFT2MSB" t1, w1, #16\n\t" + "sub r3, w1, b1\n\t" + SHFT2LSB" t1, t1, #16\n\t" + "eor r3, r3, w1\n\t" + "cmp t1, w2, "SHFT2LSB" #16\n\t" + "bne 4f\n\t" + "ands r3, r3, b1, lsl #7\n\t" + "it eq\n\t" + "ldreq w2, [wp2], #4\n\t" + "bne 5f\n\t" + "eor t1, t1, w1\n\t" + "cmp t1, w2, "SHFT2MSB" #16\n\t" + "bne 6f\n\t" + "ldr w1, [wp1], #4\n\t" + "b 2b\n" + + "5:\n\t" + SHFT2MSB"s r3, r3, #16\n\t" + "bne 7f\n\t" + "ldrh w2, [wp2]\n\t" + SHFT2LSB" t1, w1, #16\n\t" +#ifdef __ARMEB__ + SHFT2LSB" w2, w2, #16\n\t" +#endif + "b 8f\n" + + "6:\n\t" + SHFT2MSB" w2, w2, #16\n\t" + SHFT2LSB" t1, w1, #16\n\t" + "4:\n\t" + SHFT2LSB" w2, w2, #16\n\t" + "b 8f\n\t" + + /* Critical inner Loop: Block with 1 byte initial overlap */ + ".p2align 2\n" + "3:\n\t" + "and t1, w1, #"LSB"\n\t" + "cmp t1, w2, "SHFT2LSB" #24\n\t" + "sub r3, w1, b1\n\t" + "eor r3, r3, w1\n\t" + "bne 4f\n\t" + "ands r3, r3, b1, lsl #7\n\t" + "it eq\n\t" + "ldreq w2, [wp2], #4\n\t" + "bne 5f\n\t" + "eor t1, t1, w1\n\t" + "cmp t1, w2, "SHFT2MSB" #8\n\t" + "bne 6f\n\t" + "ldr w1, [wp1], #4\n\t" + "b 3b\n" + "4:\n\t" + SHFT2LSB" w2, w2, #24\n\t" + "b 8f\n" + "5:\n\t" + "tst r3, #128\n\t" + "bne 7f\n\t" + "ldr w2, [wp2], #4\n" + "6:\n\t" + SHFT2LSB" t1, w1, #8\n\t" + "bic w2, w2, #"MSB"\n\t" + "b 8f\n" + "7:\n\t" + "mov r0, #0\n\t" + // "ldmfd sp!, {r4, r5}\n\t" + "ldr r4, [sp], #4\n\t" + "ldr r5, [sp], #4\n\t" + "RETURN\n" + "8:\n\t" + "and r2, t1, #"LSB"\n\t" + "and r0, w2, #"LSB"\n\t" + "cmp r0, #1\n\t" + "it cs\n\t" + "cmpcs r0, r2\n\t" + "itt eq\n\t" + SHFT2LSB"eq t1, t1, #8\n\t" + SHFT2LSB"eq w2, w2, #8\n\t" + "beq 8b\n\t" + "sub r0, r2, r0\n\t" + // "ldmfd sp!, {r4, r5}\n\t" + "ldr r4, [sp], #4\n\t" + "ldr r5, [sp], #4\n\t" + "RETURN"); +} + +#endif diff --git a/newlib/libc/machine/arm/strcpy.c b/newlib/libc/machine/arm/strcpy.c new file mode 100644 index 000000000..f44204c5b --- /dev/null +++ b/newlib/libc/machine/arm/strcpy.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2008 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arm_asm.h" +#include <_ansi.h> +#include + +#ifdef __thumb2__ +#define magic1(REG) "#0x01010101" +#define magic2(REG) "#0x80808080" +#else +#define magic1(REG) #REG +#define magic2(REG) #REG ", lsl #7" +#endif + +char* __attribute__((naked)) +strcpy (char* dst, const char* src) +{ + asm ( +#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ + (defined (__thumb__) && !defined (__thumb2__))) + "optpld r1\n\t" + "eor r2, r0, r1\n\t" + "mov ip, r0\n\t" + "tst r2, #3\n\t" + "bne 4f\n\t" + "tst r1, #3\n\t" + "bne 3f\n" + "5:\n\t" +#ifndef __thumb2__ + "str r5, [sp, #-4]!\n\t" + "mov r5, #0x01\n\t" + "orr r5, r5, r5, lsl #8\n\t" + "orr r5, r5, r5, lsl #16\n\t" +#endif + + "str r4, [sp, #-4]!\n\t" + "tst r1, #4\n\t" + "ldr r3, [r1], #4\n\t" + "beq 2f\n\t" + "sub r2, r3, "magic1(r5)"\n\t" + "eors r2, r2, r3\n\t" + "tst r2, "magic2(r5)"\n\t" + "itt eq\n\t" + "streq r3, [ip], #4\n\t" + "ldreq r3, [r1], #4\n" + "bne 1f\n\t" + /* Inner loop. We now know that r1 is 64-bit aligned, so we + can safely fetch up to two words. This allows us to avoid + load stalls. */ + ".p2align 2\n" + "2:\n\t" + "optpld r1, #8\n\t" + "ldr r4, [r1], #4\n\t" + "sub r2, r3, "magic1(r5)"\n\t" + "eors r2, r2, r3\n\t" + "tst r2, "magic2(r5)"\n\t" + "sub r2, r4, "magic1(r5)"\n\t" + "bne 1f\n\t" + "str r3, [ip], #4\n\t" + "eors r2, r2, r4\n\t" + "tst r2, "magic2(r5)"\n\t" + "itt eq\n\t" + "ldreq r3, [r1], #4\n\t" + "streq r4, [ip], #4\n\t" + "beq 2b\n\t" + "mov r3, r4\n" + "1:\n\t" +#ifdef __ARMEB__ + "rors r3, r3 #24\n\t" +#endif + "strb r3, [ip], #1\n\t" + "tst r3, #0xff\n\t" +#ifdef __ARMEL__ + "ror r3, r3, #8\n\t" +#endif + "bne 1b\n\t" + "ldr r4, [sp], #4\n\t" +#ifndef __thumb2__ + "ldr r5, [sp], #4\n\t" +#endif + "RETURN\n" + + /* Strings have the same offset from word alignment, but it's + not zero. */ + "3:\n\t" + "tst r1, #1\n\t" + "beq 1f\n\t" + "ldrb r2, [r1], #1\n\t" + "strb r2, [ip], #1\n\t" + "cmp r2, #0\n\t" + "it eq\n" + "RETURN eq\n" + "1:\n\t" + "tst r1, #2\n\t" + "beq 5b\n\t" + "ldrh r2, [r1], #2\n\t" +#ifdef __ARMEB__ + "tst r2, #0xff00\n\t" + "iteet ne\n\t" + "strneh r2, [ip], #2\n\t" + "lsreq r2, r2, #8\n\t" + "streqb r2, [ip]\n\t" + "tstne r2, #0xff\n\t" +#else + "tst r2, #0xff\n\t" + "itet ne\n\t" + "strneh r2, [ip], #2\n\t" + "streqb r2, [ip]\n\t" + "tstne r2, #0xff00\n\t" +#endif + "bne 5b\n\t" + "RETURN\n" + + /* src and dst do not have a common word-alignement. Fall back to + byte copying. */ + "4:\n\t" + "ldrb r2, [r1], #1\n\t" + "strb r2, [ip], #1\n\t" + "cmp r2, #0\n\t" + "bne 4b\n\t" + "RETURN" + +#elif !defined (__thumb__) || defined (__thumb2__) + "mov r3, r0\n\t" + "1:\n\t" + "ldrb r2, [r1], #1\n\t" + "strb r2, [r3], #1\n\t" + "cmp r2, #0\n\t" + "bne 1b\n\t" + "RETURN" +#else + "mov r3, r0\n\t" + "1:\n\t" + "ldrb r2, [r1]\n\t" + "add r1, r1, #1\n\t" + "strb r2, [r3]\n\t" + "add r3, r3, #1\n\t" + "cmp r2, #0\n\t" + "bne 1b\n\t" + "RETURN" +#endif + ); +} diff --git a/newlib/libc/machine/arm/strlen.c b/newlib/libc/machine/arm/strlen.c new file mode 100644 index 000000000..6442c77ed --- /dev/null +++ b/newlib/libc/machine/arm/strlen.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2008 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arm_asm.h" +#include <_ansi.h> +#include +#include + +#if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ + (defined (__thumb__) && !defined (__thumb2__)) + +size_t +strlen (const char* str) +{ + int scratch; +#if defined (__thumb__) && !defined (__thumb2__) + size_t len; + asm ("mov %0, #0\n" + "1:\n\t" + "ldrb %1, [%2, %0]\n\t" + "add %0, %0, #1\n\t" + "cmp %1, #0\n\t" + "bne 1b" + : "=&r" (len), "=&r" (scratch) : "r" (str) : "memory", "cc"); + return len - 1; +#else + const char* end; + asm ("1:\n\t" + "ldrb %1, [%0], #1\n\t" + "cmp %1, #0\n\t" + "bne 1b" + : "=&r" (end), "=&r" (scratch) : "0" (str) : "memory", "cc"); + return end - str - 1; +#endif +} +#else + +size_t __attribute__((naked)) +strlen (const char* str) +{ + asm ("len .req r0\n\t" + "data .req r3\n\t" + "addr .req r1\n\t" + + "optpld r0\n\t" + /* Word-align address */ + "bic addr, r0, #3\n\t" + /* Get adjustment for start ... */ + "ands len, r0, #3\n\t" + "neg len, len\n\t" + /* First word of data */ + "ldr data, [addr], #4\n\t" + /* Ensure bytes preceeding start ... */ + "add ip, len, #4\n\t" + "mov ip, ip, asl #3\n\t" + "mvn r2, #0\n\t" + "it ne\n\t" + /* ... are masked out */ +#ifdef __thumb__ +# ifdef __ARMEB__ + "lslne r2, ip\n\t" +# else + "lsrne r2, ip\n\t" +# endif + "orr data, data, r2\n\t" +#else +# ifdef __ARMEB__ + "orrne data, data, r2, lsl ip\n\t" +# else + "orrne data, data, r2, lsr ip\n\t" +# endif +#endif + /* Magic const 0x01010101 */ +#ifdef _ISA_ARM_7 + "movw ip, #0x101\n\t" +#else + "mov ip, #0x1\n\t" + "orr ip, ip, ip, lsl #8\n\t" +#endif + "orr ip, ip, ip, lsl #16\n" + + /* This is the main loop. We subtract one from each byte in the + word: the sign bit changes iff the byte was zero. */ + "1:\n\t" + /* test (data - 0x01010101) */ + "sub r2, data, ip\n\t" + /* ... ^ data */ + "eor r2, r2, data\n\t" + /* ... & 0x80808080 == 0? */ + "ands r2, r2, ip, lsl #7\n\t" +#ifdef _ISA_ARM_7 + /* yes, get more data... */ + "itt eq\n\t" + "ldreq data, [addr], #4\n\t" + /* and 4 more bytes */ + "addeq len, len, #4\n\t" + /* If we have PLD, then unroll the loop a bit. */ + "optpld addr, #8\n\t" + /* test (data - 0x01010101) */ + "ittt eq\n\t" + "subeq r2, data, ip\n\t" + /* ... ^ data */ + "eoreq r2, r2, data\n\t" + /* ... & 0x80808080 == 0? */ + "andeqs r2, r2, ip, lsl #7\n\t" +#endif + "itt eq\n\t" + /* yes, get more data... */ + "ldreq data, [addr], #4\n\t" + /* and 4 more bytes */ + "addeq len, len, #4\n\t" + "beq 1b\n\t" +#ifdef __ARMEB__ + "tst data, #0xff000000\n\t" + "itttt ne\n\t" + "addne len, len, #1\n\t" + "tstne data, #0xff0000\n\t" + "addne len, len, #1\n\t" + "tstne data, #0xff00\n\t" + "it ne\n\t" + "addne len, len, #1\n\t" +#else +# ifdef _ISA_ARM_5 + /* R2 is the residual sign bits from the above test. All we + need to do now is establish the position of the first zero + byte... */ + /* Little-endian is harder, we need the number of trailing + zeros / 8 */ +# ifdef _ISA_ARM_7 + "rbit r2, r2\n\t" + "clz r2, r2\n\t" +# else + "rsb r1, r2, #0\n\t" + "and r2, r2, r1\n\t" + "clz r2, r2\n\t" + "rsb r2, r2, #31\n\t" +# endif + "add len, len, r2, lsr #3\n\t" +# else /* No CLZ instruction */ + "tst data, #0xff\n\t" + "itttt ne\n\t" + "addne len, len, #1\n\t" + "tstne data, #0xff00\n\t" + "addne len, len, #1\n\t" + "tstne data, #0xff0000\n\t" + "it ne\n\t" + "addne len, len, #1\n\t" +# endif +#endif + "RETURN"); +} +#endif