2009-01-21 Richard Earnshaw <rearnsha@arm.com>

* libc/machine/arm/arm_asm.h: New file.
        * libc/machine/arm/strlen.c: New file.
        * libc/machine/arm/strcpy.c: New file.
        * libc/machine/arm/strcmp.c: New file.
        * libc/machine/arm/Makefile.am: Add new string routines.
This commit is contained in:
Jeff Johnston 2009-01-22 00:02:35 +00:00
parent 7ffaa17c80
commit 2b7e0645c9
7 changed files with 864 additions and 4 deletions

View File

@ -1,3 +1,11 @@
2009-01-21 Richard Earnshaw <rearnsha@arm.com>
* libc/machine/arm/arm_asm.h: New file.
* libc/machine/arm/strlen.c: New file.
* libc/machine/arm/strcpy.c: New file.
* libc/machine/arm/strcmp.c: New file.
* libc/machine/arm/Makefile.am: Add new string routines.
2009-01-19 Neal H. Walfield <neal@gnu.org>
* libc/include/stdint.h (INT64_C, UINT64_C, INTMAX_C, UINTMAX_C)

View File

@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES)
noinst_LIBRARIES = lib.a
lib_a_SOURCES = setjmp.S access.c
lib_a_SOURCES = setjmp.S access.c strlen.c strcmp.c strcpy.c
lib_a_CCASFLAGS=$(AM_CCASFLAGS)
lib_a_CFLAGS = $(AM_CFLAGS)

View File

@ -40,7 +40,8 @@ DIST_COMMON = $(srcdir)/../../../../config.guess \
$(srcdir)/../../../../config.sub $(srcdir)/Makefile.in \
$(srcdir)/Makefile.am $(top_srcdir)/configure \
$(am__configure_deps) $(srcdir)/../../../../mkinstalldirs \
$(srcdir)/../../../../compile
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../../../acinclude.m4 \
@ -55,7 +56,9 @@ LIBRARIES = $(noinst_LIBRARIES)
ARFLAGS = cru
lib_a_AR = $(AR) $(ARFLAGS)
lib_a_LIBADD =
am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-access.$(OBJEXT)
am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-access.$(OBJEXT) \
lib_a-strlen.$(OBJEXT) lib_a-strcmp.$(OBJEXT) \
lib_a-strcpy.$(OBJEXT)
lib_a_OBJECTS = $(am_lib_a_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir)
depcomp =
@ -180,7 +183,7 @@ AUTOMAKE_OPTIONS = cygnus
INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
AM_CCASFLAGS = $(INCLUDES)
noinst_LIBRARIES = lib.a
lib_a_SOURCES = setjmp.S access.c
lib_a_SOURCES = setjmp.S access.c strlen.c strcmp.c strcpy.c
lib_a_CCASFLAGS = $(AM_CCASFLAGS)
lib_a_CFLAGS = $(AM_CFLAGS)
ACLOCAL_AMFLAGS = -I ../../.. -I ../../../..
@ -259,6 +262,24 @@ lib_a-access.o: access.c
lib_a-access.obj: access.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
lib_a-strlen.o: strlen.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.o `test -f 'strlen.c' || echo '$(srcdir)/'`strlen.c
lib_a-strlen.obj: strlen.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.obj `if test -f 'strlen.c'; then $(CYGPATH_W) 'strlen.c'; else $(CYGPATH_W) '$(srcdir)/strlen.c'; fi`
lib_a-strcmp.o: strcmp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcmp.o `test -f 'strcmp.c' || echo '$(srcdir)/'`strcmp.c
lib_a-strcmp.obj: strcmp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcmp.obj `if test -f 'strcmp.c'; then $(CYGPATH_W) 'strcmp.c'; else $(CYGPATH_W) '$(srcdir)/strcmp.c'; fi`
lib_a-strcpy.o: strcpy.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcpy.o `test -f 'strcpy.c' || echo '$(srcdir)/'`strcpy.c
lib_a-strcpy.obj: strcpy.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strcpy.obj `if test -f 'strcpy.c'; then $(CYGPATH_W) 'strcpy.c'; else $(CYGPATH_W) '$(srcdir)/strcpy.c'; fi`
uninstall-info-am:
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2009 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ARM_ASM__H
#define AMR_ASM__H
/* First define some macros that keep everything else sane. */
#if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
#define _ISA_ARM_7
#endif
#if defined (_ISA_ARM_7) || defined (__ARM_ARCH_6__) || \
defined (__ARM_ARCH_6J__) || defined (__ARM_ARCH_6T2__) || \
defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) || \
defined (__ARM_ARCH_6Z__)
#define _ISA_ARM_6
#endif
#if defined (_ISA_ARM_6) || defined (__ARM_ARCH_5__) || \
defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5TE__) || \
defined (__ARM_ARCH_5TEJ__)
#define _ISA_ARM_5
#endif
#if defined (_ISA_ARM_5) || defined (__ARM_ARCH_4T__)
#define _ISA_ARM_4T
#endif
#if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7__)
#define _ISA_THUMB_2
#endif
#if defined (_ISA_THUMB_2) || defined (__ARM_ARCH_6M__)
#define _ISA_THUMB_1
#endif
/* Now some macros for common instruction sequences. */
asm(".macro RETURN cond=\n\t"
#if defined (_ISA_ARM_4T) || defined (_ISA_THUMB_1)
"bx\\cond lr\n\t"
#else
"mov\\cond pc, lr\n\t"
#endif
".endm"
);
asm(".macro optpld base, offset=#0\n\t"
#if defined (_ISA_ARM_7)
"pld [\\base, \\offset]\n\t"
#endif
".endm"
);
#endif /* ARM_ASM__H */

View File

@ -0,0 +1,404 @@
/*
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "arm_asm.h"
#include <_ansi.h>
#include <string.h>
#ifdef __ARMEB__
#define SHFT2LSB "lsl"
#define SHFT2MSB "lsr"
#define MSB "0x000000ff"
#define LSB "0xff000000"
#else
#define SHFT2LSB "lsr"
#define SHFT2MSB "lsl"
#define MSB "0xff000000"
#define LSB "0x000000ff"
#endif
#ifdef __thumb2__
#define magic1(REG) "#0x01010101"
#define magic2(REG) "#0x80808080"
#else
#define magic1(REG) #REG
#define magic2(REG) #REG ", lsl #7"
#endif
int
__attribute__((naked)) strcmp (const char* s1, const char* s2)
{
asm(
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(defined (__thumb__) && !defined (__thumb2__)))
"optpld r0\n\t"
"optpld r1\n\t"
"eor r2, r0, r1\n\t"
"tst r2, #3\n\t"
/* Strings not at same byte offset from a word boundary. */
"bne strcmp_unaligned\n\t"
"ands r2, r0, #3\n\t"
"bic r0, r0, #3\n\t"
"bic r1, r1, #3\n\t"
"ldr ip, [r0], #4\n\t"
"it eq\n\t"
"ldreq r3, [r1], #4\n\t"
"beq 1f\n\t"
/* Although s1 and s2 have identical initial alignment, they are
not currently word aligned. Rather than comparing bytes,
make sure that any bytes fetched from before the addressed
bytes are forced to 0xff. Then they will always compare
equal. */
"eor r2, r2, #3\n\t"
"lsl r2, r2, #3\n\t"
"mvn r3, #"MSB"\n\t"
SHFT2LSB" r2, r3, r2\n\t"
"ldr r3, [r1], #4\n\t"
"orr ip, ip, r2\n\t"
"orr r3, r3, r2\n"
"1:\n\t"
#ifndef __thumb2__
/* Load the 'magic' constant 0x01010101. */
"str r4, [sp, #-4]!\n\t"
"mov r4, #1\n\t"
"orr r4, r4, r4, lsl #8\n\t"
"orr r4, r4, r4, lsl #16\n"
#endif
".p2align 2\n"
"4:\n\t"
"optpld r0, #8\n\t"
"optpld r1, #8\n\t"
"sub r2, ip, "magic1(r4)"\n\t"
"cmp ip, r3\n\t"
"itttt eq\n\t"
/* check for any zero bytes in first word */
"eoreq r2, r2, ip\n\t"
"tsteq r2, "magic2(r4)"\n\t"
"ldreq ip, [r0], #4\n\t"
"ldreq r3, [r1], #4\n\t"
"beq 4b\n"
"2:\n\t"
/* There's a zero or a different byte in the word */
SHFT2MSB" r0, ip, #24\n\t"
SHFT2LSB" ip, ip, #8\n\t"
"cmp r0, #1\n\t"
"it cs\n\t"
"cmpcs r0, r3, "SHFT2MSB" #24\n\t"
"it eq\n\t"
SHFT2LSB"eq r3, r3, #8\n\t"
"beq 2b\n\t"
"sub r0, r0, r3, "SHFT2MSB" #24\n\t"
#ifndef __thumb2__
"ldr r4, [sp], #4\n\t"
#endif
"RETURN"
#elif (defined (__thumb__) && !defined (__thumb2__))
"1:\n\t"
"ldrb r2, [r0]\n\t"
"ldrb r3, [r1]\n\t"
"add r0, r0, #1\n\t"
"add r1, r1, #1\n\t"
"cmp r2, #0\n\t"
"beq 2f\n\t"
"cmp r2, r3\n\t"
"beq 1b\n\t"
"2:\n\t"
"sub r0, r2, r3\n\t"
"bx lr"
#else
"3:\n\t"
"ldrb r2, [r0], #1\n\t"
"ldrb r3, [r1], #1\n\t"
"cmp r2, #1\n\t"
"it cs\n\t"
"cmpcs r2, r3\n\t"
"beq 3b\n\t"
"sub r0, r2, r3\n\t"
"RETURN"
#endif
);
}
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(defined (__thumb__) && !defined (__thumb2__)))
static int __attribute__((naked, used))
strcmp_unaligned(const char* s1, const char* s2)
{
#if 0
/* The assembly code below is based on the following alogrithm. */
#ifdef __ARMEB__
#define RSHIFT <<
#define LSHIFT >>
#else
#define RSHIFT >>
#define LSHIFT <<
#endif
#define body(shift) \
mask = 0xffffffffU RSHIFT shift; \
w1 = *wp1++; \
w2 = *wp2++; \
do \
{ \
t1 = w1 & mask; \
if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
{ \
w2 RSHIFT= shift; \
break; \
} \
if (__builtin_expect(((w1 - b1) ^ w1) & (b1 << 7), 0)) \
{ \
if ((((w1 - b1) ^ w1) & (b1 << 7)) & mask) \
w2 RSHIFT= shift; \
else \
{ \
w2 = *wp2; \
t1 = w1 RSHIFT (32 - shift); \
w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
} \
break; \
} \
w2 = *wp2++; \
t1 ^= w1; \
if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
{ \
t1 = w1 >> (32 - shift); \
w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
break; \
} \
w1 = *wp1++; \
} while (1)
const unsigned* wp1;
const unsigned* wp2;
unsigned w1, w2;
unsigned mask;
unsigned shift;
unsigned b1 = 0x01010101;
char c1, c2;
unsigned t1;
while (((unsigned) s1) & 3)
{
c1 = *s1++;
c2 = *s2++;
if (c1 == 0 || c1 != c2)
return c1 - (int)c2;
}
wp1 = (unsigned*) (((unsigned)s1) & ~3);
wp2 = (unsigned*) (((unsigned)s2) & ~3);
t1 = ((unsigned) s2) & 3;
if (t1 == 1)
{
body(8);
}
else if (t1 == 2)
{
body(16);
}
else
{
body (24);
}
do
{
#ifdef __ARMEB__
c1 = (char) t1 >> 24;
c2 = (char) w2 >> 24;
#else
c1 = (char) t1;
c2 = (char) w2;
#endif
t1 RSHIFT= 8;
w2 RSHIFT= 8;
} while (c1 != 0 && c1 == c2);
return c1 - c2;
#endif
asm("wp1 .req r0\n\t"
"wp2 .req r1\n\t"
"b1 .req r2\n\t"
"w1 .req r4\n\t"
"w2 .req r5\n\t"
"t1 .req ip\n\t"
"@ r3 is scratch\n"
/* First of all, compare bytes until wp1(sp1) is word-aligned. */
"1:\n\t"
"tst wp1, #3\n\t"
"beq 2f\n\t"
"ldrb r2, [wp1], #1\n\t"
"ldrb r3, [wp2], #1\n\t"
"cmp r2, #1\n\t"
"it cs\n\t"
"cmpcs r2, r3\n\t"
"beq 1b\n\t"
"sub r0, r2, r3\n\t"
"RETURN\n"
"2:\n\t"
"str r5, [sp, #-4]!\n\t"
"str r4, [sp, #-4]!\n\t"
// "stmfd sp!, {r4, r5}\n\t"
"mov b1, #1\n\t"
"orr b1, b1, b1, lsl #8\n\t"
"orr b1, b1, b1, lsl #16\n\t"
"and t1, wp2, #3\n\t"
"bic wp2, wp2, #3\n\t"
"ldr w1, [wp1], #4\n\t"
"ldr w2, [wp2], #4\n\t"
"cmp t1, #2\n\t"
"beq 2f\n\t"
"bhi 3f\n"
/* Critical inner Loop: Block with 3 bytes initial overlap */
".p2align 2\n"
"1:\n\t"
"bic t1, w1, #"MSB"\n\t"
"cmp t1, w2, "SHFT2LSB" #8\n\t"
"sub r3, w1, b1\n\t"
"eor r3, r3, w1\n\t"
"bne 4f\n\t"
"ands r3, r3, b1, lsl #7\n\t"
"it eq\n\t"
"ldreq w2, [wp2], #4\n\t"
"bne 5f\n\t"
"eor t1, t1, w1\n\t"
"cmp t1, w2, "SHFT2MSB" #24\n\t"
"bne 6f\n\t"
"ldr w1, [wp1], #4\n\t"
"b 1b\n"
"4:\n\t"
SHFT2LSB" w2, w2, #8\n\t"
"b 8f\n"
"5:\n\t"
"bics r3, r3, #"MSB"\n\t"
"bne 7f\n\t"
"ldrb w2, [wp2]\n\t"
SHFT2LSB" t1, w1, #24\n\t"
#ifdef __ARMEB__
SHFT2LSB" w2, w2, #24\n\t"
#endif
"b 8f\n"
"6:\n\t"
SHFT2LSB" t1, w1, #24\n\t"
"and w2, w2, #"LSB"\n\t"
"b 8f\n"
/* Critical inner Loop: Block with 2 bytes initial overlap */
".p2align 2\n"
"2:\n\t"
SHFT2MSB" t1, w1, #16\n\t"
"sub r3, w1, b1\n\t"
SHFT2LSB" t1, t1, #16\n\t"
"eor r3, r3, w1\n\t"
"cmp t1, w2, "SHFT2LSB" #16\n\t"
"bne 4f\n\t"
"ands r3, r3, b1, lsl #7\n\t"
"it eq\n\t"
"ldreq w2, [wp2], #4\n\t"
"bne 5f\n\t"
"eor t1, t1, w1\n\t"
"cmp t1, w2, "SHFT2MSB" #16\n\t"
"bne 6f\n\t"
"ldr w1, [wp1], #4\n\t"
"b 2b\n"
"5:\n\t"
SHFT2MSB"s r3, r3, #16\n\t"
"bne 7f\n\t"
"ldrh w2, [wp2]\n\t"
SHFT2LSB" t1, w1, #16\n\t"
#ifdef __ARMEB__
SHFT2LSB" w2, w2, #16\n\t"
#endif
"b 8f\n"
"6:\n\t"
SHFT2MSB" w2, w2, #16\n\t"
SHFT2LSB" t1, w1, #16\n\t"
"4:\n\t"
SHFT2LSB" w2, w2, #16\n\t"
"b 8f\n\t"
/* Critical inner Loop: Block with 1 byte initial overlap */
".p2align 2\n"
"3:\n\t"
"and t1, w1, #"LSB"\n\t"
"cmp t1, w2, "SHFT2LSB" #24\n\t"
"sub r3, w1, b1\n\t"
"eor r3, r3, w1\n\t"
"bne 4f\n\t"
"ands r3, r3, b1, lsl #7\n\t"
"it eq\n\t"
"ldreq w2, [wp2], #4\n\t"
"bne 5f\n\t"
"eor t1, t1, w1\n\t"
"cmp t1, w2, "SHFT2MSB" #8\n\t"
"bne 6f\n\t"
"ldr w1, [wp1], #4\n\t"
"b 3b\n"
"4:\n\t"
SHFT2LSB" w2, w2, #24\n\t"
"b 8f\n"
"5:\n\t"
"tst r3, #128\n\t"
"bne 7f\n\t"
"ldr w2, [wp2], #4\n"
"6:\n\t"
SHFT2LSB" t1, w1, #8\n\t"
"bic w2, w2, #"MSB"\n\t"
"b 8f\n"
"7:\n\t"
"mov r0, #0\n\t"
// "ldmfd sp!, {r4, r5}\n\t"
"ldr r4, [sp], #4\n\t"
"ldr r5, [sp], #4\n\t"
"RETURN\n"
"8:\n\t"
"and r2, t1, #"LSB"\n\t"
"and r0, w2, #"LSB"\n\t"
"cmp r0, #1\n\t"
"it cs\n\t"
"cmpcs r0, r2\n\t"
"itt eq\n\t"
SHFT2LSB"eq t1, t1, #8\n\t"
SHFT2LSB"eq w2, w2, #8\n\t"
"beq 8b\n\t"
"sub r0, r2, r0\n\t"
// "ldmfd sp!, {r4, r5}\n\t"
"ldr r4, [sp], #4\n\t"
"ldr r5, [sp], #4\n\t"
"RETURN");
}
#endif

View File

@ -0,0 +1,169 @@
/*
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "arm_asm.h"
#include <_ansi.h>
#include <string.h>
#ifdef __thumb2__
#define magic1(REG) "#0x01010101"
#define magic2(REG) "#0x80808080"
#else
#define magic1(REG) #REG
#define magic2(REG) #REG ", lsl #7"
#endif
char* __attribute__((naked))
strcpy (char* dst, const char* src)
{
asm (
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(defined (__thumb__) && !defined (__thumb2__)))
"optpld r1\n\t"
"eor r2, r0, r1\n\t"
"mov ip, r0\n\t"
"tst r2, #3\n\t"
"bne 4f\n\t"
"tst r1, #3\n\t"
"bne 3f\n"
"5:\n\t"
#ifndef __thumb2__
"str r5, [sp, #-4]!\n\t"
"mov r5, #0x01\n\t"
"orr r5, r5, r5, lsl #8\n\t"
"orr r5, r5, r5, lsl #16\n\t"
#endif
"str r4, [sp, #-4]!\n\t"
"tst r1, #4\n\t"
"ldr r3, [r1], #4\n\t"
"beq 2f\n\t"
"sub r2, r3, "magic1(r5)"\n\t"
"eors r2, r2, r3\n\t"
"tst r2, "magic2(r5)"\n\t"
"itt eq\n\t"
"streq r3, [ip], #4\n\t"
"ldreq r3, [r1], #4\n"
"bne 1f\n\t"
/* Inner loop. We now know that r1 is 64-bit aligned, so we
can safely fetch up to two words. This allows us to avoid
load stalls. */
".p2align 2\n"
"2:\n\t"
"optpld r1, #8\n\t"
"ldr r4, [r1], #4\n\t"
"sub r2, r3, "magic1(r5)"\n\t"
"eors r2, r2, r3\n\t"
"tst r2, "magic2(r5)"\n\t"
"sub r2, r4, "magic1(r5)"\n\t"
"bne 1f\n\t"
"str r3, [ip], #4\n\t"
"eors r2, r2, r4\n\t"
"tst r2, "magic2(r5)"\n\t"
"itt eq\n\t"
"ldreq r3, [r1], #4\n\t"
"streq r4, [ip], #4\n\t"
"beq 2b\n\t"
"mov r3, r4\n"
"1:\n\t"
#ifdef __ARMEB__
"rors r3, r3 #24\n\t"
#endif
"strb r3, [ip], #1\n\t"
"tst r3, #0xff\n\t"
#ifdef __ARMEL__
"ror r3, r3, #8\n\t"
#endif
"bne 1b\n\t"
"ldr r4, [sp], #4\n\t"
#ifndef __thumb2__
"ldr r5, [sp], #4\n\t"
#endif
"RETURN\n"
/* Strings have the same offset from word alignment, but it's
not zero. */
"3:\n\t"
"tst r1, #1\n\t"
"beq 1f\n\t"
"ldrb r2, [r1], #1\n\t"
"strb r2, [ip], #1\n\t"
"cmp r2, #0\n\t"
"it eq\n"
"RETURN eq\n"
"1:\n\t"
"tst r1, #2\n\t"
"beq 5b\n\t"
"ldrh r2, [r1], #2\n\t"
#ifdef __ARMEB__
"tst r2, #0xff00\n\t"
"iteet ne\n\t"
"strneh r2, [ip], #2\n\t"
"lsreq r2, r2, #8\n\t"
"streqb r2, [ip]\n\t"
"tstne r2, #0xff\n\t"
#else
"tst r2, #0xff\n\t"
"itet ne\n\t"
"strneh r2, [ip], #2\n\t"
"streqb r2, [ip]\n\t"
"tstne r2, #0xff00\n\t"
#endif
"bne 5b\n\t"
"RETURN\n"
/* src and dst do not have a common word-alignement. Fall back to
byte copying. */
"4:\n\t"
"ldrb r2, [r1], #1\n\t"
"strb r2, [ip], #1\n\t"
"cmp r2, #0\n\t"
"bne 4b\n\t"
"RETURN"
#elif !defined (__thumb__) || defined (__thumb2__)
"mov r3, r0\n\t"
"1:\n\t"
"ldrb r2, [r1], #1\n\t"
"strb r2, [r3], #1\n\t"
"cmp r2, #0\n\t"
"bne 1b\n\t"
"RETURN"
#else
"mov r3, r0\n\t"
"1:\n\t"
"ldrb r2, [r1]\n\t"
"add r1, r1, #1\n\t"
"strb r2, [r3]\n\t"
"add r3, r3, #1\n\t"
"cmp r2, #0\n\t"
"bne 1b\n\t"
"RETURN"
#endif
);
}

View File

@ -0,0 +1,177 @@
/*
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "arm_asm.h"
#include <_ansi.h>
#include <string.h>
#include <limits.h>
#if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(defined (__thumb__) && !defined (__thumb2__))
size_t
strlen (const char* str)
{
int scratch;
#if defined (__thumb__) && !defined (__thumb2__)
size_t len;
asm ("mov %0, #0\n"
"1:\n\t"
"ldrb %1, [%2, %0]\n\t"
"add %0, %0, #1\n\t"
"cmp %1, #0\n\t"
"bne 1b"
: "=&r" (len), "=&r" (scratch) : "r" (str) : "memory", "cc");
return len - 1;
#else
const char* end;
asm ("1:\n\t"
"ldrb %1, [%0], #1\n\t"
"cmp %1, #0\n\t"
"bne 1b"
: "=&r" (end), "=&r" (scratch) : "0" (str) : "memory", "cc");
return end - str - 1;
#endif
}
#else
size_t __attribute__((naked))
strlen (const char* str)
{
asm ("len .req r0\n\t"
"data .req r3\n\t"
"addr .req r1\n\t"
"optpld r0\n\t"
/* Word-align address */
"bic addr, r0, #3\n\t"
/* Get adjustment for start ... */
"ands len, r0, #3\n\t"
"neg len, len\n\t"
/* First word of data */
"ldr data, [addr], #4\n\t"
/* Ensure bytes preceeding start ... */
"add ip, len, #4\n\t"
"mov ip, ip, asl #3\n\t"
"mvn r2, #0\n\t"
"it ne\n\t"
/* ... are masked out */
#ifdef __thumb__
# ifdef __ARMEB__
"lslne r2, ip\n\t"
# else
"lsrne r2, ip\n\t"
# endif
"orr data, data, r2\n\t"
#else
# ifdef __ARMEB__
"orrne data, data, r2, lsl ip\n\t"
# else
"orrne data, data, r2, lsr ip\n\t"
# endif
#endif
/* Magic const 0x01010101 */
#ifdef _ISA_ARM_7
"movw ip, #0x101\n\t"
#else
"mov ip, #0x1\n\t"
"orr ip, ip, ip, lsl #8\n\t"
#endif
"orr ip, ip, ip, lsl #16\n"
/* This is the main loop. We subtract one from each byte in the
word: the sign bit changes iff the byte was zero. */
"1:\n\t"
/* test (data - 0x01010101) */
"sub r2, data, ip\n\t"
/* ... ^ data */
"eor r2, r2, data\n\t"
/* ... & 0x80808080 == 0? */
"ands r2, r2, ip, lsl #7\n\t"
#ifdef _ISA_ARM_7
/* yes, get more data... */
"itt eq\n\t"
"ldreq data, [addr], #4\n\t"
/* and 4 more bytes */
"addeq len, len, #4\n\t"
/* If we have PLD, then unroll the loop a bit. */
"optpld addr, #8\n\t"
/* test (data - 0x01010101) */
"ittt eq\n\t"
"subeq r2, data, ip\n\t"
/* ... ^ data */
"eoreq r2, r2, data\n\t"
/* ... & 0x80808080 == 0? */
"andeqs r2, r2, ip, lsl #7\n\t"
#endif
"itt eq\n\t"
/* yes, get more data... */
"ldreq data, [addr], #4\n\t"
/* and 4 more bytes */
"addeq len, len, #4\n\t"
"beq 1b\n\t"
#ifdef __ARMEB__
"tst data, #0xff000000\n\t"
"itttt ne\n\t"
"addne len, len, #1\n\t"
"tstne data, #0xff0000\n\t"
"addne len, len, #1\n\t"
"tstne data, #0xff00\n\t"
"it ne\n\t"
"addne len, len, #1\n\t"
#else
# ifdef _ISA_ARM_5
/* R2 is the residual sign bits from the above test. All we
need to do now is establish the position of the first zero
byte... */
/* Little-endian is harder, we need the number of trailing
zeros / 8 */
# ifdef _ISA_ARM_7
"rbit r2, r2\n\t"
"clz r2, r2\n\t"
# else
"rsb r1, r2, #0\n\t"
"and r2, r2, r1\n\t"
"clz r2, r2\n\t"
"rsb r2, r2, #31\n\t"
# endif
"add len, len, r2, lsr #3\n\t"
# else /* No CLZ instruction */
"tst data, #0xff\n\t"
"itttt ne\n\t"
"addne len, len, #1\n\t"
"tstne data, #0xff00\n\t"
"addne len, len, #1\n\t"
"tstne data, #0xff0000\n\t"
"it ne\n\t"
"addne len, len, #1\n\t"
# endif
#endif
"RETURN");
}
#endif