Optimize strchr for x86.

* libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned
searches aren't penalized.  Special-case searching for 0.
This commit is contained in:
Eric Blake 2008-05-21 21:46:04 +00:00
parent 804c0cc6d0
commit 4962a9453a
2 changed files with 107 additions and 14 deletions

View File

@ -1,3 +1,9 @@
2008-05-21 Eric Blake <ebb9@byu.net>
Optimize strchr for x86.
* libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned
searches aren't penalized. Special-case searching for 0.
2008-05-20 Nick Clifton <nickc@redhat.com>
* libc/sys/sysnecv850/crt0.S (___dso_handle): Define (weak).
@ -5,7 +11,7 @@
2008-05-20 DJ Delorie <dj@redhat.com>
* libc/sys/sysnecv850/isatty.c (_isatty): Renamed from isatty.
2008-05-14 Jeff Johnston <jjohnstn@redhat.com>
* libc/include/sys/reent.h: Change _REENT_INIT... macros to

View File

@ -1,6 +1,6 @@
/*
* ====================================================
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
@ -9,7 +9,7 @@
*/
#include "i386mach.h"
.global SYM (strchr)
SOTYPE_FUNCTION(strchr)
@ -21,14 +21,45 @@ SYM (strchr):
pushl ebx
xorl ebx,ebx
movl 8(ebp),edi
movb 12(ebp),bl
addb 12(ebp),bl
#ifndef __OPTIMIZE_SIZE__
/* check if string is aligned, if not do check one byte at a time */
#ifndef __OPTIMIZE_SIZE__
/* Special case strchr(p,0). */
je L25
/* Do byte-wise checks until string is aligned. */
test $3,edi
jne L9
je L5
movl edi,eax
movb (eax),cl
testb cl,cl
je L14
cmpb bl,cl
je L19
incl edi
test $3,edi
je L5
movl edi,eax
movb (eax),cl
testb cl,cl
je L14
cmpb bl,cl
je L19
incl edi
test $3,edi
je L5
movl edi,eax
movb (eax),cl
testb cl,cl
je L14
cmpb bl,cl
je L19
incl edi
/* create 4 byte mask which is just the desired byte repeated 4 times */
L5:
movl ebx,ecx
sall $8,ebx
subl $4,edi
@ -49,15 +80,14 @@ L10:
testl $-2139062144,edx
jne L9
movl ebx,eax
xorl ecx,eax
leal -16843009(eax),edx
notl eax
andl eax,edx
xorl ebx,ecx
leal -16843009(ecx),edx
notl ecx
andl ecx,edx
testl $-2139062144,edx
je L10
#endif /* not __OPTIMIZE_SIZE__ */
/* loop while (*s && *s++ != c) */
L9:
leal -1(edi),eax
@ -69,7 +99,7 @@ L15:
je L14
cmpb bl,dl
jne L15
L14:
/* if (*s == c) return address otherwise return NULL */
cmpb bl,(eax)
@ -83,3 +113,60 @@ L19:
leave
ret
#ifndef __OPTIMIZE_SIZE__
/* Special case strchr(p,0). */
#if 0
/* Hideous performance on modern machines. */
L25:
cld
movl $-1,ecx
xor eax,eax
repnz
scasb
leal -1(edi),eax
jmp L19
#endif
L25:
/* Do byte-wise checks until string is aligned. */
test $3,edi
je L26
movl edi,eax
movb (eax),cl
testb cl,cl
je L19
incl edi
test $3,edi
je L26
movl edi,eax
movb (eax),cl
testb cl,cl
je L19
incl edi
test $3,edi
je L26
movl edi,eax
movb (eax),cl
testb cl,cl
je L19
incl edi
L26:
subl $4,edi
/* loop performing 4 byte mask checking for desired 0 byte */
.p2align 4,,7
L27:
addl $4,edi
movl (edi),ecx
leal -16843009(ecx),edx
movl ecx,eax
notl eax
andl eax,edx
testl $-2139062144,edx
je L27
jmp L9
#endif /* !__OPTIMIZE_SIZE__ */