Improve performance of MIPS memcpy.

* libc/machine/mips/memcpy.S (memcpy): Add word copies for small
	aligned data.
This commit is contained in:
Steve Ellcey 2015-11-02 13:30:19 -08:00
parent 7321662bb0
commit 5045532322
2 changed files with 48 additions and 2 deletions

View File

@ -1,3 +1,8 @@
2015-11-02 Steve Ellcey <sellcey@imgtec.com>
* libc/machine/mips/memcpy.S (memcpy): Add word copies for small
aligned data.
2015-10-30 Sebastian Huber <sebastian.huber@embedded-brains.de>
* libc/include/pthread.h: Include <sched.h> instead of <sys/sched.h>.

View File

@ -311,7 +311,7 @@ L(memcpy):
* size, copy dst pointer to v0 for the return value.
*/
slti t2,a2,(2 * NSIZE)
bne t2,zero,L(lastb)
bne t2,zero,L(lasts)
#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
move v0,zero
#else
@ -562,7 +562,7 @@ L(chkw):
*/
L(chk1w):
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
beq a2,t8,L(lastb)
beq a2,t8,L(lastw)
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
@ -574,6 +574,20 @@ L(wordCopy_loop):
bne a0,a3,L(wordCopy_loop)
C_ST REG3,UNIT(-1)(a0)
/* If we have been copying double words, see if we can copy a single word
before doing byte copies. We can have, at most, one word to copy. */
L(lastw):
#ifdef USE_DOUBLE
andi t8,a2,3 /* a2 is the remainder past 4 byte chunks. */
beq t8,a2,L(lastb)
lw REG3,0(a1)
sw REG3,0(a0)
PTR_ADDIU a0,a0,4
PTR_ADDIU a1,a1,4
move a2,t8
#endif
/* Copy the last 8 (or 16) bytes */
L(lastb):
blez a2,L(leave)
@ -588,6 +602,33 @@ L(leave):
j ra
nop
/* We jump here with a memcpy of less than 8 or 16 bytes, depending on
whether or not USE_DOUBLE is defined. Instead of just doing byte
copies, check the alignment and size and use lw/sw if possible.
Otherwise, do byte copies. */
L(lasts):
andi t8,a2,3
beq t8,a2,L(lastb)
andi t9,a0,3
bne t9,zero,L(lastb)
andi t9,a1,3
bne t9,zero,L(lastb)
PTR_SUBU a3,a2,t8
PTR_ADDU a3,a0,a3
L(wcopy_loop):
lw REG3,0(a1)
PTR_ADDIU a0,a0,4
PTR_ADDIU a1,a1,4
bne a0,a3,L(wcopy_loop)
sw REG3,-4(a0)
b L(lastb)
move a2,t8
#ifndef R6_CODE
/*
* UNALIGNED case, got here with a3 = "negu a0"