78 lines
2.0 KiB
ArmAsm
78 lines
2.0 KiB
ArmAsm
.global _gint_image_p8_swapcolor
|
|
#include "image_macros.S"
|
|
|
|
/* P8 SWAPCOLOR, RAM version: by branchless xor selection.
|
|
|
|
The core action of this loop is to render full pixels while replacing any
|
|
occurrence of cmd.color_1 (x) with the value cmd.color_2 (y). Branching is
|
|
too slow as often, so instead we use the fact that both x and y are fixed to
|
|
use the identity
|
|
|
|
c ^ ((x ^ y) & -(c == x)) = (c == x ? y : c)
|
|
|
|
We materialize -(c == x) by subtracting a register from itself with subc
|
|
after the comparison (which is delightfully elegant), while (x ^ y) is pre-
|
|
computed. This way, the selection is performed in one [subc], one [and] and
|
|
one [xor] for a total of 3 EX slots. This is slower than NULL-cancelling
|
|
(which only takes 2 EX slots) but still better than symmetric alternatives.
|
|
|
|
Since we have a palette, we further trick by comparing against the index but
|
|
selecting against the palette entry, ie. we do
|
|
|
|
palette[c] ^ ((palette[x] ^ y) & -(c == x)) = (c == x ? y : palette[c])
|
|
|
|
which allows the computation to occur in parallel with the palette access
|
|
and does not require the replacement value to be located at a valid index.
|
|
|
|
r0: [temporary]
|
|
r7: cmd.color_1
|
|
r8: palette[cmd.color_1] ^ cmd.color_2 (ie. x ^ y)
|
|
r9: Palette
|
|
r10: Holds (x ^ y) & -(c == x) during selection */
|
|
|
|
.macro GEN_SWAPCOLOR_LOOP HFLIP, OUT_DIR
|
|
mov.l @r8+, r9 /* cmd.palette */
|
|
mov.w @r8+, r0 /* cmd.edge_2 (don't care) */
|
|
mov.w @r8+, r7 /* cmd.color_1 */
|
|
mov.l r10, @-r15
|
|
exts.b r7, r7
|
|
mov r7, r0
|
|
mov.w @r8, r8 /* cmd.color_2 */
|
|
add r0, r0
|
|
mov.w @(r0, r9), r0
|
|
xor r0, r8
|
|
|
|
.if \HFLIP
|
|
add #-2, r5
|
|
mov r2, r0
|
|
shll r0
|
|
add r0, r5
|
|
shll r0
|
|
add r0, r6
|
|
.endif
|
|
|
|
START
|
|
|
|
2: mov.b @r3+, r0
|
|
cmp/eq r0, r7
|
|
add r0, r0
|
|
subc r10, r10
|
|
mov.w @(r0, r9), r0
|
|
and r8, r10
|
|
xor r10, r0
|
|
mov.w r0, @r5
|
|
3: add #\OUT_DIR, r5
|
|
|
|
END
|
|
|
|
mov.l @r15+, r10
|
|
EPILOGUE
|
|
.endm
|
|
|
|
_gint_image_p8_swapcolor:
|
|
tst #1, r0
|
|
bf 9f
|
|
|
|
GEN_SWAPCOLOR_LOOP 0, 2
|
|
9: GEN_SWAPCOLOR_LOOP 1, -2
|