Azur/azur/src/gint/shaders/image_rgb16_swapcolor.S

117 lines
1.8 KiB
ArmAsm

.global _azrp_image_shader_rgb16_swapcolor
#include "image_macros.S"
/* RGB16 SWAPCOLOR, Azur version: by branchless xor selection.
The xor selection is explained in gint's version of P8 SWAPCOLOR. This
version's selection is slightly simpler because we don't have to index the
palette to find the source color. We use a 2-unrolled 2-stage-pipeline loop
to optimize for CPU speed.
r7: Right edge pointer
r8: Right edge value
r9: cmd.color_1
r10: Holds (x ^ y) & -(c == x) during selection
r11: cmd.color_1 ^ cmd.color_2 (ie. x ^ y)
r12: Right edge stride
r13: [temporary] */
.macro GEN_SWAPCOLOR_LOOP HFLIP, OUT_DIR, TMP1, TMP2, OFF1, OFF2
mov.w @r8+, r7 /* cmd.edge_2 */
shlr r2
mov.l r11, @-r15
add #-2, r4 /* Input stride compensation for pipelining */
mov.w @r8+, r9 /* cmd.color_1 */
shll r7
mov.l r10, @-r15
add r5, r7
mov.l r12, @-r15
add #-2, r5 /* Predecrement, see output logic */
mov.w @r8+, r11 /* cmd.color_2 */
mov r2, r12
mov.l r13, @-r15
shll2 r12
add r6, r12
nop
xor r9, r11
nop
.if \HFLIP
mov r2, r0
shll2 r0
add r0, r5
nop
shll r0
nop
add r0, r6
nop
.endif
START
mov.w @r3+, \TMP1
nop
mov.w @r7, r8 /* Save right edge */
nop
cmp/eq \TMP1, r9
nop
2: subc r10, r10
nop
and r11, r10
mov.w @r3+, \TMP2
xor r10, \TMP1
nop
mov.wv \TMP1 \OFF1 r5
cmp/eq \TMP2, r9
add #\OUT_DIR, r5
nop
subc r10, r10
nop
and r11, r10
mov.w @r3+, \TMP1
xor r10, \TMP2
nop
cmp/eq \TMP1, r9
3: mov.wv \TMP2 \OFF2 r5
mov.w r8, @r7 /* Restore right edge */
add r12, r7
END
mov.l @r15+, r13
mov.l @r15+, r12
mov.l @r15+, r10
mov.l @r15+, r11
EPILOGUE
.endm
_azrp_image_shader_rgb16_swapcolor:
tst #1, r0
bf 9f
GEN_SWAPCOLOR_LOOP 0, 4, r0, r13, 2, 0
9: GEN_SWAPCOLOR_LOOP 1, -4, r13, r0, 0, 2