120 lines
2.0 KiB
ArmAsm
120 lines
2.0 KiB
ArmAsm
.global _azrp_image_shader_p4_normal
|
|
#include "image_macros.S"
|
|
|
|
/* P4 Opaque rendering, Azur version: trivial with loop transforms.
|
|
|
|
This is a pretty direct loop with no difficult tricks involved; it expands
|
|
on P8 by adding another edge pointer. The main change is the decoding logic
|
|
which now only involves a single byte to load for every two pixels, but more
|
|
arithmetic to extract the nibbles.
|
|
|
|
All the loops in Azur's P4 functions are obvious EX chains and thus any
|
|
optimization would need to simplify the arithmetic to gain any half-cycles.
|
|
|
|
r0: [temporary]
|
|
r7: Right edge pointer
|
|
r8: Right edge value
|
|
r9: Palette
|
|
r10: Left edge pointer
|
|
r11: Left edge value
|
|
r12: Edge stride
|
|
r13: [temporary]
|
|
r14: [temporary] */
|
|
|
|
.macro GEN_NORMAL_LOOP HFLIP, OUT_DIR, TMP1, TMP2, OFF1, OFF2
|
|
shlr r2
|
|
nop
|
|
|
|
add r10, r10
|
|
nop
|
|
|
|
mov.l @r8+, r9 /* cmd.palette */
|
|
mov r2, r0
|
|
|
|
mov.w @r8+, r7 /* cmd.edge_2 */
|
|
shll2 r0
|
|
|
|
mov.l r12, @-r15
|
|
shll r7
|
|
|
|
mov.l r11, @-r15
|
|
add r5, r7
|
|
|
|
mov r0, r12
|
|
add r6, r12
|
|
|
|
mov.l r13, @-r15
|
|
add r5, r10
|
|
|
|
mov.l r14, @-r15
|
|
add #-4, r5
|
|
|
|
add #-1, r4 /* Input stride compensation for pipelining */
|
|
nop
|
|
|
|
.if \HFLIP
|
|
add r0, r5
|
|
nop
|
|
|
|
shll r0
|
|
nop
|
|
|
|
add r0, r6
|
|
nop
|
|
.endif
|
|
|
|
START
|
|
|
|
mov.b @r3+, \TMP1
|
|
mov #-4, \TMP2
|
|
|
|
mov.w @r7, r8 /* Save right edge */
|
|
nop
|
|
|
|
mov.w @r10, r11 /* Save left edge */
|
|
shll \TMP1
|
|
|
|
2: mov \TMP1, r0
|
|
and #0x1e, r0
|
|
|
|
shld \TMP2, \TMP1
|
|
mov #0x1e, \TMP2
|
|
|
|
mov.w @(r0,r9), r0
|
|
and \TMP2, \TMP1
|
|
|
|
mov.w r0, @(\OFF1,r5)
|
|
mov \TMP1, r0
|
|
|
|
mov.b @r3+, \TMP1
|
|
add #\OUT_DIR, r5
|
|
|
|
mov.w @(r0,r9), r0
|
|
mov #-4, \TMP2
|
|
|
|
mov.w r0, @(\OFF2,r5)
|
|
3: shll \TMP1
|
|
|
|
mov.w r8, @r7 /* Restore right edge */
|
|
add r12, r7
|
|
|
|
mov.w r11, @r10 /* Restore left edge */
|
|
add r12, r10
|
|
|
|
END
|
|
|
|
mov.l @r15+, r14
|
|
mov.l @r15+, r13
|
|
mov.l @r15+, r11
|
|
mov.l @r15+, r12
|
|
mov.l @r15+, r10
|
|
EPILOGUE
|
|
.endm
|
|
|
|
_azrp_image_shader_p4_normal:
|
|
tst #1, r0
|
|
bf 9f
|
|
|
|
GEN_NORMAL_LOOP 0, 4, r13, r14, 6, 0
|
|
9: GEN_NORMAL_LOOP 1, -4, r13, r14, 0, 6
|