gint/src/image/image_linear.S

127 lines
2.2 KiB
ArmAsm

.global _image_linear_rgb16
.global _image_linear_p8
/* The loop nest for the rotation + scaling code, manually optimized.
r0, r1: (temporary), u
r2, r3: dx_u, dx_v
r4: input_pixels
r5: output_pixels
r6, r7: drow_u, drow_v
r8: line counter
r9: dst_w
r10: src_w << 16 (for bound checks)
r11: src_h << 16 (for bound checks)
r12: v
r13: (temporary)
r14: src_stride (for index access to input_pixels)
@-4: dst_stride
This loop maintains the value of (u,v) at each pixel by adding (dx_u, dx_v)
every pixel and (drow_u, drow_v) every row. For each position, it then
checks whether 0 <= u < src_w and 0 <= v < src_height as fixed-point; if
yes, input[(int)v * src_w + (int)u] is extracted; otherwise, the pixel is
skipped. */
.macro GEN_LINEAR_LOOP MEM, DEPTH
mov.l r8, @-r15
mov.l r9, @-r15
mov.l r10, @-r15
mov.l r11, @-r15
mov.l r12, @-r15
mov.l r13, @-r15
mov.l r14, @-r15
mov.l @r6+, r10 /* map.src_w */
mov.l @r6+, r11 /* map.src_h */
mov.l @r6+, r9 /* map.dst_w */
mov.l @r6+, r8 /* map.dst_h */
mov.l @r6+, r14 /* map.src_stride */
mov.l @r6+, r0 /* map.dst_stride */
mov.l @r6+, r1 /* map.u */
mov.l @r6+, r12 /* map.v */
mov.l @r6+, r2 /* map.dx_u */
mov.l @r6+, r3 /* map.dx_v */
mov.l @(4, r6), r7 /* map.dy_v (replaced with drow_v) */
shll16 r10
mov.l @r6, r6 /* map.dy_u (replaced with drow_u) */
shll16 r11
/* Compute the output stride as map.dst_stride - (DEPTH * map.dst_w) */
ldrs 1f
sub r9, r0
ldre 2f
.if \DEPTH == 2
sub r9, r0
.else
nop
.endif
mov.l r0, @-r15
nop
4: ldrc r9
nop
1: cmp/hs r10, r1
nop
bt 3f
cmp/hs r11, r12
bt 3f
swap.w r12, r13
mov r1, r0
mulu.w r13, r14
shlr16 r0
sts macl, r13
.if \DEPTH == 2
shll r0
nop
.endif
add r13, r0
\MEM @(r0, r4), r13
\MEM r13, @r5
3: add #\DEPTH, r5
add r2, r1
nop
add r3, r12
2: nop
dt r8
mov.l @r15, r0 /* Stride between lines, excluding content */
add r6, r1
nop
add r7, r12
nop
bf.s 4b
add r0, r5
mov.l @r15+, r0
mov.l @r15+, r14
mov.l @r15+, r13
mov.l @r15+, r12
mov.l @r15+, r11
mov.l @r15+, r10
mov.l @r15+, r9
rts
mov.l @r15+, r8
.endm
_image_linear_rgb16:
GEN_LINEAR_LOOP mov.w, 2
_image_linear_p8:
GEN_LINEAR_LOOP mov.b, 1