#include #if GINT_RENDER_RGB .global _image_linear_rgb16 .global _image_linear_p8 /* The loop nest for the rotation + scaling code, manually optimized. r0, r1: (temporary), u r2, r3: dx_u, dx_v r4: input_pixels r5: output_pixels r6, r7: drow_u, drow_v r8: line counter r9: dst_w r10: src_w << 16 (for bound checks) r11: src_h << 16 (for bound checks) r12: v r13: (temporary) r14: src_stride (for index access to input_pixels) @-4: dst_stride This loop maintains the value of (u,v) at each pixel by adding (dx_u, dx_v) every pixel and (drow_u, drow_v) every row. For each position, it then checks whether 0 <= u < src_w and 0 <= v < src_height as fixed-point; if yes, input[(int)v * src_w + (int)u] is extracted; otherwise, the pixel is skipped. */ .macro GEN_LINEAR_LOOP MEM, DEPTH mov.l r8, @-r15 mov.l r9, @-r15 mov.l r10, @-r15 mov.l r11, @-r15 mov.l r12, @-r15 mov.l r13, @-r15 mov.l r14, @-r15 mov.l @r6+, r10 /* map.src_w */ mov.l @r6+, r11 /* map.src_h */ mov.l @r6+, r9 /* map.dst_w */ mov.l @r6+, r8 /* map.dst_h */ mov.l @r6+, r14 /* map.src_stride */ mov.l @r6+, r0 /* map.dst_stride */ mov.l @r6+, r1 /* map.u */ mov.l @r6+, r12 /* map.v */ mov.l @r6+, r2 /* map.dx_u */ mov.l @r6+, r3 /* map.dx_v */ mov.l @(4, r6), r7 /* map.dy_v (replaced with drow_v) */ shll16 r10 mov.l @r6, r6 /* map.dy_u (replaced with drow_u) */ shll16 r11 /* Compute the output stride as map.dst_stride - (DEPTH * map.dst_w) */ ldrs 1f sub r9, r0 ldre 2f .if \DEPTH == 2 sub r9, r0 .else nop .endif mov.l r0, @-r15 nop 4: ldrc r9 nop 1: cmp/hs r10, r1 nop bt 3f cmp/hs r11, r12 bt 3f swap.w r12, r13 mov r1, r0 mulu.w r13, r14 shlr16 r0 sts macl, r13 .if \DEPTH == 2 shll r0 nop .endif add r13, r0 \MEM @(r0, r4), r13 \MEM r13, @r5 3: add #\DEPTH, r5 add r2, r1 nop add r3, r12 2: nop dt r8 mov.l @r15, r0 /* Stride between lines, excluding content */ add r6, r1 nop add r7, r12 nop bf.s 4b add r0, r5 mov.l @r15+, r0 mov.l @r15+, r14 mov.l @r15+, r13 mov.l @r15+, r12 mov.l @r15+, r11 mov.l @r15+, r10 mov.l @r15+, r9 rts mov.l @r15+, r8 .endm _image_linear_rgb16: GEN_LINEAR_LOOP mov.w, 2 _image_linear_p8: GEN_LINEAR_LOOP mov.b, 1 #endif