azrp: faster triangle shader in assembler (speedup ≈ ×2)
This commit is contained in:
parent
8badb9f9cc
commit
cc66d3a520
|
@ -1 +1,164 @@
|
|||
/* empty for now */
|
||||
.global _azrp_shader_triangle
|
||||
.align 4
|
||||
|
||||
#define _frag r6
|
||||
#define _color r7
|
||||
#define _u r8
|
||||
#define _v r9
|
||||
#define _w r10
|
||||
#define _du_x r11
|
||||
#define _dv_x r12
|
||||
#define _dw_x r13
|
||||
|
||||
/* r0: (temporary)
|
||||
r1: y counter
|
||||
r2: (temporary)
|
||||
r3: x_max - x_min + 1
|
||||
r4: azrp_width * 2
|
||||
r5: cmd
|
||||
r6: frag
|
||||
r7: color
|
||||
r8-r10: u-w
|
||||
r11-r13: d*_x
|
||||
r14: #-31 (for NULL canceling) */
|
||||
_azrp_shader_triangle:
|
||||
mov.w @r5+, r0 /* y */
|
||||
shll r4
|
||||
|
||||
mov.w @r5+, r1 /* height_frag (y counter) */
|
||||
nop
|
||||
|
||||
mov.w @r5+, r2 /* x_min */
|
||||
extu.b r0, r0
|
||||
|
||||
mov.w @r5+, r3 /* x_max */
|
||||
extu.b r1, r1
|
||||
|
||||
mov.w @r5+, _color /* color */
|
||||
mulu.w r4, r0
|
||||
|
||||
add #2, r5
|
||||
nop
|
||||
|
||||
mov.l r8, @-r15
|
||||
sub r2, r3
|
||||
|
||||
mov.l r9, @-r15
|
||||
add #1, r3
|
||||
|
||||
mov.l r10, @-r15
|
||||
sts macl, r0
|
||||
|
||||
mov.l r11, @-r15
|
||||
add r0, _frag
|
||||
|
||||
ldrs 2f
|
||||
add r2, _frag
|
||||
|
||||
ldre 3f
|
||||
add r2, _frag
|
||||
|
||||
mov.l r12, @-r15
|
||||
sub r3, r4
|
||||
|
||||
mov.l r13, @-r15
|
||||
sub r3, r4
|
||||
|
||||
mov.l r14, @-r15
|
||||
mov #-31, r14
|
||||
|
||||
mov.l @r5, _u
|
||||
mov.l @(4, r5), _v
|
||||
mov.l @(8, r5), _w
|
||||
mov.l @(12, r5), _du_x
|
||||
mov.l @(16, r5), _dv_x
|
||||
mov.l @(20, r5), _dw_x
|
||||
|
||||
/* Loop on y */
|
||||
1: ldrc r3
|
||||
nop
|
||||
|
||||
/* Loop on x */
|
||||
2: mov _u, r0
|
||||
or _v, r0
|
||||
|
||||
or _w, r0
|
||||
nop
|
||||
|
||||
shad r14, r0
|
||||
nop
|
||||
|
||||
not r0, r0
|
||||
nop
|
||||
|
||||
and _frag, r0
|
||||
nop
|
||||
|
||||
add #2, _frag
|
||||
nop
|
||||
|
||||
add _du_x, _u
|
||||
mov.w _color, @r0
|
||||
|
||||
add _dv_x, _v
|
||||
nop
|
||||
|
||||
add _dw_x, _w
|
||||
3: nop
|
||||
|
||||
/* Sloooow */
|
||||
add r4, _frag
|
||||
mov.l @(24, r5), r0
|
||||
|
||||
dt r1
|
||||
mov.l @(28, r5), r2
|
||||
|
||||
add r0, _u
|
||||
mov.l @(32, r5), r0
|
||||
|
||||
add r2, _v
|
||||
nop
|
||||
|
||||
bf.s 1b
|
||||
add r0, _w
|
||||
|
||||
add #-12, r5
|
||||
mov #0, r0
|
||||
|
||||
/* 1 cycle lost */
|
||||
|
||||
mov.l _u, @(12, r5)
|
||||
mov.l _v, @(16, r5)
|
||||
mov.l _w, @(20, r5)
|
||||
mov.b r0, @(1, r5)
|
||||
|
||||
/* Slooow */
|
||||
mov.b @(3, r5), r0
|
||||
extu.b r0, r0
|
||||
mov r0, r2
|
||||
mov.b @(2, r5), r0
|
||||
extu.b r0, r0
|
||||
sub r2, r0
|
||||
mov.b r0, @(2, r5)
|
||||
|
||||
mov.l .azrp_frag_height, r1
|
||||
mov.l @r1, r1
|
||||
|
||||
cmp/ge r0, r1
|
||||
bt 4f
|
||||
mov r1, r0
|
||||
|
||||
4: mov.b r0, @(3, r5)
|
||||
|
||||
mov.l @r15+, r14
|
||||
mov.l @r15+, r13
|
||||
mov.l @r15+, r12
|
||||
mov.l @r15+, r11
|
||||
mov.l @r15+, r10
|
||||
mov.l @r15+, r9
|
||||
rts
|
||||
mov.l @r15+, r8
|
||||
|
||||
.balign 4
|
||||
.azrp_frag_height:
|
||||
.long _azrp_frag_height
|
||||
|
|
|
@ -29,25 +29,29 @@ struct command {
|
|||
uint8_t shader_id;
|
||||
/* Local y coordinate of the first line in the fragment */
|
||||
uint8_t y;
|
||||
/* Numebr of lines to render total, including this fragment */
|
||||
uint8_t height_total;
|
||||
/* Number of lines to render on the current fragment */
|
||||
uint8_t height_frag;
|
||||
/* Numebr of lines to render total, includnig this fragment */
|
||||
uint8_t height_total;
|
||||
/* Rectangle along the x coordinates (x_max included) */
|
||||
uint16_t x_min, x_max;
|
||||
/* Color */
|
||||
uint16_t color;
|
||||
uint16_t _;
|
||||
|
||||
/* Initial barycentric coordinates */
|
||||
int u0, v0, w0;
|
||||
/* Variation of each coordinate for a movement in x/y */
|
||||
/* Variation of each coordinate for a movement in x */
|
||||
int du_x, dv_x, dw_x;
|
||||
int du_y, dv_y, dw_y;
|
||||
/* Variation of each coordinate for a movement in y while canceling rows's
|
||||
movements in x */
|
||||
int du_row, dv_row, dw_row;
|
||||
};
|
||||
|
||||
//---
|
||||
|
||||
// TODO: Write in assembler
|
||||
void azrp_shader_triangle(void *uniforms0, void *command0, void *fragment0)
|
||||
void azrp_shader_triangle_2(void *uniforms0, void *command0, void *fragment0)
|
||||
{
|
||||
int width = (int)uniforms0;
|
||||
struct command *cmd = command0;
|
||||
|
@ -55,12 +59,11 @@ void azrp_shader_triangle(void *uniforms0, void *command0, void *fragment0)
|
|||
|
||||
frag += cmd->x_min + width * cmd->y;
|
||||
|
||||
int u, v, w;
|
||||
int u = cmd->u0;
|
||||
int v = cmd->v0;
|
||||
int w = cmd->w0;
|
||||
|
||||
for(int y = 0; y < cmd->height_frag; y++) {
|
||||
u = cmd->u0;
|
||||
v = cmd->v0;
|
||||
w = cmd->w0;
|
||||
|
||||
for(int x = cmd->x_min; x <= cmd->x_max; x++) {
|
||||
if((u | v | w) > 0) {
|
||||
|
@ -73,11 +76,15 @@ void azrp_shader_triangle(void *uniforms0, void *command0, void *fragment0)
|
|||
}
|
||||
|
||||
frag += width;
|
||||
cmd->u0 += cmd->du_y;
|
||||
cmd->v0 += cmd->dv_y;
|
||||
cmd->w0 += cmd->dw_y;
|
||||
u += cmd->du_row;
|
||||
v += cmd->dv_row;
|
||||
w += cmd->dw_row;
|
||||
}
|
||||
|
||||
cmd->u0 = u;
|
||||
cmd->v0 = v;
|
||||
cmd->w0 = w;
|
||||
|
||||
/* Prepare next fragment */
|
||||
cmd->y = 0;
|
||||
cmd->height_total -= cmd->height_frag;
|
||||
|
@ -100,6 +107,9 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color)
|
|||
int min_y = max(0, min(y1, min(y2, y3)));
|
||||
int max_y = min(azrp_height-1, max(y1, max(y2, y3)));
|
||||
|
||||
if(min_x >= max_x || min_y >= max_y)
|
||||
return;
|
||||
|
||||
/* TODO: Have a proper way to do optimized-division by azrp_frag_height
|
||||
TODO: Also account for first-fragment offset */
|
||||
int frag_first = min_y >> 4;
|
||||
|
@ -119,13 +129,18 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color)
|
|||
/* Vector products for barycentric coordinates */
|
||||
cmd.u0 = edge_start(x2, y2, x3, y3, min_x, min_y);
|
||||
cmd.du_x = y3 - y2;
|
||||
cmd.du_y = x2 - x3;
|
||||
int du_y = x2 - x3;
|
||||
cmd.v0 = edge_start(x3, y3, x1, y1, min_x, min_y);
|
||||
cmd.dv_x = y1 - y3;
|
||||
cmd.dv_y = x3 - x1;
|
||||
int dv_y = x3 - x1;
|
||||
cmd.w0 = edge_start(x1, y1, x2, y2, min_x, min_y);
|
||||
cmd.dw_x = y2 - y1;
|
||||
cmd.dw_y = x1 - x2;
|
||||
int dw_y = x1 - x2;
|
||||
|
||||
int columns = max_x - min_x + 1;
|
||||
cmd.du_row = du_y - columns * cmd.du_x;
|
||||
cmd.dv_row = dv_y - columns * cmd.dv_x;
|
||||
cmd.dw_row = dw_y - columns * cmd.dw_x;
|
||||
|
||||
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
|
||||
prof_leave(azrp_perf_cmdgen);
|
||||
|
|
Loading…
Reference in New Issue