diff --git a/azur/src/gint/shaders/triangle.S b/azur/src/gint/shaders/triangle.S index 3c3684c..9718dab 100644 --- a/azur/src/gint/shaders/triangle.S +++ b/azur/src/gint/shaders/triangle.S @@ -1 +1,164 @@ -/* empty for now */ +.global _azrp_shader_triangle +.align 4 + +#define _frag r6 +#define _color r7 +#define _u r8 +#define _v r9 +#define _w r10 +#define _du_x r11 +#define _dv_x r12 +#define _dw_x r13 + +/* r0: (temporary) + r1: y counter + r2: (temporary) + r3: x_max - x_min + 1 + r4: azrp_width * 2 + r5: cmd + r6: frag + r7: color + r8-r10: u-w + r11-r13: d*_x + r14: #-31 (for NULL canceling) */ +_azrp_shader_triangle: + mov.w @r5+, r0 /* y */ + shll r4 + + mov.w @r5+, r1 /* height_frag (y counter) */ + nop + + mov.w @r5+, r2 /* x_min */ + extu.b r0, r0 + + mov.w @r5+, r3 /* x_max */ + extu.b r1, r1 + + mov.w @r5+, _color /* color */ + mulu.w r4, r0 + + add #2, r5 + nop + + mov.l r8, @-r15 + sub r2, r3 + + mov.l r9, @-r15 + add #1, r3 + + mov.l r10, @-r15 + sts macl, r0 + + mov.l r11, @-r15 + add r0, _frag + + ldrs 2f + add r2, _frag + + ldre 3f + add r2, _frag + + mov.l r12, @-r15 + sub r3, r4 + + mov.l r13, @-r15 + sub r3, r4 + + mov.l r14, @-r15 + mov #-31, r14 + + mov.l @r5, _u + mov.l @(4, r5), _v + mov.l @(8, r5), _w + mov.l @(12, r5), _du_x + mov.l @(16, r5), _dv_x + mov.l @(20, r5), _dw_x + + /* Loop on y */ +1: ldrc r3 + nop + + /* Loop on x */ +2: mov _u, r0 + or _v, r0 + + or _w, r0 + nop + + shad r14, r0 + nop + + not r0, r0 + nop + + and _frag, r0 + nop + + add #2, _frag + nop + + add _du_x, _u + mov.w _color, @r0 + + add _dv_x, _v + nop + + add _dw_x, _w +3: nop + + /* Sloooow */ + add r4, _frag + mov.l @(24, r5), r0 + + dt r1 + mov.l @(28, r5), r2 + + add r0, _u + mov.l @(32, r5), r0 + + add r2, _v + nop + + bf.s 1b + add r0, _w + + add #-12, r5 + mov #0, r0 + + /* 1 cycle lost */ + + mov.l _u, @(12, r5) + mov.l _v, @(16, r5) + mov.l _w, @(20, r5) + mov.b r0, @(1, r5) + + /* Slooow */ + mov.b @(3, r5), r0 + extu.b r0, r0 + mov r0, r2 + mov.b @(2, r5), r0 + extu.b r0, r0 + sub r2, r0 + mov.b r0, @(2, r5) + + mov.l .azrp_frag_height, r1 + mov.l @r1, r1 + + cmp/ge r0, r1 + bt 4f + mov r1, r0 + +4: mov.b r0, @(3, r5) + + mov.l @r15+, r14 + mov.l @r15+, r13 + mov.l @r15+, r12 + mov.l @r15+, r11 + mov.l @r15+, r10 + mov.l @r15+, r9 + rts + mov.l @r15+, r8 + +.balign 4 +.azrp_frag_height: + .long _azrp_frag_height diff --git a/azur/src/gint/shaders/triangle.c b/azur/src/gint/shaders/triangle.c index 31b3ce0..7920f97 100644 --- a/azur/src/gint/shaders/triangle.c +++ b/azur/src/gint/shaders/triangle.c @@ -29,25 +29,29 @@ struct command { uint8_t shader_id; /* Local y coordinate of the first line in the fragment */ uint8_t y; + /* Numebr of lines to render total, including this fragment */ + uint8_t height_total; /* Number of lines to render on the current fragment */ uint8_t height_frag; - /* Numebr of lines to render total, includnig this fragment */ - uint8_t height_total; /* Rectangle along the x coordinates (x_max included) */ uint16_t x_min, x_max; /* Color */ uint16_t color; + uint16_t _; + /* Initial barycentric coordinates */ int u0, v0, w0; - /* Variation of each coordinate for a movement in x/y */ + /* Variation of each coordinate for a movement in x */ int du_x, dv_x, dw_x; - int du_y, dv_y, dw_y; + /* Variation of each coordinate for a movement in y while canceling rows's + movements in x */ + int du_row, dv_row, dw_row; }; //--- // TODO: Write in assembler -void azrp_shader_triangle(void *uniforms0, void *command0, void *fragment0) +void azrp_shader_triangle_2(void *uniforms0, void *command0, void *fragment0) { int width = (int)uniforms0; struct command *cmd = command0; @@ -55,12 +59,11 @@ void azrp_shader_triangle(void *uniforms0, void *command0, void *fragment0) frag += cmd->x_min + width * cmd->y; - int u, v, w; + int u = cmd->u0; + int v = cmd->v0; + int w = cmd->w0; for(int y = 0; y < cmd->height_frag; y++) { - u = cmd->u0; - v = cmd->v0; - w = cmd->w0; for(int x = cmd->x_min; x <= cmd->x_max; x++) { if((u | v | w) > 0) { @@ -73,11 +76,15 @@ void azrp_shader_triangle(void *uniforms0, void *command0, void *fragment0) } frag += width; - cmd->u0 += cmd->du_y; - cmd->v0 += cmd->dv_y; - cmd->w0 += cmd->dw_y; + u += cmd->du_row; + v += cmd->dv_row; + w += cmd->dw_row; } + cmd->u0 = u; + cmd->v0 = v; + cmd->w0 = w; + /* Prepare next fragment */ cmd->y = 0; cmd->height_total -= cmd->height_frag; @@ -100,6 +107,9 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color) int min_y = max(0, min(y1, min(y2, y3))); int max_y = min(azrp_height-1, max(y1, max(y2, y3))); + if(min_x >= max_x || min_y >= max_y) + return; + /* TODO: Have a proper way to do optimized-division by azrp_frag_height TODO: Also account for first-fragment offset */ int frag_first = min_y >> 4; @@ -119,13 +129,18 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color) /* Vector products for barycentric coordinates */ cmd.u0 = edge_start(x2, y2, x3, y3, min_x, min_y); cmd.du_x = y3 - y2; - cmd.du_y = x2 - x3; + int du_y = x2 - x3; cmd.v0 = edge_start(x3, y3, x1, y1, min_x, min_y); cmd.dv_x = y1 - y3; - cmd.dv_y = x3 - x1; + int dv_y = x3 - x1; cmd.w0 = edge_start(x1, y1, x2, y2, min_x, min_y); cmd.dw_x = y2 - y1; - cmd.dw_y = x1 - x2; + int dw_y = x1 - x2; + + int columns = max_x - min_x + 1; + cmd.du_row = du_y - columns * cmd.du_x; + cmd.dv_row = dv_y - columns * cmd.dv_x; + cmd.dw_row = dw_y - columns * cmd.dw_x; azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count); prof_leave(azrp_perf_cmdgen);