From 2e7c076e6015641164865394d7966881dc1216d1 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Wed, 31 May 2023 23:29:04 +0200 Subject: [PATCH] azrp: add rectangle shader (azrp_rect): flat, invert, darken, whiten Flat is optimized but invert, darken and whiten are not. --- azur/CMakeLists.txt | 5 +- azur/include/azur/gint/render.h | 33 +++- azur/src/gint/render.c | 18 ++ azur/src/gint/shaders/image.c | 6 +- azur/src/gint/shaders/image_rgb16_normal.S | 2 +- azur/src/gint/shaders/rect.S | 185 +++++++++++++++++++++ azur/src/gint/shaders/rect.c | 91 ++++++++++ azur/src/gint/shaders/triangle.c | 9 +- 8 files changed, 335 insertions(+), 14 deletions(-) create mode 100644 azur/src/gint/shaders/rect.S create mode 100644 azur/src/gint/shaders/rect.c diff --git a/azur/CMakeLists.txt b/azur/CMakeLists.txt index 8e4008d..edd64df 100644 --- a/azur/CMakeLists.txt +++ b/azur/CMakeLists.txt @@ -72,7 +72,10 @@ if(AZUR_GRAPHICS_GINT_CG) src/gint/shaders/image_p4_dye.c # Triangle shader src/gint/shaders/triangle.c - src/gint/shaders/triangle.S) + src/gint/shaders/triangle.S + # Rectangle shader + src/gint/shaders/rect.c + src/gint/shaders/rect.S) endif() add_library(azur STATIC ${SOURCES}) diff --git a/azur/include/azur/gint/render.h b/azur/include/azur/gint/render.h index e253b8c..34b7ec9 100644 --- a/azur/include/azur/gint/render.h +++ b/azur/include/azur/gint/render.h @@ -173,6 +173,21 @@ void azrp_config_scale(int scale); @offset Fragment offset along the y-axis (0 ... height of fragment-1). */ void azrp_config_frag_offset(int offset); +/* azrp_config_get_line(): Split a line number into fragment/offset + + Sets *fragment to the first fragment that covers line y and *offset to the + line number within that fragment. */ +void azrp_config_get_line(int y, int *fragment, int *offset); + +/* azrp_config_get_lines(): Split a line interval into fragments and offset + + Splits the interval [y; y+height) into fragment/offset pairs. + - Sets *first_fragment to the fragment that covers line y; + - Sets *first_offset to the line number within that fragment; + - Sets *fragment_count to the number of fragments the interval will cover. */ +void azrp_config_get_lines(int y, int height, int *first_fragment, + int *first_offset, int *fragment_count); + //--- // Hooks //--- @@ -205,11 +220,24 @@ void azrp_image(int x, int y, bopti_image_t const *image); void azrp_subimage(int x, int y, bopti_image_t const *image, int left, int top, int width, int height, int flags); +/* See below for more detailed image functions. Dynamic effects are provided + with the same naming convention as gint. */ + /* azrp_triangle(): Render a flat triangle. Points can be in any order. */ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color); -/* See below for more detailed image functions. Dynamic effects are provided - with the same naming convention as gint. */ +/* azrp_rect(): Render a rectangle with a flat color or color transform. */ +void azrp_rect(int x1, int y1, int width, int height, int color_or_effect); + +/* Effects for azrp_rect(). */ +enum { + /* Invert colors in gamma space. */ + AZRP_RECT_INVERT = -1, + /* Darken by halving all components in gamma space. */ + AZRP_RECT_DARKEN = -2, + /* Whiten by halving the distance to white in gamma space. */ + AZRP_RECT_WHITEN = -3, +}; //--- // Performance indicators @@ -274,6 +302,7 @@ void azrp_set_uniforms(int shader_id, void *uniforms); data can be updated between fragments by the shader program. Returns true on success, false if the maximum amount of commands or command memory is exceeded. */ +// TODO: azrp_queue_command: give access to command buffer in-place bool azrp_queue_command(void *command, size_t size, int fragment, int count); /* azrp_queue_image(): Split and queue a gint image command diff --git a/azur/src/gint/render.c b/azur/src/gint/render.c index d99abca..3e4f17a 100644 --- a/azur/src/gint/render.c +++ b/azur/src/gint/render.c @@ -227,6 +227,24 @@ static void default_settings(void) azrp_config_scale(1); } +void azrp_config_get_line(int y, int *fragment, int *offset) +{ + y += azrp_frag_offset; + + *fragment = y >> 4; + *offset = y & 15; +} + +void azrp_config_get_lines(int y, int height, int *first_fragment, + int *first_offset, int *fragment_count) +{ + y += azrp_frag_offset; + + *first_fragment = (y >> 4); + *first_offset = (y & 15); + *fragment_count = ((y + height - 1) >> 4) - *first_fragment + 1; +} + //--- // Hooks //--- diff --git a/azur/src/gint/shaders/image.c b/azur/src/gint/shaders/image.c index 825b5bf..e982377 100644 --- a/azur/src/gint/shaders/image.c +++ b/azur/src/gint/shaders/image.c @@ -17,12 +17,10 @@ void azrp_queue_image(struct gint_image_box *box, image_t const *img, else cmd->shader_id = AZRP_SHADER_IMAGE_P4; - /* This divides by azrp_frag_height */ - /* TODO: Have a proper way to do optimized-division by azrp_frag_height */ - int fragment_id = (azrp_scale == 1) ? (box->y >> 4) : (box->y >> 4); + int fragment_id, first_y; + azrp_config_get_line(box->y, &fragment_id, &first_y); /* These settings only apply to the first fragment */ - int first_y = (box->y + azrp_frag_offset) & (azrp_frag_height - 1); cmd->lines = min(box->h, azrp_frag_height - first_y); cmd->output = (void *)azrp_frag + (azrp_width * first_y + cmd->x) * 2; diff --git a/azur/src/gint/shaders/image_rgb16_normal.S b/azur/src/gint/shaders/image_rgb16_normal.S index 6baa0cb..f498578 100644 --- a/azur/src/gint/shaders/image_rgb16_normal.S +++ b/azur/src/gint/shaders/image_rgb16_normal.S @@ -10,7 +10,7 @@ In the simple case where there is no color effect and no HFLIP, the task of rendering a 16-bit opaque image boils down to a 2-dimensional memcpy. This - task can be optimized by moving longwords if the source and destination and + task can be optimized by moving longwords if the source and destination are co-4-aligned, with four variations depending on the width and initial position, identified by the following parameters: diff --git a/azur/src/gint/shaders/rect.S b/azur/src/gint/shaders/rect.S new file mode 100644 index 0000000..39514ff --- /dev/null +++ b/azur/src/gint/shaders/rect.S @@ -0,0 +1,185 @@ +.global _azrp_shader_rect +.global _azrp_shader_rect_loop_flat +.global _azrp_shader_rect_loop_invert +.global _azrp_shader_rect_loop_darken +.global _azrp_shader_rect_loop_whiten +.align 4 + +#define _height r1 +#define _edge_1 r2 +#define _edge_2 r3 +#define _stride r4 +#define _cmd r5 +#define _frag r6 +#define _wl r7 +#define _redstride r10 + +/* r0: (temporary) + r1: height counter + r2: (temporary) then fragment + edge_1 + r3: (temporary) then fragment + edge_2 + r4: stride (azrp_width * 2) + r5: cmd then color (can also be a temporary) + r6: fragment + r7: longwords to write on each line (wl) + r8: (temporary) then saved edge_1 + r9: (temporary) then saved edge_2 + r10: reduced stride (azrp_width * 2 - 4 * wl) */ + +_azrp_shader_rect: + mov.w @_cmd+, r3 /* shader_id || y */ + shll _stride + + mov.l r8, @-r15 + nop + + mov.b @_cmd+, r8 /* height_total */ + extu.b r3, r3 + + mov.b @_cmd+, _height /* height_frag */ + mulu.w r3, _stride + + mov.w @_cmd+, r2 /* xl */ + mov #-4, r0 + + sub _height, r8 + mov.b r8, @(r0, _cmd) /* update: height_total */ + + mov.l .azrp_frag_height, r0 /* ... inefficient ... */ + shll2 r2 + + sts macl, r3 + add r2, _frag + + mov.l @r0, r0 + mov #0, r2 + + mov.w @_cmd+, _wl /* wl */ + add r3, _frag + + mov.l r9, @-r15 + cmp/hs r0, r8 + + /* Next fragment height is currently r8 = remaining height + Set it to r0 = azrp_frag_height if r8 >= r0 */ + bf 1f + mov r0, r8 + +1: mov #-5, r0 + mov.b r8, @(r0, _cmd) /* update: height_frag */ + + mov #-7, r0 + mov.b r2, @(r0, _cmd) /* update: y */ + + mov.w @_cmd+, _edge_1 /* edge_1 */ + mov _wl, r8 + + mov.w @_cmd+, _edge_2 /* edge_2 */ + shll2 r8 + + mov.l @_cmd+, r0 /* loop */ + add _frag, _edge_1 + + mov.w @_cmd, r5 /* color */ + add _frag, _edge_2 + + mov.l r10, @-r15 + mov _stride, _redstride + + jmp @r0 + sub r8, _redstride + +.azrp_frag_height: + .long _azrp_frag_height + +.macro START + ldrs 2f + ldre 3f + +1: ldrc _wl + nop + + mov.w @_edge_1, r8 + nop + + mov.w @_edge_2, r9 + nop +.endm + +.macro END + dt _height + mov.w r8, @_edge_1 + + add _redstride, _frag + mov.w r9, @_edge_2 + + add _stride, _edge_1 + nop + + bf.s 1b + add _stride, _edge_2 + + mov.l @r15+, r10 + mov.l @r15+, r9 + rts + mov.l @r15+, r8 +.endm + +_azrp_shader_rect_loop_flat: + extu.w r5, r0 + shll16 r5 + or r0, r5 + lds r5, x0 + + START + mov _frag, r5 +2:3: movs.l x0, @r5+ + mov r5, _frag + END + +_azrp_shader_rect_loop_invert: + /* Inefficient: could go all the way down to 2 cycles/long with + pipelining, but we're stuck at 3 cycles/long with this naive + approach */ + // TODO: time it we might be able to just non-pipeline and read ahead + // also this _frag update is suspicious + START +2: mov.l @_frag, r0 + not r0, r0 + mov.l r0, @_frag +3: add #4, _frag + END + nop + +_azrp_shader_rect_loop_darken: + mov.l .darken_mask, r5 + nop + + /* Inefficient */ + START +2: mov.l @_frag, r0 + and r5, r0 + shlr r0 + mov.l r0, @_frag +3: add #4, _frag + nop + END + +_azrp_shader_rect_loop_whiten: + mov.l .darken_mask, r5 + nop + + /* Inefficient */ + START +2: mov.l @_frag, r0 + not r0, r0 + and r5, r0 + shlr r0 + not r0, r0 + mov.l r0, @_frag +3: add #4, _frag + nop + END + +.darken_mask: + .long 0xf7def7de diff --git a/azur/src/gint/shaders/rect.c b/azur/src/gint/shaders/rect.c new file mode 100644 index 0000000..f710a64 --- /dev/null +++ b/azur/src/gint/shaders/rect.c @@ -0,0 +1,91 @@ +#include + +uint AZRP_SHADER_RECT = -1; + +static void configure(void) +{ + azrp_set_uniforms(AZRP_SHADER_RECT, (void *)azrp_width); +} + +__attribute__((constructor)) +static void register_shader(void) +{ + extern azrp_shader_t azrp_shader_rect; + AZRP_SHADER_RECT = azrp_register_shader(azrp_shader_rect, configure); + configure(); +} + +//--- + +struct command { + uint8_t shader_id; + /* Local y coordinate of the first line in the fragment */ + uint8_t y; + /* Number of lines to render total, including this fragment */ + uint8_t height_total; + /* Number of lines to render on the current fragment */ + uint8_t height_frag; + /* Rectangle along the x coordinates (in longwords) */ + uint16_t xl, wl; + /* Offset of left edge */ + int16_t edge_1; + /* Offset of right edge */ + int16_t edge_2; + /* Core loop (this is an internal label of the renderer) */ + void const *loop; + /* Color, when applicable */ + uint16_t color; +}; + +/* Core loops */ +extern void azrp_shader_rect_loop_flat(void); +extern void azrp_shader_rect_loop_invert(void); +extern void azrp_shader_rect_loop_darken(void); +extern void azrp_shader_rect_loop_whiten(void); + +static void (*loops[])(void) = { + azrp_shader_rect_loop_flat, + azrp_shader_rect_loop_invert, + azrp_shader_rect_loop_darken, + azrp_shader_rect_loop_whiten, +}; + +void azrp_rect(int x1, int y1, int width0, int height0, int color_or_effect) +{ + /* Clipping (x2 and y2 excluded) */ + int x2 = x1 + width0; + int y2 = y1 + height0; + if(x1 < 0) + x1 = 0; + if(y1 < 0) + y1 = 0; + if(x2 > azrp_width) + x2 = azrp_width; + if(y2 > azrp_height) + y2 = azrp_height; + if(x2 <= x1 || y2 <= y1) + return; + + prof_enter(azrp_perf_cmdgen); + + int frag_first, first_offset, frag_count; + azrp_config_get_lines(y1, y2 - y1, + &frag_first, &first_offset, &frag_count); + + struct command cmd; + cmd.shader_id = AZRP_SHADER_RECT; + cmd.y = first_offset; + cmd.height_total = y2 - y1; + cmd.height_frag = azrp_frag_height - first_offset; + if(cmd.height_total < cmd.height_frag) + cmd.height_frag = cmd.height_total; + cmd.xl = (x1 >> 1); + cmd.wl = ((x2 - 1) >> 1) - cmd.xl + 1; + cmd.edge_1 = (x1 & 1) ? 0 : -2; + cmd.edge_2 = 4 * cmd.wl + ((x2 & 1) ? -2 : 0); + cmd.loop = loops[color_or_effect >= 0 ? 0 : -color_or_effect]; + cmd.color = color_or_effect; + + azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count); + prof_leave(azrp_perf_cmdgen); +} diff --git a/azur/src/gint/shaders/triangle.c b/azur/src/gint/shaders/triangle.c index 6d1c2cb..2f533c5 100644 --- a/azur/src/gint/shaders/triangle.c +++ b/azur/src/gint/shaders/triangle.c @@ -69,12 +69,9 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color) return; } - /* TODO: Have a proper way to do optimized-division by azrp_frag_height - TODO: Also account for first-fragment offset */ - int frag_first = min_y >> 4; - int frag_last = max_y >> 4; - int frag_count = frag_last - frag_first + 1; - int first_offset = min_y & 15; + int frag_first, first_offset, frag_count; + azrp_config_get_lines(min_y, max_y - min_y + 1, + &frag_first, &first_offset, &frag_count); struct command cmd; cmd.shader_id = AZRP_SHADER_TRIANGLE;