From e124719de35e8e66efa22d758b3699682549d8d7 Mon Sep 17 00:00:00 2001 From: Lephe Date: Sat, 2 Apr 2022 18:55:15 +0100 Subject: [PATCH] azur: share command data between fragments (with updates) --- CMakeLists.txt | 2 +- azur/include/azur/gint/render.h | 24 ++++++++---- azur/src/gint/render.c | 23 ++++++++--- azur/src/gint/shaders/clear.c | 9 +---- azur/src/gint/shaders/image.S | 68 ++++++++++++++++++--------------- azur/src/gint/shaders/image.c | 18 ++++----- 6 files changed, 81 insertions(+), 63 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c0e7610..adad8e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ if("${FXSDK_PLATFORM_LONG}" STREQUAL fxCG50) endif() # General options -add_compile_options(-Wall -Wextra -O3 +add_compile_options(-Wall -Wextra -O2 -fmacro-prefix-map=${CMAKE_CURRENT_SOURCE_DIR}/=) set(CMAKE_C_STANDARD 11) diff --git a/azur/include/azur/gint/render.h b/azur/include/azur/gint/render.h index 849c5c3..30f773a 100644 --- a/azur/include/azur/gint/render.h +++ b/azur/include/azur/gint/render.h @@ -243,19 +243,21 @@ void azrp_set_uniforms(int shader_id, void *uniforms); /* azrp_queue_command(): Add a new command to be rendered next frame - The command must be a structure starting with an 8-bit shader ID. Returns - true on success, false if the maximum amount of commands or command memory - is exceeded. */ -bool azrp_queue_command(void *command, size_t size, int fragment); + The command must be a structure starting with an 8-bit shader ID. The + command is added for all fragments in range [fragment..fragment+count); its + data can be updated between fragments by the shader program. Returns true on + success, false if the maximum amount of commands or command memory is + exceeded. */ +bool azrp_queue_command(void *command, size_t size, int fragment, int count); //--- // Internal shader definitions (for reference; no API guarantee) //--- struct azrp_shader_image_command { - /* Shader ID and fragment number */ uint8_t shader_id; - uint8_t fragment_id; + /* First edge-preserved pixel offset (P4 only) */ + int8_t edge1; /* Pixels per line */ int16_t columns; /* Address of the image structure */ @@ -266,8 +268,14 @@ struct azrp_shader_image_command { int16_t lines; /* Already offset by start row and column */ void const *input; - /* P4 modes only: */ - int16_t edge1, edge2; + + /* Info for structure update between fragments: */ + int16_t height; + int16_t row_stride; + int16_t x; + + /* Second edge-preserved pixel offset (P4 only) */ + int16_t edge2; }; AZUR_END_DECLS diff --git a/azur/src/gint/render.c b/azur/src/gint/render.c index 9f7c19f..736796e 100644 --- a/azur/src/gint/render.c +++ b/azur/src/gint/render.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -113,6 +114,15 @@ void azrp_render_fragments(void) prof_enter_norec(azrp_perf_shaders); shaders[data[0]](shader_uniforms[data[0]], data, azrp_frag); prof_leave_norec(azrp_perf_shaders); + + if(data[0] == AZRP_SHADER_IMAGE) { + struct azrp_shader_image_command *cmd = (void *)data; + cmd->height -= cmd->lines; + cmd->input += cmd->row_stride * cmd->lines; + cmd->lines = min(cmd->height, azrp_frag_height); + cmd->output = 2 * cmd->x; + } + cmd = commands_array[++i]; } @@ -209,9 +219,9 @@ void azrp_set_uniforms(int shader_id, void *uniforms) shader_uniforms[shader_id] = uniforms; } -bool azrp_queue_command(void *command, size_t size, int fragment) +bool azrp_queue_command(void *command, size_t size, int fragment, int count) { - if(commands_count >= AZRP_MAX_COMMANDS) + if(commands_count + count > AZRP_MAX_COMMANDS) return false; if(commands_length + size >= 8192) return false; @@ -222,10 +232,13 @@ bool azrp_queue_command(void *command, size_t size, int fragment) for(size_t i = 0; i < size; i++) dst[i] = src[i]; - commands_array[commands_count++] = - (fragment << 16) | commands_length; - commands_length += size; + do { + commands_array[commands_count++] = (fragment << 16) | commands_length; + fragment++; + } + while(--count > 0); + commands_length += size; return true; } diff --git a/azur/src/gint/shaders/clear.c b/azur/src/gint/shaders/clear.c index a2bd03f..aad1a20 100644 --- a/azur/src/gint/shaders/clear.c +++ b/azur/src/gint/shaders/clear.c @@ -18,9 +18,8 @@ void azrp_shader_clear_configure(void) //--- struct command { - /* Shader ID and fragment number */ uint8_t shader_id; - uint8_t fragment_id; + uint8_t _; /* Color */ uint16_t color; }; @@ -33,10 +32,6 @@ void azrp_clear(uint16_t color) cmd.shader_id = AZRP_SHADER_CLEAR; cmd.color = color; - for(int i = 0; i < azrp_frag_count; i++) { - cmd.fragment_id = i; - azrp_queue_command(&cmd, sizeof cmd, i); - } - + azrp_queue_command(&cmd, sizeof cmd, 0, azrp_frag_count); prof_leave(azrp_perf_cmdgen); } diff --git a/azur/src/gint/shaders/image.S b/azur/src/gint/shaders/image.S index 396b5d7..d7dc298 100644 --- a/azur/src/gint/shaders/image.S +++ b/azur/src/gint/shaders/image.S @@ -536,29 +536,32 @@ _P8_RGB565.palette_distance: The main loop achieves 5 cycles/pixel. */ .align 4 _P4_RGB565A: - shlr r9 - mov #-1, r0 - - mov.l r10, @-r15 - addc r0, r9 - - mov.l r11, @-r15 shlr r7 + mov.w @(6, r2), r0 /* command.edge2 */ mov.l r12, @-r15 + add #-15, r2 /* Go back to start of command */ + + mov #-1, r12 + shlr r9 + + mov.l r11, @-r15 + addc r12, r9 + + mov r0, r12 + add r12, r12 + + mov.l r10, @-r15 sub r7, r9 - mov.w @r2+, r11 /* command.edge1 */ + mov.b @r2, r11 /* command.edge1 */ add #2, r8 /* image.palette */ - mov.w @r2+, r12 /* command.edge2 */ + mov.l r13, @-r15 mov r5, r0 - mov.l r13, @-r15 - shll r11 - mov.l r14, @-r15 - shll r12 + shll r11 add #-4, r5 nop /* 4-alignment */ @@ -618,9 +621,9 @@ _P4_RGB565A: END_NORET() mov.l @r15+, r14 mov.l @r15+, r13 - mov.l @r15+, r12 - mov.l @r15+, r11 mov.l @r15+, r10 + mov.l @r15+, r11 + mov.l @r15+, r12 mov.l @r15+, r9 rts mov.l @r15+, r8 @@ -630,32 +633,35 @@ _P4_RGB565A: core loop runs in 3.5 cycles/pixel. */ .align 4 _P4_RGB565: - shlr r9 - mov #-1, r0 + shlr r7 + mov.w @(6, r2), r0 /* command.edge2 */ mov.l r10, @-r15 - addc r0, r9 - - mov.l r11, @-r15 - shlr r7 + add #-15, r2 /* Go back to start of command */ mov.l r12, @-r15 + shlr r9 + + add #2, r8 /* image.palette */ + mov #-1, r12 + + mov.l r11, @-r15 + addc r12, r9 + + mov r0, r12 + add r12, r12 + + mov.b @r2, r11 /* command.edge1 */ sub r7, r9 - mov.w @r2+, r11 /* command.edge1 */ - add #2, r8 /* image.palette */ - - mov.w @r2+, r12 /* command.edge2 */ - mov r5, r0 - mov.l r13, @-r15 - shll r11 + mov #0x1e, r2 mov.l r14, @-r15 - shll r12 + shll r11 + mov r5, r0 add #-4, r5 - mov #0x1e, r2 START() @@ -706,8 +712,8 @@ _P4_RGB565: END_NORET() mov.l @r15+, r14 mov.l @r15+, r13 - mov.l @r15+, r12 mov.l @r15+, r11 + mov.l @r15+, r12 mov.l @r15+, r10 mov.l @r15+, r9 rts diff --git a/azur/src/gint/shaders/image.c b/azur/src/gint/shaders/image.c index 37ee6de..0ef5294 100644 --- a/azur/src/gint/shaders/image.c +++ b/azur/src/gint/shaders/image.c @@ -45,7 +45,6 @@ void azrp_subimage(int x, int y, bopti_image_t const *image, cmd.image = image; int row_stride; - size_t cmd_size = sizeof cmd - 4; if(image->profile == P8_RGB565 || image->profile == P8_RGB565A) { row_stride = image->width; @@ -63,7 +62,6 @@ void azrp_subimage(int x, int y, bopti_image_t const *image, cmd.edge2 = width + odd_left; cmd.columns += odd_left + odd_right; x -= odd_left; - cmd_size += 4; } else { row_stride = image->width << 1; @@ -71,22 +69,20 @@ void azrp_subimage(int x, int y, bopti_image_t const *image, } /* This divides by azrp_frag_height */ - cmd.fragment_id = (azrp_scale == 1) ? (y >> 3) : (y >> 4); + int fragment_id = (azrp_scale == 1) ? (y >> 3) : (y >> 4); /* These settings only apply to the first fragment */ int first_y = (y + azrp_frag_offset) & (azrp_frag_height - 1); cmd.lines = azrp_frag_height - first_y; cmd.output = 2 * (azrp_width * first_y + x); - while(height > 0) { - azrp_queue_command(&cmd, cmd_size, cmd.fragment_id); + /* Settings for further updates */ + cmd.height = height; + cmd.row_stride = row_stride; + cmd.x = x; - height -= cmd.lines; - cmd.fragment_id++; - cmd.input += row_stride * cmd.lines; - cmd.lines = min(height, azrp_frag_height); - cmd.output = 2 * x; - } + int n = 1 + (height - cmd.lines + azrp_frag_height - 1) / azrp_frag_height; + azrp_queue_command(&cmd, sizeof cmd, fragment_id, n); prof_leave(azrp_perf_cmdgen); }