azur: share command data between fragments (with updates)

This commit is contained in:
Lephe 2022-04-02 18:55:15 +01:00 committed by Lephenixnoir
parent 911cc8e5ac
commit e124719de3
Signed by untrusted user: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
6 changed files with 81 additions and 63 deletions

View File

@ -10,7 +10,7 @@ if("${FXSDK_PLATFORM_LONG}" STREQUAL fxCG50)
endif()
# General options
add_compile_options(-Wall -Wextra -O3
add_compile_options(-Wall -Wextra -O2
-fmacro-prefix-map=${CMAKE_CURRENT_SOURCE_DIR}/=)
set(CMAKE_C_STANDARD 11)

View File

@ -243,19 +243,21 @@ void azrp_set_uniforms(int shader_id, void *uniforms);
/* azrp_queue_command(): Add a new command to be rendered next frame
The command must be a structure starting with an 8-bit shader ID. Returns
true on success, false if the maximum amount of commands or command memory
is exceeded. */
bool azrp_queue_command(void *command, size_t size, int fragment);
The command must be a structure starting with an 8-bit shader ID. The
command is added for all fragments in range [fragment..fragment+count); its
data can be updated between fragments by the shader program. Returns true on
success, false if the maximum amount of commands or command memory is
exceeded. */
bool azrp_queue_command(void *command, size_t size, int fragment, int count);
//---
// Internal shader definitions (for reference; no API guarantee)
//---
struct azrp_shader_image_command {
/* Shader ID and fragment number */
uint8_t shader_id;
uint8_t fragment_id;
/* First edge-preserved pixel offset (P4 only) */
int8_t edge1;
/* Pixels per line */
int16_t columns;
/* Address of the image structure */
@ -266,8 +268,14 @@ struct azrp_shader_image_command {
int16_t lines;
/* Already offset by start row and column */
void const *input;
/* P4 modes only: */
int16_t edge1, edge2;
/* Info for structure update between fragments: */
int16_t height;
int16_t row_stride;
int16_t x;
/* Second edge-preserved pixel offset (P4 only) */
int16_t edge2;
};
AZUR_END_DECLS

View File

@ -2,6 +2,7 @@
#include <gint/drivers/r61524.h>
#include <gint/defs/attributes.h>
#include <gint/defs/util.h>
#include <string.h>
#include <stdlib.h>
@ -113,6 +114,15 @@ void azrp_render_fragments(void)
prof_enter_norec(azrp_perf_shaders);
shaders[data[0]](shader_uniforms[data[0]], data, azrp_frag);
prof_leave_norec(azrp_perf_shaders);
if(data[0] == AZRP_SHADER_IMAGE) {
struct azrp_shader_image_command *cmd = (void *)data;
cmd->height -= cmd->lines;
cmd->input += cmd->row_stride * cmd->lines;
cmd->lines = min(cmd->height, azrp_frag_height);
cmd->output = 2 * cmd->x;
}
cmd = commands_array[++i];
}
@ -209,9 +219,9 @@ void azrp_set_uniforms(int shader_id, void *uniforms)
shader_uniforms[shader_id] = uniforms;
}
bool azrp_queue_command(void *command, size_t size, int fragment)
bool azrp_queue_command(void *command, size_t size, int fragment, int count)
{
if(commands_count >= AZRP_MAX_COMMANDS)
if(commands_count + count > AZRP_MAX_COMMANDS)
return false;
if(commands_length + size >= 8192)
return false;
@ -222,10 +232,13 @@ bool azrp_queue_command(void *command, size_t size, int fragment)
for(size_t i = 0; i < size; i++)
dst[i] = src[i];
commands_array[commands_count++] =
(fragment << 16) | commands_length;
commands_length += size;
do {
commands_array[commands_count++] = (fragment << 16) | commands_length;
fragment++;
}
while(--count > 0);
commands_length += size;
return true;
}

View File

@ -18,9 +18,8 @@ void azrp_shader_clear_configure(void)
//---
struct command {
/* Shader ID and fragment number */
uint8_t shader_id;
uint8_t fragment_id;
uint8_t _;
/* Color */
uint16_t color;
};
@ -33,10 +32,6 @@ void azrp_clear(uint16_t color)
cmd.shader_id = AZRP_SHADER_CLEAR;
cmd.color = color;
for(int i = 0; i < azrp_frag_count; i++) {
cmd.fragment_id = i;
azrp_queue_command(&cmd, sizeof cmd, i);
}
azrp_queue_command(&cmd, sizeof cmd, 0, azrp_frag_count);
prof_leave(azrp_perf_cmdgen);
}

View File

@ -536,29 +536,32 @@ _P8_RGB565.palette_distance:
The main loop achieves 5 cycles/pixel. */
.align 4
_P4_RGB565A:
shlr r9
mov #-1, r0
mov.l r10, @-r15
addc r0, r9
mov.l r11, @-r15
shlr r7
mov.w @(6, r2), r0 /* command.edge2 */
mov.l r12, @-r15
add #-15, r2 /* Go back to start of command */
mov #-1, r12
shlr r9
mov.l r11, @-r15
addc r12, r9
mov r0, r12
add r12, r12
mov.l r10, @-r15
sub r7, r9
mov.w @r2+, r11 /* command.edge1 */
mov.b @r2, r11 /* command.edge1 */
add #2, r8 /* image.palette */
mov.w @r2+, r12 /* command.edge2 */
mov.l r13, @-r15
mov r5, r0
mov.l r13, @-r15
shll r11
mov.l r14, @-r15
shll r12
shll r11
add #-4, r5
nop /* 4-alignment */
@ -618,9 +621,9 @@ _P4_RGB565A:
END_NORET()
mov.l @r15+, r14
mov.l @r15+, r13
mov.l @r15+, r12
mov.l @r15+, r11
mov.l @r15+, r10
mov.l @r15+, r11
mov.l @r15+, r12
mov.l @r15+, r9
rts
mov.l @r15+, r8
@ -630,32 +633,35 @@ _P4_RGB565A:
core loop runs in 3.5 cycles/pixel. */
.align 4
_P4_RGB565:
shlr r9
mov #-1, r0
shlr r7
mov.w @(6, r2), r0 /* command.edge2 */
mov.l r10, @-r15
addc r0, r9
mov.l r11, @-r15
shlr r7
add #-15, r2 /* Go back to start of command */
mov.l r12, @-r15
shlr r9
add #2, r8 /* image.palette */
mov #-1, r12
mov.l r11, @-r15
addc r12, r9
mov r0, r12
add r12, r12
mov.b @r2, r11 /* command.edge1 */
sub r7, r9
mov.w @r2+, r11 /* command.edge1 */
add #2, r8 /* image.palette */
mov.w @r2+, r12 /* command.edge2 */
mov r5, r0
mov.l r13, @-r15
shll r11
mov #0x1e, r2
mov.l r14, @-r15
shll r12
shll r11
mov r5, r0
add #-4, r5
mov #0x1e, r2
START()
@ -706,8 +712,8 @@ _P4_RGB565:
END_NORET()
mov.l @r15+, r14
mov.l @r15+, r13
mov.l @r15+, r12
mov.l @r15+, r11
mov.l @r15+, r12
mov.l @r15+, r10
mov.l @r15+, r9
rts

View File

@ -45,7 +45,6 @@ void azrp_subimage(int x, int y, bopti_image_t const *image,
cmd.image = image;
int row_stride;
size_t cmd_size = sizeof cmd - 4;
if(image->profile == P8_RGB565 || image->profile == P8_RGB565A) {
row_stride = image->width;
@ -63,7 +62,6 @@ void azrp_subimage(int x, int y, bopti_image_t const *image,
cmd.edge2 = width + odd_left;
cmd.columns += odd_left + odd_right;
x -= odd_left;
cmd_size += 4;
}
else {
row_stride = image->width << 1;
@ -71,22 +69,20 @@ void azrp_subimage(int x, int y, bopti_image_t const *image,
}
/* This divides by azrp_frag_height */
cmd.fragment_id = (azrp_scale == 1) ? (y >> 3) : (y >> 4);
int fragment_id = (azrp_scale == 1) ? (y >> 3) : (y >> 4);
/* These settings only apply to the first fragment */
int first_y = (y + azrp_frag_offset) & (azrp_frag_height - 1);
cmd.lines = azrp_frag_height - first_y;
cmd.output = 2 * (azrp_width * first_y + x);
while(height > 0) {
azrp_queue_command(&cmd, cmd_size, cmd.fragment_id);
/* Settings for further updates */
cmd.height = height;
cmd.row_stride = row_stride;
cmd.x = x;
height -= cmd.lines;
cmd.fragment_id++;
cmd.input += row_stride * cmd.lines;
cmd.lines = min(height, azrp_frag_height);
cmd.output = 2 * x;
}
int n = 1 + (height - cmd.lines + azrp_frag_height - 1) / azrp_frag_height;
azrp_queue_command(&cmd, sizeof cmd, fragment_id, n);
prof_leave(azrp_perf_cmdgen);
}