azrp: in-place command generation + variadic commands

This commit changes the command generation mechanism. Instead of
creating a command on the stack then calling azrp_queue_command() which
does a needless copy, the new function azrp_new_command() allocates
directly inside the command buffer, avoiding the copy.

This concept is pushed further with the introduction of finer-grained
functions azrp_alloc_command(), azrp_finalize_command() and
azrp_instantiate_command(), which split this process in three steps.
This allows generating commands of variadic size in-place in the
command buffer as well as using non-interval fragment sets.
This commit is contained in:
Lephenixnoir 2023-06-15 11:15:42 +02:00
parent 0cd9acc12f
commit f3a1e90788
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
9 changed files with 176 additions and 66 deletions

View File

@ -303,15 +303,71 @@ int azrp_register_shader(azrp_shader_t *program,
or even points to valid memory. */
void azrp_set_uniforms(int shader_id, void *uniforms);
/* azrp_queue_command(): Add a new command to be rendered next frame
/* azrp_new_command(): Create a new command to be rendered next frame
The command must be a structure starting with an 8-bit shader ID. The
command is added for all fragments in range [fragment..fragment+count); its
data can be updated between fragments by the shader program. Returns true on
success, false if the maximum amount of commands or command memory is
exceeded. */
// TODO: azrp_queue_command: give access to command buffer in-place
bool azrp_queue_command(void *command, size_t size, int fragment, int count);
This function reserves `size` bytes of space in the command buffer, and
returns the address of the region so the caller can fill in a command. The
command will run for fragments in the interval [fragment .. fragment+count).
The command's data must start with an 8-bit shader ID; anything else is up
to the shader. In particular the command's data *can* be updated by the
shader function to reflect progress in the rendering between each fragment.
Returns NULL if the maximum number of commands is reached or the command
buffer is exhausted. */
void *azrp_new_command(size_t size, int fragment, int count);
/* azrp_alloc_command(): Allocate a command in the command buffer
This function, when used together with with azrp_finalize_command() and
azrp_instantiate_command(), provides finer control over the command
generation process compared to the simple azrp_new_command().
azrp's command buffer is a bump allocator. Each new command is allocated
directly after the previous one. With azrp_new_command(), the full size of
the command must be known when allocating. By contrast, azrp_alloc_command()
allows variable-sized commands to be generated: the caller can figure out
the final size as it fills in the command, and later commits it by calling
azrp_finalize_command() which advances the bump allocator.
No shader commands can be generated while an allocation is ongoing.
azrp_finalize_command() must be called to finish allocating before
azrp_new_command() or azrp_alloc_command() can be called again.
This function allocates memory for a command of size at least `size`. The
total amount of memory available beyond `size` is recorded in `*extra`.
`size` usually represents the fixed size in commands (eg. the natural size
of structures with flexible array members); it is rarely 0 as a 1-byte
shader ID is always required.
`count` is the number of fragments that the caller knows will be covered by
the command. If less than `count` entries are available in the internal
queue where these are held, azrp_alloc_command() will return NULL
immediately. This avoids generating a command that could not be instantiated
anyway. If the final set of affected fragments is unknown, use 0.
Returns a pointer to the buffer where the command should be filled. If
`size` or `count` is too large, returns NULL. In any case, `*extra` is set
to the command buffer space remaining after reserving `size` bytes. */
void *azrp_alloc_command(size_t size, int *extra, int count);
/* azrp_finalize_command(): Finalize an allocation by azrp_alloc_command()
This function finishes an allocation started by azrp_alloc_command() and
advances the bump allocator. `total_size` is the size of the command, ie.
the sum of the `size` parameter to azrp_alloc_command() and the amount of
extra data used (less that azrp_alloc_command()'s, `*extra`). */
void azrp_finalize_command(void const *command, int total_size);
/* azrp_instantiate_command(): Queue a command for a range of fragments
This function fills in the command queue with instructions to render the
given command on fragments of the range [fragment .. fragment+count). Unlike
with azrp_new_command(), this function can be called multiple times for the
same command, if disjoint intervals are ever needed.
Returns true on success, false if the queue is out of space. */
bool azrp_instantiate_command(void const *command, int fragment, int count);
/* azrp_queue_image(): Split and queue a gint image command

View File

@ -1,5 +1,6 @@
#include <azur/azur.h>
#include <azur/log.h>
#include <azur/gint/render.h>
#include <gint/timer.h>
#include <gint/cpu.h>

View File

@ -31,6 +31,9 @@ _azrp_r61524_fragment_x2:
ldrc r5
nop
/* TODO: Possible optimization by swap.w + xtrct (3 LS -> 2 EX + 2 LS)
with pipelining. Might be display-bottlenecked. But this might save
time in the parallel USB/R61524 method regardless? */
1: mov.w @r4+, r0
nop
mov.w r0, @r2

View File

@ -145,6 +145,7 @@ void azrp_render_fragments(void)
azrp_r61524_fragment_x1(azrp_frag, 396 * azrp_frag_height);
else if(azrp_scale == 2)
azrp_r61524_fragment_x2(azrp_frag, azrp_width, azrp_frag_height);
// TODO: r61524 x3 output function
prof_leave_norec(azrp_perf_r61524);
if(++frag >= azrp_frag_count) break;
@ -286,29 +287,55 @@ void azrp_set_uniforms(int shader_id, void *uniforms)
shaders[shader_id].uniform = uniforms;
}
bool azrp_queue_command(void *command, size_t size, int fragment, int count)
void *azrp_alloc_command(size_t size, int *extra, int count)
{
*extra = sizeof commands_data - commands_length - size;
if(commands_count + count > AZRP_MAX_COMMANDS || *extra < 0)
return NULL;
return commands_data + commands_length;
}
void azrp_finalize_command(void const *command, int total_size)
{
(void)command;
total_size = (total_size | 3) + 1;
if(commands_length + total_size > (int)sizeof commands_data)
return;
commands_length += total_size;
}
bool azrp_instantiate_command(void const *command, int fragment, int count)
{
if(commands_count + count > AZRP_MAX_COMMANDS)
return false;
if(commands_length + size >= sizeof commands_data)
return false;
uint8_t *dst = commands_data + commands_length;
uint8_t *src = command;
for(size_t i = 0; i < size; i++)
dst[i] = src[i];
int offset = (uint8_t *)command - commands_data;
do {
commands_array[commands_count++] = (fragment << 16) | commands_length;
commands_array[commands_count++] = (fragment << 16) | offset;
fragment++;
}
while(--count > 0);
commands_length += size;
return true;
}
void *azrp_new_command(size_t size, int fragment, int count)
{
int extra;
void *cmd = azrp_alloc_command(size, &extra, count);
if(!cmd)
return NULL;
azrp_finalize_command(cmd, size);
azrp_instantiate_command(cmd, fragment, count);
return cmd;
}
//---
// Performance indicators
//---

View File

@ -29,10 +29,13 @@ void azrp_clear(uint16_t color)
{
prof_enter(azrp_perf_cmdgen);
struct command cmd;
cmd.shader_id = AZRP_SHADER_CLEAR;
cmd.color = color;
struct command *cmd = azrp_new_command(sizeof *cmd, 0, azrp_frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
azrp_queue_command(&cmd, sizeof cmd, 0, azrp_frag_count);
cmd->shader_id = AZRP_SHADER_CLEAR;
cmd->color = color;
prof_leave(azrp_perf_cmdgen);
}

View File

@ -25,7 +25,12 @@ void azrp_queue_image(struct gint_image_box *box, image_t const *img,
cmd->output = (void *)azrp_frag + (azrp_width * first_y + cmd->x) * 2;
int n = 1 + (box->h - cmd->lines + azrp_frag_height-1) / azrp_frag_height;
azrp_queue_command(cmd, sizeof *cmd, fragment_id, n);
// TODO: Build image command in-place?
struct gint_image_cmd *cmd_inplace =
azrp_new_command(sizeof *cmd_inplace, fragment_id, n);
if(cmd_inplace)
*cmd_inplace = *cmd;
}
void azrp_subimage(int x, int y, image_t const *img,

View File

@ -72,20 +72,25 @@ void azrp_rect(int x1, int y1, int width0, int height0, int color_or_effect)
azrp_config_get_lines(y1, y2 - y1,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_RECT;
cmd.y = first_offset;
cmd.height_total = y2 - y1;
cmd.height_frag = azrp_frag_height - first_offset;
if(cmd.height_total < cmd.height_frag)
cmd.height_frag = cmd.height_total;
cmd.xl = (x1 >> 1);
cmd.wl = ((x2 - 1) >> 1) - cmd.xl + 1;
cmd.edge_1 = (x1 & 1) ? 0 : -2;
cmd.edge_2 = 4 * cmd.wl + ((x2 & 1) ? -2 : 0);
cmd.loop = loops[color_or_effect >= 0 ? 0 : -color_or_effect];
cmd.color = color_or_effect;
struct command *cmd =
azrp_new_command(sizeof *cmd, frag_first, frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
cmd->shader_id = AZRP_SHADER_RECT;
cmd->y = first_offset;
cmd->height_total = y2 - y1;
cmd->height_frag = azrp_frag_height - first_offset;
if(cmd->height_total < cmd->height_frag)
cmd->height_frag = cmd->height_total;
cmd->xl = (x1 >> 1);
cmd->wl = ((x2 - 1) >> 1) - cmd->xl + 1;
cmd->edge_1 = (x1 & 1) ? 0 : -2;
cmd->edge_2 = 4 * cmd->wl + ((x2 & 1) ? -2 : 0);
cmd->loop = loops[color_or_effect >= 0 ? 0 : -color_or_effect];
cmd->color = color_or_effect;
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
prof_leave(azrp_perf_cmdgen);
}

View File

@ -133,18 +133,23 @@ void azrp_text(int x, int y, font_t const *f, char const *str,
azrp_config_get_lines(y, f->data_height,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_TEXT;
cmd.x = x;
cmd.y = first_offset;
cmd.height = height;
cmd.top = top;
cmd.font = f;
cmd.str = str;
cmd.fg = fg;
cmd.size = size;
struct command *cmd =
azrp_new_command(sizeof *cmd, frag_first, frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
cmd->shader_id = AZRP_SHADER_TEXT;
cmd->x = x;
cmd->y = first_offset;
cmd->height = height;
cmd->top = top;
cmd->font = f;
cmd->str = str;
cmd->fg = fg;
cmd->size = size;
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
prof_leave(azrp_perf_cmdgen);
}

View File

@ -73,14 +73,20 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color)
azrp_config_get_lines(min_y, max_y - min_y + 1,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_TRIANGLE;
cmd.y = first_offset;
cmd.height_total = max_y - min_y + 1;
cmd.height_frag = min(cmd.height_total, azrp_frag_height - cmd.y);
cmd.x_min = min_x;
cmd.x_max = max_x;
cmd.color = color;
struct command *cmd =
azrp_new_command(sizeof *cmd, frag_first, frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
cmd->shader_id = AZRP_SHADER_TRIANGLE;
cmd->y = first_offset;
cmd->height_total = max_y - min_y + 1;
cmd->height_frag = min(cmd->height_total, azrp_frag_height - cmd->y);
cmd->x_min = min_x;
cmd->x_max = max_x;
cmd->color = color;
/* Swap points 1 and 2 if the order of points is not left-handed */
if(edge_start(x1, y1, x2, y2, x3, y3) < 0) {
@ -93,21 +99,20 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color)
}
/* Vector products for barycentric coordinates */
cmd.u0 = edge_start(x2, y2, x3, y3, min_x, min_y);
cmd.du_x = y3 - y2;
cmd->u0 = edge_start(x2, y2, x3, y3, min_x, min_y);
cmd->du_x = y3 - y2;
int du_y = x2 - x3;
cmd.v0 = edge_start(x3, y3, x1, y1, min_x, min_y);
cmd.dv_x = y1 - y3;
cmd->v0 = edge_start(x3, y3, x1, y1, min_x, min_y);
cmd->dv_x = y1 - y3;
int dv_y = x3 - x1;
cmd.w0 = edge_start(x1, y1, x2, y2, min_x, min_y);
cmd.dw_x = y2 - y1;
cmd->w0 = edge_start(x1, y1, x2, y2, min_x, min_y);
cmd->dw_x = y2 - y1;
int dw_y = x1 - x2;
int columns = max_x - min_x + 1;
cmd.du_row = du_y - columns * cmd.du_x;
cmd.dv_row = dv_y - columns * cmd.dv_x;
cmd.dw_row = dw_y - columns * cmd.dw_x;
cmd->du_row = du_y - columns * cmd->du_x;
cmd->dv_row = dv_y - columns * cmd->dv_x;
cmd->dw_row = dw_y - columns * cmd->dw_x;
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
prof_leave(azrp_perf_cmdgen);
}