Compare commits

...

3 Commits

Author SHA1 Message Date
Lephenixnoir 44f20d9b5c
azrp: add azrp_print() and azrp_print_opt() shader functions 2023-06-15 15:41:57 +02:00
Lephenixnoir a33b82f283
azrp: precompute list of glyphs in text shader
This was planned for later but kind of required to avoid having
commands that reference random strings whose lifetime is very much
questionable.
2023-06-15 15:09:59 +02:00
Lephenixnoir f3a1e90788
azrp: in-place command generation + variadic commands
This commit changes the command generation mechanism. Instead of
creating a command on the stack then calling azrp_queue_command() which
does a needless copy, the new function azrp_new_command() allocates
directly inside the command buffer, avoiding the copy.

This concept is pushed further with the introduction of finer-grained
functions azrp_alloc_command(), azrp_finalize_command() and
azrp_instantiate_command(), which split this process in three steps.
This allows generating commands of variadic size in-place in the
command buffer as well as using non-interval fragment sets.
2023-06-15 15:08:45 +02:00
9 changed files with 289 additions and 120 deletions

View File

@ -240,13 +240,19 @@ enum {
};
/* azrp_text(): Render a string of text, like dtext(). */
void azrp_text(int x, int y, font_t const *f, char const *str, int fg,
int size);
void azrp_text(int x, int y, int fg, char const *str);
/* azrp_text_opt(): Render text with options similar to dtext_opt(). */
void azrp_text_opt(int x, int y, font_t const *font, int fg, int halign,
int valign, char const *str, int size);
/* azrp_print(): Like azrp_text() but with printf-formatting. */
void azrp_print(int x, int y, int fg, char const *fmt, ...);
/* azrp_print_opt(): Like azrp_text_opt() but with printf-formatting. */
void azrp_print_opt(int x, int y, font_t const *font, int fg, int halign,
int valign, char const *fmt, ...);
//---
// Performance indicators
//
@ -303,15 +309,71 @@ int azrp_register_shader(azrp_shader_t *program,
or even points to valid memory. */
void azrp_set_uniforms(int shader_id, void *uniforms);
/* azrp_queue_command(): Add a new command to be rendered next frame
/* azrp_new_command(): Create a new command to be rendered next frame
The command must be a structure starting with an 8-bit shader ID. The
command is added for all fragments in range [fragment..fragment+count); its
data can be updated between fragments by the shader program. Returns true on
success, false if the maximum amount of commands or command memory is
exceeded. */
// TODO: azrp_queue_command: give access to command buffer in-place
bool azrp_queue_command(void *command, size_t size, int fragment, int count);
This function reserves `size` bytes of space in the command buffer, and
returns the address of the region so the caller can fill in a command. The
command will run for fragments in the interval [fragment .. fragment+count).
The command's data must start with an 8-bit shader ID; anything else is up
to the shader. In particular the command's data *can* be updated by the
shader function to reflect progress in the rendering between each fragment.
Returns NULL if the maximum number of commands is reached or the command
buffer is exhausted. */
void *azrp_new_command(size_t size, int fragment, int count);
/* azrp_alloc_command(): Allocate a command in the command buffer
This function, when used together with with azrp_finalize_command() and
azrp_instantiate_command(), provides finer control over the command
generation process compared to the simple azrp_new_command().
azrp's command buffer is a bump allocator. Each new command is allocated
directly after the previous one. With azrp_new_command(), the full size of
the command must be known when allocating. By contrast, azrp_alloc_command()
allows variable-sized commands to be generated: the caller can figure out
the final size as it fills in the command, and later commits it by calling
azrp_finalize_command() which advances the bump allocator.
No shader commands can be generated while an allocation is ongoing.
azrp_finalize_command() must be called to finish allocating before
azrp_new_command() or azrp_alloc_command() can be called again.
This function allocates memory for a command of size at least `size`. The
total amount of memory available beyond `size` is recorded in `*extra`.
`size` usually represents the fixed size in commands (eg. the natural size
of structures with flexible array members); it is rarely 0 as a 1-byte
shader ID is always required.
`count` is the number of fragments that the caller knows will be covered by
the command. If less than `count` entries are available in the internal
queue where these are held, azrp_alloc_command() will return NULL
immediately. This avoids generating a command that could not be instantiated
anyway. If the final set of affected fragments is unknown, use 0.
Returns a pointer to the buffer where the command should be filled. If
`size` or `count` is too large, returns NULL. In any case, `*extra` is set
to the command buffer space remaining after reserving `size` bytes. */
void *azrp_alloc_command(size_t size, int *extra, int count);
/* azrp_finalize_command(): Finalize an allocation by azrp_alloc_command()
This function finishes an allocation started by azrp_alloc_command() and
advances the bump allocator. `total_size` is the size of the command, ie.
the sum of the `size` parameter to azrp_alloc_command() and the amount of
extra data used (less that azrp_alloc_command()'s, `*extra`). */
void azrp_finalize_command(void const *command, int total_size);
/* azrp_instantiate_command(): Queue a command for a range of fragments
This function fills in the command queue with instructions to render the
given command on fragments of the range [fragment .. fragment+count). Unlike
with azrp_new_command(), this function can be called multiple times for the
same command, if disjoint intervals are ever needed.
Returns true on success, false if the queue is out of space. */
bool azrp_instantiate_command(void const *command, int fragment, int count);
/* azrp_queue_image(): Split and queue a gint image command

View File

@ -1,5 +1,6 @@
#include <azur/azur.h>
#include <azur/log.h>
#include <azur/gint/render.h>
#include <gint/timer.h>
#include <gint/cpu.h>

View File

@ -31,6 +31,9 @@ _azrp_r61524_fragment_x2:
ldrc r5
nop
/* TODO: Possible optimization by swap.w + xtrct (3 LS -> 2 EX + 2 LS)
with pipelining. Might be display-bottlenecked. But this might save
time in the parallel USB/R61524 method regardless? */
1: mov.w @r4+, r0
nop
mov.w r0, @r2

View File

@ -145,6 +145,7 @@ void azrp_render_fragments(void)
azrp_r61524_fragment_x1(azrp_frag, 396 * azrp_frag_height);
else if(azrp_scale == 2)
azrp_r61524_fragment_x2(azrp_frag, azrp_width, azrp_frag_height);
// TODO: r61524 x3 output function
prof_leave_norec(azrp_perf_r61524);
if(++frag >= azrp_frag_count) break;
@ -286,29 +287,55 @@ void azrp_set_uniforms(int shader_id, void *uniforms)
shaders[shader_id].uniform = uniforms;
}
bool azrp_queue_command(void *command, size_t size, int fragment, int count)
void *azrp_alloc_command(size_t size, int *extra, int count)
{
*extra = sizeof commands_data - commands_length - size;
if(commands_count + count > AZRP_MAX_COMMANDS || *extra < 0)
return NULL;
return commands_data + commands_length;
}
void azrp_finalize_command(void const *command, int total_size)
{
(void)command;
total_size = (total_size | 3) + 1;
if(commands_length + total_size > (int)sizeof commands_data)
return;
commands_length += total_size;
}
bool azrp_instantiate_command(void const *command, int fragment, int count)
{
if(commands_count + count > AZRP_MAX_COMMANDS)
return false;
if(commands_length + size >= sizeof commands_data)
return false;
uint8_t *dst = commands_data + commands_length;
uint8_t *src = command;
for(size_t i = 0; i < size; i++)
dst[i] = src[i];
int offset = (uint8_t *)command - commands_data;
do {
commands_array[commands_count++] = (fragment << 16) | commands_length;
commands_array[commands_count++] = (fragment << 16) | offset;
fragment++;
}
while(--count > 0);
commands_length += size;
return true;
}
void *azrp_new_command(size_t size, int fragment, int count)
{
int extra;
void *cmd = azrp_alloc_command(size, &extra, count);
if(!cmd)
return NULL;
azrp_finalize_command(cmd, size);
azrp_instantiate_command(cmd, fragment, count);
return cmd;
}
//---
// Performance indicators
//---

View File

@ -29,10 +29,13 @@ void azrp_clear(uint16_t color)
{
prof_enter(azrp_perf_cmdgen);
struct command cmd;
cmd.shader_id = AZRP_SHADER_CLEAR;
cmd.color = color;
struct command *cmd = azrp_new_command(sizeof *cmd, 0, azrp_frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
azrp_queue_command(&cmd, sizeof cmd, 0, azrp_frag_count);
cmd->shader_id = AZRP_SHADER_CLEAR;
cmd->color = color;
prof_leave(azrp_perf_cmdgen);
}

View File

@ -25,7 +25,12 @@ void azrp_queue_image(struct gint_image_box *box, image_t const *img,
cmd->output = (void *)azrp_frag + (azrp_width * first_y + cmd->x) * 2;
int n = 1 + (box->h - cmd->lines + azrp_frag_height-1) / azrp_frag_height;
azrp_queue_command(cmd, sizeof *cmd, fragment_id, n);
// TODO: Build image command in-place?
struct gint_image_cmd *cmd_inplace =
azrp_new_command(sizeof *cmd_inplace, fragment_id, n);
if(cmd_inplace)
*cmd_inplace = *cmd;
}
void azrp_subimage(int x, int y, image_t const *img,

View File

@ -72,20 +72,25 @@ void azrp_rect(int x1, int y1, int width0, int height0, int color_or_effect)
azrp_config_get_lines(y1, y2 - y1,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_RECT;
cmd.y = first_offset;
cmd.height_total = y2 - y1;
cmd.height_frag = azrp_frag_height - first_offset;
if(cmd.height_total < cmd.height_frag)
cmd.height_frag = cmd.height_total;
cmd.xl = (x1 >> 1);
cmd.wl = ((x2 - 1) >> 1) - cmd.xl + 1;
cmd.edge_1 = (x1 & 1) ? 0 : -2;
cmd.edge_2 = 4 * cmd.wl + ((x2 & 1) ? -2 : 0);
cmd.loop = loops[color_or_effect >= 0 ? 0 : -color_or_effect];
cmd.color = color_or_effect;
struct command *cmd =
azrp_new_command(sizeof *cmd, frag_first, frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
cmd->shader_id = AZRP_SHADER_RECT;
cmd->y = first_offset;
cmd->height_total = y2 - y1;
cmd->height_frag = azrp_frag_height - first_offset;
if(cmd->height_total < cmd->height_frag)
cmd->height_frag = cmd->height_total;
cmd->xl = (x1 >> 1);
cmd->wl = ((x2 - 1) >> 1) - cmd->xl + 1;
cmd->edge_1 = (x1 & 1) ? 0 : -2;
cmd->edge_2 = 4 * cmd->wl + ((x2 & 1) ? -2 : 0);
cmd->loop = loops[color_or_effect >= 0 ? 0 : -color_or_effect];
cmd->color = color_or_effect;
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
prof_leave(azrp_perf_cmdgen);
}

View File

@ -2,6 +2,7 @@
#include <gint/defs/util.h>
#include <gint/display.h>
#include <string.h>
#include <stdio.h>
uint8_t AZRP_SHADER_TEXT = -1;
@ -20,10 +21,7 @@ static void register_shader(void)
partial and full glyphs.
2. Optimize the heck out of the full-width case, which is almost every
single call.
3. Precompute the set of glyphs so the list can be reused when crossing
fragment boundaries, the shader can be written entirely in assembler, and
the command can possibly be reused?
4. Provide noclip toplevel functions, which I believe should provide a
3. Provide noclip toplevel functions, which I believe should provide a
nontrivial speed boost. */
void azrp_text_glyph(uint16_t *fragment, uint32_t const *data, int color,
@ -31,63 +29,55 @@ void azrp_text_glyph(uint16_t *fragment, uint32_t const *data, int color,
struct command {
uint8_t shader_id;
uint8_t _;
int16_t x, y;
uint8_t y;
int16_t x;
int16_t height, top;
uint16_t fg;
int16_t glyph_count;
font_t const *font;
char const *str;
int fg;
int size;
/* TODO
uint8_t first_left, first_dataw;
uint8_t last_left, last_dataw; */
/* TODO: We use two entries per glyph; offset and data width. Can we do
something that doesn't require both of these? */
uint16_t glyphs[];
};
void azrp_shader_text(void *uniforms0, void *cmd0, void *frag0)
{
(void)uniforms0;
struct command *cmd = cmd0;
int x = cmd->x;
int y = cmd->y;
font_t const *f = cmd->font;
int fg = cmd->fg;
int size = cmd->size;
/* Storage height, top position within glyph */
int height = min(cmd->height, azrp_frag_height - y);
int height = min(cmd->height, azrp_frag_height - cmd->y);
int top = cmd->top;
uint8_t const *str = (void *)cmd->str;
uint8_t const *str0 = str;
/* Raw glyph data */
uint32_t const *data = f->data;
uint16_t *frag = (uint16_t *)frag0 + azrp_width * cmd->y;
/* Update for next fragment */
cmd->height -= height;
cmd->top += height;
cmd->y = 0;
/* Move to top row */
uint16_t *frag = (uint16_t *)frag0 + azrp_width * y;
int glyph_count = cmd->glyph_count;
uint16_t *glyphs = cmd->glyphs;
/* Read each character from the input string */
while(x < azrp_window.right)
{
uint32_t code_point = dtext_utf8_next(&str);
if(!code_point || (size >= 0 && str - str0 > size)) break;
int glyph = dfont_glyph_index(f, code_point);
if(glyph < 0) continue;
int dataw = f->prop ? f->glyph_width[glyph] : f->width;
int index = dfont_glyph_offset(f, glyph);
do {
int dataw = *glyphs++;
int index = *glyphs++;
glyph_count -= 2;
/* Compute horizontal intersection between glyph and screen */
int width = dataw, left = 0;
if(x + dataw <= azrp_window.left)
{
x += dataw + f->char_spacing;
continue;
}
if(x < azrp_window.left) {
left = azrp_window.left - x;
width -= left;
@ -95,18 +85,28 @@ void azrp_shader_text(void *uniforms0, void *cmd0, void *frag0)
width = min(width, azrp_window.right - x);
/* Render glyph */
azrp_text_glyph(frag + x + left, data + index, fg, height, width,
azrp_text_glyph(frag + x + left, f->data + index, fg, height, width,
dataw - width, top * dataw + left);
x += dataw + f->char_spacing;
}
} while(glyph_count);
}
void azrp_text(int x, int y, font_t const *f, char const *str,
int fg, int size)
void azrp_text_opt(int x, int y, font_t const *f, int fg, int halign,
int valign, char const *str0, int size)
{
prof_enter(azrp_perf_cmdgen);
if(halign != DTEXT_LEFT || valign != DTEXT_TOP) {
int w, h;
dnsize(str0, size, f, &w, &h);
if(halign == DTEXT_RIGHT) x -= w - 1;
if(halign == DTEXT_CENTER) x -= (w >> 1);
if(valign == DTEXT_BOTTOM) y -= h - 1;
if(valign == DTEXT_MIDDLE) y -= (h >> 1);
}
/* Clipping */
if(x >= azrp_window.right || y >= azrp_window.bottom ||
y + f->data_height <= azrp_window.top) {
@ -133,33 +133,91 @@ void azrp_text(int x, int y, font_t const *f, char const *str,
azrp_config_get_lines(y, f->data_height,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_TEXT;
cmd.x = x;
cmd.y = first_offset;
cmd.height = height;
cmd.top = top;
cmd.font = f;
cmd.str = str;
cmd.fg = fg;
cmd.size = size;
int extra;
struct command *cmd = azrp_alloc_command(sizeof *cmd, &extra, frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
cmd->shader_id = AZRP_SHADER_TEXT;
cmd->x = x;
cmd->y = first_offset;
cmd->glyph_count = 0;
cmd->height = height;
cmd->top = top;
cmd->font = f;
cmd->fg = fg;
uint8_t const *str = (void *)str0;
uint8_t const *str_end = (size >= 0) ? str + size : (void *)-1;
/* Compute the list of glyphs to be rendered */
while(x < azrp_window.right) {
uint32_t code_point = dtext_utf8_next(&str);
if(!code_point || str > str_end) break;
int glyph = dfont_glyph_index(f, code_point);
if(glyph < 0) continue;
int dataw = f->prop ? f->glyph_width[glyph] : f->width;
int index = dfont_glyph_offset(f, glyph);
if((cmd->glyph_count + 1) * (int)sizeof *cmd->glyphs > extra) {
prof_leave(azrp_perf_cmdgen);
return;
}
/* Glyph is entirely left clipped: skip it */
if(x + dataw <= azrp_window.left) {
x += dataw + f->char_spacing;
cmd->x = x;
continue;
}
cmd->glyphs[cmd->glyph_count++] = dataw;
cmd->glyphs[cmd->glyph_count++] = index;
x += dataw + f->char_spacing;
}
if(cmd->glyph_count == 0) {
prof_leave(azrp_perf_cmdgen);
return;
}
azrp_finalize_command(cmd, sizeof *cmd + 2 * cmd->glyph_count);
azrp_instantiate_command(cmd, frag_first, frag_count);
prof_leave(azrp_perf_cmdgen);
}
void azrp_text_opt(int x, int y, font_t const *font, int fg, int halign,
int valign, char const *str, int size)
void azrp_text(int x, int y, int fg, char const *str)
{
if(halign != DTEXT_LEFT || valign != DTEXT_TOP) {
int w, h;
dnsize(str, size, font, &w, &h);
font_t const *font = dfont(NULL);
dfont(font);
if(halign == DTEXT_RIGHT) x -= w - 1;
if(halign == DTEXT_CENTER) x -= (w >> 1);
if(valign == DTEXT_BOTTOM) y -= h - 1;
if(valign == DTEXT_MIDDLE) y -= (h >> 1);
}
azrp_text(x, y, font, str, fg, size);
azrp_text_opt(x, y, font, fg, DTEXT_LEFT, DTEXT_TOP, str, -1);
}
void azrp_print(int x, int y, int fg, char const *fmt, ...)
{
va_list args;
va_start(args, fmt);
char str[128];
vsnprintf(str, sizeof str, fmt, args);
va_end(args);
azrp_text(x, y, fg, str);
}
void azrp_print_opt(int x, int y, font_t const *font, int fg, int halign,
int valign, char const *fmt, ...)
{
va_list args;
va_start(args, fmt);
char str[128];
vsnprintf(str, sizeof str, fmt, args);
va_end(args);
azrp_text_opt(x, y, font, fg, halign, valign, str, -1);
}

View File

@ -73,14 +73,20 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color)
azrp_config_get_lines(min_y, max_y - min_y + 1,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_TRIANGLE;
cmd.y = first_offset;
cmd.height_total = max_y - min_y + 1;
cmd.height_frag = min(cmd.height_total, azrp_frag_height - cmd.y);
cmd.x_min = min_x;
cmd.x_max = max_x;
cmd.color = color;
struct command *cmd =
azrp_new_command(sizeof *cmd, frag_first, frag_count);
if(!cmd) {
prof_leave(azrp_perf_cmdgen);
return;
}
cmd->shader_id = AZRP_SHADER_TRIANGLE;
cmd->y = first_offset;
cmd->height_total = max_y - min_y + 1;
cmd->height_frag = min(cmd->height_total, azrp_frag_height - cmd->y);
cmd->x_min = min_x;
cmd->x_max = max_x;
cmd->color = color;
/* Swap points 1 and 2 if the order of points is not left-handed */
if(edge_start(x1, y1, x2, y2, x3, y3) < 0) {
@ -93,21 +99,20 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color)
}
/* Vector products for barycentric coordinates */
cmd.u0 = edge_start(x2, y2, x3, y3, min_x, min_y);
cmd.du_x = y3 - y2;
cmd->u0 = edge_start(x2, y2, x3, y3, min_x, min_y);
cmd->du_x = y3 - y2;
int du_y = x2 - x3;
cmd.v0 = edge_start(x3, y3, x1, y1, min_x, min_y);
cmd.dv_x = y1 - y3;
cmd->v0 = edge_start(x3, y3, x1, y1, min_x, min_y);
cmd->dv_x = y1 - y3;
int dv_y = x3 - x1;
cmd.w0 = edge_start(x1, y1, x2, y2, min_x, min_y);
cmd.dw_x = y2 - y1;
cmd->w0 = edge_start(x1, y1, x2, y2, min_x, min_y);
cmd->dw_x = y2 - y1;
int dw_y = x1 - x2;
int columns = max_x - min_x + 1;
cmd.du_row = du_y - columns * cmd.du_x;
cmd.dv_row = dv_y - columns * cmd.dv_x;
cmd.dw_row = dw_y - columns * cmd.dw_x;
cmd->du_row = du_y - columns * cmd->du_x;
cmd->dv_row = dv_y - columns * cmd->dv_x;
cmd->dw_row = dw_y - columns * cmd->dw_x;
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
prof_leave(azrp_perf_cmdgen);
}