azrp: add rectangle shader (azrp_rect): flat, invert, darken, whiten

Flat is optimized but invert, darken and whiten are not.
This commit is contained in:
Lephenixnoir 2023-05-31 23:29:04 +02:00
parent fe4b339327
commit 2e7c076e60
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
8 changed files with 335 additions and 14 deletions

View File

@ -72,7 +72,10 @@ if(AZUR_GRAPHICS_GINT_CG)
src/gint/shaders/image_p4_dye.c
# Triangle shader
src/gint/shaders/triangle.c
src/gint/shaders/triangle.S)
src/gint/shaders/triangle.S
# Rectangle shader
src/gint/shaders/rect.c
src/gint/shaders/rect.S)
endif()
add_library(azur STATIC ${SOURCES})

View File

@ -173,6 +173,21 @@ void azrp_config_scale(int scale);
@offset Fragment offset along the y-axis (0 ... height of fragment-1). */
void azrp_config_frag_offset(int offset);
/* azrp_config_get_line(): Split a line number into fragment/offset
Sets *fragment to the first fragment that covers line y and *offset to the
line number within that fragment. */
void azrp_config_get_line(int y, int *fragment, int *offset);
/* azrp_config_get_lines(): Split a line interval into fragments and offset
Splits the interval [y; y+height) into fragment/offset pairs.
- Sets *first_fragment to the fragment that covers line y;
- Sets *first_offset to the line number within that fragment;
- Sets *fragment_count to the number of fragments the interval will cover. */
void azrp_config_get_lines(int y, int height, int *first_fragment,
int *first_offset, int *fragment_count);
//---
// Hooks
//---
@ -205,11 +220,24 @@ void azrp_image(int x, int y, bopti_image_t const *image);
void azrp_subimage(int x, int y, bopti_image_t const *image,
int left, int top, int width, int height, int flags);
/* See below for more detailed image functions. Dynamic effects are provided
with the same naming convention as gint. */
/* azrp_triangle(): Render a flat triangle. Points can be in any order. */
void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color);
/* See below for more detailed image functions. Dynamic effects are provided
with the same naming convention as gint. */
/* azrp_rect(): Render a rectangle with a flat color or color transform. */
void azrp_rect(int x1, int y1, int width, int height, int color_or_effect);
/* Effects for azrp_rect(). */
enum {
/* Invert colors in gamma space. */
AZRP_RECT_INVERT = -1,
/* Darken by halving all components in gamma space. */
AZRP_RECT_DARKEN = -2,
/* Whiten by halving the distance to white in gamma space. */
AZRP_RECT_WHITEN = -3,
};
//---
// Performance indicators
@ -274,6 +302,7 @@ void azrp_set_uniforms(int shader_id, void *uniforms);
data can be updated between fragments by the shader program. Returns true on
success, false if the maximum amount of commands or command memory is
exceeded. */
// TODO: azrp_queue_command: give access to command buffer in-place
bool azrp_queue_command(void *command, size_t size, int fragment, int count);
/* azrp_queue_image(): Split and queue a gint image command

View File

@ -227,6 +227,24 @@ static void default_settings(void)
azrp_config_scale(1);
}
void azrp_config_get_line(int y, int *fragment, int *offset)
{
y += azrp_frag_offset;
*fragment = y >> 4;
*offset = y & 15;
}
void azrp_config_get_lines(int y, int height, int *first_fragment,
int *first_offset, int *fragment_count)
{
y += azrp_frag_offset;
*first_fragment = (y >> 4);
*first_offset = (y & 15);
*fragment_count = ((y + height - 1) >> 4) - *first_fragment + 1;
}
//---
// Hooks
//---

View File

@ -17,12 +17,10 @@ void azrp_queue_image(struct gint_image_box *box, image_t const *img,
else
cmd->shader_id = AZRP_SHADER_IMAGE_P4;
/* This divides by azrp_frag_height */
/* TODO: Have a proper way to do optimized-division by azrp_frag_height */
int fragment_id = (azrp_scale == 1) ? (box->y >> 4) : (box->y >> 4);
int fragment_id, first_y;
azrp_config_get_line(box->y, &fragment_id, &first_y);
/* These settings only apply to the first fragment */
int first_y = (box->y + azrp_frag_offset) & (azrp_frag_height - 1);
cmd->lines = min(box->h, azrp_frag_height - first_y);
cmd->output = (void *)azrp_frag + (azrp_width * first_y + cmd->x) * 2;

View File

@ -10,7 +10,7 @@
In the simple case where there is no color effect and no HFLIP, the task of
rendering a 16-bit opaque image boils down to a 2-dimensional memcpy. This
task can be optimized by moving longwords if the source and destination and
task can be optimized by moving longwords if the source and destination are
co-4-aligned, with four variations depending on the width and initial
position, identified by the following parameters:

View File

@ -0,0 +1,185 @@
.global _azrp_shader_rect
.global _azrp_shader_rect_loop_flat
.global _azrp_shader_rect_loop_invert
.global _azrp_shader_rect_loop_darken
.global _azrp_shader_rect_loop_whiten
.align 4
#define _height r1
#define _edge_1 r2
#define _edge_2 r3
#define _stride r4
#define _cmd r5
#define _frag r6
#define _wl r7
#define _redstride r10
/* r0: (temporary)
r1: height counter
r2: (temporary) then fragment + edge_1
r3: (temporary) then fragment + edge_2
r4: stride (azrp_width * 2)
r5: cmd then color (can also be a temporary)
r6: fragment
r7: longwords to write on each line (wl)
r8: (temporary) then saved edge_1
r9: (temporary) then saved edge_2
r10: reduced stride (azrp_width * 2 - 4 * wl) */
_azrp_shader_rect:
mov.w @_cmd+, r3 /* shader_id || y */
shll _stride
mov.l r8, @-r15
nop
mov.b @_cmd+, r8 /* height_total */
extu.b r3, r3
mov.b @_cmd+, _height /* height_frag */
mulu.w r3, _stride
mov.w @_cmd+, r2 /* xl */
mov #-4, r0
sub _height, r8
mov.b r8, @(r0, _cmd) /* update: height_total */
mov.l .azrp_frag_height, r0 /* ... inefficient ... */
shll2 r2
sts macl, r3
add r2, _frag
mov.l @r0, r0
mov #0, r2
mov.w @_cmd+, _wl /* wl */
add r3, _frag
mov.l r9, @-r15
cmp/hs r0, r8
/* Next fragment height is currently r8 = remaining height
Set it to r0 = azrp_frag_height if r8 >= r0 */
bf 1f
mov r0, r8
1: mov #-5, r0
mov.b r8, @(r0, _cmd) /* update: height_frag */
mov #-7, r0
mov.b r2, @(r0, _cmd) /* update: y */
mov.w @_cmd+, _edge_1 /* edge_1 */
mov _wl, r8
mov.w @_cmd+, _edge_2 /* edge_2 */
shll2 r8
mov.l @_cmd+, r0 /* loop */
add _frag, _edge_1
mov.w @_cmd, r5 /* color */
add _frag, _edge_2
mov.l r10, @-r15
mov _stride, _redstride
jmp @r0
sub r8, _redstride
.azrp_frag_height:
.long _azrp_frag_height
.macro START
ldrs 2f
ldre 3f
1: ldrc _wl
nop
mov.w @_edge_1, r8
nop
mov.w @_edge_2, r9
nop
.endm
.macro END
dt _height
mov.w r8, @_edge_1
add _redstride, _frag
mov.w r9, @_edge_2
add _stride, _edge_1
nop
bf.s 1b
add _stride, _edge_2
mov.l @r15+, r10
mov.l @r15+, r9
rts
mov.l @r15+, r8
.endm
_azrp_shader_rect_loop_flat:
extu.w r5, r0
shll16 r5
or r0, r5
lds r5, x0
START
mov _frag, r5
2:3: movs.l x0, @r5+
mov r5, _frag
END
_azrp_shader_rect_loop_invert:
/* Inefficient: could go all the way down to 2 cycles/long with
pipelining, but we're stuck at 3 cycles/long with this naive
approach */
// TODO: time it we might be able to just non-pipeline and read ahead
// also this _frag update is suspicious
START
2: mov.l @_frag, r0
not r0, r0
mov.l r0, @_frag
3: add #4, _frag
END
nop
_azrp_shader_rect_loop_darken:
mov.l .darken_mask, r5
nop
/* Inefficient */
START
2: mov.l @_frag, r0
and r5, r0
shlr r0
mov.l r0, @_frag
3: add #4, _frag
nop
END
_azrp_shader_rect_loop_whiten:
mov.l .darken_mask, r5
nop
/* Inefficient */
START
2: mov.l @_frag, r0
not r0, r0
and r5, r0
shlr r0
not r0, r0
mov.l r0, @_frag
3: add #4, _frag
nop
END
.darken_mask:
.long 0xf7def7de

View File

@ -0,0 +1,91 @@
#include <azur/gint/render.h>
uint AZRP_SHADER_RECT = -1;
static void configure(void)
{
azrp_set_uniforms(AZRP_SHADER_RECT, (void *)azrp_width);
}
__attribute__((constructor))
static void register_shader(void)
{
extern azrp_shader_t azrp_shader_rect;
AZRP_SHADER_RECT = azrp_register_shader(azrp_shader_rect, configure);
configure();
}
//---
struct command {
uint8_t shader_id;
/* Local y coordinate of the first line in the fragment */
uint8_t y;
/* Number of lines to render total, including this fragment */
uint8_t height_total;
/* Number of lines to render on the current fragment */
uint8_t height_frag;
/* Rectangle along the x coordinates (in longwords) */
uint16_t xl, wl;
/* Offset of left edge */
int16_t edge_1;
/* Offset of right edge */
int16_t edge_2;
/* Core loop (this is an internal label of the renderer) */
void const *loop;
/* Color, when applicable */
uint16_t color;
};
/* Core loops */
extern void azrp_shader_rect_loop_flat(void);
extern void azrp_shader_rect_loop_invert(void);
extern void azrp_shader_rect_loop_darken(void);
extern void azrp_shader_rect_loop_whiten(void);
static void (*loops[])(void) = {
azrp_shader_rect_loop_flat,
azrp_shader_rect_loop_invert,
azrp_shader_rect_loop_darken,
azrp_shader_rect_loop_whiten,
};
void azrp_rect(int x1, int y1, int width0, int height0, int color_or_effect)
{
/* Clipping (x2 and y2 excluded) */
int x2 = x1 + width0;
int y2 = y1 + height0;
if(x1 < 0)
x1 = 0;
if(y1 < 0)
y1 = 0;
if(x2 > azrp_width)
x2 = azrp_width;
if(y2 > azrp_height)
y2 = azrp_height;
if(x2 <= x1 || y2 <= y1)
return;
prof_enter(azrp_perf_cmdgen);
int frag_first, first_offset, frag_count;
azrp_config_get_lines(y1, y2 - y1,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_RECT;
cmd.y = first_offset;
cmd.height_total = y2 - y1;
cmd.height_frag = azrp_frag_height - first_offset;
if(cmd.height_total < cmd.height_frag)
cmd.height_frag = cmd.height_total;
cmd.xl = (x1 >> 1);
cmd.wl = ((x2 - 1) >> 1) - cmd.xl + 1;
cmd.edge_1 = (x1 & 1) ? 0 : -2;
cmd.edge_2 = 4 * cmd.wl + ((x2 & 1) ? -2 : 0);
cmd.loop = loops[color_or_effect >= 0 ? 0 : -color_or_effect];
cmd.color = color_or_effect;
azrp_queue_command(&cmd, sizeof cmd, frag_first, frag_count);
prof_leave(azrp_perf_cmdgen);
}

View File

@ -69,12 +69,9 @@ void azrp_triangle(int x1, int y1, int x2, int y2, int x3, int y3, int color)
return;
}
/* TODO: Have a proper way to do optimized-division by azrp_frag_height
TODO: Also account for first-fragment offset */
int frag_first = min_y >> 4;
int frag_last = max_y >> 4;
int frag_count = frag_last - frag_first + 1;
int first_offset = min_y & 15;
int frag_first, first_offset, frag_count;
azrp_config_get_lines(min_y, max_y - min_y + 1,
&frag_first, &first_offset, &frag_count);
struct command cmd;
cmd.shader_id = AZRP_SHADER_TRIANGLE;