From 33e6a44578da0a80a5d609e2d42c6633fd998131 Mon Sep 17 00:00:00 2001 From: Lephe Date: Fri, 20 Aug 2021 11:16:58 +0200 Subject: [PATCH] azur: add shader uniforms, clear shader, super-scaling --- azur/CMakeLists.txt | 3 + azur/include/azur/azur.h | 5 +- azur/include/azur/gint/render.h | 114 ++++++++++++++++++++++++++++---- azur/src/gint/render.c | 83 ++++++++++++++++++++--- azur/src/gint/shaders/clear.S | 21 ++++++ azur/src/gint/shaders/clear.c | 46 +++++++++++++ azur/src/gint/shaders/tex2d.S | 66 ++++++++---------- azur/src/gint/shaders/tex2d.c | 17 +++++ 8 files changed, 290 insertions(+), 65 deletions(-) create mode 100644 azur/src/gint/shaders/clear.S create mode 100644 azur/src/gint/shaders/clear.c create mode 100644 azur/src/gint/shaders/tex2d.c diff --git a/azur/CMakeLists.txt b/azur/CMakeLists.txt index fc298fe..04d740b 100644 --- a/azur/CMakeLists.txt +++ b/azur/CMakeLists.txt @@ -28,6 +28,9 @@ endif() if(AZUR_GRAPHICS_GINT_CG) list(APPEND SOURCES src/gint/render.c + src/gint/shaders/clear.c + src/gint/shaders/clear.S + src/gint/shaders/tex2d.c src/gint/shaders/tex2d.S) endif() diff --git a/azur/include/azur/azur.h b/azur/include/azur/azur.h index 0c8e94e..f990851 100644 --- a/azur/include/azur/azur.h +++ b/azur/include/azur/azur.h @@ -12,8 +12,9 @@ AZUR_BEGIN_DECLS success, non-zero on failure. Resources allocated by azur_init() are automatically destroyed by a destructor. - On GINT_CG, the window size is fixed to 396x224 and ignored. - TODO: On GINT_CG, accept super-resolution in azur_init(). */ + On GINT_CG, the window size is fixed to 396x224 and ignored at this stage. + The rendering system can still be configured for super-resolution with + azrp_config_scale() from . */ int azur_init(char const *title, int window_width, int window_height); /* azur_main_loop(): Run the update/render loop. diff --git a/azur/include/azur/gint/render.h b/azur/include/azur/gint/render.h index ca58823..527d631 100644 --- a/azur/include/azur/gint/render.h +++ b/azur/include/azur/gint/render.h @@ -40,8 +40,9 @@ AZUR_BEGIN_DECLS /* arzp_shader_t: Type of shader functions * [uniforms] is a pointer to any data the shader might use as uniform. - * [command] is a structure of the shader's command type. */ -typedef void azrp_shader_t(void *uniforms, void *command); + * [command] is a structure of the shader's command type. + * [fragment] is a pointer to azrp_frag. */ +typedef void azrp_shader_t(void *uniforms, void *command, void *fragment); /* Video memory fragment used as rendering target (in XRAM). */ extern uint16_t azrp_frag[]; @@ -73,6 +74,10 @@ extern uint16_t azrp_frag[]; // save screenshots, or reuse commands, or add new commands and use the // already-sorted base, can use the low-level functions below which implement // steps 1, 3 and 4 individually. +// +// There are optional configuration calls that can be performed within step 1; +// the configuration is retained from frame to frame, so it may be enough to +// set it only once. //--- /* azrp_update(): Sort commands, render a frame, and starts another one */ @@ -87,19 +92,87 @@ void azrp_sort_commands(void); /* azrp_render_fragments(): Render and send fragments to the dislay (step 4) */ void azrp_render_fragments(void); +//--- +// Configuration calls +// +// The following configuration options can be changed during step 1, so either +// along with arzp_sort_commands(), or just after azrp_update(). +// +// Changing display settings usually requires updating the uniforms of shaders. +// See the details of each shader. +// +// Most settings are exposed as global variables. This is for read-only access; +// if you modify the variables directly you will get garbage. +//--- + +/* Current super-scaling factor. */ +extern int azrp_scale; +/* Width and height of display (based on scale). */ +extern int azrp_width, azrp_height; +/* Number of fragments in a frame (affected by configuration). */ +extern int azrp_frag_count; +/* Offset of first fragment. */ +extern int azrp_frag_offset; + +/* azrp_config_scale(): Select the renderer's super-scaling factor + + This pipeline supports integer upscaling by factors of x1, x2 and x3. Unlike + the traditional VRAM approach, upscaling in this pipeline is fundamentally + faster on every level, since every bit of graphics data can be handled on an + actually smaller resolution, leaving the pixel duplication to the display + transfer. This is because efficient transfers to the display in this system + are performed by CPU, which is much more versatile than the DMA. + + The settings on each mode are as follow: + + * x1: Display resolution: 396x224 + Fragment size: 8 rows (6336 bytes) + Number of fragments: 28 (29 if an offset is used) + Total size of graphics data: 177.408 kB + + * x2: Display resolution: 198x112 + Fragment size: 16 rows (6336 bytes) + Number of fragments 7 (8 if an offset if used) + Total size of graphics data: 44.352 kB + + * x3: Display resolution: 132x75 (last row only has 2/3 pixels) + Fragment size: 16 rows (4224 bytes) + Number of fragments: 5 (sometimes 6 if an offset is used) + Total size of graphics data: 19.800 kB + + As one would know when playing modern video games, super-resolution is one + of the most useful ways to increase performance. The reduced amount of + graphics data (either 4 or 9 times the fullscreen amount) has a huge impact + on the rendering process. */ +void azrp_config_scale(int scale); + +/* azrp_config_frag_offset(): Offset fragments along the y-axis + + This call changes the alignment of fragments along the y-axis, so that the + first fragments starts somewhere above the screen. This tends to add one + additional fragment for the whole screen to be covered. + + The primary use of this feature is to align grid-based frames with + fragments. As a prototypical example, top-down games using a tileset spend + most of the display surface showing the tiled map, so it's pretty beneficial + to align fragments on map rows so that each fragment handles only one row, + which makes the shader simpler and faster, uses less commands, and even + simplifies memory access patterns a little bit. + + Another use is to align the x3 mode roughly to the center of the screen, to + emulate the 128x64 resolution of black-and-white models with 4 fragments. + + @offset Fragment offset along the y-axis (0 ... height of fragment-1). */ +void azrp_config_frag_offset(int offset); + //--- // Standard shaders //--- -enum { - /* Clears the entire output with a single color */ - AZRP_SHADER_CLEAR = 0, - /* Renders RGB565 textures/images */ - AZRP_SHADER_TEX2D, - - /* First user-attributable ID */ - AZRP_SHADER_USER, -}; + /* Clears the entire output with a single color */ +extern uint8_t AZRP_SHADER_CLEAR; + /* Renders RGB565 textures/images */ +extern uint8_t AZRP_SHADER_TEX2D; /* azrp_clear(): Clear output [ARZP_SHADER_CLEAR] */ void azrp_clear(uint16_t color); @@ -107,6 +180,12 @@ void azrp_clear(uint16_t color); /* azrp_image(): Queue image command [AZRP_SHADER_TEX2D] */ void azrp_image(int x, int y, uint16_t *pixels, int w, int h, int stride); +/* Functions to update uniforms for these shaders. You should call them when: + * AZRP_SHADER_CLEAR: Changing super-scaling settings. + * AZRP_SHADER_TEX2D: Changing super-scaling or or fragment offsets. */ +void azrp_shader_clear_configure(void); +void azrp_shader_tex2d_configure(void); + //--- // Performance indicators // @@ -142,13 +221,20 @@ void azrp_perf_clear(void); /* azrp_register_shader(): Register a new command type and its shader program This function adds the specified shader program to the program array, and - returns the corresponding command type (which is AZRP_SHADER_USER plus some - value). Adding new shaders is useful for specialized rendering options (eg. - tiles with fixed size) or new graphical effects. + returns the corresponding command type. Adding new shaders is useful for + specialized rendering options (eg. tiles with fixed size) or new graphical + effects. If the maximum number shaders is exceeded, returns -1. */ int azrp_register_shader(azrp_shader_t *program); +/* azrp_set_uniforms(): Set a shader's uniforms pointer + + If the shader has less than 4 bytes of uniform data, an integer may be + passed as the address; there is no requirement that the pointer be aligned + or even points to valid memory. */ +void azrp_set_uniforms(int shader_id, void *uniforms); + /* azrp_queue_command(): Add a new command to be rendered next frame The command must be a structure starting with an 8-bit shader ID and an diff --git a/azur/src/gint/render.c b/azur/src/gint/render.c index 1ff5212..fd66e28 100644 --- a/azur/src/gint/render.c +++ b/azur/src/gint/render.c @@ -11,23 +11,25 @@ /* 8 rows of video memory, occupying 6338/8192 bytes of XRAM. */ GXRAM GALIGNED(32) uint16_t azrp_frag[DWIDTH * 8]; +/* Super-scaling factor, width and height of output. */ +int azrp_scale; +int azrp_width, azrp_height; +/* Offset of first fragment for alignment, and number of fragments. */ +int azrp_frag_offset; +int azrp_frag_count; + /* Number and total size of queued commands. */ GXRAM int commands_count = 0, commands_length = 0; /* Array of pointers to queued commands (stored as an offset into YRAM). */ GXRAM uint16_t commands_array[AZRP_MAX_COMMANDS]; -/* Default shader programs. */ -extern azrp_shader_t azrp_shader_tex2d; - -/* Array of shader programs. */ -GXRAM azrp_shader_t *shaders[AZRP_MAX_SHADERS] = { - [AZRP_SHADER_CLEAR] = NULL, /* TODO: Clear shader */ - [AZRP_SHADER_TEX2D] = &azrp_shader_tex2d, -}; +/* Array of shader programs and uniforms. */ +static azrp_shader_t *shaders[AZRP_MAX_SHADERS] = { NULL }; +static void *shader_uniforms[AZRP_MAX_SHADERS] = { NULL }; /* Next free index in the shader program array. */ -GXRAM static uint16_t shaders_next = AZRP_SHADER_USER; +GXRAM static uint16_t shaders_next = 0; /* Performance counters. */ GXRAM prof_t azrp_perf_cmdgen; @@ -86,7 +88,7 @@ void azrp_render_fragments(void) if(cmd[1] == frag) { if(shaders[cmd[0]]) { prof_enter(azrp_perf_shaders); - shaders[cmd[0]](NULL, cmd); + shaders[cmd[0]](shader_uniforms[cmd[0]], cmd, azrp_frag); prof_leave(azrp_perf_shaders); } cmd = YRAM + commands_array[++i]; @@ -96,7 +98,7 @@ void azrp_render_fragments(void) xram_frame(azrp_frag, 396 * 8); prof_leave(azrp_perf_r61524); frag++; - if(frag == 28) break; + if(frag >= azrp_frag_count) break; } } @@ -110,6 +112,55 @@ void azrp_update(void) azrp_clear_commands(); } +//--- +// Configuration calls +//--- + +static void update_frag_count(void) +{ + if(azrp_scale == 1) + azrp_frag_count = 28 + (azrp_frag_offset > 0); + else if(azrp_scale == 2) + azrp_frag_count = 7 + (azrp_frag_offset > 0); + else if(azrp_scale == 3) + azrp_frag_count = 5 + (azrp_frag_offset > 5); +} + +static void update_size(void) +{ + if(azrp_scale == 1) + azrp_width = 396, azrp_height = 198; + else if(azrp_scale == 2) + azrp_width = 198, azrp_height = 112; + else if(azrp_scale == 3) + azrp_width = 132, azrp_height = 75; +} + +void azrp_config_scale(int scale) +{ + if(scale < 1 || scale > 3) + return; + + azrp_scale = scale; + update_size(); + update_frag_count(); +} + +void azrp_config_frag_offset(int offset) +{ + if(offset < 0) + return; + + azrp_frag_offset = offset; + update_frag_count(); +} + +__attribute__((constructor)) +static void default_settings(void) +{ + azrp_config_scale(1); +} + //--- // Custom shaders //--- @@ -125,6 +176,16 @@ int azrp_register_shader(azrp_shader_t *program) return id; } +void azrp_set_uniforms(int shader_id, void *uniforms) +{ + if((unsigned int)shader_id >= AZRP_MAX_SHADERS) + return; + if(shaders[shader_id] == NULL) + return; + + shader_uniforms[shader_id] = uniforms; +} + bool azrp_queue_command(void *command, size_t size) { if(commands_count >= AZRP_MAX_COMMANDS) diff --git a/azur/src/gint/shaders/clear.S b/azur/src/gint/shaders/clear.S new file mode 100644 index 0000000..a343152 --- /dev/null +++ b/azur/src/gint/shaders/clear.S @@ -0,0 +1,21 @@ +.global _azrp_shader_clear +.align 4 + +_azrp_shader_clear: + ldrs 1f + ldre 2f + ldrc r4 + + mov.w @(2,r5), r0 + shll2 r4 + + add r4, r6 + + swap.w r0, r1 + + xtrct r0, r1 + +1: 2: mov.l r1, @-r6 + + rts + nop diff --git a/azur/src/gint/shaders/clear.c b/azur/src/gint/shaders/clear.c new file mode 100644 index 0000000..4e0354b --- /dev/null +++ b/azur/src/gint/shaders/clear.c @@ -0,0 +1,46 @@ +#include + +uint8_t AZRP_SHADER_CLEAR = -1; + +__attribute__((constructor)) +static void register_shader(void) +{ + extern azrp_shader_t azrp_shader_clear; + AZRP_SHADER_CLEAR = azrp_register_shader(azrp_shader_clear); +} + +void azrp_shader_clear_configure(void) +{ + int longs_in_fragment = 0; + + if(azrp_scale == 1) + longs_in_fragment = (396 * 2) * 8 / 4; + else if(azrp_scale == 2) + longs_in_fragment = (198 * 2) * 16 / 4; + else if(azrp_scale == 3) + longs_in_fragment = (132 * 2) * 16 / 4; + + azrp_set_uniforms(AZRP_SHADER_CLEAR, (void *)longs_in_fragment); +} + +//--- + +struct command { + /* Shader ID and fragment number */ + uint8_t shader_id; + uint8_t fragment_id; + /* Color */ + uint16_t color; +}; + +void azrp_clear(uint16_t color) +{ + struct command cmd; + cmd.shader_id = AZRP_SHADER_CLEAR; + cmd.color = color; + + for(int i = 0; i < azrp_frag_count; i++) { + cmd.fragment_id = i; + azrp_queue_command(&cmd, sizeof cmd); + } +} diff --git a/azur/src/gint/shaders/tex2d.S b/azur/src/gint/shaders/tex2d.S index ea014a3..6c65da1 100644 --- a/azur/src/gint/shaders/tex2d.S +++ b/azur/src/gint/shaders/tex2d.S @@ -1,45 +1,41 @@ .global _azrp_shader_tex2d .align 4 -/* TODO [scaling]: Pass the _792 constant and fragment address as uniform */ - /* Register assignment r0: (temporary) r1: Lines - r2: Columns + r2: Output r3: Input - r4: Output + r4: Output stride (initially uniform: azrp_width*2) r5: Command queue; (temporary) - r6: (temporary) - r7: Output stride + r6: (temporary) (initially azrp_frag) + r7: Columns r8: Input stride */ _azrp_shader_tex2d: - mov.w _792, r7 add #2, r5 - mov.w @r5+, r2 /* Columns */ + mov.w @r5+, r7 /* Columns */ mov.l r8, @-r15 - mov.w @r5+, r6 /* Input (1/2) */ - sub r2, r7 + mov.w @r5+, r0 /* Input (1/2) */ + sub r7, r4 mov.w @r5+, r3 /* Input (2/2) */ - sub r2, r7 + sub r7, r4 - mov.w @r5+, r4 /* Output offset */ + mov.w @r5+, r2 /* Output offset */ mov.w @r5+, r1 /* Lines */ shll16 r3 - xtrct r6, r3 - mov.l .fragment, r6 + xtrct r0, r3 mov.w @r5+, r8 /* Input stride */ mov #8, r0 /* Maximum width for naive method */ - add r6, r4 - cmp/ge r2, r0 + add r6, r2 + cmp/ge r7, r0 bt.s .naive mov #2, r0 @@ -62,11 +58,11 @@ _azrp_shader_tex2d: ldrs 2f; \ ldre 3f; \ \ -1: ldrc r2; \ +1: ldrc r7; \ dt r1; \ #define TEX2D_END() \ - add r7, r4; \ + add r4, r2; \ bf.s 1b; \ add r8, r3; \ \ @@ -75,69 +71,63 @@ _azrp_shader_tex2d: .case_analysis: /* Use naive method for opposite source/destination parity */ - mov r4, r6 + mov r2, r6 xor r3, r6 tst r0, r6 bf .naive - shlr r2 + shlr r7 bt .wo .we: - tst r0, r4 + tst r0, r2 bf .we_do .we_de: TEX2D_START() 2: movs.l @r3+, x0 -3: movs.l x0, @r4+ +3: movs.l x0, @r2+ TEX2D_END() .we_do: - add #-1, r2 + add #-1, r7 TEX2D_START() movs.w @r3+, x0 - movs.w x0, @r4+ + movs.w x0, @r2+ 2: movs.l @r3+, x0 -3: movs.l x0, @r4+ +3: movs.l x0, @r2+ movs.w @r3+, x0 - movs.w x0, @r4+ + movs.w x0, @r2+ TEX2D_END() .wo: - tst r0, r4 + tst r0, r2 bf .wo_do .wo_de: TEX2D_START() 2: movs.l @r3+, x0 -3: movs.l x0, @r4+ +3: movs.l x0, @r2+ movs.w @r3+, x0 - movs.w x0, @r4+ + movs.w x0, @r2+ TEX2D_END() .wo_do: TEX2D_START() movs.w @r3+, x0 - movs.w x0, @r4+ + movs.w x0, @r2+ 2: movs.l @r3+, x0 -3: movs.l x0, @r4+ +3: movs.l x0, @r2+ TEX2D_END() /* Naive method for small widths and opposite source/destination parity */ .naive: TEX2D_START() 2: movs.w @r3+, x0 -3: movs.w x0, @r4+ +3: movs.w x0, @r2+ TEX2D_END() - -.align 4 -.fragment: - .long _azrp_frag -_792: - .word 792 diff --git a/azur/src/gint/shaders/tex2d.c b/azur/src/gint/shaders/tex2d.c new file mode 100644 index 0000000..6d410e0 --- /dev/null +++ b/azur/src/gint/shaders/tex2d.c @@ -0,0 +1,17 @@ +#include + +uint8_t AZRP_SHADER_TEX2D = -1; + +__attribute__((constructor)) +static void register_shader(void) +{ + extern azrp_shader_t azrp_shader_tex2d; + AZRP_SHADER_TEX2D = azrp_register_shader(azrp_shader_tex2d); +} + +void azrp_shader_tex2d_configure(void) +{ + azrp_set_uniforms(AZRP_SHADER_TEX2D, (void *)(2 * azrp_width)); +} + +//---