bopti: first fxcg50 version with r5g6b5 and r5g6b5a

This commit introduces bopti for fx-CG 50. Currently the only
interfaces are the bopti_render_{clip,noclip} functions, and the
only supported formats are r5g6b5 and r5g6b5a.

The algorithm for r5g6b5 is optimized to perform longword accesses
using movua.l, whereas the algorithm for r5g6b5a uses plain word
accesses because transparency checks feel more difficult than one
more loop iteration.

These algorithms are still slow for large surfaces and struggle to
keep up 25 FPS in full-screen, so possible improvements with the
DMA should definitely be tested before restorting to overclock.
This commit is contained in:
lephe 2019-08-04 13:59:35 +02:00
parent 64dbe6021d
commit 2e17b77e56
4 changed files with 276 additions and 0 deletions

View File

@ -41,6 +41,34 @@ enum {
C_NONE = -1,
};
//---
// Image rendering (bopti)
//---
/* image_t: Image files encoded for bopti
This format is created by the fxSDK's [fxconv] tool from standard images. */
typedef struct
{
/* Color profile (type of palette), could be extended into a bit field
later on */
uint16_t profile;
/* Color code assigned to transparent pixels (unused in 16-bit) */
uint16_t alpha;
/* Full width and height, in pixels */
uint16_t width;
uint16_t height;
/* Color palette:
* 16-bit and 16-bit alpha: none
* 8-bit: 256 colors (total 512 bytes)
* 4-bit: 16 colors (total 32 bytes)
Then raw pixel data in row-major order. */
uint16_t data[];
} GPACKED(4) image_t;
//---
// Video RAM management
//---

25
src/render-cg/bopti-asm.h Normal file
View File

@ -0,0 +1,25 @@
//---
// gint:render-cg:bopti-asm - Assembler drawing routines for bopti
//---
#ifndef GINT_RENDERCG_BOPTIASM
#define GINT_RENDERCG_BOPTIASM
/* bopti_r5g6b5(): Full opaque 16-bit rendering
@data Input data (2-aligned)
@target Target in VRAM (2-aligned)
@width Width of rendered image
@height Height of rendered image
@in_stride Bytes to ignore between two rows of @data
@out_stride Bytes to ignore between two rows of @target */
void bopti_r5g6b5(uint16_t const *data, uint16_t *target, int width,
int height, int in_stride, int out_stride);
/* bopti_r5g6b5a(): 16-bit rendering with alpha
@data @target @width @height @in_stride @out_stride
As in bopti_r5g6b5().
@alpha Color code that encodes transparency */
void bopti_r5g6b5a(uint16_t const *data, uint16_t *target, int width,
int height, int in_stride, int out_stride, uint16_t alpha);
#endif /* GINT_RENDERCG_BOPTIASM */

138
src/render-cg/bopti-asm.s Normal file
View File

@ -0,0 +1,138 @@
.global _bopti_r5g6b5
.global _bopti_r5g6b5a
# REGISTER ALLOCATION:
# r0: (tmp)
# r1: (tmp)
# r2: width - 1
# r3: target & 2
# ---
# r4: data
# r5: target
# r6: width; then, the number of longword operations
# r7: height
# ---
# r8: in_stride
# r9: out_stride
# r10: x counter
# ---
# @12: in_stride
# @16: out_stride
.align 4
_bopti_r5g6b5:
# Target alignment, either 0 (4-aligned) or 2 (2-aligned)
mov.l r8, @-r15
mov r5, r3
mov.l r9, @-r15
mov #2, r0
mov.l r10, @-r15
and r0, r3
# width-1, used to copy the last longword
mov r6, r2
add #-1, r2
shll r2
# Number of longword operations per row
shlr r6
# Input and output strides, minus aligment
mov.l @(12, r15), r8
mov.l @(16, r15), r9
sub r3, r8
sub r3, r9
.r5g6b5_y:
# First longword
mov.w @r4, r1
mov r2, r0
mov.w r1, @r5
# Last longword; align to 4-byte boundaries for target
mov.w @(r0, r4), r1
add r3, r4
mov.w r1, @(r0, r5)
add r3, r5
mov r6, r10
.r5g6b5_x:
# Copy longwords
movua.l @r4+, r0
mov.l r0, @r5
dt r10
bf.s .r5g6b5_x
add #4, r5
# -
add r8, r4
dt r7
bf.s .r5g6b5_y
add r9, r5
# -
mov.l @r15+, r10
mov.l @r15+, r9
rts
mov.l @r15+, r8
# REGISTER ALLOCATION:
# r0: (tmp)
# r1: in_stride
# r2: out_stride
# r3: x counter
# ---
# r4: data
# r5: target
# r6: width
# r7: height
# ---
# r8: alpha
# ---
# @4: in_stride
# @8: out_stride
# @12: alpha
.align 4
_bopti_r5g6b5a:
# Load alpha value
mov.l r8, @-r15
mov.l @(12, r15), r8
# Load input and output strides
mov.l @(4, r15), r1
mov.l @(8, r15), r2
.r5g6b5a_y:
mov r6, r3
.r5g6b5a_x:
mov.w @r4+, r0
cmp/eq r0, r8
bt .r5g6b5a_alpha
mov.w r0, @r5
.r5g6b5a_alpha:
dt r3
bf.s .r5g6b5a_x
add #2, r5
# -
add r1, r4
dt r7
bf.s .r5g6b5a_y
add r2, r5
# -
rts
mov.l @r15+, r8

85
src/render-cg/bopti.c Normal file
View File

@ -0,0 +1,85 @@
#define GINT_NEED_VRAM
#include <gint/defs/types.h>
#include <gint/display.h>
#include "bopti-asm.h"
struct box {
/* Target location of top-left corner */
int x, y;
/* Width and height of rendered sub-image */
int w, h;
/* Source bounding box (low included, high excluded) */
int left, top;
};
enum {
PX_R5G6B5 = 0,
PX_R5G6B5A = 1,
PX_P8 = 2,
PX_P4 = 3,
};
void bopti_render(image_t const *img, struct box *box)
{
/* Offset in video RAM and output stride */
uint16_t *target = vram + (396 * box->y + box->x);
int out = 2 * (396 - box->w);
int profile = img->profile;
if(profile == PX_R5G6B5 || profile == PX_R5G6B5A)
{
uint16_t const *data = img->data;
int w = box->w;
int h = box->h;
data += img->width * box->top + box->left;
/* Input stride */
int in = 2 * (img->width - w);
if(profile == PX_R5G6B5)
bopti_r5g6b5(data, target, w, h, in, out);
else
bopti_r5g6b5a(data, target, w, h, in, out, img->alpha);
}
else if(profile == PX_P8)
{
#warning TODO: PX_P8
}
else if(profile == PX_P4)
{
#warning TODO: PX_P4
}
}
void bopti_render_clip(int x, int y, image_t const *img, int left, int top,
int width, int height)
{
/* Adjust the bounding box of the input image */
if(left < 0) width += left, x -= left, left = 0;
if(top < 0) height += top, y -= top, top = 0;
if(left + width > img->width) width = img->width - left;
if(top + height > img->height) height = img->height - top;
/* Check whether the box intersects the screen */
if(width <= 0 || height <= 0) return;
if(x + width <= 0 || x >= 396 || y + height <= 0 || y >= 224) return;
/* Intersect with the bounding box on-screen */
if(y < 0) top -= y, height += y, y = 0;
if(y + height > 224) height = (224 - y);
if(x < 0) left -= x, width += x, x = 0;
if(x + width > 396) width = (396 - x);
/* Finish with the renderer */
struct box box = { x, y, width, height, left, top };
bopti_render(img, &box);
}
void bopti_render_noclip(int x, int y, image_t const *img, int left, int top,
int width, int height)
{
struct box box = { x, y, width, height, left, top };
bopti_render(img, &box);
}