From 2e17b77e5616e507e32a6167fe18297af6141175 Mon Sep 17 00:00:00 2001 From: lephe Date: Sun, 4 Aug 2019 13:59:35 +0200 Subject: [PATCH] bopti: first fxcg50 version with r5g6b5 and r5g6b5a This commit introduces bopti for fx-CG 50. Currently the only interfaces are the bopti_render_{clip,noclip} functions, and the only supported formats are r5g6b5 and r5g6b5a. The algorithm for r5g6b5 is optimized to perform longword accesses using movua.l, whereas the algorithm for r5g6b5a uses plain word accesses because transparency checks feel more difficult than one more loop iteration. These algorithms are still slow for large surfaces and struggle to keep up 25 FPS in full-screen, so possible improvements with the DMA should definitely be tested before restorting to overclock. --- include/gint/display-cg.h | 28 ++++++++ src/render-cg/bopti-asm.h | 25 +++++++ src/render-cg/bopti-asm.s | 138 ++++++++++++++++++++++++++++++++++++++ src/render-cg/bopti.c | 85 +++++++++++++++++++++++ 4 files changed, 276 insertions(+) create mode 100644 src/render-cg/bopti-asm.h create mode 100644 src/render-cg/bopti-asm.s create mode 100644 src/render-cg/bopti.c diff --git a/include/gint/display-cg.h b/include/gint/display-cg.h index d95a410..1fa04cb 100644 --- a/include/gint/display-cg.h +++ b/include/gint/display-cg.h @@ -41,6 +41,34 @@ enum { C_NONE = -1, }; +//--- +// Image rendering (bopti) +//--- + +/* image_t: Image files encoded for bopti + This format is created by the fxSDK's [fxconv] tool from standard images. */ +typedef struct +{ + /* Color profile (type of palette), could be extended into a bit field + later on */ + uint16_t profile; + + /* Color code assigned to transparent pixels (unused in 16-bit) */ + uint16_t alpha; + + /* Full width and height, in pixels */ + uint16_t width; + uint16_t height; + + /* Color palette: + * 16-bit and 16-bit alpha: none + * 8-bit: 256 colors (total 512 bytes) + * 4-bit: 16 colors (total 32 bytes) + Then raw pixel data in row-major order. */ + uint16_t data[]; + +} GPACKED(4) image_t; + //--- // Video RAM management //--- diff --git a/src/render-cg/bopti-asm.h b/src/render-cg/bopti-asm.h new file mode 100644 index 0000000..04d3096 --- /dev/null +++ b/src/render-cg/bopti-asm.h @@ -0,0 +1,25 @@ +//--- +// gint:render-cg:bopti-asm - Assembler drawing routines for bopti +//--- + +#ifndef GINT_RENDERCG_BOPTIASM +#define GINT_RENDERCG_BOPTIASM + +/* bopti_r5g6b5(): Full opaque 16-bit rendering + @data Input data (2-aligned) + @target Target in VRAM (2-aligned) + @width Width of rendered image + @height Height of rendered image + @in_stride Bytes to ignore between two rows of @data + @out_stride Bytes to ignore between two rows of @target */ +void bopti_r5g6b5(uint16_t const *data, uint16_t *target, int width, + int height, int in_stride, int out_stride); + +/* bopti_r5g6b5a(): 16-bit rendering with alpha + @data @target @width @height @in_stride @out_stride + As in bopti_r5g6b5(). + @alpha Color code that encodes transparency */ +void bopti_r5g6b5a(uint16_t const *data, uint16_t *target, int width, + int height, int in_stride, int out_stride, uint16_t alpha); + +#endif /* GINT_RENDERCG_BOPTIASM */ diff --git a/src/render-cg/bopti-asm.s b/src/render-cg/bopti-asm.s new file mode 100644 index 0000000..d633f4e --- /dev/null +++ b/src/render-cg/bopti-asm.s @@ -0,0 +1,138 @@ + +.global _bopti_r5g6b5 +.global _bopti_r5g6b5a + +# REGISTER ALLOCATION: +# r0: (tmp) +# r1: (tmp) +# r2: width - 1 +# r3: target & 2 +# --- +# r4: data +# r5: target +# r6: width; then, the number of longword operations +# r7: height +# --- +# r8: in_stride +# r9: out_stride +# r10: x counter +# --- +# @12: in_stride +# @16: out_stride + +.align 4 + +_bopti_r5g6b5: + # Target alignment, either 0 (4-aligned) or 2 (2-aligned) + mov.l r8, @-r15 + mov r5, r3 + mov.l r9, @-r15 + mov #2, r0 + mov.l r10, @-r15 + and r0, r3 + + # width-1, used to copy the last longword + mov r6, r2 + add #-1, r2 + shll r2 + + # Number of longword operations per row + shlr r6 + + # Input and output strides, minus aligment + mov.l @(12, r15), r8 + mov.l @(16, r15), r9 + sub r3, r8 + sub r3, r9 + +.r5g6b5_y: + # First longword + mov.w @r4, r1 + mov r2, r0 + mov.w r1, @r5 + + # Last longword; align to 4-byte boundaries for target + mov.w @(r0, r4), r1 + add r3, r4 + mov.w r1, @(r0, r5) + add r3, r5 + + mov r6, r10 + +.r5g6b5_x: + # Copy longwords + movua.l @r4+, r0 + mov.l r0, @r5 + + dt r10 + bf.s .r5g6b5_x + add #4, r5 + +# - + + add r8, r4 + dt r7 + bf.s .r5g6b5_y + add r9, r5 + +# - + + mov.l @r15+, r10 + mov.l @r15+, r9 + rts + mov.l @r15+, r8 + +# REGISTER ALLOCATION: +# r0: (tmp) +# r1: in_stride +# r2: out_stride +# r3: x counter +# --- +# r4: data +# r5: target +# r6: width +# r7: height +# --- +# r8: alpha +# --- +# @4: in_stride +# @8: out_stride +# @12: alpha + +.align 4 + +_bopti_r5g6b5a: + # Load alpha value + mov.l r8, @-r15 + mov.l @(12, r15), r8 + + # Load input and output strides + mov.l @(4, r15), r1 + mov.l @(8, r15), r2 + +.r5g6b5a_y: + mov r6, r3 + +.r5g6b5a_x: + mov.w @r4+, r0 + cmp/eq r0, r8 + bt .r5g6b5a_alpha + + mov.w r0, @r5 + +.r5g6b5a_alpha: + dt r3 + bf.s .r5g6b5a_x + add #2, r5 + +# - + + add r1, r4 + dt r7 + bf.s .r5g6b5a_y + add r2, r5 + +# - + + rts + mov.l @r15+, r8 diff --git a/src/render-cg/bopti.c b/src/render-cg/bopti.c new file mode 100644 index 0000000..0a27b8b --- /dev/null +++ b/src/render-cg/bopti.c @@ -0,0 +1,85 @@ +#define GINT_NEED_VRAM +#include +#include +#include "bopti-asm.h" + +struct box { + /* Target location of top-left corner */ + int x, y; + /* Width and height of rendered sub-image */ + int w, h; + /* Source bounding box (low included, high excluded) */ + int left, top; +}; + +enum { + PX_R5G6B5 = 0, + PX_R5G6B5A = 1, + PX_P8 = 2, + PX_P4 = 3, +}; + +void bopti_render(image_t const *img, struct box *box) +{ + /* Offset in video RAM and output stride */ + uint16_t *target = vram + (396 * box->y + box->x); + int out = 2 * (396 - box->w); + + int profile = img->profile; + + if(profile == PX_R5G6B5 || profile == PX_R5G6B5A) + { + uint16_t const *data = img->data; + int w = box->w; + int h = box->h; + + data += img->width * box->top + box->left; + + /* Input stride */ + int in = 2 * (img->width - w); + + if(profile == PX_R5G6B5) + bopti_r5g6b5(data, target, w, h, in, out); + else + bopti_r5g6b5a(data, target, w, h, in, out, img->alpha); + } + else if(profile == PX_P8) + { + #warning TODO: PX_P8 + } + else if(profile == PX_P4) + { + #warning TODO: PX_P4 + } +} + +void bopti_render_clip(int x, int y, image_t const *img, int left, int top, + int width, int height) +{ + /* Adjust the bounding box of the input image */ + if(left < 0) width += left, x -= left, left = 0; + if(top < 0) height += top, y -= top, top = 0; + if(left + width > img->width) width = img->width - left; + if(top + height > img->height) height = img->height - top; + + /* Check whether the box intersects the screen */ + if(width <= 0 || height <= 0) return; + if(x + width <= 0 || x >= 396 || y + height <= 0 || y >= 224) return; + + /* Intersect with the bounding box on-screen */ + if(y < 0) top -= y, height += y, y = 0; + if(y + height > 224) height = (224 - y); + if(x < 0) left -= x, width += x, x = 0; + if(x + width > 396) width = (396 - x); + + /* Finish with the renderer */ + struct box box = { x, y, width, height, left, top }; + bopti_render(img, &box); +} + +void bopti_render_noclip(int x, int y, image_t const *img, int left, int top, + int width, int height) +{ + struct box box = { x, y, width, height, left, top }; + bopti_render(img, &box); +}