From 1cf5bf514a0d1f0f157f5cb9ade3b4e8c582ba4f Mon Sep 17 00:00:00 2001 From: lephe Date: Sat, 27 Jul 2019 19:51:53 -0400 Subject: [PATCH] bopti: add gray support for all four profiles This change finally introduces gray image rendering with bopti. This is the final iteration of bopti v2 and certainly the fastest so far. All four profiles are supported, without change to the format. --- include/display/fx.h | 12 +- include/gint/display-fx.h | 6 +- include/gint/gray.h | 23 +++ src/gray/gimage.c | 40 +++++ src/render-fx/bopti-asm-gray.s | 287 +++++++++++++++++++++++++++++++++ src/render-fx/bopti-asm.h | 14 +- src/render-fx/bopti-asm.s | 19 +-- src/render-fx/bopti.c | 42 +++-- src/render-fx/dimage.c | 17 +- 9 files changed, 408 insertions(+), 52 deletions(-) create mode 100644 src/gray/gimage.c create mode 100644 src/render-fx/bopti-asm-gray.s diff --git a/include/display/fx.h b/include/display/fx.h index eb9e4cc..a65e4f0 100644 --- a/include/display/fx.h +++ b/include/display/fx.h @@ -27,9 +27,11 @@ void masks(int x1, int x2, uint32_t *masks); /* bopti_render_clip() - render a bopti image with clipping @x @y Location of the top-left corner @img Image encoded by [fxconv] - @left @top @w @h Bounding box to render */ + @left @top @w @h Bounding box to render + @v1 @v2 VRAMs + @bopti_asm Rendering function */ void bopti_render_clip(int x, int y, image_t const *img, int left, int top, - int w, int h); + int w, int h, uint32_t *v1, uint32_t *v2, void *bopti_asm); /* bopti_render_noclip() - render a bopti image without clipping This function is only ever slightly faster than bopti_render_clip(), @@ -39,8 +41,10 @@ void bopti_render_clip(int x, int y, image_t const *img, int left, int top, @x @y Location of the top-left corner @img Image encoded by [fxconv] - @left @top @w @h Bounding box to render */ + @left @top @w @h Bounding box to render + @v1 @v2 VRAMs + @bopti_asm Rendering function */ void bopti_render_noclip(int x, int y, image_t const *img, int left, int top, - int w, int h); + int w, int h, uint32_t *v1, uint32_t *v2, void *bopti_asm); #endif /* DISPLAY_FX */ diff --git a/include/gint/display-fx.h b/include/gint/display-fx.h index bf6d910..eb0a6b3 100644 --- a/include/gint/display-fx.h +++ b/include/gint/display-fx.h @@ -91,6 +91,9 @@ void dimage(int x, int y, image_t const *image); /* Option values for dsubimage() */ enum { + /* No option */ + DIMAGE_NONE = 0x00, + /* Disable clipping, ie. adjustments to the specified subrectangle and screen location such that any part that overflows from the image or the screen is ignored. Slightly faster. */ @@ -104,7 +107,8 @@ enum { @x @y Coordinates on screen of the rendered subrectangle @image Pointer to image encoded with [fxconv] @left @top Top-left coordinates of the subrectangle within [image] - @width @height Subrectangle dimensions */ + @width @height Subrectangle dimensions + @flags OR-combination of DIMAGE_* flags */ void dsubimage(int x, int y, image_t const *image, int left, int top, int width, int height, int flags); diff --git a/include/gint/gray.h b/include/gint/gray.h index 5e0278b..134d1e8 100644 --- a/include/gint/gray.h +++ b/include/gint/gray.h @@ -68,6 +68,7 @@ void gray_stop(void); LIGHT DARK BLINKING STRIPES COLORS -------------------------------------------------- + 869 1097 medium some excellent 869 1311 medium none good [default] 937 1425 medium none good -------------------------------------------------- @@ -143,6 +144,28 @@ void gline(int x1, int y1, int x2, int y2, color_t color); @bg Background, same colors as fg */ void gtext(int x, int y, const char *str, int fg, int bg); +//--- +// Image rendering +//--- + +/* gimage(): Render a full image + This function is exactly like dimage(), but it draws gray image instead. + + @x @y Coordinates of the top-left corner of the image + @image Pointer to gray image encoded with [fxconv] */ +void gimage(int x, int y, image_t const *image); + +/* gsubimage(): Render a section of an image + Like dsubimage() for gray images. Same options apply. + + @x @y Coordinates on screen of the rendered subrectangle + @image Pointer to image encoded with [fxconv] + @left @top Top-left coordinates of the subrectangle within [image] + @width @height Subrectangle dimensions + @flags OR-combination of DIMAGE_* flags */ +void gsubimage(int x, int y, image_t const *image, int left, int top, + int width, int height, int flags); + //--- // VRAM management //--- diff --git a/src/gray/gimage.c b/src/gray/gimage.c new file mode 100644 index 0000000..c56d936 --- /dev/null +++ b/src/gray/gimage.c @@ -0,0 +1,40 @@ +#include +#include +#include "../render-fx/bopti-asm.h" + +/* List of rendering functions */ +static void *bopti_asm[] = { + bopti_gasm_mono, + bopti_gasm_mono_alpha, + bopti_gasm_gray, + bopti_gasm_gray_alpha, +}; + +/* gimage(): Render a full image */ +void gimage(int x, int y, image_t const *img) +{ + uint32_t *light, *dark; + gvram(&light, &dark); + + bopti_render_clip(x, y, img, 0, 0, img->width, img->height, light, + dark, bopti_asm[img->profile]); +} + +/* gsubimage(): Render a section of an image */ +void gsubimage(int x, int y, image_t const *img, int left, int top, + int width, int height, int flags) +{ + uint32_t *light, *dark; + gvram(&light, &dark); + + if(flags & DIMAGE_NOCLIP) + { + bopti_render_noclip(x, y, img, left, top, width, height, + light, dark, bopti_asm[img->profile]); + } + else + { + bopti_render_clip(x, y, img, left, top, width, height, + light, dark, bopti_asm[img->profile]); + } +} diff --git a/src/render-fx/bopti-asm-gray.s b/src/render-fx/bopti-asm-gray.s new file mode 100644 index 0000000..19eac2a --- /dev/null +++ b/src/render-fx/bopti-asm-gray.s @@ -0,0 +1,287 @@ +.global _bopti_gasm_mono +.global _bopti_gasm_mono_alpha +.global _bopti_gasm_gray +.global _bopti_gasm_gray_alpha + +# REGISTER ALLOCATION +# r0: layer_1 (left) +# r1: layer_1 (right) +# r2: layer pointer and temp +# r3: mask pointer and temp +# -- +# r4: vram light (left) +# r5: vram light (right) +# r6: vram dark (left) +# r7: vram dark (right) +# -- +# @r15: layer pointer +# @(4,r15): mask pointer +# @(8,r15): -(x&31) +# @(12,r15): destination + +_bopti_gasm_mono: + # Read the data longword and update the layer address + mov.l @r15, r2 + mov.l @r2, r3 + mov.l @r3+, r0 + mov.l r3, @r2 + mov r0, r1 + + # Shift it + mov.l @(8, r15), r2 + shld r2, r0 + add #32, r2 + shld r2, r1 + + # Clear target VRAM and unwanted image data + mov.l @(4, r15), r3 + mov.l @r3, r2 + and r2, r0 + not r2, r2 + and r2, r4 + and r2, r6 + mov.l @(4, r3), r2 + and r2, r1 + not r2, r2 + and r2, r5 + and r2, r7 + + # Join everything and return + or r0, r4 + or r1, r5 + or r0, r6 + or r1, r7 + mov.l @(12, r15), r2 + mov.l r4, @r2 + mov.l r5, @(4, r2) + mov.l r6, @(8, r2) + rts + mov.l r7, @(12, r2) + +# REGISTER ALLOCATION +# r0: layer_1 (left) +# r1: layer_1 (right) +# r2: layer_2 (left) +# r3: layer_2 (right) +# -- +# r4: vram light (left) +# r5: vram light (right) +# r6: vram dark (left) +# r7: vram dark (right) +# -- +# r8: layer pointer and temp +# r9: mask pointer and temp +# -- +# @(8,r15): layer pointer +# @(12,r15): mask pointer +# @(16,r15): -(x&31) +# @(20,r15): destination + +_bopti_gasm_mono_alpha: + mov.l r8, @-r15 + mov.l r9, @-r15 + + # Read data longwords + mov.l @(8, r15), r8 + mov.l @r8, r9 + mov.l @r9+, r0 + mov.l @r9+, r2 + mov.l r9, @r8 + mov r0, r1 + mov r2, r3 + + # Shift all layer data + mov.l @(16, r15), r8 + shld r8, r0 + shld r8, r2 + add #32, r8 + shld r8, r1 + shld r8, r3 + + # Apply masks on image data + mov.l @(12, r15), r9 + mov.l @r9, r8 + and r8, r0 + and r8, r2 + mov.l @(4, r9), r8 + and r8, r1 + and r8, r3 + + # Render and leave + not r0, r0 + not r1, r1 + and r0, r4 + and r1, r5 + and r0, r6 + and r1, r7 + or r2, r4 + or r3, r5 + or r2, r6 + or r3, r7 + mov.l @(20, r15), r8 + mov.l r4, @r8 + mov.l r5, @(4, r8) + mov.l r6, @(8, r8) + mov.l r7, @(12, r8) + + mov.l @r15+, r9 + rts + mov.l @r15+, r8 + +# REGISTER ALLOCATION +# r0: layer_1 (left) +# r1: layer_1 (right) +# r2: layer_2 (left) +# r3: layer_2 (right) +# -- +# r4: vram light (left) +# r5: vram light (right) +# r6: vram dark (left) +# r7: vram dark (right) +# -- +# r8: layer pointer (also +- x&31) +# r9: mask pointer (also layer) +# -- +# @(8,r15): layer pointer +# @(12,r15): mask pointer +# @(16,r15): -(x&31) +# @(20,r15): destination + +_bopti_gasm_gray: + mov.l r8, @-r15 + mov.l r9, @-r15 + + # Read data longwords and update the layer address pointer + mov.l @(8, r15), r8 + mov.l @r8, r9 + mov.l @r9+, r0 + mov.l @r9+, r2 + mov.l r9, @r8 + mov r0, r1 + mov r2, r3 + + # Shift all layer data + mov.l @(16, r15), r8 + shld r8, r0 + shld r8, r2 + add #32, r8 + shld r8, r1 + shld r8, r3 + + # On the left side, clear the VRAM which is about to be rewritten using + # the left mask, and also clear unwanted image data + mov.l @(12, r15), r9 + mov.l @r9, r8 + and r8, r0 + and r8, r2 + not r8, r8 + and r8, r4 + and r8, r6 + + # Same on the right side + mov.l @(4, r9), r8 + and r8, r1 + and r8, r3 + not r8, r8 + and r8, r5 + and r8, r7 + + # Render these together and store the result + or r0, r4 + or r1, r5 + or r2, r6 + or r3, r7 + mov.l @(20, r15), r8 + mov.l r4, @r8 + mov.l r5, @(4, r8) + mov.l r6, @(8, r8) + mov.l r7, @(12, r8) + + mov.l @r15+, r9 + rts + mov.l @r15+, r8 + + +# REGISTER ALLOCATION +# r0: layer_1 (left) +# r1: layer_1 (right) +# r2: layer_2 (left) +# r3: layer_2 (right) +# -- +# r4: vram light (left) +# r5: vram light (right) +# r6: vram dark (left) +# r7: vram dark (right) +# -- +# r8: layer pointer (also +- x&31) +# r9: mask pointer (also layer) +# r10: layer_3 (left) +# r11: layer_3 (right) +# -- +# @(16,r15): layer pointer +# @(20,r15): mask pointer +# @(24,r15): -(x&31) +# @(28,r15): destination + +.align 4 +_bopti_gasm_gray_alpha: + mov.l r8, @-r15 + mov.l r9, @-r15 + mov.l r10, @-r15 + mov.l r11, @-r15 + + # Load layer data + mov.l @(16, r15), r8 + mov.l @r8, r9 + mov.l @r9+, r0 + mov.l @r9+, r2 + mov r0, r1 + mov.l @r9+, r10 + mov r2, r3 + mov.l r9, @r8 + mov r10, r11 + + # Shift layer data + mov.l @(24, r15), r8 + shld r8, r0 + shld r8, r2 + shld r8, r10 + add #32, r8 + shld r8, r1 + shld r8, r3 + shld r8, r11 + + # Clear unwanted layer bits + mov.l @(20, r15), r9 + mov.l @r9, r8 + and r8, r0 + and r8, r2 + and r8, r10 + mov.l @(4, r9), r8 + and r8, r1 + and r8, r3 + and r8, r11 + + # Blit everything + not r0, r0 + and r0, r4 + and r0, r6 + not r1, r1 + and r1, r5 + and r1, r7 + or r2, r4 + or r3, r5 + or r10, r6 + or r11, r7 + + # Store results and leave + mov.l @(28, r15), r8 + mov.l r4, @r8 + mov.l r5, @(4, r8) + mov.l r6, @(8, r8) + mov.l r7, @(12, r8) + mov.l @r15+, r11 + mov.l @r15+, r10 + mov.l @r15+, r9 + rts + mov.l @r15+, r8 diff --git a/src/render-fx/bopti-asm.h b/src/render-fx/bopti-asm.h index daa3820..1ee6268 100644 --- a/src/render-fx/bopti-asm.h +++ b/src/render-fx/bopti-asm.h @@ -20,9 +20,10 @@ typedef struct { } quadr_t; /* Signature of mono rendering functions */ -typedef pair_t asm_mono_t(pair_t p, void **layer, uint32_t *masks, int x); +typedef pair_t asm_mono_t(pair_t p, void **layer, uint32_t *masks, int x); /* Signature of gray rendering functions */ -typedef quadr_t asm_gray_t(quadr_t q, void **layer, uint32_t *masks, int x); +typedef void asm_gray_t(quadr_t q, void **layer, uint32_t *masks, int x, + quadr_t *ret); /* Each of the following rendering functions: 1. Takes VRAM data for two longword positions of the screen. @@ -36,9 +37,14 @@ typedef quadr_t asm_gray_t(quadr_t q, void **layer, uint32_t *masks, int x); extern asm_mono_t bopti_asm_mono; /* bopti_asm_mono_alpha(): Rendering function for the "mono alpha" profile */ extern asm_mono_t bopti_asm_mono_alpha; + +/* bopti_gasm_mono(): "mono" profile on gray VRAMs */ +extern asm_gray_t bopti_gasm_mono; +/* bopti_gasm_mono_alpha(): "mono alpha" profile on gray VRAMs */ +extern asm_gray_t bopti_gasm_mono_alpha; /* bopti_asm_gray(): Rendering function for the "gray" profile */ -extern asm_gray_t bopti_asm_gray; +extern asm_gray_t bopti_gasm_gray; /* bpoti_asm_gray_alpha(): Rendering function for the "gray alpha" profile */ -extern asm_gray_t bopti_asm_gray_alpha; +extern asm_gray_t bopti_gasm_gray_alpha; #endif /* GINT_RENDERFX_BOPTIASM */ diff --git a/src/render-fx/bopti-asm.s b/src/render-fx/bopti-asm.s index 6b5f1aa..3c1de79 100644 --- a/src/render-fx/bopti-asm.s +++ b/src/render-fx/bopti-asm.s @@ -1,8 +1,6 @@ .global _bopti_asm_mono .global _bopti_asm_mono_alpha -.global _bopti_asm_gray -.global _bopti_asm_gray_alpha # REGISTER ALLOCATION: # r0: layer (left) @@ -54,12 +52,11 @@ _bopti_asm_mono: # r4: vram (left) # r5: vram (right) # r6: layer pointer; f(x&31); mask (left); mask (right) -# r7: masks pointer +# r7: mask pointer # -- # @r15: -(x&31) _bopti_asm_mono_alpha: - # Read data longwords and update the layer address pointer mov.l @r6, r2 mov.l @r2+, r0 @@ -94,17 +91,3 @@ _bopti_asm_mono_alpha: or r2, r0 rts or r3, r1 - -# REGISTER ALLOCATION -# TODO: _bopti_asm_gray - -_bopti_asm_gray: - rts - nop - -# REGISTER ALLOCATION -# TODO: _bopti_asm_gray_alpha - -_bopti_asm_gray_alpha: - rts - nop diff --git a/src/render-fx/bopti.c b/src/render-fx/bopti.c index d19a5d8..aaf165a 100644 --- a/src/render-fx/bopti.c +++ b/src/render-fx/bopti.c @@ -1,4 +1,3 @@ -#define GINT_NEED_VRAM #include #include #include @@ -21,14 +20,6 @@ struct rbox int top, bottom; }; -/* List of rendering functions */ -void *bopti_asm[4] = { - bopti_asm_mono, /* asm_mono_t */ - bopti_asm_mono_alpha, /* asm_mono_t */ - bopti_asm_gray, /* asm_gray_t */ - bopti_asm_gray_alpha, /* asm_gray_t */ -}; - /* struct command: A rendering command Includes many computed parameters and handy information. Read-only. */ struct command @@ -55,6 +46,10 @@ struct command /* Ignored elements between two rendered grid columns */ int data_stride; + /* Whether the image should be drawn on gray mode (this may be 1 even + for images of the mono and mono_alpha profiles) */ + int gray; + /* Assembly function, prototype depends on image type */ union { void *asm_void; @@ -63,7 +58,7 @@ struct command }; }; -void bopti_grid(void **layer, int rows, int gray, struct command *c) +void bopti_grid(void **layer, int rows, struct command *c) { /* Pointers to vram data */ uint32_t *v1 = c->v1, *v2 = c->v2; @@ -77,7 +72,7 @@ void bopti_grid(void **layer, int rows, int gray, struct command *c) quadr_t q, qret = { 0 }; /* Monochrome version */ - if(!gray) while(rows--) + if(!c->gray) while(rows--) { p.r = pret.r = v1[offset & 0xff]; @@ -127,7 +122,7 @@ void bopti_grid(void **layer, int rows, int gray, struct command *c) q.l2 = (c->x) ? qret.r2 : q.r2; q.r2 = v2[(offset + 1) & 0xff]; - qret = c->asm_gray(q, layer, c->masks+col+col, -c->x); + c->asm_gray(q, layer, c->masks+col+col, -c->x, &qret); if(c->real_start + col) { @@ -149,7 +144,8 @@ void bopti_grid(void **layer, int rows, int gray, struct command *c) } } -void bopti_render(image_t const *img, struct rbox *rbox) +void bopti_render(image_t const *img, struct rbox *rbox, uint32_t *v1, + uint32_t *v2, void *bopti_asm) { /* Compute rendering masks */ uint32_t vm[4]; @@ -190,22 +186,23 @@ void bopti_render(image_t const *img, struct rbox *rbox) /* Compute and execute the command for this parameters */ struct command c = { .x = rbox->x & 31, - /* TODO: bopti: Support gray rendering */ - .v1 = vram, - .v2 = vram, + .v1 = v1, + .v2 = v2 ? v2 : v1, .offset = (rbox->y << 2) + (rbox->x >> 5), .columns = columns, .masks = masks + 2 * left_origin, .real_start = (left_origin > 0), .vram_stride = 4 - columns, .data_stride = (img_columns - columns) << 2, - .asm_void = bopti_asm[img->profile], + .gray = (v2 != NULL), + .asm_void = bopti_asm, }; - bopti_grid((void **)&layer, rbox->bottom - rbox->top, img->gray, &c); + bopti_grid((void **)&layer, rbox->bottom - rbox->top, &c); } void bopti_render_clip(int visual_x, int y, image_t const *img, int left, - int top, int width, int height) + int top, int width, int height, uint32_t *v1, uint32_t *v2, + void *bopti_asm) { /* Left pixel of leftmost column */ int x = visual_x - (left & 31); @@ -245,11 +242,12 @@ void bopti_render_clip(int visual_x, int y, image_t const *img, int left, /* Finish with the standard bopti renderer */ struct rbox rbox = { x, visual_x, y, width, left, right, top, bottom }; - bopti_render(img, &rbox); + bopti_render(img, &rbox, v1, v2, bopti_asm); } void bopti_render_noclip(int visual_x, int y, image_t const *img, int left, - int top, int width, int height) + int top, int width, int height, uint32_t *v1, uint32_t *v2, + void *bopti_asm) { /* End row (excluded) */ int bottom = top + height; @@ -264,5 +262,5 @@ void bopti_render_noclip(int visual_x, int y, image_t const *img, int left, /* Finish with the standard bopti renderer */ struct rbox rbox = { x, visual_x, y, width, left, right, top, bottom }; - bopti_render(img, &rbox); + bopti_render(img, &rbox, v1, v2, bopti_asm); } diff --git a/src/render-fx/dimage.c b/src/render-fx/dimage.c index 3de56cb..4dbab00 100644 --- a/src/render-fx/dimage.c +++ b/src/render-fx/dimage.c @@ -1,11 +1,20 @@ +#define GINT_NEED_VRAM #include #include +#include "bopti-asm.h" + +/* List of rendering functions */ +static void *bopti_asm[] = { + bopti_asm_mono, + bopti_asm_mono_alpha, +}; /* dimage() - render a full image */ void dimage(int x, int y, image_t const *img) { if(img->gray) return; - bopti_render_clip(x, y, img, 0, 0, img->width, img->height); + bopti_render_clip(x, y, img, 0, 0, img->width, img->height, vram, + NULL, bopti_asm[img->profile]); } /* dsubimage() - render a section of an image */ @@ -16,10 +25,12 @@ void dsubimage(int x, int y, image_t const *img, int left, int top, if(flags & DIMAGE_NOCLIP) { - bopti_render_noclip(x, y, img, left, top, width, height); + bopti_render_noclip(x, y, img, left, top, width, height, + vram, NULL, bopti_asm[img->profile]); } else { - bopti_render_clip(x, y, img, left, top, width, height); + bopti_render_clip(x, y, img, left, top, width, height, + vram, NULL, bopti_asm[img->profile]); } }