diff --git a/src/gray/gsubimage.c b/src/gray/gsubimage.c index 27cc6ad..9710e6f 100644 --- a/src/gray/gsubimage.c +++ b/src/gray/gsubimage.c @@ -2,14 +2,6 @@ #include "../render-fx/render-fx.h" #include "../render-fx/bopti-asm.h" -/* List of rendering functions */ -static void *bopti_asm[] = { - bopti_gasm_mono, - bopti_gasm_mono_alpha, - bopti_gasm_gray, - bopti_gasm_gray_alpha, -}; - /* gsubimage(): Render a section of an image */ void gsubimage(int x, int y, bopti_image_t const *img, int left, int top, int width, int height, int flags) @@ -20,11 +12,11 @@ void gsubimage(int x, int y, bopti_image_t const *img, int left, int top, if(flags & DIMAGE_NOCLIP) { bopti_render_noclip(x, y, img, left, top, width, height, - light, dark, bopti_asm[img->profile]); + light, dark); } else { bopti_render_clip(x, y, img, left, top, width, height, - light, dark, bopti_asm[img->profile]); + light, dark); } } diff --git a/src/render-fx/bopti-asm-gray-scsp.s b/src/render-fx/bopti-asm-gray-scsp.s new file mode 100644 index 0000000..1e567b6 --- /dev/null +++ b/src/render-fx/bopti-asm-gray-scsp.s @@ -0,0 +1,152 @@ + +.global _bopti_gasm_mono_scsp +.global _bopti_gasm_mono_alpha_scsp +.global _bopti_gasm_gray_scsp +.global _bopti_gasm_gray_alpha_scsp + +# REGISTER ALLOCATION: +# r0: OR layer +# r1: (temp) +# r2: light vram longword +# r3: dark vram longword +# -- +# r4: light pointer +# r5: layer pointer +# r6: mask +# r7: dark pointer +# -- +# @r15: -(x&31) + +_bopti_gasm_mono_scsp: + # Read layer longword and shift it + mov.l @r5, r0 + mov.l @r15, r1 + shld r1, r0 + + # Clear target VRAM and unwanted image data + and r6, r0 + mov.l @r4, r2 + not r6, r6 + mov.l @r7, r3 + and r6, r2 + and r6, r3 + + # Blit and return + or r0, r2 + or r0, r3 + mov.l r2, @r4 + rts + mov.l r3, @r7 + +# REGISTER ALLOCATION: +# r0: AND layer +# r1: (temp) +# r2: light vram longword +# r3: dark vram longword +# -- +# r4: light pointer +# r5: layer pointer, then OR layer +# r6: mask +# r7: dark pointer +# -- +# @r15: -(x&31) +_bopti_gasm_mono_alpha_scsp: + # Read layer longwords and shift them + mov.l @r5, r0 + mov.l @r15, r1 + mov.l @(4,r5), r5 + shld r1, r0 + shld r1, r5 + + # Clear any unwanted image data + and r6, r0 + mov.l @r4, r2 + and r6, r5 + mov.l @r7, r3 + + # Blit and return + not r0, r0 + and r0, r2 + and r0, r3 + or r5, r2 + or r5, r3 + mov.l r2, @r4 + rts + mov.l r3, @r7 + +# REGISTER ALLOCATION: +# r0: LIGHT layer +# r1: (temp) +# r2: light vram longword +# r3: dark vram longword +# -- +# r4: light pointer +# r5: layer pointer, then DARK layer +# r6: mask +# r7: dark pointer +# -- +# @r15: -(x&31) +_bopti_gasm_gray_scsp: + # Read layer longwords and shift them + mov.l @r5, r0 + mov.l @r15, r1 + mov.l @(4,r5), r5 + shld r1, r0 + shld r1, r5 + + # Clear target VRAM and unapplied image data + and r6, r0 + mov.l @r4, r2 + and r6, r5 + mov.l @r7, r3 + not r6, r6 + and r6, r2 + and r6, r3 + + # Blit and return + or r0, r2 + or r5, r3 + mov.l r2, @r4 + rts + mov.l r3, @r7 + +# REGISTER ALLOCATION: +# r0: AND layer +# r1: LIGHT layer +# r2: (temp), then light vram longword +# r3: dark vram longword +# -- +# r4: light pointer +# r5: layer pointer, then DARK layer +# r6: mask +# r7: dark pointer +# -- +# @r15: -(x&31) +_bopti_gasm_gray_alpha_scsp: + # Read layer longwords and shift them + mov.l @r5, r0 + mov.l @(4,r5), r1 + mov.l @(8,r5), r5 + mov.l @r15, r2 + shld r2, r0 + shld r2, r1 + shld r2, r5 + + # Clear unappliqed image data + and r6, r0 + and r6, r1 + and r6, r5 + + # Blit the AND layer + mov.l @r4, r2 + not r0, r0 + mov.l @r7, r3 + and r0, r2 + and r0, r3 + + # Blit the LIGHT and DARY layers, and return + or r1, r2 + or r5, r3 + mov.l r2, @r4 + rts + mov.l r3, @r7 diff --git a/src/render-fx/bopti-asm-gray.s b/src/render-fx/bopti-asm-gray.s index 19eac2a..42fd204 100644 --- a/src/render-fx/bopti-asm-gray.s +++ b/src/render-fx/bopti-asm-gray.s @@ -1,3 +1,4 @@ + .global _bopti_gasm_mono .global _bopti_gasm_mono_alpha .global _bopti_gasm_gray diff --git a/src/render-fx/bopti-asm-mono-scsp.s b/src/render-fx/bopti-asm-mono-scsp.s new file mode 100644 index 0000000..0d123c1 --- /dev/null +++ b/src/render-fx/bopti-asm-mono-scsp.s @@ -0,0 +1,60 @@ + +.global _bopti_asm_mono_scsp +.global _bopti_asm_mono_alpha_scsp + +# REGISTER ALLOCATION: +# r0: layer +# r1: - +# r2: (temp) +# r3: vram longword +# -- +# r4: vram pointer +# r5: layer pointer +# r6: mask +# r7: -(x&31) + +_bopti_asm_mono_scsp: + # Read layer longword and shift it + mov.l @r5, r0 + shld r7, r0 + + # Clear the target VRAM and unwanted image data + mov.l @r4, r3 + and r6, r0 + not r6, r6 + and r6, r3 + + # Blit and return + or r0, r3 + rts + mov.l r3, @r4 + +# REGISTER ALLOCATION: +# r0: AND layer +# r1: OR layer +# r2: (temp) +# r3: vram longword +# -- +# r4: vram pointer +# r5: layer pointer +# r6: mask +# r7: -(x&31) + +_bopti_asm_mono_alpha_scsp: + # Read layer longwords and shift them + mov.l @r5, r0 + mov.l @(4,r5), r1 + shld r7, r0 + shld r7, r1 + + # Apply masks to clear layer data + and r6, r0 + and r6, r1 + + # Blit to VRAM + mov.l @r4, r3 + not r0, r0 + and r0, r3 + or r1, r3 + rts + mov.l r3, @r4 diff --git a/src/render-fx/bopti-asm.h b/src/render-fx/bopti-asm.h index 1ee6268..ef7ec64 100644 --- a/src/render-fx/bopti-asm.h +++ b/src/render-fx/bopti-asm.h @@ -5,6 +5,9 @@ #ifndef GINT_RENDERFX_BOPTIASM #define GINT_RENDERFX_BOPTIASM +#include +#include + /* pair_t: A pair of consecutive VRAM longwords */ typedef struct { uint32_t l; @@ -24,6 +27,20 @@ typedef pair_t asm_mono_t(pair_t p, void **layer, uint32_t *masks, int x); /* Signature of gray rendering functions */ typedef void asm_gray_t(quadr_t q, void **layer, uint32_t *masks, int x, quadr_t *ret); +/* Signature of mono single-column single-position rendering functions */ +typedef void asm_mono_scsp_t(uint32_t *vram, uint32_t const *layer, + uint32_t mask, int x); +/* Signature of gray single-column single-position rendering functions */ +typedef void asm_gray_scsp_t(uint32_t *v1, uint32_t const *layer, + uint32_t mask, uint32_t *v2, int x); + +/* Type of any rendering function */ +typedef union { + asm_mono_t *asm_mono; + asm_gray_t *asm_gray; + asm_mono_scsp_t *asm_mono_scsp; + asm_gray_scsp_t *asm_gray_scsp; +} bopti_asm_t; /* Each of the following rendering functions: 1. Takes VRAM data for two longword positions of the screen. @@ -40,11 +57,31 @@ extern asm_mono_t bopti_asm_mono_alpha; /* bopti_gasm_mono(): "mono" profile on gray VRAMs */ extern asm_gray_t bopti_gasm_mono; -/* bopti_gasm_mono_alpha(): "mono alpha" profile on gray VRAMs */ +/* bopti_gasm_mono_alpha(): "mono_alpha" profile on gray VRAMs */ extern asm_gray_t bopti_gasm_mono_alpha; /* bopti_asm_gray(): Rendering function for the "gray" profile */ extern asm_gray_t bopti_gasm_gray; -/* bpoti_asm_gray_alpha(): Rendering function for the "gray alpha" profile */ +/* bpoti_asm_gray_alpha(): Rendering function for the "gray_alpha" profile */ extern asm_gray_t bopti_gasm_gray_alpha; +/* Each of the following rendering functions: + 1. Takes VRAM data from one longword position of the screen. + 2. Reads data from one longword position of the image from layer. + 3. Shifts the image data and applies it to the VRAM position. + None update the layer pointer. */ + +/* bopti_asm_mono_scsp(): SCSP "mono" profile */ +extern asm_mono_scsp_t bopti_asm_mono_scsp; +/* bopti_asm_mono_alpha_scsp(): SCSP "mono_alpha" profile */ +extern asm_mono_scsp_t bopti_asm_mono_alpha_scsp; + +/* bopti_gasm_mono_scsp(): SCSP "mono" profile on gray VRAMs */ +extern asm_gray_scsp_t bopti_gasm_mono_scsp; +/* bopti_gasm_mono_scsp_alpha(): SCSP "mono_alpha" profile on gray VRAMs */ +extern asm_gray_scsp_t bopti_gasm_mono_alpha_scsp; +/* bopti_asm_gray_scsp(): SCSP "gray" profile */ +extern asm_gray_scsp_t bopti_gasm_gray_scsp; +/* bpoti_asm_gray_alpha_scsp(): SCSP "gray_alpha" profile */ +extern asm_gray_scsp_t bopti_gasm_gray_alpha_scsp; + #endif /* GINT_RENDERFX_BOPTIASM */ diff --git a/src/render-fx/bopti.c b/src/render-fx/bopti.c index 858a9f4..ca4bd60 100644 --- a/src/render-fx/bopti.c +++ b/src/render-fx/bopti.c @@ -17,7 +17,7 @@ struct rbox /* Horizontal bounds of the box in the image (included, in columns) */ int left, right; /* Vertical bounds of the box in the image (inc-excluded, in pixels) */ - int top, bottom; + int top, height; }; /* struct command: A rendering command @@ -51,11 +51,30 @@ struct command int gray; /* Assembly function, prototype depends on image type */ - union { - void *asm_void; - asm_mono_t *asm_mono; - asm_gray_t *asm_gray; - }; + bopti_asm_t f; +}; + + +/* List of rendering functions */ +static asm_mono_t *asm_mono[] = { + bopti_asm_mono, + bopti_asm_mono_alpha, +}; +static asm_gray_t *asm_gray[] = { + bopti_gasm_mono, + bopti_gasm_mono_alpha, + bopti_gasm_gray, + bopti_gasm_gray_alpha, +}; +static asm_mono_scsp_t *asm_mono_scsp[] = { + bopti_asm_mono_scsp, + bopti_asm_mono_alpha_scsp, +}; +static asm_gray_scsp_t *asm_gray_scsp[] = { + bopti_gasm_mono_scsp, + bopti_gasm_mono_alpha_scsp, + bopti_gasm_gray_scsp, + bopti_gasm_gray_alpha_scsp, }; void bopti_grid(void **layer, int rows, struct command *c) @@ -90,7 +109,7 @@ void bopti_grid(void **layer, int rows, struct command *c) /* The assembly routine blends a longword of data onto the pair and returns the resulting pair. */ - pret = c->asm_mono(p, layer, c->masks+col+col, -c->x); + pret = c->f.asm_mono(p, layer, c->masks+col+col,-c->x); /* Write back the result into VRAM, except for column -1 (occurs once every row, iff visual_x < 0) */ @@ -122,7 +141,7 @@ void bopti_grid(void **layer, int rows, struct command *c) q.l2 = (c->x) ? qret.r2 : q.r2; q.r2 = v2[(offset + 1) & 0xff]; - c->asm_gray(q, layer, c->masks+col+col, -c->x, &qret); + c->f.asm_gray(q, layer, c->masks+col+col, -c->x,&qret); if(c->real_start + col) { @@ -145,11 +164,16 @@ void bopti_grid(void **layer, int rows, struct command *c) } void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1, - uint32_t *v2, void *bopti_asm) + uint32_t *v2) { + /* Rendering function */ + bopti_asm_t f; + if(v2) f.asm_gray = asm_gray[img->profile]; + else f.asm_mono = asm_mono[img->profile]; + /* Compute rendering masks */ uint32_t vm[4]; - masks(rbox->visual_x, rbox->x + rbox->width - 1, vm); + masks(rbox->visual_x, rbox->visual_x + rbox->width - 1, vm); /* Number of layers per profile */ int layer_count[] = { 1, 2, 2, 3 }; @@ -181,8 +205,7 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1, /* Interwoven layer data. Skip left columns that are not rendered */ const uint32_t *layer = (void *)img->data; - layer += rbox->top * img_columns * layers; - layer += rbox->left * layer_count[img->profile]; + layer += (rbox->top * img_columns + rbox->left) * layers; /* Number of grid columns */ int columns = rbox->right - rbox->left + 1; @@ -199,73 +222,108 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1, .vram_stride = 4 - columns, .data_stride = ((img_columns - columns) << 2) * layers, .gray = (v2 != NULL), - .asm_void = bopti_asm, + .f = f, }; - bopti_grid((void **)&layer, rbox->bottom - rbox->top, &c); + bopti_grid((void **)&layer, rbox->height, &c); } -void bopti_render_clip(int visual_x, int y, bopti_image_t const *img, int left, - int top, int width, int height, uint32_t *v1, uint32_t *v2, - void *bopti_asm) +/* Specialized, faster version for single-column single-position instances */ +void bopti_render_scsp(bopti_image_t const *img, struct rbox *rbox, + uint32_t *v1, uint32_t *v2) { - /* Left pixel of leftmost column */ - int x = visual_x - (left & 31); - width += (left & 31); - left &= ~31; + /* Rendering function */ + bopti_asm_t f; + if(v2) f.asm_gray_scsp = asm_gray_scsp[img->profile]; + else f.asm_mono_scsp = asm_mono_scsp[img->profile]; + /* Compute the only rendering mask. Avoid UB if width = 32 */ + uint32_t mask = 0xffffffff; + if(rbox->width < 32) + { + int right = 32 - ((rbox->visual_x & 31) + rbox->width); + mask = ((1 << rbox->width) - 1) << right; + } + + /* Number of layers */ + int layer_count[] = { 1, 2, 2, 3 }; + int layers = layer_count[img->profile]; + + /* Number of columns in [img] */ + int img_columns = (img->width + 31) >> 5; + + /* Interwoven layer data. Skip left columns that are not rendered */ + const uint32_t *layer = (void *)img->data; + layer += (rbox->top * img_columns + rbox->left) * layers; + + /* Starting value of VRAM pointers */ + int offset = (rbox->y << 2) + (rbox->visual_x >> 5); + v1 += offset; + if(v2) v2 += offset; + + /* Number of rows */ + int rows = rbox->height; + /* Mask shift */ + int shift = -(rbox->x & 31); + if(rbox->x < 0) shift += 32; + + /* Render the grid immediately; mono version */ + if(!v2) while(rows--) + { + f.asm_mono_scsp(v1, layer, mask, shift); + layer += img_columns * layers; + v1 += 4; + } + /* Gray version */ + else while(rows--) + { + f.asm_gray_scsp(v1, layer, mask, v2, shift); + layer += img_columns * layers; + v1 += 4; + v2 += 4; + } +} + +void bopti_render_clip(int x, int y, bopti_image_t const *img, int left, + int top, int width, int height, uint32_t *v1, uint32_t *v2) +{ /* Adjust the bounding box of the input image */ - if(left < 0) width += left, x -= left, left = 0; if(top < 0) height += top, y -= top, top = 0; if(left + width > img->width) width = img->width - left; if(top + height > img->height) height = img->height - top; - /* Check whether the box intersects the screen */ + /* Intersect with the bounding box on-screen */ + if(x < 0) width += x, left -= x, x = 0; + if(y < 0) height += y, top -= y, y = 0; + if(x + width > DWIDTH) width = DWIDTH - x; + if(y + height > DHEIGHT) height = DHEIGHT - y; + + /* Early finish for empty intersections */ if(width <= 0 || height <= 0) return; - if(x + width <= 0 || x > 127 || y + height <= 0 || y > 63) return; - /* Intersect with the bounding box on-screen. We only need to make sure - that x>=-31, not x>=0. Setting x=0 would discard the horizontal - alignment information (x & 31). */ - - if(y < 0) top -= y, height += y, y = 0; - if(y + height > 64) height = (64 - y); - int bottom = top + height; - - if(x < -32) - { - int overflow = (x + 32) >> 5; - overflow = -overflow << 5; - left += overflow; - width -= overflow; - x += overflow; - } - if(x + width > 128) width = (128 - x); - int right = (left + width - 1) >> 5; - left >>= 5; - - /* Finish with the standard bopti renderer */ - struct rbox rbox = { x, visual_x, y, width, left, right, top, bottom }; - bopti_render(img, &rbox, v1, v2, bopti_asm); + /* Finish with the noclip variant */ + bopti_render_noclip(x, y, img, left, top, width, height, v1, v2); } void bopti_render_noclip(int visual_x, int y, bopti_image_t const *img, - int left, int top, int width, int height, uint32_t *v1, uint32_t *v2, - void *bopti_asm) + int left, int top, int width, int height, uint32_t *v1, uint32_t *v2) { - /* End row (excluded) */ - int bottom = top + height; - - /* Left pixel of leftmost column */ - int x = visual_x - (left & 31); - width += (left & 31); - left &= ~31; - - /* Start column and end column (included) */ - int right = (left + width - 1) >> 5; - left >>= 5; + /* Start column and end column (both included) */ + int cl = (left) >> 5; + int cr = (left + width - 1) >> 5; /* Finish with the standard bopti renderer */ - struct rbox rbox = { x, visual_x, y, width, left, right, top, bottom }; - bopti_render(img, &rbox, v1, v2, bopti_asm); + struct rbox rbox = { 0, visual_x, y, width, cl, cr, top, height }; + + if(cl == cr && (visual_x & 31) + width <= 32) + { + rbox.x = (visual_x & 31) - (left & 31); + bopti_render_scsp(img, &rbox, v1, v2); + } + else + { + /* x-coordinate of the first pixel of the first column */ + rbox.x = visual_x - (left & 31); + bopti_render(img, &rbox, v1, v2); + } } diff --git a/src/render-fx/dsubimage.c b/src/render-fx/dsubimage.c index 230d18f..1f21ea4 100644 --- a/src/render-fx/dsubimage.c +++ b/src/render-fx/dsubimage.c @@ -2,12 +2,6 @@ #include "render-fx.h" #include "bopti-asm.h" -/* List of rendering functions */ -static void *bopti_asm[] = { - bopti_asm_mono, - bopti_asm_mono_alpha, -}; - /* dsubimage(): Render a section of an image */ void dsubimage(int x, int y, bopti_image_t const *img, int left, int top, int width, int height, int flags) @@ -19,11 +13,11 @@ void dsubimage(int x, int y, bopti_image_t const *img, int left, int top, if(flags & DIMAGE_NOCLIP) { bopti_render_noclip(x, y, img, left, top, width, height, - gint_vram, NULL, bopti_asm[img->profile]); + gint_vram, NULL); } else { bopti_render_clip(x, y, img, left, top, width, height, - gint_vram, NULL, bopti_asm[img->profile]); + gint_vram, NULL); } } diff --git a/src/render-fx/render-fx.h b/src/render-fx/render-fx.h index 05ec92b..8647336 100644 --- a/src/render-fx/render-fx.h +++ b/src/render-fx/render-fx.h @@ -28,10 +28,9 @@ void masks(int x1, int x2, uint32_t *masks); @x @y Location of the top-left corner @img Image encoded by [fxconv] @left @top @w @h Bounding box to render - @v1 @v2 VRAMs - @bopti_asm Rendering function */ + @v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left, - int top, int w, int h, uint32_t *v1, uint32_t *v2, void *bopti_asm); + int top, int w, int h, uint32_t *v1, uint32_t *v2); /* bopti_render_noclip(): Render a bopti image without clipping This function is only ever slightly faster than bopti_render_clip(), @@ -42,10 +41,9 @@ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left, @x @y Location of the top-left corner @img Image encoded by [fxconv] @left @top @w @h Bounding box to render - @v1 @v2 VRAMs - @bopti_asm Rendering function */ + @v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */ void bopti_render_noclip(int x, int y, bopti_image_t const *img, int left, - int top, int w, int h, uint32_t *v1, uint32_t *v2, void *bopti_asm); + int top, int w, int h, uint32_t *v1, uint32_t *v2); //--- // Alternate rendering modes