diff --git a/src/gray/gsubimage.c b/src/gray/gsubimage.c index 9710e6f..315bd37 100644 --- a/src/gray/gsubimage.c +++ b/src/gray/gsubimage.c @@ -2,21 +2,38 @@ #include "../render-fx/render-fx.h" #include "../render-fx/bopti-asm.h" +#pragma GCC optimize("O3") + /* gsubimage(): Render a section of an image */ -void gsubimage(int x, int y, bopti_image_t const *img, int left, int top, - int width, int height, int flags) +void gsubimage(bopti_image_t const *img, struct rbox *r, GUNUSED int flags) { uint32_t *light, *dark; dgray_getvram(&light, &dark); - if(flags & DIMAGE_NOCLIP) + /* Intersect the bounding box with both the source image and the VRAM, + except if DIMAGE_NOCLIP is provided */ + if(!(flags & DIMAGE_NOCLIP)) { - bopti_render_noclip(x, y, img, left, top, width, height, - light, dark); + /* Early finish for empty intersections */ + if(bopti_clip(img, r)) return; + } + + int left = r->left; + int width = r->width; + int visual_x = r->visual_x; + + r->left = left >> 5; + r->columns = ((left + width - 1) >> 5) - r->left + 1; + + if(r->columns == 1 && (visual_x & 31) + width <= 32) + { + r->x = (left & 31) - (visual_x & 31); + bopti_render_scsp(img, r, light, dark); } else { - bopti_render_clip(x, y, img, left, top, width, height, - light, dark); + /* x-coordinate of the first pixel of the first column */ + r->x = visual_x - (left & 31); + bopti_render(img, r, light, dark); } } diff --git a/src/render-fx/bopti-asm.h b/src/render-fx/bopti-asm.h index ef7ec64..a5f9dbb 100644 --- a/src/render-fx/bopti-asm.h +++ b/src/render-fx/bopti-asm.h @@ -36,6 +36,7 @@ typedef void asm_gray_scsp_t(uint32_t *v1, uint32_t const *layer, /* Type of any rendering function */ typedef union { + void *gen; asm_mono_t *asm_mono; asm_gray_t *asm_gray; asm_mono_scsp_t *asm_mono_scsp; @@ -84,4 +85,34 @@ extern asm_gray_scsp_t bopti_gasm_gray_scsp; /* bpoti_asm_gray_alpha_scsp(): SCSP "gray_alpha" profile */ extern asm_gray_scsp_t bopti_gasm_gray_alpha_scsp; +//--- +// Renderer's data structures +//--- + +/* struct rbox: A rendering box (target coordinates and source rectangle) + Meaning of fields vary during the rendering process! */ +struct rbox +{ + /* General renderer: + On-screen location of the leftmost pixel of the leftmost rendered + column (this particular pixel might not be drawn but is of + importance in the positioning process) + SCSP renderer: + Shift value used to align columns with positions */ + int x; + /* On-screen location of top-left corner; the (x,y) of dsubimage() */ + int visual_x, y; + /* Width of rendered sub-image */ + int width; + /* Before bopti_render{_scsp}(): + Left-coordinate of the source box (included, in pixels) + In bopti_render{_scsp}(): + Left-coordinate of the source box (included, in columns) */ + int left; + /* Number of columns used in the source box */ + int columns; + /* Vertical bounds of the box in the image (inc-excluded, in pixels) */ + int top, height; +}; + #endif /* GINT_RENDERFX_BOPTIASM */ diff --git a/src/render-fx/bopti.c b/src/render-fx/bopti.c index ca4bd60..1946dac 100644 --- a/src/render-fx/bopti.c +++ b/src/render-fx/bopti.c @@ -3,22 +3,7 @@ #include "render-fx.h" #include "bopti-asm.h" -/* struct rbox: A rendering box (target coordinates and source rectangle) - Some of the data here is redundant, but makes things easier. */ -struct rbox -{ - /* Left pixel of the first column to be drawn, even if this column is - not drawn entirely */ - int x; - /* On-screen location of top-left corner */ - int visual_x, y; - /* Width of rendered sub-image */ - int width; - /* Horizontal bounds of the box in the image (included, in columns) */ - int left, right; - /* Vertical bounds of the box in the image (inc-excluded, in pixels) */ - int top, height; -}; +#pragma GCC optimize("O3") /* struct command: A rendering command Includes many computed parameters and handy information. Read-only. */ @@ -176,7 +161,7 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1, masks(rbox->visual_x, rbox->visual_x + rbox->width - 1, vm); /* Number of layers per profile */ - int layer_count[] = { 1, 2, 2, 3 }; + static const int layer_count[] = { 1, 2, 2, 3 }; int layers = layer_count[img->profile]; /* For each pair of consecutive VRAM elements involved, create a mask @@ -207,20 +192,17 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1, const uint32_t *layer = (void *)img->data; layer += (rbox->top * img_columns + rbox->left) * layers; - /* Number of grid columns */ - int columns = rbox->right - rbox->left + 1; - /* Compute and execute the command for this parameters */ struct command c = { .x = rbox->x & 31, .v1 = v1, .v2 = v2 ? v2 : v1, .offset = (rbox->y << 2) + (rbox->x >> 5), - .columns = columns, + .columns = rbox->columns, .masks = masks + 2 * left_origin, .real_start = (left_origin > 0), - .vram_stride = 4 - columns, - .data_stride = ((img_columns - columns) << 2) * layers, + .vram_stride = 4 - rbox->columns, + .data_stride = ((img_columns - rbox->columns) << 2) * layers, .gray = (v2 != NULL), .f = f, }; @@ -231,61 +213,62 @@ void bopti_render(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1, void bopti_render_scsp(bopti_image_t const *img, struct rbox *rbox, uint32_t *v1, uint32_t *v2) { - /* Rendering function */ - bopti_asm_t f; - if(v2) f.asm_gray_scsp = asm_gray_scsp[img->profile]; - else f.asm_mono_scsp = asm_mono_scsp[img->profile]; - - /* Compute the only rendering mask. Avoid UB if width = 32 */ - uint32_t mask = 0xffffffff; - if(rbox->width < 32) - { - int right = 32 - ((rbox->visual_x & 31) + rbox->width); - mask = ((1 << rbox->width) - 1) << right; - } + /* Compute the only rendering mask */ + uint32_t mask = + (0xffffffff << (32 - rbox->width)) >> (rbox->visual_x & 31); /* Number of layers */ - int layer_count[] = { 1, 2, 2, 3 }; - int layers = layer_count[img->profile]; + int layers = img->profile - (img->profile >> 1) + 1; - /* Number of columns in [img] */ - int img_columns = (img->width + 31) >> 5; + /* Number of longwords to skip between rows of [img] */ + int img_stride = ((img->width + 31) >> 5) * layers; /* Interwoven layer data. Skip left columns that are not rendered */ const uint32_t *layer = (void *)img->data; - layer += (rbox->top * img_columns + rbox->left) * layers; + layer += (rbox->top * img_stride) + (rbox->left * layers); /* Starting value of VRAM pointers */ int offset = (rbox->y << 2) + (rbox->visual_x >> 5); v1 += offset; - if(v2) v2 += offset; /* Number of rows */ int rows = rbox->height; - /* Mask shift */ - int shift = -(rbox->x & 31); - if(rbox->x < 0) shift += 32; /* Render the grid immediately; mono version */ - if(!v2) while(rows--) + if(!v2) { - f.asm_mono_scsp(v1, layer, mask, shift); - layer += img_columns * layers; - v1 += 4; + asm_mono_scsp_t *f = asm_mono_scsp[img->profile]; + while(rows--) + { + f(v1, layer, mask, rbox->x); + layer += img_stride; + v1 += 4; + } } /* Gray version */ - else while(rows--) + else { - f.asm_gray_scsp(v1, layer, mask, v2, shift); - layer += img_columns * layers; - v1 += 4; - v2 += 4; + asm_gray_scsp_t *f = asm_gray_scsp[img->profile]; + v2 += offset; + + while(rows--) + { + f(v1, layer, mask, v2, rbox->x); + layer += img_stride; + v1 += 4; + v2 += 4; + } } } -void bopti_render_clip(int x, int y, bopti_image_t const *img, int left, - int top, int width, int height, uint32_t *v1, uint32_t *v2) +int bopti_clip(bopti_image_t const *img, struct rbox *r) { + /* This load/save is not elegant but it makes GCC use register-only + operations, which is what we need for efficiency */ + int x = r->visual_x, y = r->y; + int left = r->left, top = r->top; + int width = r->width, height = r->height; + /* Adjust the bounding box of the input image */ if(left < 0) width += left, x -= left, left = 0; if(top < 0) height += top, y -= top, top = 0; @@ -298,32 +281,34 @@ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left, if(x + width > DWIDTH) width = DWIDTH - x; if(y + height > DHEIGHT) height = DHEIGHT - y; - /* Early finish for empty intersections */ - if(width <= 0 || height <= 0) return; + r->visual_x = x; + r->y = y; + r->left = left; + r->top = top; + r->width = width; + r->height = height; - /* Finish with the noclip variant */ - bopti_render_noclip(x, y, img, left, top, width, height, v1, v2); + /* Return non-zero if the result is empty */ + return (width <= 0 || height <= 0); } -void bopti_render_noclip(int visual_x, int y, bopti_image_t const *img, - int left, int top, int width, int height, uint32_t *v1, uint32_t *v2) +void bopti_render_noclip(bopti_image_t const *img, struct rbox *r, + uint32_t *v1, uint32_t *v2) { + int left = r->left; + /* Start column and end column (both included) */ - int cl = (left) >> 5; - int cr = (left + width - 1) >> 5; + r->left >>= 5; - /* Finish with the standard bopti renderer */ - struct rbox rbox = { 0, visual_x, y, width, cl, cr, top, height }; - - if(cl == cr && (visual_x & 31) + width <= 32) + if(r->columns == 1 && (r->visual_x & 31) + r->width <= 32) { - rbox.x = (visual_x & 31) - (left & 31); - bopti_render_scsp(img, &rbox, v1, v2); + r->x = (left & 31) - (r->visual_x & 31); + bopti_render_scsp(img, r, v1, v2); } else { /* x-coordinate of the first pixel of the first column */ - rbox.x = visual_x - (left & 31); - bopti_render(img, &rbox, v1, v2); + r->x = r->visual_x - (left & 31); + bopti_render(img, r, v1, v2); } } diff --git a/src/render-fx/dsubimage.c b/src/render-fx/dsubimage.c index 1f21ea4..0063ad8 100644 --- a/src/render-fx/dsubimage.c +++ b/src/render-fx/dsubimage.c @@ -2,22 +2,43 @@ #include "render-fx.h" #include "bopti-asm.h" +#pragma GCC optimize("O3") + /* dsubimage(): Render a section of an image */ void dsubimage(int x, int y, bopti_image_t const *img, int left, int top, int width, int height, int flags) { - DMODE_OVERRIDE(dsubimage, x, y, img, left, top, width, height, flags); + struct rbox r = { + 0, x, y, width, left, 0, top, height + }; + DMODE_OVERRIDE(dsubimage, img, &r, flags); if(img->gray) return; - if(flags & DIMAGE_NOCLIP) + /* Intersect the bounding box with both the source image and the VRAM, + except if DIMAGE_NOCLIP is provided */ + if(!(flags & DIMAGE_NOCLIP)) { - bopti_render_noclip(x, y, img, left, top, width, height, - gint_vram, NULL); + /* Early finish for empty intersections */ + if(bopti_clip(img, &r)) return; + } + + left = r.left; + width = r.width; + int visual_x = r.visual_x; + + r.left = left >> 5; + r.columns = ((left + width - 1) >> 5) - r.left + 1; + + if(r.columns == 1 && (visual_x & 31) + width <= 32) + { + r.x = (left & 31) - (visual_x & 31); + bopti_render_scsp(img, &r, gint_vram, NULL); } else { - bopti_render_clip(x, y, img, left, top, width, height, - gint_vram, NULL); + /* x-coordinate of the first pixel of the first column */ + r.x = visual_x - (left & 31); + bopti_render(img, &r, gint_vram, NULL); } } diff --git a/src/render-fx/render-fx.h b/src/render-fx/render-fx.h index 8647336..b32eebe 100644 --- a/src/render-fx/render-fx.h +++ b/src/render-fx/render-fx.h @@ -7,6 +7,7 @@ #include #include +#include "bopti-asm.h" /* masks(): Compute the vram masks for a given rectangle @@ -24,13 +25,10 @@ @masks Stores the result of the function (four uint32_t values) */ void masks(int x1, int x2, uint32_t *masks); -/* bopti_render_clip(): Render a bopti image with clipping - @x @y Location of the top-left corner - @img Image encoded by [fxconv] - @left @top @w @h Bounding box to render - @v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */ -void bopti_render_clip(int x, int y, bopti_image_t const *img, int left, - int top, int w, int h, uint32_t *v1, uint32_t *v2); +/* bopti_clip(): Clip a bounding box to image and VRAM + @img Image encoded by [fxconv] + @rbox Rendering box */ +int bopti_clip(bopti_image_t const *img, struct rbox *rbox); /* bopti_render_noclip(): Render a bopti image without clipping This function is only ever slightly faster than bopti_render_clip(), @@ -42,8 +40,8 @@ void bopti_render_clip(int x, int y, bopti_image_t const *img, int left, @img Image encoded by [fxconv] @left @top @w @h Bounding box to render @v1 @v2 VRAMs (gray rendering is used if v2 != NULL) */ -void bopti_render_noclip(int x, int y, bopti_image_t const *img, int left, - int top, int w, int h, uint32_t *v1, uint32_t *v2); +void bopti_render_noclip(bopti_image_t const *img, struct rbox *rbox, + uint32_t *v1, uint32_t *v2); //--- // Alternate rendering modes @@ -72,8 +70,7 @@ struct rendering_mode (int x, int y, int fg, int bg, int halign, int valign, char const *str); void (*dsubimage) - (int x, int y, bopti_image_t const *image, int left, int top, - int width, int height, int flags); + (bopti_image_t const *image, struct rbox *r, int flags); }; /* The alternate rendering mode pointer (initially NULL)*/ @@ -89,9 +86,7 @@ void gint_gvline(int y1, int y2, int x, int color); void gtext_opt (int x, int y, int fg, int bg, int halign, int valign, char const *str); -void gsubimage - (int x, int y, bopti_image_t const *image, int left, int top, - int width, int height, int flags); +void gsubimage(bopti_image_t const *image, struct rbox *r, int flags); /* Short macro to call the alternate rendering function when available */ #define DMODE_OVERRIDE(func, ...) \