From 31470451967469ca4ec144d6586e8c93ae47d532 Mon Sep 17 00:00:00 2001 From: Lephe Date: Fri, 15 Nov 2019 13:31:44 +0100 Subject: [PATCH] bopti-cg: add p8, p4, and fix alignment issues on r5g6b5 --- TODO | 1 + src/render-cg/bopti-asm.h | 18 ++++ src/render-cg/bopti-asm.s | 187 ++++++++++++++++++++++++++++++++++++-- src/render-cg/bopti.c | 34 +++++-- 4 files changed, 226 insertions(+), 14 deletions(-) diff --git a/TODO b/TODO index 82dc7ed..6278761 100644 --- a/TODO +++ b/TODO @@ -10,6 +10,7 @@ Tests to run. * topti: all charsets Complementary elements on existing code. +* make fx9860g projects work out of the box on fxcg50 * topti: support unicode fonts * gray: find good values for more models than the Graph 35+E II * dma: maybe relax the 4-byte size constraint for dma_memset() diff --git a/src/render-cg/bopti-asm.h b/src/render-cg/bopti-asm.h index 71cbbde..159856b 100644 --- a/src/render-cg/bopti-asm.h +++ b/src/render-cg/bopti-asm.h @@ -22,6 +22,24 @@ void bopti_r5g6b5(uint16_t const *data, uint16_t *target, int width, void bopti_r5g6b5a(uint16_t const *data, uint16_t *target, int width, int height, int in_stride, int out_stride, uint16_t alpha); +/* bopti_p8(): 8-bit palette rendering with alpha + @data @target @width @height @in_stride @out_stride + As in bopti_r5g6b5(). + @palette Pointer to image palette (256 colors) + @alpha Palette index for transparency, if >= 256 no alpha is used */ +void bopti_p8(uint8_t const *data, uint16_t *target, int width, int height, + int in_stride, int out_stride, uint16_t const *palette, int alpha); + +/* bopti_p4(): 4-bit palette rendering with alpha + @data @target @width @height @in_stride @out_stride + As in bopti_r5g6b5(). + @palette Pointer to image palette (16 colors) + @alpha Palette index for transparency, if >= 16 no alpha is used + @offset Initial offset within [data], in number of pixels */ +void bopti_p4(uint8_t const *data, uint16_t *target, int width, int height, + int in_stride, int out_stride, uint16_t const *palette, int alpha, + int offset); + /* bopti_render_clip(): Render with clipping Same parameters as dsubimage(), except for flags. */ void bopti_render_clip(int x, int y, image_t const *img, int left, int top, diff --git a/src/render-cg/bopti-asm.s b/src/render-cg/bopti-asm.s index d633f4e..84149f8 100644 --- a/src/render-cg/bopti-asm.s +++ b/src/render-cg/bopti-asm.s @@ -1,6 +1,8 @@ .global _bopti_r5g6b5 .global _bopti_r5g6b5a +.global _bopti_p8 +.global _bopti_p4 # REGISTER ALLOCATION: # r0: (tmp) @@ -36,14 +38,20 @@ _bopti_r5g6b5: add #-1, r2 shll r2 - # Number of longword operations per row - shlr r6 - - # Input and output strides, minus aligment + # Input and output strides. Add ending alignment because there is no + # corresponding increment in the y-loop. mov.l @(12, r15), r8 mov.l @(16, r15), r9 - sub r3, r8 - sub r3, r9 + mov r5, r0 + shll r6 + add r6 ,r0 + and #2, r0 + add r0, r8 + add r0, r9 + + # Number of longword operations per row + sub r3, r6 + shlr2 r6 .r5g6b5_y: # First longword @@ -136,3 +144,170 @@ _bopti_r5g6b5a: rts mov.l @r15+, r8 + +# REGISTER ALLOCATION: +# r0: (tmp) +# r1: in_stride +# r2: out_stride +# r3: x counter +# --- +# r4: data +# r5: target +# r6: width +# r7: height +# --- +# r8: palette +# r9: alpha +# --- +# @8: in_stride +# @12: out_stride +# @16: palette +# @20: alpha + +.align 4 + +_bopti_p8: + # Load palette and in/out strides + mov.l r8, @-r15 + mov.l r9, @-r15 + mov.l @(16, r15), r8 + mov.l @(8, r15), r1 + mov.l @(12, r15), r2 + + # Load alpha value + mov.l @(20, r15), r9 + +.p8_y: + mov r6, r3 + +.p8_x: + mov.b @r4+, r0 + extu.b r0, r0 + cmp/eq r0, r9 + bt .p8_alpha + + # Pass pixel through palette + shll r0 + mov.w @(r0, r8), r0 + mov.w r0, @r5 + +.p8_alpha: + dt r3 + bf.s .p8_x + add #2, r5 + +# - + + add r1, r4 + dt r7 + bf.s .p8_y + add r2, r5 + +# - + + mov.l @r15+, r9 + rts + mov.l @r15+, r8 + +# REGISTER ALLOCATION: +# r0: (tmp) +# r1: in_stride (in pixels) +# r2: out_stride +# r3: x counter +# --- +# r4: data +# r5: target +# r6: width +# r7: height +# --- +# r8: palette +# r9: number of pixels of offset into data (r4) +# r10: alpha +# --- +# @12: in_stride +# @16: out_stride +# @20: palette +# @24: alpha +# @28: initial offset (in pixels) + +.align 4 + +_bopti_p4: + mov.l r8, @-r15 + mov.l r9, @-r15 + mov.l r10, @-r15 + + # Load palette and in/out strides + mov.l @(20, r15), r8 + mov.l @(12, r15), r1 + mov.l @(16, r15), r2 + + # Load initial offset + mov.l @(28, r15), r9 + + # Load alpha value + mov.l @(24, r15), r10 + shll r10 + +.p4_y: + mov r6, r3 + +.p4_x: + + # Load 4 bits from offset r9 (in pixels) within image data (r4). Note + # that [shlr] puts bit 0 of the shifted register in T. + mov r9, r0 + add #1, r9 + shlr r0 + bt.s .p4_x_unaligned + mov.b @(r0, r4), r0 + +.p4_x_aligned: + + # Load 4 bits from the higher half of @r4 and use them to index the + # palette. Since the palette has two-byte entries, we need the color + # bits to be in position 000xxxx0. + shlr2 r0 + shlr r0 + and #0x1e, r0 + + cmp/eq r0, r10 + bt .p4_alpha + + # Pass pixel through palette + mov.w @(r0, r8), r0 + bra .p4_alpha + mov.w r0, @r5 + +.p4_x_unaligned: + + # Load 4 bits from the lower half of @r4 into position 000xxxx0. + shll r0 + and #0x1e, r0 + + cmp/eq r0, r10 + bt .p4_alpha + + # Pass pixel through palette + mov.w @(r0, r8), r0 + mov.w r0, @r5 + +.p4_alpha: + + dt r3 + bf.s .p4_x + add #2, r5 + +.p4_y_end: + + add r1, r9 + dt r7 + bf.s .p4_y + add r2, r5 + +# - + + mov.l @r15+, r10 + mov.l @r15+, r9 + rts + mov.l @r15+, r8 diff --git a/src/render-cg/bopti.c b/src/render-cg/bopti.c index aa294d6..cfde180 100644 --- a/src/render-cg/bopti.c +++ b/src/render-cg/bopti.c @@ -25,18 +25,18 @@ void bopti_render(image_t const *img, struct box *box) int out = 2 * (396 - box->w); int profile = img->profile; + void const *data = img->data; + + int w = box->w; + int h = box->h; if(profile == PX_R5G6B5 || profile == PX_R5G6B5A) { - uint16_t const *data = img->data; - int w = box->w; - int h = box->h; - - data += img->width * box->top + box->left; - /* Input stride */ int in = 2 * (img->width - w); + data += 2 * (img->width * box->top + box->left); + if(profile == PX_R5G6B5) bopti_r5g6b5(data, target, w, h, in, out); else @@ -44,11 +44,29 @@ void bopti_render(image_t const *img, struct box *box) } else if(profile == PX_P8) { - #warning TODO: PX_P8 + /* Palette has 0x100 entries of 2 bytes each */ + uint16_t const *palette = data; + data += 512; + + int in = img->width - w; + data += img->width * box->top + box->left; + + bopti_p8(data, target, w, h, in, out, palette, img->alpha); } else if(profile == PX_P4) { - #warning TODO: PX_P4 + /* Palette has 0x10 entries of 2 bytes each */ + uint16_t const *palette = data; + data += 32; + + /* Due to nibble alignment being a hassle, in this function the + input stride is expressed in pixels. */ + int in = img->width - w; + /* Also we don't move pointers, we just use pixel offsets. */ + int offset = img->width * box->top + box->left; + + bopti_p4(data, target, w, h, in, out, palette, img->alpha, + offset); } }