bopti-cg: add p8, p4, and fix alignment issues on r5g6b5

This commit is contained in:
Lephe 2019-11-15 13:31:44 +01:00
parent 3baa7612ee
commit 3147045196
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
4 changed files with 226 additions and 14 deletions

1
TODO
View File

@ -10,6 +10,7 @@ Tests to run.
* topti: all charsets
Complementary elements on existing code.
* make fx9860g projects work out of the box on fxcg50
* topti: support unicode fonts
* gray: find good values for more models than the Graph 35+E II
* dma: maybe relax the 4-byte size constraint for dma_memset()

View File

@ -22,6 +22,24 @@ void bopti_r5g6b5(uint16_t const *data, uint16_t *target, int width,
void bopti_r5g6b5a(uint16_t const *data, uint16_t *target, int width,
int height, int in_stride, int out_stride, uint16_t alpha);
/* bopti_p8(): 8-bit palette rendering with alpha
@data @target @width @height @in_stride @out_stride
As in bopti_r5g6b5().
@palette Pointer to image palette (256 colors)
@alpha Palette index for transparency, if >= 256 no alpha is used */
void bopti_p8(uint8_t const *data, uint16_t *target, int width, int height,
int in_stride, int out_stride, uint16_t const *palette, int alpha);
/* bopti_p4(): 4-bit palette rendering with alpha
@data @target @width @height @in_stride @out_stride
As in bopti_r5g6b5().
@palette Pointer to image palette (16 colors)
@alpha Palette index for transparency, if >= 16 no alpha is used
@offset Initial offset within [data], in number of pixels */
void bopti_p4(uint8_t const *data, uint16_t *target, int width, int height,
int in_stride, int out_stride, uint16_t const *palette, int alpha,
int offset);
/* bopti_render_clip(): Render with clipping
Same parameters as dsubimage(), except for flags. */
void bopti_render_clip(int x, int y, image_t const *img, int left, int top,

View File

@ -1,6 +1,8 @@
.global _bopti_r5g6b5
.global _bopti_r5g6b5a
.global _bopti_p8
.global _bopti_p4
# REGISTER ALLOCATION:
# r0: (tmp)
@ -36,14 +38,20 @@ _bopti_r5g6b5:
add #-1, r2
shll r2
# Number of longword operations per row
shlr r6
# Input and output strides, minus aligment
# Input and output strides. Add ending alignment because there is no
# corresponding increment in the y-loop.
mov.l @(12, r15), r8
mov.l @(16, r15), r9
sub r3, r8
sub r3, r9
mov r5, r0
shll r6
add r6 ,r0
and #2, r0
add r0, r8
add r0, r9
# Number of longword operations per row
sub r3, r6
shlr2 r6
.r5g6b5_y:
# First longword
@ -136,3 +144,170 @@ _bopti_r5g6b5a:
rts
mov.l @r15+, r8
# REGISTER ALLOCATION:
# r0: (tmp)
# r1: in_stride
# r2: out_stride
# r3: x counter
# ---
# r4: data
# r5: target
# r6: width
# r7: height
# ---
# r8: palette
# r9: alpha
# ---
# @8: in_stride
# @12: out_stride
# @16: palette
# @20: alpha
.align 4
_bopti_p8:
# Load palette and in/out strides
mov.l r8, @-r15
mov.l r9, @-r15
mov.l @(16, r15), r8
mov.l @(8, r15), r1
mov.l @(12, r15), r2
# Load alpha value
mov.l @(20, r15), r9
.p8_y:
mov r6, r3
.p8_x:
mov.b @r4+, r0
extu.b r0, r0
cmp/eq r0, r9
bt .p8_alpha
# Pass pixel through palette
shll r0
mov.w @(r0, r8), r0
mov.w r0, @r5
.p8_alpha:
dt r3
bf.s .p8_x
add #2, r5
# -
add r1, r4
dt r7
bf.s .p8_y
add r2, r5
# -
mov.l @r15+, r9
rts
mov.l @r15+, r8
# REGISTER ALLOCATION:
# r0: (tmp)
# r1: in_stride (in pixels)
# r2: out_stride
# r3: x counter
# ---
# r4: data
# r5: target
# r6: width
# r7: height
# ---
# r8: palette
# r9: number of pixels of offset into data (r4)
# r10: alpha
# ---
# @12: in_stride
# @16: out_stride
# @20: palette
# @24: alpha
# @28: initial offset (in pixels)
.align 4
_bopti_p4:
mov.l r8, @-r15
mov.l r9, @-r15
mov.l r10, @-r15
# Load palette and in/out strides
mov.l @(20, r15), r8
mov.l @(12, r15), r1
mov.l @(16, r15), r2
# Load initial offset
mov.l @(28, r15), r9
# Load alpha value
mov.l @(24, r15), r10
shll r10
.p4_y:
mov r6, r3
.p4_x:
# Load 4 bits from offset r9 (in pixels) within image data (r4). Note
# that [shlr] puts bit 0 of the shifted register in T.
mov r9, r0
add #1, r9
shlr r0
bt.s .p4_x_unaligned
mov.b @(r0, r4), r0
.p4_x_aligned:
# Load 4 bits from the higher half of @r4 and use them to index the
# palette. Since the palette has two-byte entries, we need the color
# bits to be in position 000xxxx0.
shlr2 r0
shlr r0
and #0x1e, r0
cmp/eq r0, r10
bt .p4_alpha
# Pass pixel through palette
mov.w @(r0, r8), r0
bra .p4_alpha
mov.w r0, @r5
.p4_x_unaligned:
# Load 4 bits from the lower half of @r4 into position 000xxxx0.
shll r0
and #0x1e, r0
cmp/eq r0, r10
bt .p4_alpha
# Pass pixel through palette
mov.w @(r0, r8), r0
mov.w r0, @r5
.p4_alpha:
dt r3
bf.s .p4_x
add #2, r5
.p4_y_end:
add r1, r9
dt r7
bf.s .p4_y
add r2, r5
# -
mov.l @r15+, r10
mov.l @r15+, r9
rts
mov.l @r15+, r8

View File

@ -25,18 +25,18 @@ void bopti_render(image_t const *img, struct box *box)
int out = 2 * (396 - box->w);
int profile = img->profile;
void const *data = img->data;
int w = box->w;
int h = box->h;
if(profile == PX_R5G6B5 || profile == PX_R5G6B5A)
{
uint16_t const *data = img->data;
int w = box->w;
int h = box->h;
data += img->width * box->top + box->left;
/* Input stride */
int in = 2 * (img->width - w);
data += 2 * (img->width * box->top + box->left);
if(profile == PX_R5G6B5)
bopti_r5g6b5(data, target, w, h, in, out);
else
@ -44,11 +44,29 @@ void bopti_render(image_t const *img, struct box *box)
}
else if(profile == PX_P8)
{
#warning TODO: PX_P8
/* Palette has 0x100 entries of 2 bytes each */
uint16_t const *palette = data;
data += 512;
int in = img->width - w;
data += img->width * box->top + box->left;
bopti_p8(data, target, w, h, in, out, palette, img->alpha);
}
else if(profile == PX_P4)
{
#warning TODO: PX_P4
/* Palette has 0x10 entries of 2 bytes each */
uint16_t const *palette = data;
data += 32;
/* Due to nibble alignment being a hassle, in this function the
input stride is expressed in pixels. */
int in = img->width - w;
/* Also we don't move pointers, we just use pixel offsets. */
int offset = img->width * box->top + box->left;
bopti_p4(data, target, w, h, in, out, palette, img->alpha,
offset);
}
}