azur: implement support for P4_RGB565A (P4)
This commit is contained in:
parent
ddff9f6d6b
commit
c16b1a85c6
|
@ -268,6 +268,8 @@ struct azrp_shader_tex2d_command {
|
|||
int16_t lines;
|
||||
/* Already offset by start row and column */
|
||||
void const *input;
|
||||
/* P4 modes only: */
|
||||
int16_t edge1, edge2;
|
||||
};
|
||||
|
||||
AZUR_END_DECLS
|
||||
|
|
|
@ -26,37 +26,36 @@ _azrp_shader_tex2d:
|
|||
mov.w @r2+, r5 /* command.output (offset) */
|
||||
sub r7, r4
|
||||
|
||||
mov.w @r2+, r1 /* command.lines */
|
||||
mov.w @r8+, r9 /* image.profile */
|
||||
sub r7, r4
|
||||
|
||||
mov.w @r8+, r0 /* image.profile */
|
||||
mov.w @r2+, r1 /* command.lines */
|
||||
add r6, r5
|
||||
|
||||
mov.l @r2+, r3 /* command.input (pointer) */
|
||||
shll2 r9
|
||||
|
||||
mova .formats, r0
|
||||
|
||||
mov.w @r8+, r6 /* image.alpha */
|
||||
|
||||
mov.l @(r0,r9), r0
|
||||
|
||||
mov.w @r8+, r9 /* image.width */
|
||||
|
||||
mov.l @r2+, r3 /* command.input (pointer) */
|
||||
mov r0, r2
|
||||
|
||||
mova .formats, r0
|
||||
shll2 r2
|
||||
|
||||
/* Stall cycle */
|
||||
|
||||
mov.l @(r0, r2), r0
|
||||
|
||||
jmp @r0
|
||||
/* Stall for r9 */
|
||||
sub r7, r9
|
||||
|
||||
.align 4
|
||||
.formats:
|
||||
.long _RGB565
|
||||
.long _RGB565A
|
||||
.long _NOP
|
||||
.long _P4
|
||||
.long _NOP /* P8 */
|
||||
.long _P4_RGB565A /* =P4 */
|
||||
.long _P8_RGB565
|
||||
.long _P8_RGB565A
|
||||
.long _P4_RGB565
|
||||
|
||||
/* [Loop macros]
|
||||
|
||||
|
@ -414,15 +413,131 @@ _P8_RGB565.palette_distance:
|
|||
/* Distance between image pointer and palette array base */
|
||||
.word 260
|
||||
|
||||
/* [Rendering strategy for the P4 format] */
|
||||
_P4:
|
||||
/* [Rendering strategy for the P4_RGB565A format]
|
||||
|
||||
This is the most complex format. Most of the remarks that apply to
|
||||
P8_RGB565A also apply here, except that there are less opportunities to save
|
||||
computation because nibbles must be extracted anyway.
|
||||
|
||||
The P4_RGB565A format is simply bopti's P4, but an additional variation
|
||||
P4_RGB565 is specified to save on transparency handling, which is very
|
||||
expensive.
|
||||
|
||||
The special nature of the nibble packing means the simplest loop form writes
|
||||
2 pixels from a 2-aligned source image position in a single iteration. Other
|
||||
structures don't even come close: selecting nibbles individually is folly,
|
||||
while not interweaving is inefficient. So the whole point of this routine is
|
||||
to forcibly align the subimage on a byte-aligned and never break that grid.
|
||||
|
||||
The command builder for P4 does this alignment before submitting the
|
||||
command. Obviously the transform can cause one extra pixel to be overridden
|
||||
on each side of every line. The command is thus extended with two edge
|
||||
offsets indicating pixels to preserve at each end. When overwrites occurs,
|
||||
the edge offsets point to the overwritten pixels so they can be restored.
|
||||
Otherwise, they point to the next pixels and the restores are no-ops. See
|
||||
the strategy used for managing interweaving in P8 formats for details.
|
||||
|
||||
TODO: Asymptotic performance */
|
||||
.align 4
|
||||
_P4_RGB565A:
|
||||
mov.l r10, @-r15
|
||||
shlr r9
|
||||
|
||||
mov.l r11, @-r15
|
||||
add #-1, r9 /* Input stride compensation for openness */
|
||||
|
||||
mov.l r12, @-r15
|
||||
add #2, r8 /* image.palette */
|
||||
|
||||
mov.w @r2+, r11 /* command.edge1 */
|
||||
shlr r7
|
||||
|
||||
mov.w @r2+, r12 /* command.edge2 */
|
||||
mov r5, r10
|
||||
|
||||
mov.l r13, @-r15
|
||||
shll r11
|
||||
|
||||
mov.l r14, @-r15
|
||||
shll r12
|
||||
|
||||
TEX2D_START()
|
||||
|
||||
mov r10, r0
|
||||
mov.b @r3+, r6
|
||||
|
||||
/* Stall for r0 */
|
||||
|
||||
mov.w @(r0,r11), r13
|
||||
|
||||
mov.w @(r0,r12), r14
|
||||
|
||||
/* Main loop with 2 pixels sharing a single byte */
|
||||
|
||||
2: /* Stall for r6 */
|
||||
|
||||
shll r6
|
||||
|
||||
mov r6, r0
|
||||
and #0x1e, r0
|
||||
|
||||
tst r0, r0
|
||||
|
||||
bt 4f
|
||||
mov.w @(r0,r8), r0
|
||||
|
||||
mov.w r0, @(2,r5)
|
||||
4: shlr2 r6
|
||||
|
||||
shlr2 r6
|
||||
|
||||
mov r6, r0
|
||||
and #0x1e, r0
|
||||
|
||||
tst r0, r0
|
||||
|
||||
bt 5f
|
||||
mov.w @(r0,r8), r0
|
||||
|
||||
mov.w r0, @r5
|
||||
|
||||
5: mov.b @r3+, r6
|
||||
3: add #4, r5
|
||||
|
||||
mov r10, r0
|
||||
add r7, r10
|
||||
|
||||
/* Stall for r0 */
|
||||
|
||||
mov.w r13, @(r0,r11)
|
||||
add r7, r10
|
||||
|
||||
mov.w r14, @(r0,r12)
|
||||
add r4, r10
|
||||
|
||||
add r7, r10
|
||||
add r7, r10
|
||||
|
||||
TEX2D_END_NORET()
|
||||
mov.l @r15+, r14
|
||||
mov.l @r15+, r13
|
||||
mov.l @r15+, r12
|
||||
mov.l @r15+, r11
|
||||
mov.l @r15+, r10
|
||||
mov.l @r15+, r9
|
||||
rts
|
||||
mov.l @r15+, r8
|
||||
|
||||
/* [Rendering strategy for the P4_RGB565 format]
|
||||
Same as P4_RGB565A without transparency checks (fairly straightforward). */
|
||||
.align 4
|
||||
_P4_RGB565:
|
||||
TEX2D_START()
|
||||
2:
|
||||
3: nop
|
||||
TEX2D_END()
|
||||
|
||||
/* [Unsupported formats]
|
||||
|
||||
P8 is unsupported, use P8_RGB565 and P8_RGB565A. */
|
||||
_NOP:
|
||||
mov.l @r15+, r9
|
||||
|
|
|
@ -18,11 +18,12 @@ void azrp_shader_tex2d_configure(void)
|
|||
//---
|
||||
|
||||
/* Profile IDs */
|
||||
#define PX_RGB565 0
|
||||
#define PX_RGB565A 1
|
||||
#define PX_P4 3
|
||||
#define PX_P8_RGB565 4
|
||||
#define PX_P8_RGB565A 5
|
||||
#define RGB565 0
|
||||
#define RGB565A 1
|
||||
#define P4_RGB565A 3
|
||||
#define P8_RGB565 4
|
||||
#define P8_RGB565A 5
|
||||
#define P4_RGB565 6
|
||||
|
||||
void azrp_image(int x, int y, bopti_image_t const *image)
|
||||
{
|
||||
|
@ -45,14 +46,24 @@ void azrp_subimage(int x, int y, bopti_image_t const *image,
|
|||
|
||||
int input_multiplier = 1;
|
||||
void const *data = image->data;
|
||||
size_t cmd_size = sizeof cmd - 4;
|
||||
|
||||
if(image->profile == PX_P8_RGB565 || image->profile == PX_P8_RGB565A) {
|
||||
if(image->profile == P8_RGB565 || image->profile == P8_RGB565A) {
|
||||
input_multiplier = 0;
|
||||
data += (image->data[0] * 2) + 2;
|
||||
}
|
||||
else if(image->profile == PX_P4) {
|
||||
else if(image->profile == P4_RGB565 || image->profile == P4_RGB565A) {
|
||||
input_multiplier = -1;
|
||||
data += 32;
|
||||
|
||||
int odd_left = left & 1;
|
||||
int odd_right = (left + width) & 1;
|
||||
|
||||
cmd.edge1 = -1 + odd_left;
|
||||
cmd.edge2 = width + odd_left;
|
||||
cmd.columns += odd_left + odd_right;
|
||||
x -= odd_left;
|
||||
cmd_size += 4;
|
||||
}
|
||||
|
||||
/* This divides by azrp_frag_height */
|
||||
|
@ -61,7 +72,8 @@ void azrp_subimage(int x, int y, bopti_image_t const *image,
|
|||
while(height > 0) {
|
||||
cmd.lines = min(height, azrp_frag_height - (y & (azrp_frag_height-1)));
|
||||
|
||||
int input_offset = (image->width * top + left) << input_multiplier;
|
||||
int input_offset = image->width * top + left;
|
||||
input_offset = (input_offset << (input_multiplier + 1)) >> 1;
|
||||
cmd.input = data + input_offset;
|
||||
cmd.output = 2 * (azrp_width * (y & (azrp_frag_height-1)) + x);
|
||||
|
||||
|
@ -69,7 +81,7 @@ void azrp_subimage(int x, int y, bopti_image_t const *image,
|
|||
top += cmd.lines;
|
||||
height -= cmd.lines;
|
||||
|
||||
azrp_queue_command(&cmd, sizeof cmd);
|
||||
azrp_queue_command(&cmd, cmd_size);
|
||||
cmd.fragment_id++;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue