clip floor, avoiding inner loop checks - ASM now slower
I'm guessing the function is short that not inlining and having to pass paremeters is breaking the deal. I expect this to revert back when we switch to Azur, handle entire fragments at once, and use a denser branch-free texture-lookup inner loop.
This commit is contained in:
parent
15f65e724c
commit
45396c599e
|
@ -2,7 +2,7 @@
|
|||
# toolchain file and module path of the fxSDK
|
||||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
project(MyAddin LANGUAGES C CXX)
|
||||
project(MyAddin LANGUAGES C CXX ASM)
|
||||
|
||||
include(GenerateG1A)
|
||||
include(GenerateG3A)
|
||||
|
@ -15,6 +15,7 @@ set(CMAKE_CXX_STANDARD 20)
|
|||
|
||||
set(SOURCES
|
||||
src/azuray.cc
|
||||
src/azuray.S
|
||||
src/main.cc)
|
||||
set(ASSETS
|
||||
assets/map1.txt)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#########
|
||||
## ######
|
||||
# # #
|
||||
###
|
||||
#
|
||||
# #
|
||||
## ##
|
||||
#########
|
||||
###### ##
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
.global _azuray_floor_line
|
||||
|
||||
# Parameters:
|
||||
# vram, floor_x, floor_z, ortho_dir.x
|
||||
# ortho_dir.z, xlen, 0, color1, color2
|
||||
# Register allocation:
|
||||
# r0: (temp)
|
||||
# r1: (temp)
|
||||
# r2: color1
|
||||
# r3: color2
|
||||
# r4: vram
|
||||
# r5: floor_x
|
||||
# r6: floor_z
|
||||
# r7: ortho_dir.x
|
||||
# r8: ortho_dir.z
|
||||
# r9: xlen
|
||||
# r10: _
|
||||
# r11: -16 (for num -> int conversion)
|
||||
# r12: loop counter
|
||||
_azuray_floor_line:
|
||||
mov.l r8, @-r15
|
||||
nop
|
||||
|
||||
mov.l r9, @-r15
|
||||
nop
|
||||
|
||||
mov.l r10, @-r15
|
||||
nop
|
||||
|
||||
mov.l @(12, r15), r8
|
||||
nop
|
||||
|
||||
mov.l @(16, r15), r9
|
||||
nop
|
||||
|
||||
mov.l @(20, r15), r10
|
||||
nop
|
||||
|
||||
mov.l @(24, r15), r2
|
||||
nop
|
||||
|
||||
mov.l @(28, r15), r3
|
||||
nop
|
||||
|
||||
mov.l r11, @-r15
|
||||
mov #-16, r11
|
||||
|
||||
# ---
|
||||
|
||||
1: mov r5, r0
|
||||
xor r6, r0
|
||||
|
||||
mov r2, r1
|
||||
shad r11, r0
|
||||
|
||||
shlr r0
|
||||
nop
|
||||
|
||||
bf.s 3f
|
||||
add r7, r5
|
||||
|
||||
mov r3, r1
|
||||
nop
|
||||
|
||||
3: mov.w r1, @r4
|
||||
dt r9
|
||||
|
||||
/* Sneaky vram++ */
|
||||
mov.w @r4+, r0
|
||||
nop
|
||||
|
||||
bf.s 1b
|
||||
add r8, r6
|
||||
|
||||
# ---
|
||||
|
||||
bra 4f
|
||||
nop
|
||||
|
||||
# --- alternative loop ending ---
|
||||
|
||||
2: dt r9
|
||||
nop
|
||||
|
||||
/* Sneaky vram++ */
|
||||
add r7, r5
|
||||
mov.w @r4+, r0
|
||||
|
||||
bf.s 1b
|
||||
add r8, r6
|
||||
|
||||
# ---
|
||||
|
||||
4: mov.l @r15+, r11
|
||||
mov.l @r15+, r10
|
||||
mov.l @r15+, r9
|
||||
rts
|
||||
mov.l @r15+, r8
|
||||
|
||||
/*
|
||||
for(int x = 0; x < DWIDTH; x++) {
|
||||
int wx = floor_x.ifloor();
|
||||
int wz = floor_z.ifloor();
|
||||
|
||||
if((uint)wx < MI.width() && (uint)wz < MI.height()) {
|
||||
*vram = (wx ^ wz) & 1 ? color2 : color1;
|
||||
}
|
||||
|
||||
floor_x += ortho_dir.x;
|
||||
floor_z += ortho_dir.z;
|
||||
vram++;
|
||||
}
|
||||
*/
|
|
@ -45,6 +45,9 @@ bool raycast(MapInterface &MI, vec3 start, vec3 end, vec3 *collision,
|
|||
int current_x = num_ifloor_along(x, u.x);
|
||||
int current_z = num_ifloor_along(z, u.z);
|
||||
|
||||
if((uint)current_x > MI.width() || (uint)current_z > MI.height())
|
||||
break;
|
||||
|
||||
/* Distance to the next horizontal, and vertical line */
|
||||
num dist_z = (u.z >= 0) ? num(1) - z.frac() : -num_frac_roundup(z);
|
||||
num dist_x = (u.x >= 0) ? num(1) - x.frac() : -num_frac_roundup(x);
|
||||
|
@ -173,24 +176,69 @@ void render_floor(MapInterface &MI, vec3 pos, vec3 dir, num depth)
|
|||
screen position for one line, because depth is constant. */
|
||||
vec3 ortho_dir(dir.z, 0, -dir.x);
|
||||
/* Apply depth factor */
|
||||
num depthFactor = dist / num(DWIDTH / 2 * depth);
|
||||
ortho_dir *= depthFactor;
|
||||
ortho_dir *= dist / num(DWIDTH / 2 * depth);
|
||||
|
||||
vec3 floorPos = floor + (-DWIDTH/2) * ortho_dir;
|
||||
num floor_x = floor.x + (-DWIDTH/2) * ortho_dir.x;
|
||||
num floor_z = floor.z + (-DWIDTH/2) * ortho_dir.z;
|
||||
|
||||
num floor_x_end = floor.x + (DWIDTH/2) * ortho_dir.x;
|
||||
num floor_z_end = floor.z + (DWIDTH/2) * ortho_dir.z;
|
||||
|
||||
uint16_t *vram = gint_vram + y * DWIDTH;
|
||||
int color1 = C_RGB(15, 12, 4);
|
||||
int color2 = C_RGB(12, 8, 2);
|
||||
|
||||
for(int x = 0; x < DWIDTH; x++) {
|
||||
int wx = floorPos.x.ifloor();
|
||||
int wz = floorPos.z.ifloor();
|
||||
/* Determine the intersection between display and map for this line */
|
||||
int xmin = 0;
|
||||
int xmax = DWIDTH-1;
|
||||
|
||||
if((uint)wx < MI.width() && (uint)wz < MI.height()) {
|
||||
*vram = (wx ^ wz) & 1 ? color2 : color1;
|
||||
}
|
||||
if(ortho_dir.x > 0) {
|
||||
if(floor_x < 0)
|
||||
xmin = max((i32)xmin, -floor_x.v / ortho_dir.x.v + 1);
|
||||
num Mx = floor_x_end - num((int)MI.width());
|
||||
if(Mx > 0)
|
||||
xmax = min((i32)xmax, DWIDTH - 1 - Mx.v / ortho_dir.x.v);
|
||||
}
|
||||
else {
|
||||
if(floor_x_end < 0)
|
||||
xmax = min((i32)xmax, DWIDTH - 1 - floor_x_end.v / ortho_dir.x.v + 1);
|
||||
num mx = num((int)MI.width()) - floor_x;
|
||||
if(mx < 0)
|
||||
xmin = max((i32)xmin, mx.v / ortho_dir.x.v);
|
||||
}
|
||||
|
||||
floorPos += ortho_dir;
|
||||
if(ortho_dir.z > 0) {
|
||||
if(floor_z < 0)
|
||||
xmin = max((i32)xmin, -floor_z.v / ortho_dir.z.v + 1);
|
||||
num Mz = floor_z_end - num((int)MI.height());
|
||||
if(Mz > 0)
|
||||
xmax = min((i32)xmax, DWIDTH - 1 - Mz.v / ortho_dir.z.v);
|
||||
}
|
||||
else {
|
||||
if(floor_z_end < 0)
|
||||
xmax = min((i32)xmax, DWIDTH - 1 - floor_z_end.v / ortho_dir.z.v + 1);
|
||||
num mz = num((int)MI.height()) - floor_z;
|
||||
if(mz < 0)
|
||||
xmin = max((i32)xmin, mz.v / ortho_dir.z.v);
|
||||
}
|
||||
|
||||
if(xmin) {
|
||||
vram += xmin;
|
||||
floor_x += xmin * ortho_dir.x;
|
||||
floor_z += xmin * ortho_dir.z;
|
||||
}
|
||||
/* if(xmin <= xmax)
|
||||
azuray_floor_line(vram, floor_x, floor_z, ortho_dir.x, ortho_dir.z,
|
||||
xmax-xmin+1, 0, color1, color2);
|
||||
*/
|
||||
for(int x = xmin; x <= xmax; x++) {
|
||||
int wx = floor_x.ifloor();
|
||||
int wz = floor_z.ifloor();
|
||||
|
||||
*vram = (wx ^ wz) & 1 ? color2 : color1;
|
||||
|
||||
floor_x += ortho_dir.x;
|
||||
floor_z += ortho_dir.z;
|
||||
vram++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,3 +29,11 @@ void render(MapInterface &MI, vec3 pos, vec3 dir, num depth);
|
|||
void render_floor(MapInterface &MI, vec3 pos, vec3 dir, num depth);
|
||||
|
||||
} /* namespace azuray */
|
||||
|
||||
extern "C" {
|
||||
|
||||
void azuray_floor_line(uint16_t *vram, num floor_x, num floor_z,
|
||||
num ortho_dir_x, num ortho_dir_z, int xlen, int, int color1,
|
||||
int color2);
|
||||
|
||||
} /* extern "C" */
|
||||
|
|
Loading…
Reference in New Issue