ASM implementation of the 3D track (slower for now)
This commit is contained in:
parent
900837e36b
commit
189dc9b102
|
@ -2,7 +2,7 @@
|
|||
# toolchain file and module path of the fxSDK
|
||||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
project(MarioKart)
|
||||
project(MarioKart LANGUAGES C ASM)
|
||||
|
||||
set(SOURCES
|
||||
data-headers/track.c
|
||||
|
@ -25,6 +25,7 @@ set(SOURCES
|
|||
if (DEFINED FXSDK_PLATFORM_LONG)
|
||||
set(SOURCES ${SOURCES}
|
||||
src/platforms/gint.c
|
||||
# src/tilemap.S
|
||||
)
|
||||
|
||||
include(GenerateG1A)
|
||||
|
@ -115,7 +116,7 @@ if (DEFINED FXSDK_PLATFORM_LONG)
|
|||
fxconv_declare_assets(${ASSETS} ${ASSETS_fx} ${ASSETS_cg} WITH_METADATA)
|
||||
|
||||
add_executable(mariokart ${SOURCES} ${ASSETS} ${ASSETS_${FXSDK_PLATFORM}})
|
||||
target_compile_options(mariokart PRIVATE -Wall -Wextra -Werror-implicit-function-declaration -Ofast -flto -g)
|
||||
target_compile_options(mariokart PRIVATE -DUSE_ASM -Wall -Wextra -Werror-implicit-function-declaration -Ofast -flto -g)
|
||||
target_link_libraries(mariokart Gint::Gint)
|
||||
target_link_libraries(mariokart LibProf::LibProf)
|
||||
|
||||
|
|
207
src/3d.c
207
src/3d.c
|
@ -12,6 +12,10 @@
|
|||
#include "platforms/gint.h"
|
||||
// #include "./3d-bg-dat.h"
|
||||
|
||||
// #ifdef PROFILING_ENABLED
|
||||
// #include "libprof.h"
|
||||
// #endif
|
||||
|
||||
#define lowResCutoff (LCD_HEIGHT_PX * 3 / 4) - 30
|
||||
|
||||
inline void setPixel(int x, int y, color_t color) {
|
||||
|
@ -20,61 +24,61 @@ inline void setPixel(int x, int y, color_t color) {
|
|||
|
||||
int hFovModifier = 1 << 12;
|
||||
|
||||
void normalFov() {
|
||||
int angleCos = fpcos(angle);
|
||||
int angleSin = fpsin(angle);
|
||||
for (unsigned short y = horizon + 2; y < /*LCD_HEIGHT_PX*/lowResCutoff; y++) {
|
||||
int dist = newLut[y - horizon];
|
||||
int wx = -(LCD_WIDTH_PX / 2) / 2 * dist;
|
||||
for (unsigned short x = 0; x < LCD_WIDTH_PX / 2; x++) {
|
||||
int x2 = wx >> 6;
|
||||
int y2 = dist;
|
||||
// void __attribute__ ((noinline, hot)) normalFov() {
|
||||
// int angleCos = fpcos(angle);
|
||||
// int angleSin = fpsin(angle);
|
||||
// for (unsigned short y = horizon + 2; y < /*LCD_HEIGHT_PX*/lowResCutoff; y++) {
|
||||
// int dist = newLut[y - horizon];
|
||||
// int wx = -(LCD_WIDTH_PX / 2) / 2 * dist;
|
||||
// for (unsigned short x = 0; x < LCD_WIDTH_PX / 2; x++) {
|
||||
// int x2 = wx >> 6;
|
||||
// int y2 = dist;
|
||||
|
||||
// Rotate by angle
|
||||
int newX = ((x2 * angleCos) >> 15) + ((y2 * angleSin) >> 15);
|
||||
int newY = ((y2 * angleCos) >> 15) - ((x2 * angleSin) >> 15);
|
||||
// // Rotate by angle
|
||||
// int newX = ((x2 * angleCos) >> 15) + ((y2 * angleSin) >> 15);
|
||||
// int newY = ((y2 * angleCos) >> 15) - ((x2 * angleSin) >> 15);
|
||||
|
||||
color_t col = samplePixel(newX >> 1, newY >> 1);
|
||||
// if (col == 0) {
|
||||
// color_t* img_data = (color_t*) data_3d_bg + 4;
|
||||
// col = img_data[(LCD_WIDTH_PX * (y - (horizon + 2))) + (x << 1)];
|
||||
// }
|
||||
// setPixel(x * 2, y, col);
|
||||
// setPixel(x * 2 + 1, y, col);
|
||||
// Cast to an unsigned int array so two pixels are stored at once.
|
||||
((unsigned int *)VRAM)[y * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
// color_t col = samplePixel(newX >> 1, newY >> 1);
|
||||
// // if (col == 0) {
|
||||
// // color_t* img_data = (color_t*) data_3d_bg + 4;
|
||||
// // col = img_data[(LCD_WIDTH_PX * (y - (horizon + 2))) + (x << 1)];
|
||||
// // }
|
||||
// // setPixel(x * 2, y, col);
|
||||
// // setPixel(x * 2 + 1, y, col);
|
||||
// // Cast to an unsigned int array so two pixels are stored at once.
|
||||
// ((unsigned int *)VRAM)[y * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
|
||||
wx += dist;
|
||||
}
|
||||
}
|
||||
// Fill in the low resolution area at half vertical resolution.
|
||||
// TODO: remove duplicate code
|
||||
for (unsigned short y = lowResCutoff; y < LCD_HEIGHT_PX; y += 2) {
|
||||
int dist = newLut[y - horizon];
|
||||
int wx = -(LCD_WIDTH_PX / 2) / 2 * dist;
|
||||
for (unsigned short x = 0; x < LCD_WIDTH_PX / 2; x++) {
|
||||
int x2 = wx >> 6;
|
||||
int y2 = dist;
|
||||
// wx += dist;
|
||||
// }
|
||||
// }
|
||||
// // Fill in the low resolution area at half vertical resolution.
|
||||
// // TODO: remove duplicate code
|
||||
// for (unsigned short y = lowResCutoff; y < LCD_HEIGHT_PX; y += 2) {
|
||||
// int dist = newLut[y - horizon];
|
||||
// int wx = -(LCD_WIDTH_PX / 2) / 2 * dist;
|
||||
// for (unsigned short x = 0; x < LCD_WIDTH_PX / 2; x++) {
|
||||
// int x2 = wx >> 6;
|
||||
// int y2 = dist;
|
||||
|
||||
// Rotate by angle
|
||||
int newX = ((x2 * angleCos) >> 15) + ((y2 * angleSin) >> 15);
|
||||
int newY = ((y2 * angleCos) >> 15) - ((x2 * angleSin) >> 15);
|
||||
// // Rotate by angle
|
||||
// int newX = ((x2 * angleCos) >> 15) + ((y2 * angleSin) >> 15);
|
||||
// int newY = ((y2 * angleCos) >> 15) - ((x2 * angleSin) >> 15);
|
||||
|
||||
color_t col = samplePixel(newX >> 1, newY >> 1);
|
||||
// if (col == 0) {
|
||||
// color_t* img_data = (color_t*) data_3d_bg + 4;
|
||||
// col = img_data[(LCD_WIDTH_PX * (y - (horizon + 2))) + (x << 1)];
|
||||
// }
|
||||
// setPixel(x * 2, y, col);
|
||||
// setPixel(x * 2 + 1, y, col);
|
||||
// Cast to an unsigned int array so two pixels are stored at once.
|
||||
((unsigned int *)VRAM)[y * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
((unsigned int *)VRAM)[(y + 1) * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
// color_t col = samplePixel(newX >> 1, newY >> 1);
|
||||
// // if (col == 0) {
|
||||
// // color_t* img_data = (color_t*) data_3d_bg + 4;
|
||||
// // col = img_data[(LCD_WIDTH_PX * (y - (horizon + 2))) + (x << 1)];
|
||||
// // }
|
||||
// // setPixel(x * 2, y, col);
|
||||
// // setPixel(x * 2 + 1, y, col);
|
||||
// // Cast to an unsigned int array so two pixels are stored at once.
|
||||
// ((unsigned int *)VRAM)[y * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
// ((unsigned int *)VRAM)[(y + 1) * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
|
||||
wx += dist;
|
||||
}
|
||||
}
|
||||
}
|
||||
// wx += dist;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// void normalFov() {
|
||||
// for (unsigned short y = horizon + 2; y < LCD_HEIGHT_PX; y++) {
|
||||
|
@ -205,36 +209,101 @@ void worldToScreenSpace(int worldX, int worldY, int* x, int* y, int* dist) {
|
|||
printf("x: %d\n", *x);
|
||||
}
|
||||
|
||||
void fullRes() {
|
||||
// Original
|
||||
// void __attribute__ ((noinline)) fullRes() {
|
||||
// int angleCos = fpcos(angle);
|
||||
// int angleSin = fpsin(angle);
|
||||
// for (unsigned short y = horizon + 2; y < LCD_HEIGHT_PX; y++) {
|
||||
// int dist = newLut[y - horizon];
|
||||
// int wx = -(LCD_WIDTH_PX / 2) / 2 * dist;
|
||||
// for (unsigned short x = 0; x < LCD_WIDTH_PX / 2; x++) {
|
||||
// int x2 = wx >> 6;
|
||||
// int y2 = dist;
|
||||
|
||||
// // Rotate by angle
|
||||
// int newX = ((x2 * angleCos) >> 15) + ((y2 * angleSin) >> 15);
|
||||
// int newY = ((y2 * angleCos) >> 15) - ((x2 * angleSin) >> 15);
|
||||
|
||||
// color_t col = samplePixel(newX >> 1, newY >> 1);
|
||||
// // if (col == 0) {
|
||||
// // color_t* img_data = (color_t*) data_3d_bg + 4;
|
||||
// // col = img_data[(LCD_WIDTH_PX * (y - (horizon + 2))) + (x << 1)];
|
||||
// // }
|
||||
// // setPixel(x * 2, y, col);
|
||||
// // setPixel(x * 2 + 1, y, col);
|
||||
// // Cast to an unsigned int array so two pixels are stored at once.
|
||||
// ((unsigned int *)VRAM)[y * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
|
||||
// wx += dist;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// dx/dy version
|
||||
// void __attribute__ ((noinline)) fullRes() {
|
||||
// int angleCos = fpcos(angle);
|
||||
// int angleSin = fpsin(angle);
|
||||
// for (unsigned short y = horizon + 2; y < LCD_HEIGHT_PX; y++) {
|
||||
// int dist = newLut[y - horizon];
|
||||
// int wx = -(LCD_WIDTH_PX / 2) / 2 * dist;
|
||||
|
||||
// int y2 = dist;
|
||||
|
||||
// int newX = (wx * (angleCos >> 6)) + (y2 * angleSin);
|
||||
// int newY = (y2 * angleCos) - (wx * (angleSin >> 6));
|
||||
|
||||
// int dx = (dist * (angleCos >> 6));
|
||||
// int dy = -(dist * (angleSin >> 6));
|
||||
|
||||
// for (unsigned short x = 0; x < LCD_WIDTH_PX / 2; x++) {
|
||||
// color_t col = samplePixel(newX >> 16, newY >> 16);
|
||||
// ((unsigned int *)VRAM)[y * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
|
||||
// newX += dx;
|
||||
// newY += dy;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// TODO: Put this in fast on-chip memory?
|
||||
unsigned int* vramLine;
|
||||
|
||||
#ifndef USE_ASM
|
||||
void draw3DLine(int x, int y, int dx, int dy/*, unsigned int* vramLine*/) {
|
||||
for (unsigned short x2 = 0; x2 < LCD_WIDTH_PX / 2; x2++) {
|
||||
color_t col = samplePixel(x >> 16, y >> 16);
|
||||
*vramLine = (col << 16 | col);
|
||||
vramLine++;
|
||||
|
||||
x += dx;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void __attribute__ ((noinline)) fullRes() {
|
||||
int angleCos = fpcos(angle);
|
||||
int angleSin = fpsin(angle);
|
||||
for (unsigned short y = horizon + 2; y < LCD_HEIGHT_PX; y++) {
|
||||
int dist = newLut[y - horizon];
|
||||
int wx = -(LCD_WIDTH_PX / 2) / 2 * dist;
|
||||
for (unsigned short x = 0; x < LCD_WIDTH_PX / 2; x++) {
|
||||
int x2 = wx >> 6;
|
||||
int y2 = dist;
|
||||
|
||||
int y2 = dist;
|
||||
|
||||
// Rotate by angle
|
||||
int newX = ((x2 * angleCos) >> 15) + ((y2 * angleSin) >> 15);
|
||||
int newY = ((y2 * angleCos) >> 15) - ((x2 * angleSin) >> 15);
|
||||
int newX = (wx * (angleCos >> 6)) + (y2 * angleSin);
|
||||
int newY = (y2 * angleCos) - (wx * (angleSin >> 6));
|
||||
|
||||
color_t col = samplePixel(newX >> 1, newY >> 1);
|
||||
// if (col == 0) {
|
||||
// color_t* img_data = (color_t*) data_3d_bg + 4;
|
||||
// col = img_data[(LCD_WIDTH_PX * (y - (horizon + 2))) + (x << 1)];
|
||||
// }
|
||||
// setPixel(x * 2, y, col);
|
||||
// setPixel(x * 2 + 1, y, col);
|
||||
// Cast to an unsigned int array so two pixels are stored at once.
|
||||
((unsigned int *)VRAM)[y * (LCD_WIDTH_PX / 2) + x] = (col << 16 | col);
|
||||
int dx = (dist * (angleCos >> 6));
|
||||
int dy = -(dist * (angleSin >> 6));
|
||||
|
||||
wx += dist;
|
||||
}
|
||||
vramLine = (unsigned int *)VRAM + y * (LCD_WIDTH_PX / 2);
|
||||
draw3DLine(newX, newY, dx, dy/*, vramLine*/);
|
||||
}
|
||||
}
|
||||
|
||||
void changedFov() {
|
||||
#define normalFov fullRes
|
||||
|
||||
void __attribute__ ((noinline)) changedFov() {
|
||||
int angleCos = fpcos(angle);
|
||||
int angleSin = fpsin(angle);
|
||||
for (unsigned short y = horizon + 2; y < /*LCD_HEIGHT_PX*/lowResCutoff; y++) {
|
||||
|
@ -293,7 +362,7 @@ void changedFov() {
|
|||
}
|
||||
}
|
||||
|
||||
void draw3D(bool highQuality) {
|
||||
void __attribute__ ((noinline)) draw3D(bool highQuality) {
|
||||
if (hFovModifier == 1 << 12) {
|
||||
if (highQuality) {
|
||||
fullRes();
|
||||
|
|
2
src/3d.h
2
src/3d.h
|
@ -9,4 +9,6 @@ void worldToScreenSpace(int worldX, int worldY, int* x, int* y, int* dist);
|
|||
void draw3D(bool highQuality);
|
||||
extern int hFovModifier;
|
||||
|
||||
void draw3DLine(int x, int y, int dx, int dy/*, unsigned int* vramLine*/);
|
||||
|
||||
#endif // _3D_H_
|
||||
|
|
|
@ -0,0 +1,314 @@
|
|||
.global _getTileID
|
||||
.type _getTileID, @function
|
||||
|
||||
_getTileID: # (r4: xPos, r5: yPos) -> r0
|
||||
mov r4, r0
|
||||
or r5, r0
|
||||
shlr8 r0
|
||||
tst #0b11111000, r0
|
||||
# T is now 0 if we want to return 0
|
||||
bf .return0
|
||||
# otherwise, continue
|
||||
mov #-3, r1
|
||||
shad r1, r4
|
||||
shad r1, r5
|
||||
# multiply r5 by 256
|
||||
shll8 r5
|
||||
add r5, r4
|
||||
# read from the tilemap
|
||||
mov.l .tilemap, r0
|
||||
# now r0 = _tilemap symbol = address of the variable, which is an array
|
||||
# shll2 r4
|
||||
mov.b @(r0, r4), r0
|
||||
rts
|
||||
extu.b r0, r0
|
||||
.return0:
|
||||
rts
|
||||
mov #0, r0
|
||||
# .align 4
|
||||
# .tilemap:
|
||||
# .long _tilemap
|
||||
|
||||
# extern unsigned char tileset[256][64];
|
||||
# extern unsigned short palette[256];
|
||||
# extern unsigned char tilemap[256 * 256];
|
||||
#
|
||||
# unsigned short __attribute__ ((hot)) samplePixel(int xPos, int yPos) {
|
||||
# xPos += xOffset;
|
||||
# yPos += yOffset;
|
||||
#
|
||||
# // Divide by 4
|
||||
# xPos = xPos >> 2;
|
||||
# yPos = yPos >> 2;
|
||||
#
|
||||
# // Get the position of the pixel in the tile
|
||||
# int xPixelInTile = xPos & (8 - 1);
|
||||
# int yPixelInTile = yPos & (8 - 1);
|
||||
#
|
||||
# // Get the colour of the pixel in the tile
|
||||
# unsigned char tileID = getTileID(xPos, yPos);
|
||||
# unsigned char index = tileset[tileID][(yPixelInTile * 8) + xPixelInTile];
|
||||
# return palette[index];
|
||||
# }
|
||||
|
||||
.global _samplePixel
|
||||
.type _samplePixel, @function
|
||||
|
||||
_samplePixel: # (r4: xPos, r5: yPos) -> r0
|
||||
# TODO: When inlining this into the loop, we can make sure to only do these loads once if we have the registers available
|
||||
# add xOffset to xPos
|
||||
mov.l .xOffset, r1
|
||||
mov.w @r1, r1
|
||||
add r1, r4
|
||||
# add yOffset to yPos
|
||||
mov.l .yOffset, r1
|
||||
mov.w @r1, r1
|
||||
add r1, r5
|
||||
# divide by 4
|
||||
shlr2 r4
|
||||
shlr2 r5
|
||||
# get the position of the pixel in the tile
|
||||
# copy them into other registers so we can use the original ones for the tileID
|
||||
mov r4, r6
|
||||
mov #0b111, r0
|
||||
and r0, r6
|
||||
mov r5, r7
|
||||
and r0, r7
|
||||
# get the colour of the pixel in the tile
|
||||
# get the tile ID by calling getTileID
|
||||
# Inlined version of getTileID
|
||||
|
||||
|
||||
mov r4, r0
|
||||
or r5, r0
|
||||
shlr8 r0
|
||||
tst #0b11111000, r0
|
||||
# T is now 0 if we want to return 0
|
||||
bf .return0v2
|
||||
# otherwise, continue
|
||||
mov #-3, r1
|
||||
shad r1, r4
|
||||
shad r1, r5
|
||||
# multiply r5 by 256
|
||||
shll8 r5
|
||||
add r5, r4
|
||||
# read from the tilemap
|
||||
mov.l .tilemap, r0
|
||||
# now r0 = _tilemap symbol = address of the variable, which is an array
|
||||
# shll2 r4
|
||||
mov.b @(r0, r4), r0
|
||||
bra .end
|
||||
extu.b r0, r0
|
||||
.return0v2:
|
||||
# rts
|
||||
mov #0, r0
|
||||
.end:
|
||||
|
||||
|
||||
|
||||
# r0 now contains the tile ID
|
||||
# multiply r7 by 8
|
||||
# Happens in branch delay slot
|
||||
shll2 r7
|
||||
add r7, r7
|
||||
# add r6 to r7
|
||||
add r6, r7
|
||||
# multiply r0 by 64
|
||||
shll8 r0
|
||||
shlr2 r0
|
||||
# add r0 to r7
|
||||
add r0, r7
|
||||
# read from the tileset
|
||||
mov.l .tileset, r0
|
||||
# now r0 = _tileset symbol = address of the variable, which is an array
|
||||
mov.b @(r0, r7), r0
|
||||
extu.b r0, r0
|
||||
# read from the palette
|
||||
mov.l .palette, r1
|
||||
# now r1 = _palette symbol = address of the variable, which is an array
|
||||
# multiply r0 by 2
|
||||
add r0, r0
|
||||
# read from the palette
|
||||
mov.w @(r0, r1), r0
|
||||
rts
|
||||
extu.w r0, r0
|
||||
|
||||
# void draw3DLine(int x, int y, int dx, int dy, int y2,/*, unsigned int* vramLine*/) {
|
||||
# for (unsigned short x2 = 0; x2 < 198; x2++) {
|
||||
# color_t col = samplePixel(x >> 16, y >> 16);
|
||||
# *vramLine = (col << 16 | col);
|
||||
# vramLine++;
|
||||
#
|
||||
# x += dx;
|
||||
# y += dy;
|
||||
# }
|
||||
# }
|
||||
|
||||
.global _draw3DLine
|
||||
.type _draw3DLine, @function
|
||||
|
||||
_draw3DLine: # (r4: x, r5: y, r6: dx, r7: dy) -> r0
|
||||
# vramLine is stored in a global because I don't want to deal with the stack
|
||||
# Save registers
|
||||
mov.l r8,@-r15
|
||||
mov.l r9,@-r15
|
||||
mov.l r10,@-r15
|
||||
mov.l r11,@-r15
|
||||
mov.l r12,@-r15
|
||||
mov.l r13,@-r15
|
||||
mov.l r14,@-r15
|
||||
|
||||
# Copy dx and dy into r13 and r14 (otherwise they get overwritten)
|
||||
mov r6, r13
|
||||
mov r7, r14
|
||||
|
||||
# Load vramLine into r8
|
||||
mov.l .vramLine, r8
|
||||
mov.l @r8, r8
|
||||
# Use r9 as the loop counter
|
||||
mov #0, r9
|
||||
.loop:
|
||||
# TODO: Avoid the mov by making r10/r11 the shifted values, and modify the inlined samplePixel to use r10/r11 instead of r4/r5?
|
||||
# Set r4 to x >> 16
|
||||
mov r4, r10
|
||||
# shlr16 r4
|
||||
swap.w r4, r4
|
||||
exts.w r4, r4
|
||||
# Set r5 to y >> 16
|
||||
mov r5, r11
|
||||
# shlr16 r5
|
||||
swap.w r5, r5
|
||||
exts.w r5, r5
|
||||
# Call samplePixel
|
||||
|
||||
|
||||
# TODO: When inlining this into the loop, we can make sure to only do these loads once if we have the registers available
|
||||
# add xOffset to xPos
|
||||
mov.l .xOffset, r1
|
||||
mov.w @r1, r1
|
||||
add r1, r4
|
||||
# add yOffset to yPos
|
||||
mov.l .yOffset, r1
|
||||
mov.w @r1, r1
|
||||
add r1, r5
|
||||
# divide by 4
|
||||
shlr2 r4
|
||||
shlr2 r5
|
||||
# get the position of the pixel in the tile
|
||||
# copy them into other registers so we can use the original ones for the tileID
|
||||
mov r4, r6
|
||||
mov #0b111, r0
|
||||
and r0, r6
|
||||
mov r5, r7
|
||||
and r0, r7
|
||||
# get the colour of the pixel in the tile
|
||||
# get the tile ID by calling getTileID
|
||||
# Inlined version of getTileID
|
||||
mov r4, r0
|
||||
or r5, r0
|
||||
shlr8 r0
|
||||
tst #0b11111000, r0
|
||||
# T is now 0 if we want to return 0
|
||||
bf .return0v3
|
||||
# otherwise, continue
|
||||
mov #-3, r1
|
||||
shad r1, r4
|
||||
shad r1, r5
|
||||
# multiply r5 by 256
|
||||
shll8 r5
|
||||
add r5, r4
|
||||
# read from the tilemap
|
||||
mov.l .tilemap, r0
|
||||
# now r0 = _tilemap symbol = address of the variable, which is an array
|
||||
# shll2 r4
|
||||
mov.b @(r0, r4), r0
|
||||
bra .endv2
|
||||
extu.b r0, r0
|
||||
.return0v3:
|
||||
# rts
|
||||
mov #0, r0
|
||||
.endv2:
|
||||
# r0 now contains the tile ID
|
||||
# multiply r7 by 8
|
||||
# Happens in branch delay slot
|
||||
shll2 r7
|
||||
add r7, r7
|
||||
# add r6 to r7
|
||||
add r6, r7
|
||||
# multiply r0 by 64
|
||||
shll8 r0
|
||||
shlr2 r0
|
||||
# add r0 to r7
|
||||
add r0, r7
|
||||
# read from the tileset
|
||||
mov.l .tileset, r0
|
||||
# now r0 = _tileset symbol = address of the variable, which is an array
|
||||
mov.b @(r0, r7), r0
|
||||
extu.b r0, r0
|
||||
# read from the palette
|
||||
mov.l .palette, r1
|
||||
# now r1 = _palette symbol = address of the variable, which is an array
|
||||
# multiply r0 by 2
|
||||
add r0, r0
|
||||
# read from the palette
|
||||
mov.w @(r0, r1), r0
|
||||
extu.w r0, r0
|
||||
# End of samplePixel
|
||||
|
||||
|
||||
|
||||
|
||||
# Set r12 to the result
|
||||
mov r0, r12
|
||||
# Shift r12 left by 16
|
||||
shll16 r12
|
||||
# OR r12 with r0
|
||||
or r0, r12
|
||||
# Store r12 in vramLine
|
||||
mov.l r12, @r8
|
||||
# Increment vramLine
|
||||
add #4, r8
|
||||
# Put the original values of r4 and r5 back
|
||||
mov r10, r4
|
||||
mov r11, r5
|
||||
# Increment x and y by dx and dy
|
||||
add r13, r4
|
||||
add r14, r5
|
||||
# Increment the loop counter
|
||||
add #1, r9
|
||||
# Check if we're done
|
||||
# Load 198 from .halfWidth
|
||||
mov.w .halfWidth, r10
|
||||
cmp/eq r9, r10
|
||||
bf .loop
|
||||
|
||||
# Restore registers
|
||||
mov.l @r15+,r14
|
||||
mov.l @r15+,r13
|
||||
mov.l @r15+,r12
|
||||
mov.l @r15+,r11
|
||||
mov.l @r15+,r10
|
||||
mov.l @r15+,r9
|
||||
rts
|
||||
mov.l @r15+,r8
|
||||
.align 2
|
||||
.halfWidth:
|
||||
.word 198
|
||||
.align 4
|
||||
.vramLine:
|
||||
.long _vramLine
|
||||
.align 4
|
||||
.xOffset:
|
||||
.long _xOffset
|
||||
.align 4
|
||||
.yOffset:
|
||||
.long _yOffset
|
||||
.align 4
|
||||
.tilemap:
|
||||
.long _tilemap
|
||||
.align 4
|
||||
.tileset:
|
||||
.long _tileset
|
||||
.align 4
|
||||
.palette:
|
||||
.long _palette
|
|
@ -9,18 +9,20 @@
|
|||
#define trackImageWidth 256 * tileSize
|
||||
#define trackImageHeight 256 * tileSize
|
||||
|
||||
unsigned char getTileID(short xPos, short yPos) {
|
||||
#ifndef USE_ASM
|
||||
unsigned char __attribute__ ((hot)) getTileID(int xPos, int yPos) {
|
||||
// __builtin_expect(xPos < 0 || xPos >= trackImageWidth || yPos < 0 || yPos >= trackImageHeight, 0);
|
||||
if((unsigned short) xPos >= trackImageWidth || (unsigned short) yPos >= trackImageHeight) {
|
||||
if ((unsigned short) xPos >= trackImageWidth || (unsigned short) yPos >= trackImageHeight) {
|
||||
return 0; // Grass
|
||||
} else {
|
||||
// Divide by 8
|
||||
int xPixel = xPos >> 3;
|
||||
int yPixel = yPos >> 3;
|
||||
|
||||
return /*track.*/tilemap[((yPixel * (trackImageWidth / tileSize)) + xPixel)];
|
||||
return /*track.*/tilemap[(yPixel * (trackImageWidth / tileSize)) + xPixel];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// unsigned char getTileID(short l0, short l1) {
|
||||
// unsigned int si0, si1, si2;
|
||||
|
@ -55,7 +57,8 @@ enum TileType getTileType(int tileID) {
|
|||
return (enum TileType) track.tileTypes[tileID];
|
||||
}
|
||||
|
||||
unsigned short samplePixel(short xPos, short yPos) {
|
||||
#ifndef USE_ASM
|
||||
unsigned short __attribute__ ((hot)) samplePixel(int xPos, int yPos) {
|
||||
xPos += xOffset;
|
||||
yPos += yOffset;
|
||||
|
||||
|
@ -72,6 +75,7 @@ unsigned short samplePixel(short xPos, short yPos) {
|
|||
return /*track.*/palette[index];
|
||||
// return index;
|
||||
}
|
||||
#endif
|
||||
|
||||
// unsigned short samplePixelFast(short xPos, short yPos) {
|
||||
// xPos += xOffset;
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
|
||||
#include <stdbool.h>
|
||||
|
||||
unsigned char getTileID(short xPos, short yPos);
|
||||
// unsigned char getTileID(short xPos, short yPos);
|
||||
unsigned char getTileID(int xPos, int yPos);
|
||||
enum TileType getTileType(int tileID);
|
||||
unsigned short samplePixel(short xPos, short yPos);
|
||||
unsigned short samplePixel(int xPos, int yPos);
|
||||
|
||||
#endif // _TILEMAP_H
|
||||
|
|
Loading…
Reference in New Issue