Add a PRAM heap and move some of the data there

It's a 32-bit-access-only heap. On the Ultimate Doom WAD the amount of
data moved is about 17 kiB, which is not a lot, but arrays with 4-byte
elements are not easy to find in Doom.
This commit is contained in:
Lephenixnoir 2021-08-12 21:01:35 +02:00
parent 1639adce59
commit 751d5f186d
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
7 changed files with 175 additions and 13 deletions

2
README
View File

@ -28,8 +28,6 @@ WAD support TODO:
Technical support TODO:
-> Supply more VRAM memory to internal allocator
=> Merge internal heap into Z_Zone? (< 50 kB)
=> Rewrite video code to use long PRAM0 access? (138 kB)
=> Identify arrays that can go to PRAM, eg. lumpcache
=> Remove multiply-avoiding lookup tables?
-> Rate-limit the game when overclocking
-> Add more SHORT() to avoid having to copy-align lumps

97
cgdoom/cgdoom-alloc.c Normal file
View File

@ -0,0 +1,97 @@
#include "cgdoom-alloc.h"
#include "z_zone.h"
/* We use -fstrict-volatile-bitfields to enforce the 32-bit access size. */
struct node;
typedef volatile struct node node_t;
struct node {
/* Neighbors, or NULL at the head and tail of the list */
node_t *prev, *next;
/* Size of the block, in bytes */
uint32_t size :24;
/* Whether the block is free */
uint32_t free :8;
};
/* First node of the list. */
static node_t *arena = NULL;
/* Bounds of the arena, used to find whether data has been allocated here. */
static void *arena_start, *arena_end;
/* Split a free node into two (if there's enough space for a second one). */
static void split(node_t *node, int size)
{
int remainder = node->size - size;
if(remainder < 32) return;
node_t *right = (void *)node + sizeof(node_t) + size;
right->prev = node;
right->next = node->next;
right->size = remainder - sizeof(node_t);
right->free = 1;
node->size = size;
node->next = right;
if(right->next) right->next->prev = right;
}
/* Merge this free node with the next one (also needs to be free). */
static void merge_with_next(node_t *node)
{
if(!node->next) return;
node->size += sizeof(node_t) + node->next->size;
node->next = node->next->next;
if(node->next) node->next->prev = node;
}
void CGD_PRAM_Init(void *start, void *end)
{
arena = NULL;
if(end - start < 256) return;
arena = start;
arena->prev = NULL;
arena->next = NULL;
arena->size = (end - start) - sizeof(node_t);
arena->free = 1;
arena_start = start;
arena_end = end;
}
void *CGD_PRAM_Malloc(size_t size)
{
node_t *candidate;
size = (size + 3) & -4;
/* Find a free block in the list */
for(candidate = arena; candidate; candidate = candidate->next) {
if(candidate->free && candidate->size >= size) break;
}
if(!candidate) return Z_Malloc(size, PU_STATIC, 0);
/* Prepare and return that block */
split(candidate, size);
candidate->free = 0;
return (void *)candidate + sizeof(node_t);
}
void CGD_PRAM_Free(void *ptr)
{
if(!ptr) return;
if(ptr < arena_start || ptr >= arena_end) return Z_Free(ptr);
node_t *node = (void *)ptr - sizeof(node_t);
node->free = 1;
if(node->next && node->next->free) merge_with_next(node);
if(node->prev && node->prev->free) merge_with_next(node->prev);
}
void *CGD_PRAM_Zalloc(size_t size)
{
uint32_t *ptr = CGD_PRAM_Malloc(size);
if(!ptr) return NULL;
for(int i = 0; i < size / 4; i++) ptr[i] = 0;
return ptr;
}

56
cgdoom/cgdoom-alloc.h Normal file
View File

@ -0,0 +1,56 @@
#ifndef CGDOOM_ALLOC_H
#define CGDOOM_ALLOC_H
#include "platform.h"
#include <stddef.h>
/* The simple CGDoom allocator from SPU2 memory
In CGDoom, the main bottleneck is memory. Speed is a concern but the CG-50
is good enough at it that most titles and levels are pretty playable.
However, a level that doesn't load is never playable. Therefore, memory
limits cause more problems to the user experience than any other problem.
To deal with this, several tools have been used; mainly the Doom allocator
in z_zone.c has been extended to support multiple zones, which are supplied
in the modified I_ZoneBase() function. This, and of course different memory
areas have been freed up of whatever data they held in order to be used as
heap.
However, there are some areas that cannot be included there. Even the OS
heap can be used as a default in Z_Malloc() with some effort, but one of
the resources escapes even these options: SPU2 memory.
I don't want to delve into the specifics of SPU2 memory as it's extremely
strange; there are only two things that you should know about it:
* There is a 160 kiB area called PRAM0 that only supports 32-bit accesses.
* There is one 168 kiB area and two 48 kiB areas, called XRAM0, YRAM0 and
YRAM1, that only support 32-bit accesses and every access only addresses
24 bits of actual memory (so they span 224 kiB and 64 kiB of pointers).
PRAM0 can be used fairly easily but we must guarantee that only 32-bit
accesses are used. This means it's restricted to arrays of pointers, ints,
and fixed_t mainly. In addition to the data though, the control structures
of the heap must also use only 32-bit accesses, which would require pretty
large changes in Z_Malloc.
Instead, CGDoom provides a very, very simple heap structure on PRAM0. This
is a trivial doubly-linked list with merging, intended to move out a handful
of static buffers out of the main heap. Its use is voluntarily marginal.
For stability, the allocator defaults to Z_Malloc() on failure. This is
because a number of arrays that we direct to PRAM0 have variable size (like
the WAD lump cache) and this extra flexibility is required to consistently
work on a variety of WADs. (Diversions to the OS heap in previous versions
of CGDoom had such problems, and I myself moved the lump cache to Z_Malloc
because it didn't fit in some games.) */
/* Initialize the area. */
void CGD_PRAM_Init(void *start, void *end);
/* Allocation functions. */
void *CGD_PRAM_Malloc(size_t size);
void CGD_PRAM_Free(void *ptr);
void *CGD_PRAM_Zalloc(size_t size);
#endif /* CGDOOM_ALLOC_H */

View File

@ -1,6 +1,7 @@
#include "platform.h"
#include "os.h"
#include "cgdoom-ui.h"
#include "cgdoom-alloc.h"
#ifdef CG_EMULATOR
static int iAllocSum = 0;
@ -693,6 +694,12 @@ int main(void){
int key;
GetKey(&key);
}
/* Initialize the PRAM allocator */
void *PRAM0_start = (void *)0xfe200000;
void *PRAM0_end = (void *)0xfe228000;
PRAM0_start += gWADMap.miItemCount * sizeof(FileMappingItem);
CGD_PRAM_Init(PRAM0_start, PRAM0_end);
}
memset(VRAM, 0, WIDTH*HEIGHT*2);

View File

@ -30,6 +30,7 @@
#include "os.h"
#include "cgdoom-alloc.h"
#include "doomdef.h"
#include "doomstat.h"
@ -275,7 +276,7 @@ void D_DoomLoop (void)
}
// I_ShutdownGraphics();
free(lumpinfo);
free(lumpcache);
CGD_PRAM_Free(lumpcache);
I_ShutdownGraphics();
return;
}

View File

@ -41,6 +41,7 @@
#include "r_sky.h"
#include "os.h"
#include "cgdoom-alloc.h"
#include "r_data.h"
@ -482,13 +483,14 @@ void R_InitTextures (void)
numtextures = numtextures1 + numtextures2;
textures = (texture_t **)Z_Malloc (numtextures*4, PU_STATIC, 0);
texturecolumnlump = (short**)Z_Malloc (numtextures*4, PU_STATIC, 0);
texturecolumnofs = (unsigned short**)Z_Malloc (numtextures*4, PU_STATIC, 0);
texturecomposite = (byte**)Z_Malloc (numtextures*4, PU_STATIC, 0);
texturecompositesize = (int*)Z_Malloc (numtextures*4, PU_STATIC, 0);
texturewidthmask = (int*)Z_Malloc (numtextures*4, PU_STATIC, 0);
textureheight = (fixed_t*)Z_Malloc (numtextures*4, PU_STATIC, 0);
/* CGDoom: Allocate all of these in PRAM, since they have 4-byte elements */
textures = (texture_t **)CGD_PRAM_Malloc (numtextures*4);
texturecolumnlump = (short**)CGD_PRAM_Malloc (numtextures*4);
texturecolumnofs = (unsigned short**)CGD_PRAM_Malloc (numtextures*4);
texturecomposite = (byte**)CGD_PRAM_Malloc (numtextures*4);
texturecompositesize = (int*)CGD_PRAM_Malloc (numtextures*4);
texturewidthmask = (int*)CGD_PRAM_Malloc (numtextures*4);
textureheight = (fixed_t*)CGD_PRAM_Malloc (numtextures*4);
totalwidth = 0;
@ -575,7 +577,7 @@ void R_InitTextures (void)
R_GenerateLookup (i);
// Create translation table for global animation.
texturetranslation = (int *)Z_Malloc ((numtextures+1)*4, PU_STATIC, 0);
texturetranslation = (int *)CGD_PRAM_Malloc ((numtextures+1)*4);
for (i=0 ; i<numtextures ; i++)
texturetranslation[i] = i;
@ -598,7 +600,7 @@ void R_InitFlats (void)
numflats = lastflat - firstflat + 1;
// Create translation table for global animation.
flattranslation = (int *)Z_Malloc ((numflats+1)*4, PU_STATIC, 0);
flattranslation = (int *)CGD_PRAM_Malloc ((numflats+1)*4);
for (i=0 ; i<numflats ; i++)
flattranslation[i] = i;

View File

@ -38,6 +38,7 @@
#endif*/
#include "os.h"
#include "cgdoom-alloc.h"
#include "doomtype.h"
#include "m_swap.h"
@ -174,7 +175,7 @@ int W_InitMultipleFiles(void)
// set up caching
//printf ("numlumps = %i \n",numlumps);
lumpcache = (void **)CGDCalloc(numlumps * sizeof(*lumpcache));
lumpcache = (void **)CGD_PRAM_Zalloc(numlumps * sizeof(*lumpcache));
if (!lumpcache)
{