Add a PRAM heap and move some of the data there
It's a 32-bit-access-only heap. On the Ultimate Doom WAD the amount of data moved is about 17 kiB, which is not a lot, but arrays with 4-byte elements are not easy to find in Doom.
This commit is contained in:
parent
1639adce59
commit
751d5f186d
2
README
2
README
|
@ -28,8 +28,6 @@ WAD support TODO:
|
|||
Technical support TODO:
|
||||
-> Supply more VRAM memory to internal allocator
|
||||
=> Merge internal heap into Z_Zone? (< 50 kB)
|
||||
=> Rewrite video code to use long PRAM0 access? (138 kB)
|
||||
=> Identify arrays that can go to PRAM, eg. lumpcache
|
||||
=> Remove multiply-avoiding lookup tables?
|
||||
-> Rate-limit the game when overclocking
|
||||
-> Add more SHORT() to avoid having to copy-align lumps
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
#include "cgdoom-alloc.h"
|
||||
#include "z_zone.h"
|
||||
|
||||
/* We use -fstrict-volatile-bitfields to enforce the 32-bit access size. */
|
||||
struct node;
|
||||
typedef volatile struct node node_t;
|
||||
|
||||
struct node {
|
||||
/* Neighbors, or NULL at the head and tail of the list */
|
||||
node_t *prev, *next;
|
||||
/* Size of the block, in bytes */
|
||||
uint32_t size :24;
|
||||
/* Whether the block is free */
|
||||
uint32_t free :8;
|
||||
};
|
||||
|
||||
/* First node of the list. */
|
||||
static node_t *arena = NULL;
|
||||
/* Bounds of the arena, used to find whether data has been allocated here. */
|
||||
static void *arena_start, *arena_end;
|
||||
|
||||
/* Split a free node into two (if there's enough space for a second one). */
|
||||
static void split(node_t *node, int size)
|
||||
{
|
||||
int remainder = node->size - size;
|
||||
if(remainder < 32) return;
|
||||
|
||||
node_t *right = (void *)node + sizeof(node_t) + size;
|
||||
right->prev = node;
|
||||
right->next = node->next;
|
||||
right->size = remainder - sizeof(node_t);
|
||||
right->free = 1;
|
||||
|
||||
node->size = size;
|
||||
node->next = right;
|
||||
if(right->next) right->next->prev = right;
|
||||
}
|
||||
|
||||
/* Merge this free node with the next one (also needs to be free). */
|
||||
static void merge_with_next(node_t *node)
|
||||
{
|
||||
if(!node->next) return;
|
||||
node->size += sizeof(node_t) + node->next->size;
|
||||
node->next = node->next->next;
|
||||
if(node->next) node->next->prev = node;
|
||||
}
|
||||
|
||||
void CGD_PRAM_Init(void *start, void *end)
|
||||
{
|
||||
arena = NULL;
|
||||
if(end - start < 256) return;
|
||||
|
||||
arena = start;
|
||||
arena->prev = NULL;
|
||||
arena->next = NULL;
|
||||
arena->size = (end - start) - sizeof(node_t);
|
||||
arena->free = 1;
|
||||
|
||||
arena_start = start;
|
||||
arena_end = end;
|
||||
}
|
||||
|
||||
void *CGD_PRAM_Malloc(size_t size)
|
||||
{
|
||||
node_t *candidate;
|
||||
size = (size + 3) & -4;
|
||||
|
||||
/* Find a free block in the list */
|
||||
for(candidate = arena; candidate; candidate = candidate->next) {
|
||||
if(candidate->free && candidate->size >= size) break;
|
||||
}
|
||||
if(!candidate) return Z_Malloc(size, PU_STATIC, 0);
|
||||
|
||||
/* Prepare and return that block */
|
||||
split(candidate, size);
|
||||
candidate->free = 0;
|
||||
return (void *)candidate + sizeof(node_t);
|
||||
}
|
||||
|
||||
void CGD_PRAM_Free(void *ptr)
|
||||
{
|
||||
if(!ptr) return;
|
||||
if(ptr < arena_start || ptr >= arena_end) return Z_Free(ptr);
|
||||
|
||||
node_t *node = (void *)ptr - sizeof(node_t);
|
||||
node->free = 1;
|
||||
if(node->next && node->next->free) merge_with_next(node);
|
||||
if(node->prev && node->prev->free) merge_with_next(node->prev);
|
||||
}
|
||||
|
||||
void *CGD_PRAM_Zalloc(size_t size)
|
||||
{
|
||||
uint32_t *ptr = CGD_PRAM_Malloc(size);
|
||||
if(!ptr) return NULL;
|
||||
for(int i = 0; i < size / 4; i++) ptr[i] = 0;
|
||||
return ptr;
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
#ifndef CGDOOM_ALLOC_H
|
||||
#define CGDOOM_ALLOC_H
|
||||
|
||||
#include "platform.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/* The simple CGDoom allocator from SPU2 memory
|
||||
|
||||
In CGDoom, the main bottleneck is memory. Speed is a concern but the CG-50
|
||||
is good enough at it that most titles and levels are pretty playable.
|
||||
However, a level that doesn't load is never playable. Therefore, memory
|
||||
limits cause more problems to the user experience than any other problem.
|
||||
|
||||
To deal with this, several tools have been used; mainly the Doom allocator
|
||||
in z_zone.c has been extended to support multiple zones, which are supplied
|
||||
in the modified I_ZoneBase() function. This, and of course different memory
|
||||
areas have been freed up of whatever data they held in order to be used as
|
||||
heap.
|
||||
|
||||
However, there are some areas that cannot be included there. Even the OS
|
||||
heap can be used as a default in Z_Malloc() with some effort, but one of
|
||||
the resources escapes even these options: SPU2 memory.
|
||||
|
||||
I don't want to delve into the specifics of SPU2 memory as it's extremely
|
||||
strange; there are only two things that you should know about it:
|
||||
* There is a 160 kiB area called PRAM0 that only supports 32-bit accesses.
|
||||
* There is one 168 kiB area and two 48 kiB areas, called XRAM0, YRAM0 and
|
||||
YRAM1, that only support 32-bit accesses and every access only addresses
|
||||
24 bits of actual memory (so they span 224 kiB and 64 kiB of pointers).
|
||||
|
||||
PRAM0 can be used fairly easily but we must guarantee that only 32-bit
|
||||
accesses are used. This means it's restricted to arrays of pointers, ints,
|
||||
and fixed_t mainly. In addition to the data though, the control structures
|
||||
of the heap must also use only 32-bit accesses, which would require pretty
|
||||
large changes in Z_Malloc.
|
||||
|
||||
Instead, CGDoom provides a very, very simple heap structure on PRAM0. This
|
||||
is a trivial doubly-linked list with merging, intended to move out a handful
|
||||
of static buffers out of the main heap. Its use is voluntarily marginal.
|
||||
|
||||
For stability, the allocator defaults to Z_Malloc() on failure. This is
|
||||
because a number of arrays that we direct to PRAM0 have variable size (like
|
||||
the WAD lump cache) and this extra flexibility is required to consistently
|
||||
work on a variety of WADs. (Diversions to the OS heap in previous versions
|
||||
of CGDoom had such problems, and I myself moved the lump cache to Z_Malloc
|
||||
because it didn't fit in some games.) */
|
||||
|
||||
/* Initialize the area. */
|
||||
void CGD_PRAM_Init(void *start, void *end);
|
||||
|
||||
/* Allocation functions. */
|
||||
void *CGD_PRAM_Malloc(size_t size);
|
||||
void CGD_PRAM_Free(void *ptr);
|
||||
void *CGD_PRAM_Zalloc(size_t size);
|
||||
|
||||
#endif /* CGDOOM_ALLOC_H */
|
|
@ -1,6 +1,7 @@
|
|||
#include "platform.h"
|
||||
#include "os.h"
|
||||
#include "cgdoom-ui.h"
|
||||
#include "cgdoom-alloc.h"
|
||||
|
||||
#ifdef CG_EMULATOR
|
||||
static int iAllocSum = 0;
|
||||
|
@ -693,6 +694,12 @@ int main(void){
|
|||
int key;
|
||||
GetKey(&key);
|
||||
}
|
||||
|
||||
/* Initialize the PRAM allocator */
|
||||
void *PRAM0_start = (void *)0xfe200000;
|
||||
void *PRAM0_end = (void *)0xfe228000;
|
||||
PRAM0_start += gWADMap.miItemCount * sizeof(FileMappingItem);
|
||||
CGD_PRAM_Init(PRAM0_start, PRAM0_end);
|
||||
}
|
||||
|
||||
memset(VRAM, 0, WIDTH*HEIGHT*2);
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
|
||||
#include "os.h"
|
||||
#include "cgdoom-alloc.h"
|
||||
|
||||
#include "doomdef.h"
|
||||
#include "doomstat.h"
|
||||
|
@ -275,7 +276,7 @@ void D_DoomLoop (void)
|
|||
}
|
||||
// I_ShutdownGraphics();
|
||||
free(lumpinfo);
|
||||
free(lumpcache);
|
||||
CGD_PRAM_Free(lumpcache);
|
||||
I_ShutdownGraphics();
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "r_sky.h"
|
||||
|
||||
#include "os.h"
|
||||
#include "cgdoom-alloc.h"
|
||||
|
||||
#include "r_data.h"
|
||||
|
||||
|
@ -482,13 +483,14 @@ void R_InitTextures (void)
|
|||
|
||||
numtextures = numtextures1 + numtextures2;
|
||||
|
||||
textures = (texture_t **)Z_Malloc (numtextures*4, PU_STATIC, 0);
|
||||
texturecolumnlump = (short**)Z_Malloc (numtextures*4, PU_STATIC, 0);
|
||||
texturecolumnofs = (unsigned short**)Z_Malloc (numtextures*4, PU_STATIC, 0);
|
||||
texturecomposite = (byte**)Z_Malloc (numtextures*4, PU_STATIC, 0);
|
||||
texturecompositesize = (int*)Z_Malloc (numtextures*4, PU_STATIC, 0);
|
||||
texturewidthmask = (int*)Z_Malloc (numtextures*4, PU_STATIC, 0);
|
||||
textureheight = (fixed_t*)Z_Malloc (numtextures*4, PU_STATIC, 0);
|
||||
/* CGDoom: Allocate all of these in PRAM, since they have 4-byte elements */
|
||||
textures = (texture_t **)CGD_PRAM_Malloc (numtextures*4);
|
||||
texturecolumnlump = (short**)CGD_PRAM_Malloc (numtextures*4);
|
||||
texturecolumnofs = (unsigned short**)CGD_PRAM_Malloc (numtextures*4);
|
||||
texturecomposite = (byte**)CGD_PRAM_Malloc (numtextures*4);
|
||||
texturecompositesize = (int*)CGD_PRAM_Malloc (numtextures*4);
|
||||
texturewidthmask = (int*)CGD_PRAM_Malloc (numtextures*4);
|
||||
textureheight = (fixed_t*)CGD_PRAM_Malloc (numtextures*4);
|
||||
|
||||
totalwidth = 0;
|
||||
|
||||
|
@ -575,7 +577,7 @@ void R_InitTextures (void)
|
|||
R_GenerateLookup (i);
|
||||
|
||||
// Create translation table for global animation.
|
||||
texturetranslation = (int *)Z_Malloc ((numtextures+1)*4, PU_STATIC, 0);
|
||||
texturetranslation = (int *)CGD_PRAM_Malloc ((numtextures+1)*4);
|
||||
|
||||
for (i=0 ; i<numtextures ; i++)
|
||||
texturetranslation[i] = i;
|
||||
|
@ -598,7 +600,7 @@ void R_InitFlats (void)
|
|||
numflats = lastflat - firstflat + 1;
|
||||
|
||||
// Create translation table for global animation.
|
||||
flattranslation = (int *)Z_Malloc ((numflats+1)*4, PU_STATIC, 0);
|
||||
flattranslation = (int *)CGD_PRAM_Malloc ((numflats+1)*4);
|
||||
|
||||
for (i=0 ; i<numflats ; i++)
|
||||
flattranslation[i] = i;
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#endif*/
|
||||
|
||||
#include "os.h"
|
||||
#include "cgdoom-alloc.h"
|
||||
|
||||
#include "doomtype.h"
|
||||
#include "m_swap.h"
|
||||
|
@ -174,7 +175,7 @@ int W_InitMultipleFiles(void)
|
|||
|
||||
// set up caching
|
||||
//printf ("numlumps = %i \n",numlumps);
|
||||
lumpcache = (void **)CGDCalloc(numlumps * sizeof(*lumpcache));
|
||||
lumpcache = (void **)CGD_PRAM_Zalloc(numlumps * sizeof(*lumpcache));
|
||||
|
||||
if (!lumpcache)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue