From 751d5f186db1141bd90e707bb63d18d9e7615dbe Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Thu, 12 Aug 2021 21:01:35 +0200 Subject: [PATCH] Add a PRAM heap and move some of the data there It's a 32-bit-access-only heap. On the Ultimate Doom WAD the amount of data moved is about 17 kiB, which is not a lot, but arrays with 4-byte elements are not easy to find in Doom. --- README | 2 - cgdoom/cgdoom-alloc.c | 97 +++++++++++++++++++++++++++++++++++++++++++ cgdoom/cgdoom-alloc.h | 56 +++++++++++++++++++++++++ cgdoom/cgdoom.c | 7 ++++ cgdoom/d_main.c | 3 +- cgdoom/r_data.c | 20 +++++---- cgdoom/w_wad.c | 3 +- 7 files changed, 175 insertions(+), 13 deletions(-) create mode 100644 cgdoom/cgdoom-alloc.c create mode 100644 cgdoom/cgdoom-alloc.h diff --git a/README b/README index 1b91874..6535879 100644 --- a/README +++ b/README @@ -28,8 +28,6 @@ WAD support TODO: Technical support TODO: -> Supply more VRAM memory to internal allocator => Merge internal heap into Z_Zone? (< 50 kB) - => Rewrite video code to use long PRAM0 access? (138 kB) - => Identify arrays that can go to PRAM, eg. lumpcache => Remove multiply-avoiding lookup tables? -> Rate-limit the game when overclocking -> Add more SHORT() to avoid having to copy-align lumps diff --git a/cgdoom/cgdoom-alloc.c b/cgdoom/cgdoom-alloc.c new file mode 100644 index 0000000..125ea33 --- /dev/null +++ b/cgdoom/cgdoom-alloc.c @@ -0,0 +1,97 @@ +#include "cgdoom-alloc.h" +#include "z_zone.h" + +/* We use -fstrict-volatile-bitfields to enforce the 32-bit access size. */ +struct node; +typedef volatile struct node node_t; + +struct node { + /* Neighbors, or NULL at the head and tail of the list */ + node_t *prev, *next; + /* Size of the block, in bytes */ + uint32_t size :24; + /* Whether the block is free */ + uint32_t free :8; +}; + +/* First node of the list. */ +static node_t *arena = NULL; +/* Bounds of the arena, used to find whether data has been allocated here. */ +static void *arena_start, *arena_end; + +/* Split a free node into two (if there's enough space for a second one). */ +static void split(node_t *node, int size) +{ + int remainder = node->size - size; + if(remainder < 32) return; + + node_t *right = (void *)node + sizeof(node_t) + size; + right->prev = node; + right->next = node->next; + right->size = remainder - sizeof(node_t); + right->free = 1; + + node->size = size; + node->next = right; + if(right->next) right->next->prev = right; +} + +/* Merge this free node with the next one (also needs to be free). */ +static void merge_with_next(node_t *node) +{ + if(!node->next) return; + node->size += sizeof(node_t) + node->next->size; + node->next = node->next->next; + if(node->next) node->next->prev = node; +} + +void CGD_PRAM_Init(void *start, void *end) +{ + arena = NULL; + if(end - start < 256) return; + + arena = start; + arena->prev = NULL; + arena->next = NULL; + arena->size = (end - start) - sizeof(node_t); + arena->free = 1; + + arena_start = start; + arena_end = end; +} + +void *CGD_PRAM_Malloc(size_t size) +{ + node_t *candidate; + size = (size + 3) & -4; + + /* Find a free block in the list */ + for(candidate = arena; candidate; candidate = candidate->next) { + if(candidate->free && candidate->size >= size) break; + } + if(!candidate) return Z_Malloc(size, PU_STATIC, 0); + + /* Prepare and return that block */ + split(candidate, size); + candidate->free = 0; + return (void *)candidate + sizeof(node_t); +} + +void CGD_PRAM_Free(void *ptr) +{ + if(!ptr) return; + if(ptr < arena_start || ptr >= arena_end) return Z_Free(ptr); + + node_t *node = (void *)ptr - sizeof(node_t); + node->free = 1; + if(node->next && node->next->free) merge_with_next(node); + if(node->prev && node->prev->free) merge_with_next(node->prev); +} + +void *CGD_PRAM_Zalloc(size_t size) +{ + uint32_t *ptr = CGD_PRAM_Malloc(size); + if(!ptr) return NULL; + for(int i = 0; i < size / 4; i++) ptr[i] = 0; + return ptr; +} diff --git a/cgdoom/cgdoom-alloc.h b/cgdoom/cgdoom-alloc.h new file mode 100644 index 0000000..424ef4c --- /dev/null +++ b/cgdoom/cgdoom-alloc.h @@ -0,0 +1,56 @@ +#ifndef CGDOOM_ALLOC_H +#define CGDOOM_ALLOC_H + +#include "platform.h" +#include + +/* The simple CGDoom allocator from SPU2 memory + + In CGDoom, the main bottleneck is memory. Speed is a concern but the CG-50 + is good enough at it that most titles and levels are pretty playable. + However, a level that doesn't load is never playable. Therefore, memory + limits cause more problems to the user experience than any other problem. + + To deal with this, several tools have been used; mainly the Doom allocator + in z_zone.c has been extended to support multiple zones, which are supplied + in the modified I_ZoneBase() function. This, and of course different memory + areas have been freed up of whatever data they held in order to be used as + heap. + + However, there are some areas that cannot be included there. Even the OS + heap can be used as a default in Z_Malloc() with some effort, but one of + the resources escapes even these options: SPU2 memory. + + I don't want to delve into the specifics of SPU2 memory as it's extremely + strange; there are only two things that you should know about it: + * There is a 160 kiB area called PRAM0 that only supports 32-bit accesses. + * There is one 168 kiB area and two 48 kiB areas, called XRAM0, YRAM0 and + YRAM1, that only support 32-bit accesses and every access only addresses + 24 bits of actual memory (so they span 224 kiB and 64 kiB of pointers). + + PRAM0 can be used fairly easily but we must guarantee that only 32-bit + accesses are used. This means it's restricted to arrays of pointers, ints, + and fixed_t mainly. In addition to the data though, the control structures + of the heap must also use only 32-bit accesses, which would require pretty + large changes in Z_Malloc. + + Instead, CGDoom provides a very, very simple heap structure on PRAM0. This + is a trivial doubly-linked list with merging, intended to move out a handful + of static buffers out of the main heap. Its use is voluntarily marginal. + + For stability, the allocator defaults to Z_Malloc() on failure. This is + because a number of arrays that we direct to PRAM0 have variable size (like + the WAD lump cache) and this extra flexibility is required to consistently + work on a variety of WADs. (Diversions to the OS heap in previous versions + of CGDoom had such problems, and I myself moved the lump cache to Z_Malloc + because it didn't fit in some games.) */ + +/* Initialize the area. */ +void CGD_PRAM_Init(void *start, void *end); + +/* Allocation functions. */ +void *CGD_PRAM_Malloc(size_t size); +void CGD_PRAM_Free(void *ptr); +void *CGD_PRAM_Zalloc(size_t size); + +#endif /* CGDOOM_ALLOC_H */ diff --git a/cgdoom/cgdoom.c b/cgdoom/cgdoom.c index bcb5b5b..4c42c4b 100644 --- a/cgdoom/cgdoom.c +++ b/cgdoom/cgdoom.c @@ -1,6 +1,7 @@ #include "platform.h" #include "os.h" #include "cgdoom-ui.h" +#include "cgdoom-alloc.h" #ifdef CG_EMULATOR static int iAllocSum = 0; @@ -693,6 +694,12 @@ int main(void){ int key; GetKey(&key); } + + /* Initialize the PRAM allocator */ + void *PRAM0_start = (void *)0xfe200000; + void *PRAM0_end = (void *)0xfe228000; + PRAM0_start += gWADMap.miItemCount * sizeof(FileMappingItem); + CGD_PRAM_Init(PRAM0_start, PRAM0_end); } memset(VRAM, 0, WIDTH*HEIGHT*2); diff --git a/cgdoom/d_main.c b/cgdoom/d_main.c index d1784bd..6fa725e 100644 --- a/cgdoom/d_main.c +++ b/cgdoom/d_main.c @@ -30,6 +30,7 @@ #include "os.h" +#include "cgdoom-alloc.h" #include "doomdef.h" #include "doomstat.h" @@ -275,7 +276,7 @@ void D_DoomLoop (void) } // I_ShutdownGraphics(); free(lumpinfo); - free(lumpcache); + CGD_PRAM_Free(lumpcache); I_ShutdownGraphics(); return; } diff --git a/cgdoom/r_data.c b/cgdoom/r_data.c index 1e356b2..bfb6d0b 100644 --- a/cgdoom/r_data.c +++ b/cgdoom/r_data.c @@ -41,6 +41,7 @@ #include "r_sky.h" #include "os.h" +#include "cgdoom-alloc.h" #include "r_data.h" @@ -482,13 +483,14 @@ void R_InitTextures (void) numtextures = numtextures1 + numtextures2; - textures = (texture_t **)Z_Malloc (numtextures*4, PU_STATIC, 0); - texturecolumnlump = (short**)Z_Malloc (numtextures*4, PU_STATIC, 0); - texturecolumnofs = (unsigned short**)Z_Malloc (numtextures*4, PU_STATIC, 0); - texturecomposite = (byte**)Z_Malloc (numtextures*4, PU_STATIC, 0); - texturecompositesize = (int*)Z_Malloc (numtextures*4, PU_STATIC, 0); - texturewidthmask = (int*)Z_Malloc (numtextures*4, PU_STATIC, 0); - textureheight = (fixed_t*)Z_Malloc (numtextures*4, PU_STATIC, 0); + /* CGDoom: Allocate all of these in PRAM, since they have 4-byte elements */ + textures = (texture_t **)CGD_PRAM_Malloc (numtextures*4); + texturecolumnlump = (short**)CGD_PRAM_Malloc (numtextures*4); + texturecolumnofs = (unsigned short**)CGD_PRAM_Malloc (numtextures*4); + texturecomposite = (byte**)CGD_PRAM_Malloc (numtextures*4); + texturecompositesize = (int*)CGD_PRAM_Malloc (numtextures*4); + texturewidthmask = (int*)CGD_PRAM_Malloc (numtextures*4); + textureheight = (fixed_t*)CGD_PRAM_Malloc (numtextures*4); totalwidth = 0; @@ -575,7 +577,7 @@ void R_InitTextures (void) R_GenerateLookup (i); // Create translation table for global animation. - texturetranslation = (int *)Z_Malloc ((numtextures+1)*4, PU_STATIC, 0); + texturetranslation = (int *)CGD_PRAM_Malloc ((numtextures+1)*4); for (i=0 ; i