From fe82cef7c300311e6ec835bee246e95695c82b7f Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Wed, 25 Aug 2021 11:09:35 +0200 Subject: [PATCH] Add a powerful profiler (libprof) --- cgdoom/cgdoom.c | 7 ++++ cgdoom/cgdoom.h | 15 ++++++++ cgdoom/doomdef.h | 1 + cgdoom/i_system.c | 37 +++++++++++++++++++- cgdoom/i_video.c | 8 +++++ cgdoom/r_main.c | 6 +++- cgdoom/st_stuff.c | 13 +++++++ cgdoom/w_wad.c | 10 ++++++ cgdoom/z_zone.c | 11 ++++-- src-cg/keyboard.c | 1 + src-cg/libprof.c | 26 ++++++++++++++ src-cg/libprof.h | 89 +++++++++++++++++++++++++++++++++++++++++++++++ src-cg/platform.h | 2 ++ 13 files changed, 222 insertions(+), 4 deletions(-) create mode 100644 src-cg/libprof.c create mode 100644 src-cg/libprof.h diff --git a/cgdoom/cgdoom.c b/cgdoom/cgdoom.c index cd69200..b1ebd79 100644 --- a/cgdoom/cgdoom.c +++ b/cgdoom/cgdoom.c @@ -2,6 +2,7 @@ #include "os.h" #include "cgdoom.h" #include +#include "libprof.h" #ifndef CG_EMULATOR # include "cgdoom-alloc.h" @@ -387,6 +388,9 @@ int gDevUnalignedLumpSize = 0; /* Global options */ int CGD_TrustUnalignedLumps = 0; +/* Performance counters */ +struct CGD_Perf CGD_Perf; + /* Read next sector from file, while caching into a buffer. */ const void *ReadNextSector(FileAccessCache *fc, int *size) { @@ -629,6 +633,8 @@ int main(void) Bdisp_FrameAndColor(3, 16); Bdisp_FrameAndColor(1, 0); + prof_init(); + WADFileInfo wads[16]; int wad_count = FindWADs(wads, 16); @@ -737,6 +743,7 @@ int main(void) GetKey(&key); } + prof_quit(); return 1; } diff --git a/cgdoom/cgdoom.h b/cgdoom/cgdoom.h index c0f267a..a3b0372 100644 --- a/cgdoom/cgdoom.h +++ b/cgdoom/cgdoom.h @@ -4,6 +4,7 @@ /* CGDoom-specific definitions that cover both calculator and native builds. */ #include +#include "libprof.h" /* VRAM pointer and size */ extern uint16_t *VRAM; @@ -36,6 +37,7 @@ extern int startmap, startepisode; #define SKEY_FREEMEM 0x10006 #define SKEY_FPSCOUNTER 0x10007 #define SKEY_FRAMESKIP 0x10008 +#define SKEY_PROFILER 0x10009 // Scan keyboard (the previous state is also retained). void UpdateKeyboardState(void); @@ -44,4 +46,17 @@ void UpdateKeyboardState(void); int KeyWasJustPressed(int key); int KeyWasJustReleased(int key); +//--- +// Performance metrics +//--- + +struct CGD_Perf +{ + prof_t DynamicAllocation; + prof_t GraphicsRendering; + prof_t DisplayInterface; + prof_t LumpLoading; + prof_t UnalignedLumpLoading; +}; + #endif /* CGDOOM_H */ diff --git a/cgdoom/doomdef.h b/cgdoom/doomdef.h index 203d171..d3c2744 100644 --- a/cgdoom/doomdef.h +++ b/cgdoom/doomdef.h @@ -310,6 +310,7 @@ void CGSwitchClip(); void CGFreeMem(); void CGRefreshSwitch(); void CGCycleGamma(); +void CGProfilerResults(); #endif // __DOOMDEF__ diff --git a/cgdoom/i_system.c b/cgdoom/i_system.c index aa505cd..28363a7 100644 --- a/cgdoom/i_system.c +++ b/cgdoom/i_system.c @@ -259,9 +259,14 @@ static void R_SetViewSizeChange(int iDiff) void I_StartTic (void) { + extern int giRefreshMask; + + //--- + // FPS counter + //--- + extern boolean fpscounteractive; extern int fpscounter_data; - extern int giRefreshMask; /* Number of frames since FPS count started */ static int fps_frames = 0; @@ -283,6 +288,10 @@ void I_StartTic (void) fps_frames = 0; } + //--- + // Special keys + //--- + UpdateKeyboardState(); /* Capture events for special keys */ @@ -303,6 +312,32 @@ void I_StartTic (void) if (KeyWasJustPressed(SKEY_FRAMESKIP)) CGRefreshSwitch(); + //--- + // Profiler + //--- + + /* Number of ticks left before reporting results */ + static int profiler_ticks = -1; + + if (KeyWasJustPressed(SKEY_PROFILER)) { + prof_t *counters = (prof_t *)&CGD_Perf; + for(int i = 0; i < sizeof CGD_Perf / sizeof (prof_t); i++) { + counters[i] = prof_make(); + } + profiler_ticks = 40; + } + else { + profiler_ticks--; + if(profiler_ticks == 0) { + CGProfilerResults(); + profiler_ticks = -1; + } + } + + //--- + // Normal keys + //--- + static int keys[] = { KEY_LEFTARROW, KEY_RIGHTARROW, KEY_UPARROW, KEY_DOWNARROW, KEY_RCTRL, KEY_TAB, KEY_PAUSE, KEY_SLEFTARROW, KEY_SRIGHTARROW, diff --git a/cgdoom/i_video.c b/cgdoom/i_video.c index 3e621b7..872254c 100644 --- a/cgdoom/i_video.c +++ b/cgdoom/i_video.c @@ -139,6 +139,8 @@ static int firstflipdone = 0; void I_Flip (void) { + prof_enter(CGD_Perf.DisplayInterface); + if (!firstflipdone) { SetWindow(0, 0, 396, 224); @@ -182,6 +184,8 @@ void I_Flip (void) j++; } } + + prof_leave(CGD_Perf.DisplayInterface); } void I_InitGraphics(void) @@ -207,6 +211,8 @@ void I_ReinitAfterError(void) void I_Flip (void) { + prof_enter(CGD_Perf.DisplayInterface); + /* Set a black frame if the current one is white */ if ((Bdisp_FrameAndColor(0, 0) & 1) == 0) { @@ -223,6 +229,8 @@ void I_Flip (void) } } Bdisp_PutDisp_DD(); + + prof_leave(CGD_Perf.DisplayInterface); } void I_InitGraphics(void) diff --git a/cgdoom/r_main.c b/cgdoom/r_main.c index ef3ed8c..10c981c 100644 --- a/cgdoom/r_main.c +++ b/cgdoom/r_main.c @@ -25,8 +25,8 @@ -//#include #include "os.h" +#include "cgdoom.h" #include "doomdef.h" @@ -758,6 +758,8 @@ void R_SetupFrame (player_t* player) // void R_RenderPlayerView (player_t* player) { + prof_enter(CGD_Perf.GraphicsRendering); + R_SetupFrame (player); // Clear buffers. R_ClearClipSegs (); @@ -772,4 +774,6 @@ void R_RenderPlayerView (player_t* player) R_DrawMasked (); //I_Error("poof"); + + prof_leave(CGD_Perf.GraphicsRendering); } diff --git a/cgdoom/st_stuff.c b/cgdoom/st_stuff.c index 3d2e588..74ae378 100644 --- a/cgdoom/st_stuff.c +++ b/cgdoom/st_stuff.c @@ -27,6 +27,7 @@ #include "os.h" +#include "cgdoom.h" #include "i_system.h" #include "i_video.h" @@ -517,6 +518,18 @@ void CGCycleGamma() plyr->message = message; } +void CGProfilerResults() +{ + static char message[72]; + sprintf(message, "DA:%ums GR:%ums DI:%ums LL:%ums ULL:%ums", + ((unsigned int)prof_time(CGD_Perf.DynamicAllocation) + 500) / 1000, + ((unsigned int)prof_time(CGD_Perf.GraphicsRendering) + 500) / 1000, + ((unsigned int)prof_time(CGD_Perf.DisplayInterface) + 500) / 1000, + ((unsigned int)prof_time(CGD_Perf.LumpLoading) + 500) / 1000, + ((unsigned int)prof_time(CGD_Perf.UnalignedLumpLoading) + 500) / 1000); + plyr->message = message; +} + // Respond to keyboard input events, // intercept cheats. boolean ST_Responder (event_t* ev) diff --git a/cgdoom/w_wad.c b/cgdoom/w_wad.c index f633f75..7962252 100644 --- a/cgdoom/w_wad.c +++ b/cgdoom/w_wad.c @@ -38,6 +38,7 @@ #endif*/ #include "os.h" +#include "cgdoom.h" #include "cgdoom-alloc.h" #include "doomtype.h" @@ -322,8 +323,17 @@ void * W_ReadLumpWithZ_Malloc(int lump,int tag,int iEnableFlash) the heap would provide. */ if(!full || !aligned) { + prof_enter(CGD_Perf.LumpLoading); + if(!aligned) + prof_enter(CGD_Perf.UnalignedLumpLoading); + + Z_Malloc (lumpinfo[lump].size, tag, &lumpcache[lump]); W_ReadLump (lump, lumpcache[lump]); + + if(!aligned) + prof_leave(CGD_Perf.UnalignedLumpLoading); + prof_leave(CGD_Perf.LumpLoading); } return lumpcache[lump]; diff --git a/cgdoom/z_zone.c b/cgdoom/z_zone.c index 0b8a3ca..76d6b9c 100644 --- a/cgdoom/z_zone.c +++ b/cgdoom/z_zone.c @@ -21,12 +21,12 @@ // //----------------------------------------------------------------------------- -//static const char - #include "z_zone.h" #include "i_system.h" #include "doomdef.h" +#include "cgdoom.h" + // // ZONE MEMORY ALLOCATION @@ -152,6 +152,7 @@ void Z_Free (const void* ptr) return; } + prof_enter(CGD_Perf.DynamicAllocation); block = (memblock_t *) ( (byte *)ptr - sizeof(memblock_t)); if (block->id != ZONEID) @@ -203,6 +204,8 @@ void Z_Free (const void* ptr) zone->rover = block; } } + + prof_leave(CGD_Perf.DynamicAllocation); } @@ -216,6 +219,8 @@ void Z_Free (const void* ptr) void* Z_Malloc( int size, int tag, void* user ) { + prof_enter(CGD_Perf.DynamicAllocation); + static int iCalled = 0; int extra; memblock_t* start; @@ -305,6 +310,7 @@ void* Z_Malloc( int size, int tag, void* user ) else { I_ErrorI ("Z_Malloc failure", size, 0, 0, 0); + prof_leave(CGD_Perf.DynamicAllocation); return NULL; } } @@ -354,6 +360,7 @@ void* Z_Malloc( int size, int tag, void* user ) base->id = ZONEID; + prof_leave(CGD_Perf.DynamicAllocation); return (void *) ((byte *)base + sizeof(memblock_t)); } diff --git a/src-cg/keyboard.c b/src-cg/keyboard.c index 740a032..bb8629b 100644 --- a/src-cg/keyboard.c +++ b/src-cg/keyboard.c @@ -118,6 +118,7 @@ static int DoomKeyToKeycode(int key) case SKEY_FREEMEM: return KEYCODE_FD; case SKEY_FPSCOUNTER: return KEYCODE_LEFTP; case SKEY_FRAMESKIP: return KEYCODE_VARS; + case SKEY_PROFILER: return KEYCODE_RIGHTP; default: return -1; } diff --git a/src-cg/libprof.c b/src-cg/libprof.c new file mode 100644 index 0000000..a0b116e --- /dev/null +++ b/src-cg/libprof.c @@ -0,0 +1,26 @@ +#include "libprof.h" + +/* prof_init(): Initialize the profiler's timer */ +int prof_init(void) +{ + *TMU0_TCOR = 0xffffffff; + *TMU0_TCNT = 0xffffffff; + *TMU0_TCR = 0x0000; /* UNIE=0 Pphi/4 */ + + *TMU_TSTR |= 1; + return 0; +} + +/* prof_quit(): Free the profiler's timer */ +void prof_quit(void) +{ + *TMU_TSTR &= ~1; +} + +/* prof_time(): Time spent in a given context, in microseconds */ +uint32_t prof_time(prof_t prof) +{ + /* Hardcoded for the default overclock settings */ + int Pphi = 29491200; + return ((uint64_t)prof.elapsed * 4 * 1000000) / Pphi; +} diff --git a/src-cg/libprof.h b/src-cg/libprof.h new file mode 100644 index 0000000..51e326f --- /dev/null +++ b/src-cg/libprof.h @@ -0,0 +1,89 @@ +//--- +// libprof: A manual profiling library for gint +// +// THIS VERSION IS MODIFIED FOR CGDOOM/LIBFXCG. +//--- + +#ifndef LIBPROF_LIBPROF +#define LIBPROF_LIBPROF + +#include + +//--- +// Initialization +//--- + +/* prof_init(): Initialize the profiler's timer */ +int prof_init(void); + +/* prof_quit(): Free the profiler's timer */ +void prof_quit(void); + +//--- +// Runtime time measurement +//--- + +#define TMU0_TCOR ((volatile uint32_t *)0xa4490008) +#define TMU0_TCNT ((volatile uint32_t *)0xa449000c) +#define TMU0_TCR ((volatile uint16_t *)0xa4490010) +#define TMU_TSTR ((volatile uint8_t *)0xa4490004) + +/* Context object, has an elasped delay and a recursion level. This object can + be created on the stack of a function that measures its execution time, + except if the function is recursive, in which case it should be either + static or global. */ +typedef struct prof_t +{ + uint32_t rec; + uint32_t elapsed; +} prof_t; + +/* prof_make(): Create a new context object + A context can be cleared by assigning it prof_make() again. */ +#define prof_make() ((prof_t){ 0, 0 }) + +/* Timer counter */ +#define PROF_TCNT TMU0_TCNT + +/* prof_enter(): Start counting time for a function + This macro should be called at the start of the context scope. If the + function was already executing then the deepest instance in the stack is + used instead of creating a new counter. */ +#define prof_enter(prof) { \ + if(!prof.rec++) prof.elapsed += *PROF_TCNT; \ +} + +/* prof_leave(): Stop counting time for a function + This should be called at the end of the context scope; it only actually + stops if there is no deeper instance of the context in the stack. If there + are not as exactly as many prof_leave()'s as prof_enter()'s then the + resulting time measure will not be relevant at all. */ +#define prof_leave(prof) { \ + if(!--prof.rec) prof.elapsed -= *PROF_TCNT; \ +} + +/* prof_exec(): Measure a single block of code + This operation can be used when profiling is not required, and instead + libprof is used to measure the performance of a single bit of code. Use it + like this: + + uint32_t elasped_us = prof_exec({ + exec_code(); + }); */ +#define prof_exec(code) ({ \ + prof_t prof = prof_make(); \ + prof_enter(prof); \ + code; \ + prof_leave(prof); \ + prof_time(prof); \ +}) + +//--- +// Post-measurement analysis +//--- + +/* prof_time(): Time spent in a given context, in microseconds + Should only be called when the context is not currently executing. */ +uint32_t prof_time(prof_t prof); + +#endif /* LIBPROF_LIBPROF */ diff --git a/src-cg/platform.h b/src-cg/platform.h index e0e9386..25652df 100644 --- a/src-cg/platform.h +++ b/src-cg/platform.h @@ -79,6 +79,8 @@ extern int CGD_TrustUnalignedLumps; # define CGDOOM_SCREENS_BASE SaveVRAMBuffer #endif +extern struct CGD_Perf CGD_Perf; + //--- #include "keyboard.hpp"