#include #include #include #include #include #include #include #include extern void memory_read(volatile uint8_t *area, uint32_t size); extern void memory_write(volatile uint8_t *area, uint32_t size); extern void memory_dsp_xram_memset(volatile uint8_t *area, uint32_t size); extern void memory_dsp_yram_memset(volatile uint8_t *area, uint32_t size); extern void memory_dsp_xyram_memcpy(volatile uint8_t *dst, volatile uint8_t *src, uint32_t size); GILRAM GALIGNED(32) static char ilram_buffer[0x800]; GXRAM GALIGNED(32) static char xram_buffer[0x800]; GYRAM GALIGNED(32) static char yram_buffer[0x800]; struct results { void *address; uint32_t size; int rounds; /* In microseconds for the whole area */ uint32_t read_C_u8_time; uint32_t write_C_u8_time; uint32_t read_u8_time; uint32_t write_u8_time; uint32_t memcpy_time; uint32_t memset_time; uint32_t dma_memcpy_time; uint32_t dma_memset_time; uint32_t dsp_xram_memset_time; uint32_t dsp_yram_memset_time; uint32_t dsp_xyram_memcpy_time; /* In kbytes/second */ uint32_t read_C_u8_speed; uint32_t write_C_u8_speed; uint32_t read_u8_speed; uint32_t write_u8_speed; uint32_t memcpy_speed; uint32_t memset_speed; uint32_t dma_memcpy_speed; uint32_t dma_memset_speed; uint32_t dsp_xram_memset_speed; uint32_t dsp_yram_memset_speed; uint32_t dsp_xyram_memcpy_speed; }; static void test(struct results *r, void *address, uint32_t size, int rounds) { volatile uint8_t *area = address; volatile uint8_t x; r->address = address; r->size = size; r->rounds = rounds; /* Defaults for conditional tests */ r->dsp_xram_memset_time = 1; r->dsp_yram_memset_time = 1; r->dsp_xyram_memcpy_time = 1; r->read_C_u8_time = prof_exec({ for(int i = 0; i < rounds; i++) { for(uint index = 0; index < size; index++) x = area[index]; } }); r->write_C_u8_time = prof_exec({ for(int i = 0; i < rounds; i++) { for(uint index = 0; index < size; index++) area[index] = x; } }); r->read_u8_time = prof_exec({ for(int i = 0; i < rounds; i++) memory_read(area, size); }); r->write_u8_time = prof_exec({ for(int i = 0; i < rounds; i++) memory_write(area, size); }); r->memset_time = prof_exec({ for(int i = 0; i < rounds; i++) memset(address, 0, size); }); r->memcpy_time = 2 * prof_exec({ for(int i = 0; i < rounds; i++) memcpy(address + size / 2, address, size / 2); }); r->dma_memset_time = prof_exec({ #ifdef FXCG50 for(int i = 0; i < rounds; i++) dma_memset(address, 0, size); #endif }); r->dma_memcpy_time = 2 * prof_exec({ #ifdef FXCG50 for(int i = 0; i < rounds; i++) dma_memcpy(address + size / 2, address, size / 2); #endif }); if(address == &xram_buffer) { /* Since the buffers are small, repeat 16 times */ r->dsp_xram_memset_time = prof_exec({ for(int i = 0; i < rounds; i++) memory_dsp_xram_memset(address, size); }); } if(address == &yram_buffer) { r->dsp_yram_memset_time = prof_exec({ for(int i = 0; i < rounds; i++) memory_dsp_yram_memset(address, size); }); } if(address == &xram_buffer) { void *x = xram_buffer; void *y = yram_buffer; /* Since the buffers are small, repeat 16 times */ r->dsp_xyram_memcpy_time = prof_exec({ for(int i = 0; i < rounds; i++) memory_dsp_xyram_memcpy(y, x, size); }); } if(address == &yram_buffer) { void *x = xram_buffer; void *y = yram_buffer; r->dsp_xyram_memcpy_time = prof_exec({ for(int i = 0; i < rounds; i++) memory_dsp_xyram_memcpy(x, y, size); }); } /* Convert from us/(size bytes) to kb/(1 second) */ uint64_t factor = size * 1000 * rounds; r->read_C_u8_speed = factor / r->read_C_u8_time; r->write_C_u8_speed = factor / r->write_C_u8_time; r->read_u8_speed = factor / r->read_u8_time; r->write_u8_speed = factor / r->write_u8_time; r->memcpy_speed = factor / r->memcpy_time; r->memset_speed = factor / r->memset_time; r->dma_memcpy_speed = factor / r->dma_memcpy_time; r->dma_memset_speed = factor / r->dma_memset_time; r->dsp_xram_memset_speed = factor / r->dsp_xram_memset_time; r->dsp_yram_memset_speed = factor / r->dsp_yram_memset_time; r->dsp_xyram_memcpy_speed = factor / r->dsp_xyram_memcpy_time; } static void results_line(int row, uint32_t time, uint32_t speed) { dprint_opt(260, row_y(row), C_BLACK, C_NONE, DTEXT_RIGHT, DTEXT_TOP, "%d us", time); dprint_opt(370, row_y(row), C_BLACK, C_NONE, DTEXT_RIGHT, DTEXT_TOP, "%3.3j MB/s", speed); } /* gintctl_perf_memory(): Memory primitives and reading/writing speed */ void gintctl_perf_memory(void) { /* TODO: Memory performance on SH3 */ if(isSH3()) return; int key = 0; struct results r = { 0 }; while(key != KEY_EXIT) { dclear(C_WHITE); row_title("Memory access speed"); #ifdef FXCG50 row_print( 3, 1, "Naive C-loop u8 read:"); row_print( 4, 1, "Naive C-loop u8 write:"); row_print( 5, 1, "Rolled asm u8 read:"); row_print( 6, 1, "Rolled asm u8 write:"); row_print( 7, 1, "gint's memcpy():"); row_print( 8, 1, "gint's memset():"); row_print( 9, 1, "gint's dma_memcpy():"); row_print(10, 1, "gint's dma_memset():"); if(r.address == &xram_buffer) row_print(11, 1, "DSP XRAM memset():"); if(r.address == &yram_buffer) row_print(11, 1, "DSP YRAM memset():"); if(r.address == &xram_buffer || r.address == &yram_buffer) row_print(12, 1, "DSP XRAM->YRAM memcpy():"); if(!r.address) { row_print(1, 1, "No test yet"); } else { row_print(1, 1, "Results for area %08x (%d bytes, %d " "round%s)", (uint32_t)r.address, r.size, r.rounds, (r.rounds > 1) ? "s" : ""); results_line(3, r.read_C_u8_time, r.read_C_u8_speed); results_line(4, r.write_C_u8_time, r.write_C_u8_speed); results_line(5, r.read_u8_time, r.read_u8_speed); results_line(6, r.write_u8_time, r.write_u8_speed); results_line(7, r.memcpy_time, r.memcpy_speed); results_line(8, r.memset_time, r.memset_speed); results_line(9, r.dma_memcpy_time, r.dma_memcpy_speed); results_line(10,r.dma_memset_time, r.dma_memset_speed); if(r.address == &xram_buffer) { results_line(11, r.dsp_xram_memset_time, r.dsp_xram_memset_speed); } if(r.address == &yram_buffer) { results_line(11, r.dsp_yram_memset_time, r.dsp_yram_memset_speed); } if(r.address==&xram_buffer || r.address==&yram_buffer) { results_line(12, r.dsp_xyram_memcpy_time, r.dsp_xyram_memcpy_speed); } } fkey_button(1, "RAM"); fkey_button(2, "ILRAM"); fkey_button(3, "XRAM"); fkey_button(4, "YRAM"); #endif dupdate(); key = getkey().key; if(key == KEY_F1) test(&r, gint_vram, _(0x400,0x8000), 1); if(key == KEY_F2) test(&r, &ilram_buffer, 0x800, 64); if(key == KEY_F3) test(&r, &xram_buffer, 0x800, 64); if(key == KEY_F4) test(&r, &yram_buffer, 0x800, 64); } }