forked from Lephenixnoir/gintctl
perf/cpucache: 2-ILP parallelism and cache size (w/ plotting util)
Adds a CPU-cache performance test. The libprof measure is extended into Iϕ cycles (resolution: 16 Iϕ cycles) to check the 2-ILP parallelism hypothesis (confirmed on fx-9860G and fx-CG 50). The time needed to traverse a small buffer multiple times is also measured with the same method in an attempt to determine cache size. No definite result could be obtained. This change includes a plotting utility.
This commit is contained in:
parent
b86b14f3cd
commit
fb561d1310
|
@ -8,6 +8,9 @@
|
|||
/* gintctl_perf_libprof(): Basic libprof tests using timers */
|
||||
void gintctl_perf_libprof(void);
|
||||
|
||||
/* gintctl_perf_cpucache(): CPU speed and cache size */
|
||||
void gintctl_perf_cpucache(void);
|
||||
|
||||
/* gintctl_perf_interrupts(): Interrupt handling */
|
||||
void gintctl_perf_interrupts(void);
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
//---
|
||||
// gintctl:plot - Simple plotting utility
|
||||
//---
|
||||
|
||||
#ifndef GINTCTL_PLOT
|
||||
#define GINTCTL_PLOT
|
||||
|
||||
struct plot_ticks
|
||||
{
|
||||
/* At most 64 ticks can be specified currently (easy to improve) */
|
||||
|
||||
/* If set to non-zero, ticks are placed at multiples of this. By
|
||||
default all of them are primary, use [subtick_divisions] to make
|
||||
some ticks secondary */
|
||||
int multiples;
|
||||
/* No other way to specify ticks yet */
|
||||
|
||||
/* Number of subtick per tick; subticks are marked but values are not
|
||||
printed. Has no effect when 0 or 1 */
|
||||
int subtick_divisions;
|
||||
|
||||
/* sprintf() format producting the text for ticks; defaults to "%d" */
|
||||
char const *format;
|
||||
/* Formatter function for specific needs; overrides [format] */
|
||||
void (*formatter)(char *str, size_t size, int32_t v);
|
||||
};
|
||||
|
||||
struct plot
|
||||
{
|
||||
/* Plot area, including axes and ticks */
|
||||
struct {
|
||||
int x, y, w, h;
|
||||
} area;
|
||||
|
||||
/* Data points and color */
|
||||
int32_t *data_x;
|
||||
int32_t *data_y;
|
||||
int data_len;
|
||||
int color;
|
||||
|
||||
/* Tick specification */
|
||||
struct plot_ticks ticks_x;
|
||||
struct plot_ticks ticks_y;
|
||||
|
||||
/* Grid specification */
|
||||
struct {
|
||||
enum {
|
||||
PLOT_NOGRID = 0,
|
||||
PLOT_MAINGRID,
|
||||
PLOT_FULLGRID,
|
||||
} level;
|
||||
|
||||
int primary_color;
|
||||
int secondary_color;
|
||||
int dotted;
|
||||
} grid;
|
||||
|
||||
/** Internal parameters, computed by plot() **/
|
||||
|
||||
/* Logical bounds */
|
||||
int min_x, max_x;
|
||||
int min_y, max_y;
|
||||
|
||||
/* Concrete bounds */
|
||||
struct {
|
||||
int x, y, w, h;
|
||||
} graph;
|
||||
};
|
||||
|
||||
/* plot(): Render a graph */
|
||||
void plot(struct plot *plot);
|
||||
|
||||
#endif /* GINTCTL_PLOT */
|
|
@ -59,6 +59,7 @@ struct menu menu_perf = {
|
|||
_("Performance", "Performance benchmarks"), .entries = {
|
||||
|
||||
{ "libprof basics", gintctl_perf_libprof },
|
||||
{ "CPU and cache", gintctl_perf_cpucache },
|
||||
{ "Interrupt stress", gintctl_perf_interrupts },
|
||||
{ "Rendering functions", gintctl_perf_render },
|
||||
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
|
||||
.global _cpucache_nop1024
|
||||
.global _cpucache_rounds
|
||||
|
||||
#define r4(x) x; x; x; x
|
||||
|
||||
_cpucache_nop1024:
|
||||
r4(r4(r4(r4(r4(nop)))))
|
||||
|
||||
dt r4
|
||||
bt 1f
|
||||
bra _cpucache_nop1024
|
||||
nop
|
||||
|
||||
1: rts
|
||||
nop
|
||||
|
||||
|
||||
# r4: Buffer to read from
|
||||
# r5: Buffer size (multiple of 4)
|
||||
# r6: Number of rounds
|
||||
_cpucache_rounds:
|
||||
mov r4, r1
|
||||
add r5, r1
|
||||
add #-1, r1
|
||||
mov r4, r0
|
||||
|
||||
1: mov.b @r0+, r2
|
||||
mov.b @r0+, r2
|
||||
mov.b @r0+, r2
|
||||
cmp/ge r1, r0
|
||||
bf 1b
|
||||
mov.b @r0+, r2
|
||||
|
||||
dt r6
|
||||
bf _cpucache_rounds
|
||||
|
||||
rts
|
||||
nop
|
|
@ -0,0 +1,158 @@
|
|||
#include <gint/display.h>
|
||||
#include <gint/keyboard.h>
|
||||
#include <gint/clock.h>
|
||||
|
||||
#include <gintctl/perf.h>
|
||||
#include <gintctl/util.h>
|
||||
#include <gintctl/plot.h>
|
||||
|
||||
#include <libprof.h>
|
||||
|
||||
#define CACHE_MAX 4096
|
||||
#define SAMPLES 129
|
||||
|
||||
extern void cpucache_nop1024(int repeats);
|
||||
extern void cpucache_rounds(uint8_t const *buf, size_t len, int rounds);
|
||||
|
||||
uint32_t test_nop4096(void)
|
||||
{
|
||||
prof_t prof = prof_make();
|
||||
|
||||
prof_enter(prof);
|
||||
cpucache_nop1024(4);
|
||||
prof_leave(prof);
|
||||
|
||||
/* Return the amount in Iphi cycles computed from the Pphi/4 measure */
|
||||
clock_frequency_t const *freq = clock_freq();
|
||||
uint64_t PLL_cycles = ((uint64_t)prof.elapsed * 4) * freq->Pphi_div;
|
||||
return PLL_cycles / freq->Iphi_div;
|
||||
}
|
||||
|
||||
uint32_t test_cpucache_rounds(uint8_t const *buf, size_t len, int rounds)
|
||||
{
|
||||
prof_t prof = prof_make();
|
||||
|
||||
prof_enter(prof);
|
||||
cpucache_rounds(buf, len, rounds);
|
||||
prof_leave(prof);
|
||||
|
||||
clock_frequency_t const *freq = clock_freq();
|
||||
uint64_t PLL_cycles = ((uint64_t)prof.elapsed * 4) * freq->Pphi_div;
|
||||
return PLL_cycles / freq->Iphi_div;
|
||||
}
|
||||
|
||||
#ifdef FX9860G
|
||||
static void tick_formatter(char *str, size_t size, int32_t v)
|
||||
{
|
||||
if(v == 0) snprintf(str, size, "0");
|
||||
else snprintf(str, size, "%dk", v/1000);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* gintctl_perf_cpucache(): CPU speed and cache size */
|
||||
void gintctl_perf_cpucache(void)
|
||||
{
|
||||
int key = 0;
|
||||
|
||||
/* Test twice because this is sensitive to libprof initialization */
|
||||
uint32_t nop4096 = test_nop4096();
|
||||
nop4096 = test_nop4096();
|
||||
|
||||
uint8_t buf[CACHE_MAX];
|
||||
int32_t x_size[SAMPLES];
|
||||
int32_t y_time[SAMPLES];
|
||||
|
||||
struct plot plotspec = {
|
||||
.data_x = x_size,
|
||||
.data_y = y_time,
|
||||
.data_len = SAMPLES,
|
||||
|
||||
#ifdef FX9860G
|
||||
.area = {
|
||||
.x = 0, .y = 18,
|
||||
.w = 128, .h = 44,
|
||||
},
|
||||
.color = C_BLACK,
|
||||
.ticks_x = {
|
||||
.multiples = 250,
|
||||
.subtick_divisions = 4,
|
||||
.formatter = tick_formatter,
|
||||
},
|
||||
.ticks_y = {
|
||||
.multiples = 20000,
|
||||
.subtick_divisions = 2,
|
||||
.formatter = tick_formatter,
|
||||
},
|
||||
.grid = {
|
||||
.level = PLOT_MAINGRID,
|
||||
.primary_color = C_BLACK,
|
||||
.dotted = 1,
|
||||
},
|
||||
#endif
|
||||
|
||||
#ifdef FXCG50
|
||||
.area = {
|
||||
.x = 24, .y = 51,
|
||||
.w = 340, .h = 120,
|
||||
},
|
||||
.color = C_RED,
|
||||
.ticks_x = {
|
||||
.multiples = CACHE_MAX / 16,
|
||||
.subtick_divisions = 4,
|
||||
},
|
||||
.ticks_y = {
|
||||
.multiples = 10000,
|
||||
.subtick_divisions = 2,
|
||||
},
|
||||
.grid = {
|
||||
.level = PLOT_FULLGRID,
|
||||
.primary_color = C_RGB(20, 20, 20),
|
||||
.secondary_color = C_RGB(28, 28, 28),
|
||||
.dotted = 1,
|
||||
},
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
int y_min = -1;
|
||||
int y_max = -1;
|
||||
|
||||
for(int i = 0; i < SAMPLES; i++)
|
||||
{
|
||||
x_size[i] = (CACHE_MAX / (SAMPLES-1)) * i;
|
||||
y_time[i] = test_cpucache_rounds(buf, x_size[i], 16);
|
||||
|
||||
if(y_time[i] < y_min || y_min == -1) y_min = y_time[i];
|
||||
if(y_time[i] > y_max || y_max == -1) y_max = y_time[i];
|
||||
}
|
||||
|
||||
while(key != KEY_EXIT)
|
||||
{
|
||||
dclear(C_WHITE);
|
||||
row_title(_("CPU and cache", "CPU speed and cache size"));
|
||||
|
||||
#ifdef FX9860G
|
||||
row_print(2, 1, "4096 nop: %d Iϕ", nop4096);
|
||||
extern font_t font_hexa;
|
||||
font_t const *old = dfont(&font_hexa);
|
||||
plot(&plotspec);
|
||||
dfont(old);
|
||||
#endif
|
||||
|
||||
#ifdef FXCG50
|
||||
row_print(1, 1, "Time for 4096 nop (with overhead): %d Iphi",
|
||||
nop4096);
|
||||
row_print(2, 1, "Time needed to read a buffer multiple times:");
|
||||
|
||||
plot(&plotspec);
|
||||
|
||||
row_print(12, 1, "X: Size of buffer (bytes)");
|
||||
row_print(13, 1, "Y: Iphi cycles for 16 8-bit traversals");
|
||||
row_print(14, 1, "Last samples suggests: %.2j Iphi/byte access",
|
||||
100 * y_time[SAMPLES-1] / x_size[SAMPLES-1] / 16);
|
||||
#endif
|
||||
|
||||
dupdate();
|
||||
key = getkey().key;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,239 @@
|
|||
#include <gint/display.h>
|
||||
#include <gint/std/stdio.h>
|
||||
|
||||
#include <gintctl/plot.h>
|
||||
#include <gintctl/util.h>
|
||||
|
||||
struct tick_info
|
||||
{
|
||||
int count;
|
||||
|
||||
int32_t value[64];
|
||||
uint16_t px_pos[64];
|
||||
int8_t primary[64];
|
||||
|
||||
int max_width;
|
||||
int max_height;
|
||||
};
|
||||
|
||||
static int graph_x(struct plot *s, int v)
|
||||
{
|
||||
return s->graph.x + (s->graph.w-1) * (v-s->min_x) / (s->max_x-s->min_x);
|
||||
}
|
||||
|
||||
static int graph_y(struct plot *s, int v)
|
||||
{
|
||||
return s->graph.y + s->graph.h -
|
||||
(s->graph.h-1) * (v-s->min_y) / (s->max_y-s->min_y);
|
||||
}
|
||||
|
||||
static void layout_ticks(struct plot *s, int xy, struct tick_info *i)
|
||||
{
|
||||
struct plot_ticks *t = xy ? &s->ticks_y : &s->ticks_x;
|
||||
|
||||
char str[256];
|
||||
char const *fmt = t->format ? t->format : "%d";
|
||||
|
||||
i->count = 0;
|
||||
i->max_width = 0;
|
||||
i->max_height = 0;
|
||||
|
||||
if(!t->multiples) return;
|
||||
|
||||
int subdivs = t->subtick_divisions > 0 ? t->subtick_divisions : 1;
|
||||
|
||||
/* Start at the minimum value and work towards the maximum one */
|
||||
int min = xy ? s->min_y : s->min_x;
|
||||
int max = xy ? s->max_y : s->max_x;
|
||||
|
||||
int v = min + t->multiples - 1;
|
||||
v -= (v % t->multiples);
|
||||
|
||||
while(v <= max && i->count < 64)
|
||||
{
|
||||
i->primary[i->count] = (v % (t->multiples * subdivs) == 0);
|
||||
i->value[i->count] = v;
|
||||
i->px_pos[i->count] = xy ? graph_y(s, v) : graph_x(s, v);
|
||||
(i->count)++;
|
||||
|
||||
if(t->formatter) t->formatter(str, 256, v);
|
||||
else snprintf(str, 256, fmt, v);
|
||||
|
||||
int w, h;
|
||||
dsize(str, NULL, &w, &h);
|
||||
|
||||
if(w > i->max_width) i->max_width = w;
|
||||
if(h > i->max_height) i->max_height = h;
|
||||
|
||||
v += t->multiples;
|
||||
}
|
||||
}
|
||||
|
||||
void plot(struct plot *s)
|
||||
{
|
||||
struct tick_info tx, ty;
|
||||
char str[256];
|
||||
int tick_w, tick_h;
|
||||
|
||||
int32_t *data_x = s->data_x;
|
||||
int32_t *data_y = s->data_y;
|
||||
|
||||
int axis_spacing = _(2,3);
|
||||
int tick_length = _(2,4);
|
||||
|
||||
/* Determine the bounds of the rendering area */
|
||||
|
||||
s->min_x = 0;
|
||||
s->max_x = 0;
|
||||
s->min_y = 0;
|
||||
s->max_y = 0;
|
||||
|
||||
for(int i = 0; i < s->data_len; i++)
|
||||
{
|
||||
if(data_x[i] < s->min_x) s->min_x = data_x[i];
|
||||
if(data_x[i] > s->max_x) s->max_x = data_x[i];
|
||||
|
||||
if(data_y[i] < s->min_y) s->min_y = data_y[i];
|
||||
if(data_y[i] > s->max_y) s->max_y = data_y[i];
|
||||
}
|
||||
|
||||
if(s->min_x == s->max_x || s->min_y == s->max_y) return;
|
||||
|
||||
/* Determine the number, position and size of ticks */
|
||||
|
||||
font_t const *f = dfont(NULL);
|
||||
dfont(f);
|
||||
|
||||
/* Start with vertical ticks */
|
||||
s->graph.y = s->area.y;
|
||||
s->graph.h = s->area.h - axis_spacing - f->line_height;
|
||||
|
||||
layout_ticks(s, 1, &ty);
|
||||
|
||||
/* Continue with horizontal ticks */
|
||||
s->graph.x = s->area.x + axis_spacing + ty.max_width;
|
||||
s->graph.w = s->area.w - (s->graph.x - s->area.x);
|
||||
|
||||
layout_ticks(s, 0, &tx);
|
||||
|
||||
/* Render grid, first secondary ticks, then primary ticks */
|
||||
|
||||
int dotted = s->grid.dotted;
|
||||
|
||||
for(int i = 0; i < ty.count; i++) if(!ty.primary[i])
|
||||
{
|
||||
int y = ty.px_pos[i];
|
||||
for(int x = s->graph.x; x < s->graph.x + s->graph.w; x++)
|
||||
{
|
||||
if(dotted && !((x^y) & 1)) continue;
|
||||
dpixel(x, y, s->grid.secondary_color);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < tx.count; i++) if(!tx.primary[i])
|
||||
{
|
||||
int x = tx.px_pos[i];
|
||||
for(int y = s->graph.y; y < s->graph.y + s->graph.h; y++)
|
||||
{
|
||||
if(dotted && !((x^y) & 1)) continue;
|
||||
dpixel(x, y, s->grid.secondary_color);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < ty.count; i++) if(ty.primary[i])
|
||||
{
|
||||
int y = ty.px_pos[i];
|
||||
for(int x = s->graph.x; x < s->graph.x + s->graph.w; x++)
|
||||
{
|
||||
if(dotted && !((x^y) & 1)) continue;
|
||||
dpixel(x, y, s->grid.primary_color);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < tx.count; i++) if(tx.primary[i])
|
||||
{
|
||||
int x = tx.px_pos[i];
|
||||
for(int y = s->graph.y; y < s->graph.y + s->graph.h; y++)
|
||||
{
|
||||
if(dotted && !((x^y) & 1)) continue;
|
||||
dpixel(x, y, s->grid.primary_color);
|
||||
}
|
||||
}
|
||||
|
||||
/* Render ticks */
|
||||
|
||||
int horz_axis = s->graph.y + s->graph.h;
|
||||
int vert_axis = s->graph.x;
|
||||
|
||||
for(int i = 0; i < ty.count; i++)
|
||||
{
|
||||
char const *format = s->ticks_y.format;
|
||||
if(!format) format = "%d";
|
||||
|
||||
if(s->ticks_y.formatter)
|
||||
s->ticks_y.formatter(str, 256, ty.value[i]);
|
||||
else snprintf(str, 256, format, ty.value[i]);
|
||||
dsize(str, NULL, NULL, &tick_h);
|
||||
|
||||
/* Try to center the text left of the tick, but move it up or
|
||||
down if it overflows from the render region */
|
||||
int y = ty.px_pos[i] - (tick_h >> 1);
|
||||
if(y < s->graph.y)
|
||||
y = s->graph.y;
|
||||
if(y + tick_h > s->graph.y + s->graph.h)
|
||||
y = s->graph.y + s->graph.h - tick_h;
|
||||
|
||||
dline(vert_axis, ty.px_pos[i], vert_axis + tick_length,
|
||||
ty.px_pos[i], C_BLACK);
|
||||
|
||||
if(!ty.primary[i]) continue;
|
||||
dtext_opt(vert_axis - axis_spacing, y, C_BLACK, C_NONE,
|
||||
DTEXT_RIGHT, DTEXT_TOP, str);
|
||||
}
|
||||
|
||||
for(int i = 0; i < tx.count; i++)
|
||||
{
|
||||
char const *format = s->ticks_y.format;
|
||||
if(!format) format = "%d";
|
||||
|
||||
if(s->ticks_x.formatter)
|
||||
s->ticks_x.formatter(str, 256, tx.value[i]);
|
||||
else snprintf(str, 256, format, tx.value[i]);
|
||||
dsize(str, NULL, &tick_w, NULL);
|
||||
|
||||
/* Try to center the text below the tick, but move it left or
|
||||
right if it overflows from the render region */
|
||||
int x = tx.px_pos[i] - ((tick_w+1) >> 1);
|
||||
if(x < s->graph.x)
|
||||
x = s->graph.x;
|
||||
if(x+tick_w > s->graph.x + s->graph.w)
|
||||
x = s->graph.x + s->graph.w - tick_w;
|
||||
|
||||
dline(tx.px_pos[i], horz_axis, tx.px_pos[i],
|
||||
horz_axis - tick_length, C_BLACK);
|
||||
|
||||
if(!tx.primary[i]) continue;
|
||||
dtext(x, horz_axis + axis_spacing, C_BLACK, str);
|
||||
}
|
||||
|
||||
/* Render axes */
|
||||
|
||||
int x2 = s->area.x + s->area.w - 1;
|
||||
dline(vert_axis, s->area.y, vert_axis, horz_axis, C_BLACK);
|
||||
dline(vert_axis, horz_axis, x2, horz_axis, C_BLACK);
|
||||
dline(vert_axis, s->area.y, x2, s->area.y, C_BLACK);
|
||||
dline(x2, s->area.y, x2, horz_axis, C_BLACK);
|
||||
|
||||
/* Plot data */
|
||||
|
||||
int last_x = 0;
|
||||
int last_y = 0;
|
||||
|
||||
for(int i = 0; i < s->data_len; i++)
|
||||
{
|
||||
int x = graph_x(s, s->data_x[i]);
|
||||
int y = graph_y(s, s->data_y[i]);
|
||||
|
||||
if(i > 0) dline(last_x, last_y, x, y, s->color);
|
||||
|
||||
last_x = x;
|
||||
last_y = y;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue