perf/cpucache: 2-ILP parallelism and cache size (w/ plotting util)

Adds a CPU-cache performance test. The libprof measure is extended into
Iϕ cycles (resolution: 16 Iϕ cycles) to check the 2-ILP parallelism
hypothesis (confirmed on fx-9860G and fx-CG 50).

The time needed to traverse a small buffer multiple times is also
measured with the same method in an attempt to determine cache size. No
definite result could be obtained.

This change includes a plotting utility.
This commit is contained in:
Lephe 2020-10-21 12:05:44 +02:00
parent b86b14f3cd
commit fb561d1310
Signed by untrusted user who does not match committer: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
6 changed files with 513 additions and 0 deletions

View File

@ -8,6 +8,9 @@
/* gintctl_perf_libprof(): Basic libprof tests using timers */
void gintctl_perf_libprof(void);
/* gintctl_perf_cpucache(): CPU speed and cache size */
void gintctl_perf_cpucache(void);
/* gintctl_perf_interrupts(): Interrupt handling */
void gintctl_perf_interrupts(void);

73
include/gintctl/plot.h Normal file
View File

@ -0,0 +1,73 @@
//---
// gintctl:plot - Simple plotting utility
//---
#ifndef GINTCTL_PLOT
#define GINTCTL_PLOT
struct plot_ticks
{
/* At most 64 ticks can be specified currently (easy to improve) */
/* If set to non-zero, ticks are placed at multiples of this. By
default all of them are primary, use [subtick_divisions] to make
some ticks secondary */
int multiples;
/* No other way to specify ticks yet */
/* Number of subtick per tick; subticks are marked but values are not
printed. Has no effect when 0 or 1 */
int subtick_divisions;
/* sprintf() format producting the text for ticks; defaults to "%d" */
char const *format;
/* Formatter function for specific needs; overrides [format] */
void (*formatter)(char *str, size_t size, int32_t v);
};
struct plot
{
/* Plot area, including axes and ticks */
struct {
int x, y, w, h;
} area;
/* Data points and color */
int32_t *data_x;
int32_t *data_y;
int data_len;
int color;
/* Tick specification */
struct plot_ticks ticks_x;
struct plot_ticks ticks_y;
/* Grid specification */
struct {
enum {
PLOT_NOGRID = 0,
PLOT_MAINGRID,
PLOT_FULLGRID,
} level;
int primary_color;
int secondary_color;
int dotted;
} grid;
/** Internal parameters, computed by plot() **/
/* Logical bounds */
int min_x, max_x;
int min_y, max_y;
/* Concrete bounds */
struct {
int x, y, w, h;
} graph;
};
/* plot(): Render a graph */
void plot(struct plot *plot);
#endif /* GINTCTL_PLOT */

View File

@ -59,6 +59,7 @@ struct menu menu_perf = {
_("Performance", "Performance benchmarks"), .entries = {
{ "libprof basics", gintctl_perf_libprof },
{ "CPU and cache", gintctl_perf_cpucache },
{ "Interrupt stress", gintctl_perf_interrupts },
{ "Rendering functions", gintctl_perf_render },

39
src/perf/cpucache.S Normal file
View File

@ -0,0 +1,39 @@
.global _cpucache_nop1024
.global _cpucache_rounds
#define r4(x) x; x; x; x
_cpucache_nop1024:
r4(r4(r4(r4(r4(nop)))))
dt r4
bt 1f
bra _cpucache_nop1024
nop
1: rts
nop
# r4: Buffer to read from
# r5: Buffer size (multiple of 4)
# r6: Number of rounds
_cpucache_rounds:
mov r4, r1
add r5, r1
add #-1, r1
mov r4, r0
1: mov.b @r0+, r2
mov.b @r0+, r2
mov.b @r0+, r2
cmp/ge r1, r0
bf 1b
mov.b @r0+, r2
dt r6
bf _cpucache_rounds
rts
nop

158
src/perf/cpucache.c Normal file
View File

@ -0,0 +1,158 @@
#include <gint/display.h>
#include <gint/keyboard.h>
#include <gint/clock.h>
#include <gintctl/perf.h>
#include <gintctl/util.h>
#include <gintctl/plot.h>
#include <libprof.h>
#define CACHE_MAX 4096
#define SAMPLES 129
extern void cpucache_nop1024(int repeats);
extern void cpucache_rounds(uint8_t const *buf, size_t len, int rounds);
uint32_t test_nop4096(void)
{
prof_t prof = prof_make();
prof_enter(prof);
cpucache_nop1024(4);
prof_leave(prof);
/* Return the amount in Iphi cycles computed from the Pphi/4 measure */
clock_frequency_t const *freq = clock_freq();
uint64_t PLL_cycles = ((uint64_t)prof.elapsed * 4) * freq->Pphi_div;
return PLL_cycles / freq->Iphi_div;
}
uint32_t test_cpucache_rounds(uint8_t const *buf, size_t len, int rounds)
{
prof_t prof = prof_make();
prof_enter(prof);
cpucache_rounds(buf, len, rounds);
prof_leave(prof);
clock_frequency_t const *freq = clock_freq();
uint64_t PLL_cycles = ((uint64_t)prof.elapsed * 4) * freq->Pphi_div;
return PLL_cycles / freq->Iphi_div;
}
#ifdef FX9860G
static void tick_formatter(char *str, size_t size, int32_t v)
{
if(v == 0) snprintf(str, size, "0");
else snprintf(str, size, "%dk", v/1000);
}
#endif
/* gintctl_perf_cpucache(): CPU speed and cache size */
void gintctl_perf_cpucache(void)
{
int key = 0;
/* Test twice because this is sensitive to libprof initialization */
uint32_t nop4096 = test_nop4096();
nop4096 = test_nop4096();
uint8_t buf[CACHE_MAX];
int32_t x_size[SAMPLES];
int32_t y_time[SAMPLES];
struct plot plotspec = {
.data_x = x_size,
.data_y = y_time,
.data_len = SAMPLES,
#ifdef FX9860G
.area = {
.x = 0, .y = 18,
.w = 128, .h = 44,
},
.color = C_BLACK,
.ticks_x = {
.multiples = 250,
.subtick_divisions = 4,
.formatter = tick_formatter,
},
.ticks_y = {
.multiples = 20000,
.subtick_divisions = 2,
.formatter = tick_formatter,
},
.grid = {
.level = PLOT_MAINGRID,
.primary_color = C_BLACK,
.dotted = 1,
},
#endif
#ifdef FXCG50
.area = {
.x = 24, .y = 51,
.w = 340, .h = 120,
},
.color = C_RED,
.ticks_x = {
.multiples = CACHE_MAX / 16,
.subtick_divisions = 4,
},
.ticks_y = {
.multiples = 10000,
.subtick_divisions = 2,
},
.grid = {
.level = PLOT_FULLGRID,
.primary_color = C_RGB(20, 20, 20),
.secondary_color = C_RGB(28, 28, 28),
.dotted = 1,
},
#endif
};
int y_min = -1;
int y_max = -1;
for(int i = 0; i < SAMPLES; i++)
{
x_size[i] = (CACHE_MAX / (SAMPLES-1)) * i;
y_time[i] = test_cpucache_rounds(buf, x_size[i], 16);
if(y_time[i] < y_min || y_min == -1) y_min = y_time[i];
if(y_time[i] > y_max || y_max == -1) y_max = y_time[i];
}
while(key != KEY_EXIT)
{
dclear(C_WHITE);
row_title(_("CPU and cache", "CPU speed and cache size"));
#ifdef FX9860G
row_print(2, 1, "4096 nop: %d Iϕ", nop4096);
extern font_t font_hexa;
font_t const *old = dfont(&font_hexa);
plot(&plotspec);
dfont(old);
#endif
#ifdef FXCG50
row_print(1, 1, "Time for 4096 nop (with overhead): %d Iphi",
nop4096);
row_print(2, 1, "Time needed to read a buffer multiple times:");
plot(&plotspec);
row_print(12, 1, "X: Size of buffer (bytes)");
row_print(13, 1, "Y: Iphi cycles for 16 8-bit traversals");
row_print(14, 1, "Last samples suggests: %.2j Iphi/byte access",
100 * y_time[SAMPLES-1] / x_size[SAMPLES-1] / 16);
#endif
dupdate();
key = getkey().key;
}
}

239
src/plot.c Normal file
View File

@ -0,0 +1,239 @@
#include <gint/display.h>
#include <gint/std/stdio.h>
#include <gintctl/plot.h>
#include <gintctl/util.h>
struct tick_info
{
int count;
int32_t value[64];
uint16_t px_pos[64];
int8_t primary[64];
int max_width;
int max_height;
};
static int graph_x(struct plot *s, int v)
{
return s->graph.x + (s->graph.w-1) * (v-s->min_x) / (s->max_x-s->min_x);
}
static int graph_y(struct plot *s, int v)
{
return s->graph.y + s->graph.h -
(s->graph.h-1) * (v-s->min_y) / (s->max_y-s->min_y);
}
static void layout_ticks(struct plot *s, int xy, struct tick_info *i)
{
struct plot_ticks *t = xy ? &s->ticks_y : &s->ticks_x;
char str[256];
char const *fmt = t->format ? t->format : "%d";
i->count = 0;
i->max_width = 0;
i->max_height = 0;
if(!t->multiples) return;
int subdivs = t->subtick_divisions > 0 ? t->subtick_divisions : 1;
/* Start at the minimum value and work towards the maximum one */
int min = xy ? s->min_y : s->min_x;
int max = xy ? s->max_y : s->max_x;
int v = min + t->multiples - 1;
v -= (v % t->multiples);
while(v <= max && i->count < 64)
{
i->primary[i->count] = (v % (t->multiples * subdivs) == 0);
i->value[i->count] = v;
i->px_pos[i->count] = xy ? graph_y(s, v) : graph_x(s, v);
(i->count)++;
if(t->formatter) t->formatter(str, 256, v);
else snprintf(str, 256, fmt, v);
int w, h;
dsize(str, NULL, &w, &h);
if(w > i->max_width) i->max_width = w;
if(h > i->max_height) i->max_height = h;
v += t->multiples;
}
}
void plot(struct plot *s)
{
struct tick_info tx, ty;
char str[256];
int tick_w, tick_h;
int32_t *data_x = s->data_x;
int32_t *data_y = s->data_y;
int axis_spacing = _(2,3);
int tick_length = _(2,4);
/* Determine the bounds of the rendering area */
s->min_x = 0;
s->max_x = 0;
s->min_y = 0;
s->max_y = 0;
for(int i = 0; i < s->data_len; i++)
{
if(data_x[i] < s->min_x) s->min_x = data_x[i];
if(data_x[i] > s->max_x) s->max_x = data_x[i];
if(data_y[i] < s->min_y) s->min_y = data_y[i];
if(data_y[i] > s->max_y) s->max_y = data_y[i];
}
if(s->min_x == s->max_x || s->min_y == s->max_y) return;
/* Determine the number, position and size of ticks */
font_t const *f = dfont(NULL);
dfont(f);
/* Start with vertical ticks */
s->graph.y = s->area.y;
s->graph.h = s->area.h - axis_spacing - f->line_height;
layout_ticks(s, 1, &ty);
/* Continue with horizontal ticks */
s->graph.x = s->area.x + axis_spacing + ty.max_width;
s->graph.w = s->area.w - (s->graph.x - s->area.x);
layout_ticks(s, 0, &tx);
/* Render grid, first secondary ticks, then primary ticks */
int dotted = s->grid.dotted;
for(int i = 0; i < ty.count; i++) if(!ty.primary[i])
{
int y = ty.px_pos[i];
for(int x = s->graph.x; x < s->graph.x + s->graph.w; x++)
{
if(dotted && !((x^y) & 1)) continue;
dpixel(x, y, s->grid.secondary_color);
}
}
for(int i = 0; i < tx.count; i++) if(!tx.primary[i])
{
int x = tx.px_pos[i];
for(int y = s->graph.y; y < s->graph.y + s->graph.h; y++)
{
if(dotted && !((x^y) & 1)) continue;
dpixel(x, y, s->grid.secondary_color);
}
}
for(int i = 0; i < ty.count; i++) if(ty.primary[i])
{
int y = ty.px_pos[i];
for(int x = s->graph.x; x < s->graph.x + s->graph.w; x++)
{
if(dotted && !((x^y) & 1)) continue;
dpixel(x, y, s->grid.primary_color);
}
}
for(int i = 0; i < tx.count; i++) if(tx.primary[i])
{
int x = tx.px_pos[i];
for(int y = s->graph.y; y < s->graph.y + s->graph.h; y++)
{
if(dotted && !((x^y) & 1)) continue;
dpixel(x, y, s->grid.primary_color);
}
}
/* Render ticks */
int horz_axis = s->graph.y + s->graph.h;
int vert_axis = s->graph.x;
for(int i = 0; i < ty.count; i++)
{
char const *format = s->ticks_y.format;
if(!format) format = "%d";
if(s->ticks_y.formatter)
s->ticks_y.formatter(str, 256, ty.value[i]);
else snprintf(str, 256, format, ty.value[i]);
dsize(str, NULL, NULL, &tick_h);
/* Try to center the text left of the tick, but move it up or
down if it overflows from the render region */
int y = ty.px_pos[i] - (tick_h >> 1);
if(y < s->graph.y)
y = s->graph.y;
if(y + tick_h > s->graph.y + s->graph.h)
y = s->graph.y + s->graph.h - tick_h;
dline(vert_axis, ty.px_pos[i], vert_axis + tick_length,
ty.px_pos[i], C_BLACK);
if(!ty.primary[i]) continue;
dtext_opt(vert_axis - axis_spacing, y, C_BLACK, C_NONE,
DTEXT_RIGHT, DTEXT_TOP, str);
}
for(int i = 0; i < tx.count; i++)
{
char const *format = s->ticks_y.format;
if(!format) format = "%d";
if(s->ticks_x.formatter)
s->ticks_x.formatter(str, 256, tx.value[i]);
else snprintf(str, 256, format, tx.value[i]);
dsize(str, NULL, &tick_w, NULL);
/* Try to center the text below the tick, but move it left or
right if it overflows from the render region */
int x = tx.px_pos[i] - ((tick_w+1) >> 1);
if(x < s->graph.x)
x = s->graph.x;
if(x+tick_w > s->graph.x + s->graph.w)
x = s->graph.x + s->graph.w - tick_w;
dline(tx.px_pos[i], horz_axis, tx.px_pos[i],
horz_axis - tick_length, C_BLACK);
if(!tx.primary[i]) continue;
dtext(x, horz_axis + axis_spacing, C_BLACK, str);
}
/* Render axes */
int x2 = s->area.x + s->area.w - 1;
dline(vert_axis, s->area.y, vert_axis, horz_axis, C_BLACK);
dline(vert_axis, horz_axis, x2, horz_axis, C_BLACK);
dline(vert_axis, s->area.y, x2, s->area.y, C_BLACK);
dline(x2, s->area.y, x2, horz_axis, C_BLACK);
/* Plot data */
int last_x = 0;
int last_y = 0;
for(int i = 0; i < s->data_len; i++)
{
int x = graph_x(s, s->data_x[i]);
int y = graph_y(s, s->data_y[i]);
if(i > 0) dline(last_x, last_y, x, y, s->color);
last_x = x;
last_y = y;
}
}