From b942bc5d194534cd881f52eebaa15eac54ee70a1 Mon Sep 17 00:00:00 2001 From: Lephe Date: Sun, 15 May 2022 19:16:03 +0100 Subject: [PATCH] clock: add overclock support on fx-CG 10/20/50 --- CMakeLists.txt | 1 + TODO | 6 +- include/gint/clock.h | 64 ++++++++++- include/gint/gint.h | 7 ++ include/gint/image.h | 2 +- include/gint/mpu/bsc.h | 2 - src/cpg/overclock.c | 241 +++++++++++++++++++++++++++++++++++++++++ src/kernel/world.c | 14 +++ src/tmu/tmu.c | 23 ++++ 9 files changed, 351 insertions(+), 9 deletions(-) create mode 100644 src/cpg/overclock.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a88ff5..8c1f166 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ configure_file(include/gint/config.h.in include/gint/config.h) set(SOURCES_COMMON # Clock Pulse Generator driver src/cpg/cpg.c + src/cpg/overclock.c # CPU driver src/cpu/atomic.c src/cpu/cpu.c diff --git a/TODO b/TODO index 446b151..0d37de9 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,5 @@ -gint 2.8 image things: -* Design the new image formats, inspired from libimg -* Replace the [profile] attribute with [format] - Extensions on existing code: +* clock: mono support * usb: add PC->calc reading, and interrupt pipes * fs: support RAM files * fs: support USB streams as generic file descriptors @@ -19,7 +16,6 @@ Extensions on existing code: * core: run destructors when a task-switch results in leaving the app Future directions: -* Integrate overclock management * Audio playback using TSWilliamson's libsnd method * Serial communication * Make fx9860g projects work out of the box on fxcg50 diff --git a/include/gint/clock.h b/include/gint/clock.h index 2dc3d6c..057a03d 100644 --- a/include/gint/clock.h +++ b/include/gint/clock.h @@ -61,7 +61,69 @@ void cpg_compute_freq(void); // Overclock //--- -/* TODO: All overclock */ +/* The following enumerations define the clock speed settings supported by + gint. These are always the settings from Ftune/Ptune, which are the most + widely tested and gint treats as the standard. */ +enum { + /* Combinations of hardware settings that are none of Ftune's levels */ + CLOCK_SPEED_UNKNOWN = 0, + + /* Ftune's 5 default overclock levels. The main settings are listed below, + thoug many more are involved. + + On SH4 fx-9860G-likr: + (Not supported yet) + + On the fx G-III series: + (Not supported yet) + + On fx-CG 10/20: + F1: CPU @ 58 MHz, BFC @ 29 MHz [Default speed] + F2: CPU @ 58 MHz, BFC @ 29 MHz [Improved memory speed] + F3: CPU @ 118 MHz, BFC @ 58 MHz [Faster than F2] + F4: CPU @ 118 MHz, BFC @ 118 MHz [Fastest bus option] + F5: CPU @ 191 MHz, BFC @ 94 MHz [Fastest CPU option] + + On fx-CG 50: + F1: CPU @ 116 MHz, BFC @ 58 MHz [Default speed] + F2: CPU @ 58 MHz, BFC @ 29 MHz [Clearly slower: F2 < F3 < F1] + F3: CPU @ 94 MHz, BFC @ 47 MHz [Clearly slower: F2 < F3 < F1] + F4: CPU @ 232 MHz, BFC @ 58 MHz [Fastest CPU option] + F5: CPU @ 189 MHz, BFC @ 94 MHz [Fastest bus option] */ + CLOCK_SPEED_F1 = 1, + CLOCK_SPEED_F2 = 2, + CLOCK_SPEED_F3 = 3, + CLOCK_SPEED_F4 = 4, + CLOCK_SPEED_F5 = 5, + + /* The default clock speed is always Ftune's F1 */ + CLOCK_SPEED_DEFAULT = CLOCK_SPEED_F1, +}; + +#ifdef FXCG50 +/* clock_get_speed(): Determine the current clock speed + + This function compares the current hardware state with the settings for each + speed level and returns the current one. If the hardware state does not + correspond to any of Ftune's settings, CLOCK_SPEED_UNKNOWN is returned. */ +int clock_get_speed(void); + +/* clock_set_speed(): Set the current clock speed + + This function sets the clock speed to the desired level. This is "the + overclock function", although depending on the model or settings it is also + the downclocking function. + + The process of changing clock speeds is non-trivial, requires waiting for + the DMA to finish its work and slightly affects running timers. You should + avoid changing the clock speed constantly if not necessary. If this function + detects that the desired clock speed is already in use, it returns without + performing any change. + + Currently the clock speed is not reset during a world switch nor when + leaving the add-in. */ +void clock_set_speed(int speed); +#endif //--- // Sleep functions diff --git a/include/gint/gint.h b/include/gint/gint.h index ddbbc6c..14dad5f 100644 --- a/include/gint/gint.h +++ b/include/gint/gint.h @@ -41,6 +41,13 @@ int gint_world_switch(gint_call_t function); __attribute__((deprecated("Use gint_world_switch() instead"))) void gint_switch(void (*function)(void)); +/* gint_world_sync(): Synchronize asynchronous drivers + + This function waits for asynchronous tasks to complete by unbinding all + drivers. This is useful in certain hardware operations while remaining in + gint. */ +void gint_world_sync(void); + /* gint_osmenu(): Call the calculator's main menu This function safely invokes the calculator's main menu with gint_switch(). diff --git a/include/gint/image.h b/include/gint/image.h index ce6cff1..23d8df1 100644 --- a/include/gint/image.h +++ b/include/gint/image.h @@ -485,7 +485,7 @@ image_t *image_vflip_alloc(image_t const *src); top-left corner of the full output is actually rendered. Formats: RGB16, P8 - Size requirement: none (clipping through image_linear_opt settings) + Size requirement: none (clipping is performed) Supports in-place: No */ struct image_linear_map { diff --git a/include/gint/mpu/bsc.h b/include/gint/mpu/bsc.h index c32d0d2..6f65a0b 100644 --- a/include/gint/mpu/bsc.h +++ b/include/gint/mpu/bsc.h @@ -142,8 +142,6 @@ typedef volatile struct } GPACKED(4) sh7305_bsc_t; #define SH7305_BSC (*(sh7305_bsc_t *)0xfec10000) -#define SH7305_BSC_SDMR2 (*(uint8_t *)0xfec14000) -#define SH7305_BSC_SDMR3 (*(uint8_t *)0xfec15000) #ifdef __cplusplus } diff --git a/src/cpg/overclock.c b/src/cpg/overclock.c new file mode 100644 index 0000000..d5a6656 --- /dev/null +++ b/src/cpg/overclock.c @@ -0,0 +1,241 @@ +//--- +// gint:cpg:overclock - Clock speed control +// +// Most of the data in this file has been reused from Sentaro21's Ftune and +// Ptune utilities, which have long been the standard for overclocking CASIO +// calculators. +// See: http://pm.matrix.jp/ftune2e.html +// +// SlyVTT also contributed early testing on both the fx-CG 10/20 and fx-CG 50. +//--- + +#include +#include +#include +#include +#include + +#ifdef FXCG50 + +#define CPG SH7305_CPG +#define BSC SH7305_BSC + +#define PLL_32x 0b011111 +#define PLL_26x 0b011001 +#define PLL_16x 0b001111 +#define DIV_2 0 +#define DIV_4 1 +#define DIV_8 2 +#define DIV_16 3 +#define DIV_32 4 +#define WAIT18 0b1011 + +struct overclock_setting +{ + uint32_t FLLFRQ, FRQCR; + uint32_t CS0BCR, CS2BCR, CS3BCR, CS5aBCR; + uint32_t CS0WCR, CS2WCR, CS3WCR, CS5aWCR; +}; + +#define SDMR3_CL2 ((volatile uint8_t *)0xFEC15040) +#define SDMR3_CL3 ((volatile uint8_t *)0xFEC15060) + +static struct overclock_setting settings_cg50[5] = { + /* CLOCK_SPEED_F1 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = 0x0F011112, + .CS0BCR = 0x36DA0400, + .CS2BCR = 0x36DA3400, + .CS3BCR = 0x36DB4400, + .CS5aBCR = 0x17DF0400, + .CS0WCR = 0x000003C0, + .CS2WCR = 0x000003C0, + .CS3WCR = 0x000024D1, + .CS5aWCR = 0x000203C1 }, + /* CLOCK_SPEED_F2 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_16x<<24)+(DIV_4<<20)+(DIV_8<<12)+(DIV_8<<8)+DIV_8, + .CS0BCR = 0x24920400, + .CS2BCR = 0x24923400, + .CS3BCR = 0x24924400, + .CS5aBCR = 0x17DF0400, + .CS0WCR = 0x00000340, + .CS2WCR = 0x000003C0, + .CS3WCR = 0x000024D1, + .CS5aWCR = 0x000203C1 }, + /* CLOCK_SPEED_F3 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_26x<<24)+(DIV_4<<20)+(DIV_8<<12)+(DIV_8<<8)+DIV_8, + .CS0BCR = 0x24920400, + .CS2BCR = 0x24923400, + .CS3BCR = 0x24924400, + .CS5aBCR = 0x17DF0400, + .CS0WCR = 0x00000240, + .CS2WCR = 0x000003C0, + .CS3WCR = 0x000024D1, + .CS5aWCR = 0x000203C1 }, + /* CLOCK_SPEED_F4 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_32x<<24)+(DIV_2<<20)+(DIV_4<<12)+(DIV_8<<8)+DIV_16, + .CS0BCR = 0x24920400, + .CS2BCR = 0x24923400, + .CS3BCR = 0x24924400, + .CS5aBCR = 0x17DF0400, + .CS0WCR = 0x000002C0, + .CS2WCR = 0x000003C0, + .CS3WCR = 0x000024D1, + .CS5aWCR = 0x000203C1 }, + /* CLOCK_SPEED_F5 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_26x<<24)+(DIV_2<<20)+(DIV_4<<12)+(DIV_4<<8)+DIV_8, + .CS0BCR = 0x24920400, + .CS2BCR = 0x24923400, + .CS3BCR = 0x24924400, + .CS5aBCR = 0x17DF0400, + .CS0WCR = 0x00000440, + .CS2WCR = 0x000003C0, + .CS3WCR = 0x000024D1, + .CS5aWCR = 0x000203C1 }, +}; + +static struct overclock_setting settings_cg20[5] = { + /* CLOCK_SPEED_F1 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = 0x0F102203, + .CS0BCR = 0x24920400, + .CS2BCR = 0x24923400, + .CS5aBCR = 0x15140400, + .CS0WCR = 0x000001C0, + .CS2WCR = 0x00000140, + .CS5aWCR = 0x00010240 }, + /* CLOCK_SPEED_F2 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_32x<<24)+(DIV_8<<20)+(DIV_16<<12)+(DIV_16<<8)+DIV_32, + .CS0BCR = 0x04900400, + .CS2BCR = 0x04903400, + .CS5aBCR = 0x15140400, + .CS0WCR = 0x00000140, + .CS2WCR = 0x000100C0, + .CS5aWCR = 0x00010240 }, + /* CLOCK_SPEED_F3 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_32x<<24)+(DIV_4<<20)+(DIV_8<<12)+(DIV_8<<8)+DIV_32, + .CS0BCR = 0x24900400, + .CS2BCR = 0x04903400, + .CS5aBCR = 0x15140400, + .CS0WCR = 0x000002C0, + .CS2WCR = 0x000201C0, + .CS5aWCR = 0x00010240 }, + /* CLOCK_SPEED_F4 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_32x<<24)+(DIV_4<<20)+(DIV_4<<12)+(DIV_4<<8)+DIV_32, + .CS0BCR = 0x44900400, + .CS2BCR = 0x04903400, + .CS5aBCR = 0x15140400, + .CS0WCR = 0x00000440, + .CS2WCR = 0x00040340, + .CS5aWCR = 0x00010240 }, + /* CLOCK_SPEED_F5 */ + { .FLLFRQ = 0x00004000 + 900, + .FRQCR = (PLL_26x<<24)+(DIV_2<<20)+(DIV_4<<12)+(DIV_4<<8)+DIV_16, + .CS0BCR = 0x34900400, + .CS2BCR = 0x04903400, + .CS5aBCR = 0x15140400, + .CS0WCR = 0x000003C0, + .CS2WCR = 0x000402C0, + .CS5aWCR = 0x00010240 }, +}; + +static struct overclock_setting *get_settings(void) +{ + if(gint[HWCALC] == HWCALC_FXCG50) + return settings_cg50; + if(gint[HWCALC] == HWCALC_PRIZM) + return settings_cg20; + return NULL; +} + +int clock_get_speed(void) +{ + struct overclock_setting *settings = get_settings(); + if(!settings) + return CLOCK_SPEED_UNKNOWN; + + for(int i = 0; i < 5; i++) { + struct overclock_setting *s = &settings[i]; + + if(CPG.FLLFRQ.lword == s->FLLFRQ + && CPG.FRQCR.lword == s->FRQCR + && BSC.CS0BCR.lword == s->CS0BCR + && BSC.CS2BCR.lword == s->CS2BCR + && BSC.CS3BCR.lword == s->CS3BCR + && BSC.CS5ABCR.lword == s->CS5aBCR + && BSC.CS0WCR.lword == s->CS0WCR + && BSC.CS2WCR.lword == s->CS2WCR + && BSC.CS3WCR.lword == s->CS3WCR + && BSC.CS5AWCR.lword == s->CS5aWCR) + return CLOCK_SPEED_F1 + i; + } + + return CLOCK_SPEED_UNKNOWN; +} + +void clock_set_speed(int level) +{ + if(level < CLOCK_SPEED_F1 || level > CLOCK_SPEED_F5) + return; + if(clock_get_speed() == level) + return; + + struct overclock_setting *settings = get_settings(); + if(!settings) + return; + + struct overclock_setting *s = &settings[level - CLOCK_SPEED_F1]; + uint32_t old_Pphi = clock_freq()->Pphi_f; + + /* Wait for asynchronous tasks to complete */ + gint_world_sync(); + + /* Disable interrupts during the change */ + cpu_atomic_start(); + + /* Set the clock settings */ + + BSC.CS0WCR.WR = WAIT18; + + CPG.FLLFRQ.lword = s->FLLFRQ; + CPG.FRQCR.lword = s->FRQCR; + CPG.FRQCR.KICK = 1; + while(CPG.LSTATS != 0) {} + + BSC.CS0BCR.lword = s->CS0BCR; + BSC.CS0WCR.lword = s->CS0WCR; + BSC.CS2BCR.lword = s->CS2BCR; + BSC.CS2WCR.lword = s->CS2WCR; + + if(gint[HWCALC] == HWCALC_FXCG50) { + BSC.CS3BCR.lword = s->CS3BCR; + BSC.CS3WCR.lword = s->CS3WCR; + + if(BSC.CS3WCR.A3CL == 1) + *SDMR3_CL2 = 0; + else + *SDMR3_CL3 = 0; + } + + BSC.CS5ABCR.lword = s->CS5aBCR; + BSC.CS5AWCR.lword = s->CS5aWCR; + + /* Determine the change in frequency for Pϕ and recompute CPG data */ + cpg_compute_freq(); + uint32_t new_Pphi = clock_freq()->Pphi_f; + + /* Update timers' TCNT and TCOR to match the new clock speed */ + void timer_rescale(uint32_t old_Pphi, uint32_t new_Pphi); + timer_rescale(old_Pphi, new_Pphi); + + cpu_atomic_end(); +} + +#endif diff --git a/src/kernel/world.c b/src/kernel/world.c index 0cd779d..87ba362 100644 --- a/src/kernel/world.c +++ b/src/kernel/world.c @@ -38,6 +38,20 @@ void gint_world_free(gint_world_t world) free(world); } +//--- +// Synchronization +//--- + +void gint_world_sync(void) +{ + /* Unbind all drivers, which waits for async tasks to complete */ + for(int i = gint_driver_count() - 1; i >= 0; i--) + { + gint_driver_t *d = &gint_drivers[i]; + if(d->unbind) d->unbind(); + } +} + //--- // World switch with driver state saves //--- diff --git a/src/tmu/tmu.c b/src/tmu/tmu.c index 8e57eb3..b2d7990 100644 --- a/src/tmu/tmu.c +++ b/src/tmu/tmu.c @@ -260,6 +260,29 @@ void timer_spinwait(int id) } } +//--- +// Overclock adjustment +//--- + +void timer_rescale(uint32_t old_Pphi, uint32_t new_Pphi_0) +{ + uint64_t new_Pphi = new_Pphi_0; + + for(int id = 0; id < 3; id++) + { + tmu_t *T = &TMU[id]; + /* Skip timers that are not running */ + if(T->TCNT == 0xffffffff && T->TCOR == 0xffffffff) + continue; + + /* For libprof: keep timers with max TCOR as they are */ + if(T->TCOR != 0xffffffff) { + T->TCOR = ((uint64_t)T->TCOR * new_Pphi) / old_Pphi; + } + T->TCNT = ((uint64_t)T->TCNT * new_Pphi) / old_Pphi; + } +} + //--- // Deprecated API //---