From e5abe03b89f544e4cf7f5fb5aa93a6d3d60ca73e Mon Sep 17 00:00:00 2001 From: Lephe Date: Thu, 17 Sep 2020 14:48:54 +0200 Subject: [PATCH] kernel: dynamic loading of GMAPPED functions to user RAM This commit introduces a large architectural change. Unlike previous models of the fx-9860G series, the G-III models have a new user RAM address different from 8801c000. The purpose of this change is to dynamically load GMAPPED functions to this address by querying the TLB, and call them through a function pointer whose address is determined when loading. Because of the overhead of using a function pointer in both assembly and C code, changes have been made to avoid GMAPPED functions altogether. Current, only cpu_setVBR() and gint_inth_callback() are left, the second being used specifically to enable TLB misses when needed. * Add a .gint.mappedrel section for the function pointers holding addresses to GMAPPED functions; add function pointers for cpu_setVBR() and gint_inth_callback() * Move rram to address 0 instead of the hardcoded 0x8801c000 * Load GMAPPED functions at their linked address + the physical address user RAM is mapped, to and compute their function pointers * Remove the GMAPPED macro since no user function needs it anymore * Add section flags "ax" (code) or "aw" (data) to every custom .section in assembler code, as they default to unpredictable values that can cause the section to be marked NOLOAD by the linker * Update the main kernel, TMU, ETMU and RTC interrupt handlers to use the new indirect calling method This is made possible by new MMU functions giving direct access to the physical area behind any virtualized page. * Add an mmu_translate() function to query the TLB * Add an mmu_uram() function to access user RAM from P1 The exception catching mechanism has been modified to avoid the use of GMAPPED functions altogether. * Set SR.BL=0 and SR.IMASK=15 before calling exception catchers * Move gint_exc_skip() to normal text ROM * Also fix registers not being popped off the stack before a panic The timer drivers have also been modified to avoid GMAPPED functions. * Invoke timer_stop() through gint_inth_callback() and move it to ROM * Move and expand the ETMU driver to span 3 blocks at 0xd00 (ETMU4) * Remove the timer_clear() function by inlining it into the ETMU handler (TCR is provided within the storage block of each timer) * Also split src/timer/inth.s into src/timer/inth-{tmu,etmu}.s Additionally, VBR addresses are now determined at runtime to further reduce hardcoded memory layout addresses in the linker script. * Determine fx-9860G VBR addresses dynamically from mmu_uram() * Determine fx-CG 50 VBR addresses dynamically from mmu_uram() * Remove linker symbols for VBR addresses Comments and documentation have been updated throughout the code to reflect the changes. --- TODO | 3 +- fx9860g.ld | 50 ++++---- fxcg50.ld | 45 ++----- include/gint/defs/attributes.h | 3 - include/gint/exc.h | 11 +- include/gint/gint.h | 22 +++- include/gint/mmu.h | 27 ++++ src/kernel/cpu.h | 5 +- src/kernel/cpu.s | 10 +- src/kernel/exch.c | 2 +- src/kernel/exch.s | 24 +++- src/kernel/inth.S | 10 +- src/kernel/kernel.c | 26 ++-- src/kernel/mmu.c | 52 +++++++- src/kernel/start.c | 53 +++++--- src/kernel/tlbh.S | 2 +- src/kernel/vbr.h | 11 -- src/rtc/inth.s | 8 +- src/tmu/inth-etmu.s | 104 +++++++++++++++ src/tmu/inth-tmu.s | 121 ++++++++++++++++++ src/tmu/inth.s | 227 --------------------------------- src/tmu/tmu.c | 49 +++---- 22 files changed, 473 insertions(+), 392 deletions(-) create mode 100644 src/tmu/inth-etmu.s create mode 100644 src/tmu/inth-tmu.s delete mode 100644 src/tmu/inth.s diff --git a/TODO b/TODO index 5c9a794..b455488 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,6 @@ Extensions on existing code: -* tmu: make interrupt handlers more elegant +* kernel: group linker script symbols in a single header file +* kernel: be consistent about *tlb_mapped_memory() in hw_detect() * bopti: try to display fullscreen images with TLB access + DMA on fxcg50 * dma: fx9860g support (need to switch it on and update the Makefile) * core: try to leave add-in without reset in case of panic diff --git a/fx9860g.ld b/fx9860g.ld index 9a85e09..9a37ef7 100644 --- a/fx9860g.ld +++ b/fx9860g.ld @@ -15,14 +15,20 @@ MEMORY { /* Userspace mapping of the add-in (G1A header takes 0x200 bytes) */ rom (rx): o = 0x00300200, l = 500k - /* This is mapped to RAM; 8k on SH3, 32k on SH4. Since gint uses the - last 2k for its VBR and mapped code, 6k are left */ + + /* User RAM is mapped at 0x08100000 through MMU; 8k on SH3, 32k on SH4. + Currently gint provides access to 8k, with three blocks: + * 0x200 bytes for text accessed without the TLB when SR.BL=1, linked + into the rram region below, then loaded dynamically + * 6k for user content + * 0x600 bytes for the VBR space, also without MMU */ ram (rw): o = 0x08100200, l = 6k - /* This is the VBR space. The [ram] region is mapped to 8801c000. This - region is used at load time and mentioned here for completeness */ - vbr (rwx): o = 0x8801da00, l = 1536 - /* These first bits of RAM are used for gint's perma-mapped code */ - rram (rwx): o = 0x8801c000, l = 512 + + /* This region represents the first block of user RAM. Linker arranges + sections as if linked to address 0, then gint's runtime determines + the location and relocates references (which are manual) */ + rram (rwx): o = 0x00000000, l = 512 + /* On-chip IL memory */ ilram (rwx): o = 0xe5200000, l = 4k /* On-chip X and Y memory */ @@ -141,6 +147,10 @@ SECTIONS _ldata = LOADADDR(.data); _rdata = . ; + _lreloc = . ; + *(.gint.mappedrel); + _sreloc = ABSOLUTE(. - _lreloc); + *(.data .data.*) *(D) *(.gint.data.sh3) @@ -205,24 +215,16 @@ SECTIONS /* - ** RRAM sections + ** Relocated no-MMU RAM sections */ - /* VBR address: let's just start at the beginning of the RRAM area. - There's an unused 0x100-byte gap at the start of the VBR space. - The VBR space is already a large block (~2 kiB), so I'm cutting off - the gap to spare some memory */ - _gint_vbr_fx9860g = ORIGIN(vbr) - 0x100; - . = ORIGIN(rram); - /* Code that must remain permanently mapped (.gint.mapped) */ + /* Code that must remain permanently mapped (.gint.mapped); relocated + to start of user RAM at startup, accessed through P1 */ .gint.mapped ALIGN(4) : ALIGN(4) { _lgmapped = LOADADDR(.gint.mapped); - _rgmapped = . ; - *(.gint.mapped) - . = ALIGN(16); } > rram AT> rom @@ -231,21 +233,19 @@ SECTIONS /* - ** Other sections + ** Unused sections */ - /* Unwanted sections going to meet Dave Null: - - Debug sections, often come out of libgcc - - Java classes registration (why is there any of this here?) - - Asynchronous unwind tables: no C++ exception handling for now ^^ - - Comments or anything the compiler might put in its assembler - - A stack section sometimes generated for build/version.o */ /DISCARD/ : { + /* Debug sections (often from libgcc) */ *(.debug_info .debug_abbrev .debug_loc .debug_aranges .debug_ranges .debug_line .debug_str) + /* Java class registration (why are they even here?!) */ *(.jcr) + /* Asynchronous unwind tables: no C++ exception handling */ *(.eh_frame_hdr) *(.eh_frame) + /* Comments or anything the compiler might generate */ *(.comment) } } diff --git a/fxcg50.ld b/fxcg50.ld index d5edfd2..5ce4b1e 100644 --- a/fxcg50.ld +++ b/fxcg50.ld @@ -15,14 +15,9 @@ MEMORY /* Userspace mapping of the add-in (without G3A header) */ rom (rx): o = 0x00300000, l = 2M /* Static RAM; stack grows down from the end of this region. - The first 0x2000 bytes are reserved by gint, see below */ + The first 5k (0x1400 bytes) are reserved by gint for the VBR space, + which is loaded dynamically and accessed through P1 */ ram (rw): o = 0x08101400, l = 507k - /* gint's VBR space at the start of the user stack on fx-CG 50 and on - fx-CG 20. This address needs not be determined now because VBR code - is position-independent and stored in ROM so the linker doesn't even - need to know the value */ - vbr50 (rwx): o = 0x8c160000, l = 5k - vbr20 (rwx): o = 0x88160000, l = 5k /* On-chip IL memory */ ilram (rwx): o = 0xe5200000, l = 4k /* On-chip X and Y memory */ @@ -127,6 +122,8 @@ SECTIONS _rdata = . ; *(.data .data.*) + /* References to mapped code - no relocation needed */ + *(.gint.mappedrel) . = ALIGN(16); } > ram AT> rom @@ -177,27 +174,10 @@ SECTIONS _sxram = SIZEOF(.xram); _syram = SIZEOF(.yram); - - - /* - ** gint-related sections (VBR space, .gint.data and .gint.bss) - */ - - /* VBR address: let's just start at the beginning of the RAM area. - There's an unused 0x100-byte gap at the start of the VBR space. - The VBR space is already a large block (> 2 kiB), so I'm cutting off - the gap to spare some memory */ - _gint_vbr_fxcg50 = ORIGIN(vbr50) - 0x100; - _gint_vbr_fxcg20 = ORIGIN(vbr20) - 0x100; - - . = ORIGIN(ram) + _sbss + _sdata; - /* gint's uninitialized BSS section, going to static RAM. All the large data arrays will be located here */ .gint.bss (NOLOAD) : { - /* Since it's uninitialized, the location doesn't matter */ *(.gint.bss) - . = ALIGN(16); } > ram :NONE @@ -206,24 +186,21 @@ SECTIONS /* - ** Other sections + ** Unused sections */ - /* Unwanted sections going to meet Dave Null: - - SH3-only data sections - - Debug sections, often come out of libgcc - - Java classes registration (why is there any of this here?) - - Asynchronous unwind tables: no C++ exception handling for now ^^ - - Comments or anything the compiler might put in its assembler */ /DISCARD/ : { - *(.gint.data.sh3) - *(.gint.bss.sh3) - + /* SH3-only data sections */ + *(.gint.data.sh3 .gint.bss.sh3) + /* Debug sections (often from libgcc) */ *(.debug_info .debug_abbrev .debug_loc .debug_aranges .debug_ranges .debug_line .debug_str) + /* Java class registration (why are they even here?!) */ *(.jcr) + /* Asynchronous unwind tables: no C++ exception handling */ *(.eh_frame_hdr) *(.eh_frame) + /* Comments or anything the compiler might generate */ *(.comment) } } diff --git a/include/gint/defs/attributes.h b/include/gint/defs/attributes.h index db31c22..aa2f9c7 100644 --- a/include/gint/defs/attributes.h +++ b/include/gint/defs/attributes.h @@ -16,9 +16,6 @@ #define GILRAM __attribute__((section(".ilram"))) #define GXRAM __attribute__((section(".xram"))) #define GYRAM __attribute__((section(".yram"))) -/* Objects that must remain mapped; placed in "real RAM" (some P1 area) on - fx9860g, and ILRAM on fxcg50 */ -#define GMAPPED __attribute__((section(".gint.mapped"))) /* Unused parameters or variables */ #define GUNUSED __attribute__((unused)) diff --git a/include/gint/exc.h b/include/gint/exc.h index 46d20d8..f3ef7bc 100644 --- a/include/gint/exc.h +++ b/include/gint/exc.h @@ -28,16 +28,13 @@ void gint_panic_set(GNORETURN void (*panic)(uint32_t code)); Sets up an exception-catching function. If an exception occurs, before a panic is raised, the exception-catching function is executed with interrupt - mode and is given a chance to handle the exception. Passing NULL disables - this feature. + disabled and is given a chance to handle the exception. Passing NULL + disables this feature. The exception-catching function can do anything that does not use interrupts or cause an exception, such as logging the exception or any other useful - mechanism. TLB misses count as exceptions and are disabled, so this function - must absolutely be mapped *before* it runs! The only real way to ensure this - is to have it mapped at all times using the GMAPPED attribute. Note that - GMAPPED puts the function in a generally small memory region so you should - defer as much handling as possible until after the exception is caught. + mechanism. TLB misses are enabled. In general, this function should be as + short as possible. What happens next depends on the return value: * If it returns 0, the exception is considered handled and execution diff --git a/include/gint/gint.h b/include/gint/gint.h index 65907e0..0b2649c 100644 --- a/include/gint/gint.h +++ b/include/gint/gint.h @@ -110,15 +110,31 @@ void *gint_inthandler(int event_code, void const *handler, size_t size); Calls the specified function with the given argument after saving the user context, enabling interrupts and going to user bank. This function is used to call user code from interrupt handlers, typically from timer or RTC - callbacks. + callbacks. You can think of it as a way to escape the SR.BL=1 environment to + safely call back virtualized and interrupt-based functions during interrupt + handling. It is not safe to call from C code in any capacity and is mentioned here only for documentation purposes; you should really only call this from - an interrupt handler's assembler code. + an interrupt handler's assembler code, typically like this: + + mov.l .callback, r0 + mov.l @r0, r0 # because function pointer + + mov , r4 + mov , r5 + jsr @r0 + nop + + .callback: + .long _gint_inth_callback + + This function is loaded to a platform-dependent address determined at + runtime; call it indirectly through the function pointer. @callback Callback function, may take no argument in practice @arg Argument Returns the return value of the callback. */ -int gint_inth_callback(int (*function)(void *arg), void *arg); +int (*gint_inth_callback)(int (*function)(void *arg), void *arg); #endif /* GINT_GINT */ diff --git a/include/gint/mmu.h b/include/gint/mmu.h index b799c7a..fbb7dab 100644 --- a/include/gint/mmu.h +++ b/include/gint/mmu.h @@ -7,6 +7,27 @@ #include +//--- +// Unified interface +//--- + +/* mmu_translate(): Get the physical address for a virtual page + Looks for a translation with the specified virtual address as start, and + returns the corresponding physical address. Only works if the argument is + page-aligned. + + @virtual Virtual page address + Returns the page's physical address, or -1 if not mapped. */ +uint32_t mmu_translate(uint32_t page); + +/* mmu_uram(): Get pointer to physical start of user RAM + + Returns a pointer to the physical location behind 0x08100000. The physical + location can be used to access without the TLB, which is useful when + interrupts are processed with SR.BL=1. However, the location is highly + platform-dependent. */ +void *mmu_uram(void); + //--- // SH7705 TLB //--- @@ -34,6 +55,9 @@ tlb_data_t const *tlb_data(uint way, uint E); @ram Pointer to amount of mapped RAM */ void tlb_mapped_memory(uint32_t *p_rom, uint32_t *p_ram); +/* tlb_translate(): Get the physical address for a virtual page */ +uint32_t tlb_translate(uint32_t page); + //--- // SH7305 Unified TLB //--- @@ -59,4 +83,7 @@ utlb_data_t const *utlb_data(uint E); @ram Pointer to amount of mapped RAM */ void utlb_mapped_memory(uint32_t *rom, uint32_t *ram); +/* utlb_translate(): Get the physical address for a virtual page */ +uint32_t utlb_translate(uint32_t page); + #endif /* GINT_MMU */ diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h index 03241a8..22ef2e2 100644 --- a/src/kernel/cpu.h +++ b/src/kernel/cpu.h @@ -14,11 +14,14 @@ configure the INTC in a way that is safe for the new VBR controller, including disabling all interrupts that it cannot handle. + This function is loaded to a platform-dependent address determined at + runtime; call it indirectly through the function pointer. + @vbr New VBR address @conf_intc Configuration function @arg Additional argument for conf_intc Returns the previous VBR address. */ -uint32_t cpu_setVBR(uint32_t vbr, void (*conf_intc)(int arg), int arg); +uint32_t (*cpu_setVBR)(uint32_t vbr, void (*conf_intc)(int arg), int arg); /* cpu_setCPUOPM(): Change the CPU Operation Mode register diff --git a/src/kernel/cpu.s b/src/kernel/cpu.s index 4b8aeb5..3bb17a6 100644 --- a/src/kernel/cpu.s +++ b/src/kernel/cpu.s @@ -6,10 +6,10 @@ .global _cpu_setCPUOPM .global _cpu_getCPUOPM -.section .gint.mapped - /* cpu_setVBR(): Change VBR address */ -_cpu_setVBR: + +.section .gint.mapped, "ax" +_cpu_setVBR_reloc: mov.l r8, @-r15 sts.l pr, @-r15 stc.l sr, @-r15 @@ -37,6 +37,10 @@ _cpu_setVBR: rts mov.l @r15+, r8 +.section .gint.mappedrel, "aw" +_cpu_setVBR: + .long _cpu_setVBR_reloc + .text /* cpu_setCPUOPM(): Change the CPU Operation Mode register */ diff --git a/src/kernel/exch.c b/src/kernel/exch.c index cdecc6f..2098d7b 100644 --- a/src/kernel/exch.c +++ b/src/kernel/exch.c @@ -142,7 +142,7 @@ void gint_exc_catch(int (*handler)(uint32_t code)) } /* gint_exc_skip(): Skip pending exception instructions */ -GMAPPED void gint_exc_skip(int instructions) +void gint_exc_skip(int instructions) { uint32_t spc; diff --git a/src/kernel/exch.s b/src/kernel/exch.s index d7937ee..ccd97d5 100644 --- a/src/kernel/exch.s +++ b/src/kernel/exch.s @@ -1,5 +1,5 @@ .global _gint_exch -.section .gint.exch +.section .gint.exch, "ax" .align 4 _gint_exch: @@ -8,6 +8,7 @@ _gint_exch: sts.l mach, @-r15 sts.l macl, @-r15 mov.l r8, @-r15 + mov.l r9, @-r15 /* Get the first word of the gint hardware array (HWMPU). If it has the last bit set, we're SH3 */ @@ -25,9 +26,19 @@ catch: tst r0, r0 bt panic - /* Leave if the catcher returns zero */ + /* Set BL=0, IMASK=15 */ + stc sr, r9 + mov.l .SR_set_IMASK, r1 + or r9, r1 + mov.l .SR_clear_BL, r2 + and r2, r1 + ldc r1, sr + + /* Call the catcher and leave if it returns zero (exception handled) */ jsr @r0 mov.l @r8, r4 + + ldc r9, sr tst r0, r0 bt end @@ -36,7 +47,11 @@ panic: mov.l @r8, r4 ldc r4, r4_bank + mov.l @r15+, r9 mov.l @r15+, r8 + lds.l @r15+, macl + lds.l @r15+, mach + ldc.l @r15+, gbr lds.l @r15+, pr /* Here we switch banks so r0..r7 change meaning! */ @@ -49,6 +64,7 @@ panic: nop end: + mov.l @r15+, r9 mov.l @r15+, r8 lds.l @r15+, macl lds.l @r15+, mach @@ -69,3 +85,7 @@ end: .long _gint_exc_catcher .panic: .long _gint_exc_panic +.SR_set_IMASK: + .long (0xf << 4) +.SR_clear_BL: + .long ~(1 << 28) diff --git a/src/kernel/inth.S b/src/kernel/inth.S index e5656a7..7593240 100644 --- a/src/kernel/inth.S +++ b/src/kernel/inth.S @@ -193,8 +193,6 @@ _inth_remap: switches back to user bank and executes the callback. It does not save other registers (pr/mach/macl/gbr) which are managed by the handler entry. */ -.section .gint.mapped - /* gint_inth_callback() Calls the specified function with the given argument after saving the user context, enabling interrupts and going to user bank. @@ -202,7 +200,9 @@ _inth_remap: @r4 Callback function (volatile void * -> int) @r5 Argument (volatile void *) Returns the return value of the callback (int). */ -_gint_inth_callback: + +.section .gint.mapped, "ax" +_gint_inth_callback_reloc: stc.l r0_bank, @-r15 stc.l r1_bank, @-r15 stc.l r2_bank, @-r15 @@ -283,3 +283,7 @@ _gint_inth_callback: .long (0xf << 4) .gint: .long _gint + +.section .gint.mappedrel, "aw" +_gint_inth_callback: + .long _gint_inth_callback_reloc diff --git a/src/kernel/kernel.c b/src/kernel/kernel.c index ad93f2b..455d891 100644 --- a/src/kernel/kernel.c +++ b/src/kernel/kernel.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "cpu.h" @@ -105,14 +106,15 @@ static void kinit_cpu(void) /* kinit(): Install and start gint */ void kinit(void) { - /* VBR address, provided by the linker script */ #ifdef FX9860G - gint_ctx.VBR = (uint32_t)&gint_vbr_fx9860g; + /* VBR is loaded 0x600 bytes before end of the user RAM (0x100 bytes at + the start of the VBR space are unused) */ + gint_ctx.VBR = (uint32_t)mmu_uram() + 0x1a00 - 0x100; #endif + #ifdef FXCG50 - gint_ctx.VBR = (gint[HWCALC] == HWCALC_FXCG50) - ? (uint32_t)&gint_vbr_fxcg50 - : (uint32_t)&gint_vbr_fxcg20; + /* VBR is loaded at the start of the user RAM */ + gint_ctx.VBR = (uint32_t)mmu_uram(); #endif /* Event handler entry points */ @@ -128,7 +130,7 @@ void kinit(void) /* Take control of the VBR and roll! */ drivers_wait(); - sys_ctx.VBR = cpu_setVBR(gint_ctx.VBR, drivers_save_and_init, 0); + sys_ctx.VBR = (*cpu_setVBR)(gint_ctx.VBR, drivers_save_and_init, 0); } /* Due to dire space restrictions on SH3, event codes that are translated to @@ -151,9 +153,9 @@ static const uint16_t sh3_vbr_map[] = { 0x440, /* TMU2 underflow */ 0x460, /* (gint custom: TMU helper) */ 0x9e0, /* ETMU0 underflow */ - 0xc20, /* ETMU1 underflow (used as helper on SH3) */ - 0xc40, /* ETMU2 underflow (used as helper on SH3) */ - 0xc60, /* (gint custom: ETMU helper) */ + 0xd00, /* ETMU4 underflow (used as helper on SH3) */ + 0xd20, /* (gint custom: ETMU helper) */ + 0xd40, /* (gint custom: ETMU helper) */ 0xaa0, /* RTC Periodic Interrupt */ 1, /* (Filler to maintain the gap between 0xaa0 and 0xae0) */ 0xae0, /* (gint custom: RTC helper) */ @@ -202,13 +204,13 @@ void gint_switch(void (*function)(void)) { /* Switch from gint to the OS after a short wait */ drivers_wait(); - cpu_setVBR(sys_ctx.VBR, drivers_switch, 1); + (*cpu_setVBR)(sys_ctx.VBR, drivers_switch, 1); if(function) function(); /* Then switch back to gint once the OS finishes working */ drivers_wait(); - cpu_setVBR(gint_ctx.VBR, drivers_switch, 0); + (*cpu_setVBR)(gint_ctx.VBR, drivers_switch, 0); } /* kquit(): Quit gint and give back control to the system */ @@ -217,5 +219,5 @@ void kquit(void) /* Wait for hardware tasks then restore all of the drivers' state and return the VBR space to the OS */ drivers_wait(); - cpu_setVBR(sys_ctx.VBR, drivers_restore, 0); + (*cpu_setVBR)(sys_ctx.VBR, drivers_restore, 0); } diff --git a/src/kernel/mmu.c b/src/kernel/mmu.c index 733861d..1c25c27 100644 --- a/src/kernel/mmu.c +++ b/src/kernel/mmu.c @@ -5,6 +5,23 @@ #include #include +//--- +// Unified interface +//--- + +/* mmu_translate(): Get the physical address for a virtual page */ +uint32_t mmu_translate(uint32_t page) +{ + return isSH3() ? tlb_translate(page) : utlb_translate(page); +} + +/* mmu_uram(): Get pointer to physical start of user RAM */ +void *mmu_uram(void) +{ + /* Use P1 access */ + return (void *)(mmu_translate(0x08100000) | 0x80000000); +} + //--- // SH7705 TLB //--- @@ -53,6 +70,24 @@ void tlb_mapped_memory(uint32_t *p_rom, uint32_t *p_ram) gint[HWURAM] = ram; } +/* tlb_translate(): Get the physical address for a virtual page */ +uint32_t tlb_translate(uint32_t page) +{ + for(int way = 0; way < 4; way++) + for(int E = 0; E < 32; E++) + { + const tlb_addr_t *addr = tlb_addr(way, E); + const tlb_data_t *data = tlb_data(way, E); + if(!addr->V || !data->V) continue; + + uint32_t src; + if(data->SZ) src = (((addr->VPN >> 2) | E) << 12); + else src = (addr->VPN | (E << 2)) << 10; + + if(src == page) return data->PPN << 10; + } + return -1; +} #endif //--- @@ -84,7 +119,8 @@ void utlb_mapped_memory(uint32_t *p_rom, uint32_t *p_ram) const utlb_data_t *data = utlb_data(E); if(!addr->V || !data->V) continue; - /* Magic formula to get the size without using an array */ + /* Magic formula to get the size without using an array since + this code is used even before global data is initialized */ int sz = ((data->SZ1 << 1) | data->SZ2) << 3; int size = 1 << ((0x14100c0a >> sz) & 0xff); @@ -98,3 +134,17 @@ void utlb_mapped_memory(uint32_t *p_rom, uint32_t *p_ram) gint[HWURAM] = ram; } + +/* utlb_translate(): Get the physical address for a virtual page */ +uint32_t utlb_translate(uint32_t page) +{ + for(int E = 0; E < 64; E++) + { + const utlb_addr_t *addr = utlb_addr(E); + const utlb_data_t *data = utlb_data(E); + if(!addr->V || !data->V) continue; + + if((uint32_t)addr->VPN << 10 == page) return data->PPN << 10; + } + return -1; +} diff --git a/src/kernel/start.c b/src/kernel/start.c index 344d9d1..59aa165 100644 --- a/src/kernel/start.c +++ b/src/kernel/start.c @@ -26,7 +26,8 @@ extern uint32_t sbss, rbss; /* User's BSS section */ #ifdef FX9860G extern uint32_t - lgmapped, sgmapped, rgmapped; /* Permanently mapped functions */ + lgmapped, sgmapped, /* Permanently mapped functions */ + lreloc, sreloc; /* Relocatable references */ #endif /* Constructor and destructor arrays */ @@ -60,7 +61,7 @@ static void regcpy(uint32_t * restrict l, int32_t s, uint32_t * restrict r) s -= 16; } } -#define regcpy(l, s, r) regcpy(&l, (int32_t)&s, &r) +#define regcpy(l, s, r) regcpy(l, (int32_t)s, r) /* regclr(): Clear a memory region using symbol information @r Source pointer (base address) @@ -76,7 +77,7 @@ static void regclr(uint32_t *r, int32_t s) s -= 16; } } -#define regclr(r, s) regclr(&r, (int32_t)&s) +#define regclr(r, s) regclr(r, (int32_t)s) /* callarray(): Call an array of functions (constructors or destructors) @f First element of array @@ -98,30 +99,41 @@ int start(int isappli, int optnum) Do not disturb the operating system. - gint will silently run in an isolated part of the memory (fx9860g) - or at the start of the RAM (fxcg50). The kernel will redirect - interrupts and load its own drivers, so we can't rely too much on - the system. Ladies and gentlemen, let's have fun! ;D */ + gint loads its important code and data at the start of the user RAM + area. The kernel redirects interrupts and uses its own drivers, so + we can't rely too much on syscalls. Ladies and gentlemen, let's have + fun! ;D */ /* For now, we use the system's memory mapper for ROM. We'll still do it later in our TLB miss handler once we're installed. RAM is always - fully mapped, but we need to initialize it. We also need to do some - hardware detection because old fx9860g models have a different - processor with some incompatible features. */ + fully mapped, but we need to initialize it. We also need to perform + hardware detection because there are many models and emulators with + varying processor, peripherals, and configuration. */ - /* Detect architecture; this will tell SH3 from SH4 on fx9860g */ + /* Detect hardware; this will mainly tell SH3 from SH4 on fx-9860G */ hw_detect(); /* Load data sections and wipe the bss section. This has to be done first for static and global variables to be initialized */ - regcpy(ldata, sdata, rdata); + regcpy(&ldata, &sdata, &rdata); + regcpy(&lilram, &silram, &rilram); + regcpy(&lxram, &sxram, &rxram); + regcpy(&lyram, &syram, &ryram); + regclr(&rbss, &sbss); + #ifdef FX9860G - regcpy(lgmapped, sgmapped, rgmapped); + /* Copy permanentely-mapped code to start of user RAM (on fx-CG 50 it + is loaded along ILRAM contents) */ + void *rgmapped = mmu_uram(); + regcpy(&lgmapped, &sgmapped, rgmapped); + + /* Relocate references to this code */ + uint32_t volatile *fixups = &lreloc; + for(uint i = 0; i < (uint32_t)&sreloc / 4; i++) + { + fixups[i] += (uint32_t)rgmapped; + } #endif - regcpy(lilram, silram, rilram); - regcpy(lxram, sxram, rxram); - regcpy(lyram, syram, ryram); - regclr(rbss, sbss); /* Install gint, switch VBR and initialize drivers */ kinit(); @@ -137,16 +149,15 @@ int start(int isappli, int optnum) callarray(&bctors, &ectors); - int ret; + int rc = 1; while(1) { - ret = main(isappli, optnum); + rc = main(isappli, optnum); if(!gint_restart) break; gint_osmenu(); } callarray(&bdtors, &edtors); - /* Before leaving the application, we need to clean everything we changed to hardware settings and peripheral modules. The OS is bound to be confused (and hang, or crash, or any other kind of giving up) @@ -155,5 +166,5 @@ int start(int isappli, int optnum) /* Unload gint and give back control to the system. Driver settings will be restored while interrupts are disabled */ kquit(); - return ret; + return rc; } diff --git a/src/kernel/tlbh.S b/src/kernel/tlbh.S index f0b325c..ff51acd 100644 --- a/src/kernel/tlbh.S +++ b/src/kernel/tlbh.S @@ -1,5 +1,5 @@ .global _gint_tlbh -.section .gint.tlbh +.section .gint.tlbh, "ax" .align 4 _gint_tlbh: diff --git a/src/kernel/vbr.h b/src/kernel/vbr.h index 574c043..dc0c1a8 100644 --- a/src/kernel/vbr.h +++ b/src/kernel/vbr.h @@ -5,17 +5,6 @@ #ifndef GINT_CORE_VBR #define GINT_CORE_VBR -#include - -/* VBR addresses from the linker script */ -#ifdef FX9860G -extern char gint_vbr_fx9860g[]; -#endif -#ifdef FXCG50 -extern char gint_vbr_fxcg50[]; -extern char gint_vbr_fxcg20[]; -#endif - /* The kernel's interrupt and exception handlers' entry points */ void gint_exch(void); void gint_tlbh(void); diff --git a/src/rtc/inth.s b/src/rtc/inth.s index 5c067d8..4c6ec0e 100644 --- a/src/rtc/inth.s +++ b/src/rtc/inth.s @@ -15,14 +15,12 @@ _inth_rtc_pri: /* Invoke the callback function with its argument */ sts.l pr, @-r15 mov.l .gint_inth_callback, r0 + mov.l @r0, r0 mov.l 1f, r4 mov.l 2f, r5 jsr @r0 nop - /* Save the return value */ - mov r0, r3 - /* Jump to another gate to finish the work: - 0xc is the size of storage below - 0x20 is the size of the gap before next gate (alarm interrupt) */ @@ -38,6 +36,9 @@ _inth_rtc_pri: _inth_rtc_pri_helper: + /* Save the return value */ + mov r0, r3 + .clear: /* Clear the interrupt flag */ mov.l .RCR2, r1 @@ -57,6 +58,5 @@ _inth_rtc_pri_helper: lds.l @r15+, pr rts nop - nop .RCR2: .long 0xa413fede /* RCR2 address, edited at startup on SH3 */ diff --git a/src/tmu/inth-etmu.s b/src/tmu/inth-etmu.s new file mode 100644 index 0000000..1695f28 --- /dev/null +++ b/src/tmu/inth-etmu.s @@ -0,0 +1,104 @@ +/* +** gint:tmu:inth-etmu - Interrupt handlers for the RTC-bound timers +*/ + +/* Gates for the extra timers (informally called ETMU) */ +.global _inth_etmu4 +.global _inth_etmux + +.section .gint.blocks, "ax" +.align 4 + +/* EXTRA TMU INTERRUPT HANDLERS - 96 BYTES + To implement the same functionalities as the standard timers, several blocks + are once again needed. This time, 2 empty blocks after ETMU4 (0xd20, 0xd40) + are used for convenience. + + It would be possible to communicate between any interrupt handlers in non- + consecutive gates. A simple way is to store at runtime a pointer to the + desired object in one handler. But that costs a lot fo space. If the + relative position of interrupt handlers is known, the best option left is + the unnatural @(disp,pc) addressing mode, and it doesn't even work with the + SH3's compact VBR scheme. */ + +/* FIRST GATE - ETMU4 and two empty blocks */ +_inth_etmu4: + mova .storage_etmu4, r0 + mov #7, r2 + +.shared: + mov.l r2, @-r15 + mov.l r8, @-r15 + sts.l pr, @-r15 + mov r0, r1 + + /* Clear interrupt flag in TCR */ + mov.l @(8, r1), r3 +1: + mov.b @r3, r0 + tst #0x02, r0 + and #0xfd, r0 + bf/s 1b + mov.b r0, @r3 + + /* Prepare invoking the callback function */ + mov.l .gint_inth_callback, r8 + mov.l @r8, r8 + mov.l @r1, r4 + jsr @r8 + mov.l @(4, r1), r5 + tst r0, r0 + bt 2f + + /* Invoke callback; if return value is non-zero, stop timer */ + mov.l .timer_stop, r4 + jsr @r8 + mov.l @(8, r15), r5 + + /* Clear the flag and possibly stop the timer */ +2: + lds.l @r15+, pr + mov.l @r15+, r8 + rts + add #4, r15 + + .zero 24 + +.gint_inth_callback: + .long _gint_inth_callback +.timer_stop: + .long _timer_stop + +.storage_etmu4: + .long 0 /* Callback: Configured dynamically */ + .long 0 /* Argument: Configured dynamically */ + .long 0 /* TCR: Configured dynamically */ + +/* SECOND GATE - All other ETMU entries, falling back to ETMU2 */ +_inth_etmux: + /* Dynamically compute the target of the jump */ + stc vbr, r3 + mov.l 1f, r2 + add r2, r3 + + mova .storage_etmux, r0 + mov.w .id_etmux, r2 + jmp @r3 + nop + +.id_etmux: + .word 0 /* Timer ID */ + + /* Offset from VBR where extra timer 2 is located: + * 0x600 to reach the interrupt handlers + * 0x040 to jump over the entry gate + * 0x900 to reach the handler of ETMU4 + * Skip over the first instructions + This is different on SH3 due to the compact scheme so it's edited + dynamically at install time. */ +1: .long 0xf40 + (.shared - _inth_etmu4) + +.storage_etmux: + .long 0 /* Callback: Configured dynamically */ + .long 0 /* Argument: Configured dynamically */ + .long 0 /* TCR: Configured dynamically */ diff --git a/src/tmu/inth-tmu.s b/src/tmu/inth-tmu.s new file mode 100644 index 0000000..30ecb02 --- /dev/null +++ b/src/tmu/inth-tmu.s @@ -0,0 +1,121 @@ +/* +** gint:tmu:inth-tmu - Interrupt handlers for the timer units +** Perhaps the most technical of my interrupt handlers. They implement a +** simple kind of interrupt handler communication by letting the control flow +** from each interrupt handler to the next. +*/ + +/* Gates for the standard Timer Unit (TMU) */ +.global _inth_tmu /* 128 bytes */ + +.section .gint.blocks, "ax" +.align 4 + +/* TMU INTERRUPT HANDLERS - 128 BYTES + Unfortunately I did not manage to write a handler that cleared the interrupt + flag and invoked a callback in less than 34 bytes data included. So I + decided to make several gates operate as a whole and add a bit more features + in them. Basically, these handlers: + - Clear the interrupt flag + - Invoke a callback function and pass it a user-provided argument + - Stop the timer if the callback returns non-zero + - Host their own callback pointers and arguments + + It is important to notice that the code of the following gates looks like + they are contiguous in memory. The assembler will make that assumption, and + turn any address reference between two gates into a *relative displacement*. + If the gates don't have the same relative location at runtime, the code will + crash because we will have broken the references. This is why we can only do + it with handlers that are mapped to consecutive event codes. */ + +_inth_tmu: + +/* FIRST GATE - TMU0 entry, clear underflow flag and call back */ +_inth_tmu_0: + mova .storage0, r0 + mov #0, r1 + +/*** This is the first shared section ***/ +.shared1: + mov.l r8, @-r15 + sts.l pr, @-r15 + mov.l r1, @-r15 + + /* Load the TCR address */ + mov.l .mask, r3 + not r3, r4 + mov.l @(8, r0), r1 + + /* Clear the interrupt flag */ +1: mov.w @r1, r2 + tst r4, r2 + and r3, r2 + mov.w r2, @r1 + bf 1b + + /* Prepare callback and jump to second section */ + mov.l .gint_inth_callback, r8 + bra .shared2 + mov.l @r8, r8 + +/* SECOND GATE - TMU1 entry and stop timer */ +_inth_tmu_1: + mova .storage1, r0 + bra .shared1 + mov #1, r1 + +/*** This is the second shared section ***/ +.shared2: + /* Invoke callback */ + mov.l @r0, r4 + jsr @r8 + mov.l @(4, r0), r5 + + /* Stop the timer if the return value is not zero */ + mov.l @r15+, r5 + tst r0, r0 + bt 2f + mov.l .timer_stop, r4 + jsr @r8 + nop + +2: + lds.l @r15+, pr + rts + mov.l @r15+, r8 + + .zero 2 + +/* THIRD GATE - TMU2 entry and storage for TMU0 */ +_inth_tmu_2: + mova .storage2, r0 + bra .shared1 + mov #2, r1 + + .zero 10 + +.gint_inth_callback: + .long _gint_inth_callback + +.storage0: + .long 0 /* Callback: Configured dynamically */ + .long 0 /* Argument: Configured dynamically */ + .long 0xa4490010 /* TCR0: Overridden at startup on SH3 */ + +/* FOURTH GATE - Storage for TMU1, TMU2 and other values */ +_inth_tmu_storage: + +.mask: + .long 0xfffffeff +.timer_stop: + .long _timer_stop + +.storage1: + .long 0 /* Callback: Configured dynamically */ + .long 0 /* Argument: Configured dynamically */ + .long 0xa449001c /* TCR1: Overridden at startup on SH3 */ + +.storage2: + .long 0 /* Callback: Configured dynamically */ + .long 0 /* Argument: Configured dynamically */ + .long 0xa4490028 /* TCR2: Overridden at startup on SH3 */ diff --git a/src/tmu/inth.s b/src/tmu/inth.s deleted file mode 100644 index 13cddb5..0000000 --- a/src/tmu/inth.s +++ /dev/null @@ -1,227 +0,0 @@ -/* -** gint:tmu:inth - Interrupt handlers for the timer units -** Perhaps the most technical of my interrupt handlers. They implement a -** simple kind of interrupt handler communication by letting the control flow -** from each interrupt handler to the next. -*/ - -/* Gates for the standard Timer Unit (TMU) */ -.global _inth_tmu /* 128 bytes */ - -/* Gates for the extra timers (informally called ETMU) */ -.global _inth_etmu2 /* 32 bytes */ -.global _inth_etmu_help /* 32 bytes */ -.global _inth_etmux /* 32 bytes */ - -.section .gint.blocks, "ax" -.align 4 - -/* TMU INTERRUPT HANDLERS - 128 BYTES - Unfortunately I did not manage to write a handler that cleared the interrupt - flag and invoked a callback in less than 34 bytes data included. So I - decided to make several gates operate as a whole and add a bit more features - in them. Basically, these handlers: - - Clear the interrupt flag - - Invoke a callback function and pass it a user-provided argument - - Stop the timer if the callback returns non-zero - - Host their own callback pointers and arguments - - It is important to notice that the code of the following gates looks like - they are contiguous in memory. The assembler will make that assumption, and - turn any address reference between two gates into a *relative displacement*. - If the gates don't have the same relative location at runtime, the code will - crash because we will have broken the references. This is why we can only do - it with handlers that are mapped to consecutive event codes. */ - -_inth_tmu: - -/* FIRST GATE - TMU0 entry, clear underflow flag and call back */ -_inth_tmu_0: - mova .storage0, r0 - mov #0, r1 - -/*** This is the first shared section ***/ -.shared1: - sts.l pr, @-r15 - mov.l r1, @-r15 - - /* Load the TCR address */ - mov.l .mask, r3 - not r3, r4 - mov.l @(8, r0), r1 - - /* Clear the interrupt flag */ -1: mov.w @r1, r2 - tst r4, r2 - and r3, r2 - mov.w r2, @r1 - bf 1b - - /* Prepare callback and jump to second section */ - mov.l .gint_inth_callback_1, r1 - mov.l @r0, r4 - bra .shared2 - mov.l @(4, r0), r5 - -/* SECOND GATE - TMU1 entry and stop timer */ -_inth_tmu_1: - mova .storage1, r0 - bra .shared1 - mov #1, r1 - -/*** This is the second shared section ***/ -.shared2: - /* Invoke callback */ - jsr @r1 - nop - - /* Stop the timer if the return value is not zero */ - mov.l @r15+, r4 - tst r0, r0 - bt .shared3 - mov.l .timer_stop_1, r1 - jsr @r1 - nop - -.shared3: - lds.l @r15+, pr - rts - nop - - .zero 4 - -/* THIRD GATE - TMU2 entry and storage for TMU0 */ -_inth_tmu_2: - mova .storage2, r0 - bra .shared1 - mov #2, r1 - - .zero 10 - -.gint_inth_callback_1: - .long _gint_inth_callback - -.storage0: - .long 0 /* Callback: Configured dynamically */ - .long 0 /* Argument: Configured dynamically */ - .long 0xa4490010 /* TCR0: Overridden at startup on SH3 */ - -/* FOURTH GATE - Storage for TMU1, TMU2 and other values */ -_inth_tmu_storage: - -.mask: - .long 0xfffffeff -.timer_stop_1: - .long _timer_stop - -.storage1: - .long 0 /* Callback: Configured dynamically */ - .long 0 /* Argument: Configured dynamically */ - .long 0xa449001c /* TCR1: Overridden at startup on SH3 */ - -.storage2: - .long 0 /* Callback: Configured dynamically */ - .long 0 /* Argument: Configured dynamically */ - .long 0xa4490028 /* TCR2: Overridden at startup on SH3 */ - - - -/* EXTRA TMU INTERRUPT HANDLERS - 96 BYTES - To implement the same functionalities as the standard timers, several blocks - are once again needed. But the handlers for the extra timers are not located - in adjacent gates, except for ETMU1 and ETMU2 which have event codes 0xc20 - and 0xc40. Since handler 0xc60 is free on SH4 and not redirected to on SH3, - I use it to build a three-handler block similar to that of the TMU above. - - On SH4 this means that an extra gate has to be installed, but no interrupt - point here. On SH3 this means that four gates are used for the only extra - timer, but the incurred cost is minimal (96 bytes on the binary file) - because the size of the VBR area can hardly be shrunk anyway. - - It *is* possible to do generalized communication between interrupt handlers - that do not reside in consecutive gates. The general way of performing a - jump or data access between two interrupt handlers would be to store at - runtime the address of the target resource in a reserved longword in the - source handler. But longwords are costly in 32-byte areas. Even if the event - codes of the interrupt handlers are known at development time, the best I - can think of is hardcoding the relative displacements, and one would need to - use the unnatural and unmaintainable @(disp, pc) addressing modes. */ - -/* FIRST GATE - ETMU2 entry, invoke callback and prepare clear flag */ -_inth_etmu2: - mova .storage_etmu2, r0 - mov #5, r1 - -.shared4: - mov.l r1, @-r15 - sts.l pr, @-r15 - mov.l r0, @-r15 - - /* Clear interrupt flag */ - mov.l .timer_clear, r2 - jsr @r2 - mov r1, r4 - - /* Prepare invoking the callback function */ - mov.l @r15+, r0 - mov.l .gint_inth_callback_2, r1 - bra _inth_etmu_help - mov.l @r0, r4 - -.storage_etmu2: - .long 0 /* Callback: Configured dynamically */ - .long 0 /* Argument: Configured dynamically */ - -/* SECOND GATE - Helper entry, invoke callback and stop timer if requested */ -_inth_etmu_help: - - /* Invoke callback; if return value is non-zero, stop timer */ - jsr @r1 - mov.l @(4, r0), r5 - tst r0, r0 - bt .shared5 - mov.l .timer_stop_2, r1 - jsr @r1 - mov.l @(4, r15), r4 - - /* Clear the flag and possibly stop the timer */ - -.shared5: - lds.l @r15+, pr - rts - add #4, r15 - -.gint_inth_callback_2: - .long _gint_inth_callback -.timer_clear: - .long _timer_clear -.timer_stop_2: - .long _timer_stop - -/* THIRD GATE - All other ETMU entries, deferred to the previous ones */ -_inth_etmux: - /* Dynamically compute the target of the jump */ - stc vbr, r3 - mov.l 1f, r2 - add r2, r3 - - mova .storage_etmux, r0 - mov.l .id_etmux, r1 - jmp @r3 - nop - nop - - /* Offset from VBR where extra timer 2 is located: - * 0x600 to reach the interrupt handlers - * 0x040 to jump over the entry gate - * 0x840 to reach the handler of ETMU2 - * Skip over the first instructions - This is different on SH3 due to the compact scheme so it's edited - dynamically at install time. */ -1: .long 0xe80 + (.shared4 - _inth_etmu2) - -.id_etmux: - .long 0 /* Timer ID */ -.storage_etmux: - .long 0 /* Callback: Configured dynamically */ - .long 0 /* Argument: Configured dynamically */ diff --git a/src/tmu/tmu.c b/src/tmu/tmu.c index fe6ea7e..cb893a8 100644 --- a/src/tmu/tmu.c +++ b/src/tmu/tmu.c @@ -233,7 +233,7 @@ void timer_pause(int id) } /* timer_stop() - stop and free a timer */ -GMAPPED void timer_stop(int id) +void timer_stop(int id) { /* Stop the timer and disable UNIE to indicate that it's free */ timer_pause(id); @@ -289,18 +289,6 @@ int timer_timeout(void volatile *arg) return TIMER_STOP; } -//--- -// Low-level functions -//--- - -/* timer_clear() - clear an ETMU flag - @timer Timer ID, must be an ETMU */ -GMAPPED void timer_clear(int id) -{ - do ETMU[id-3].TCR.UNF = 0; - while(ETMU[id-3].TCR.UNF); -} - //--- // Driver initialization //--- @@ -308,8 +296,7 @@ GMAPPED void timer_clear(int id) /* Interrupt handlers for standard timers (4 gates) */ extern void inth_tmu(void); /* Interrupt handlers for extra timers */ -extern void inth_etmu2(void); -extern void inth_etmu_help(void); +extern void inth_etmu4(void); extern void inth_etmux(void); #ifdef FX9860G @@ -362,30 +349,28 @@ static void init(void) while(T->TCR.byte); } - /* Install the extra timers. The interrupt handler takes 3 gates, so we - install 3 even on SH3 where there's only one */ - int limit = isSH3() ? 6 : 9; - - for(int i = 3; i < limit; i++) + /* Install the extra timers. On SH3, only ETMU0 is available */ + for(int i = 3; i < timer_count(); i++) if(i != 7) { - void *handler = (i == 5) ? inth_etmu2 : inth_etmux; - void *h = gint_inthandler(etmu_event[i-3], handler, 32); + void *h = gint_inthandler(etmu_event[i-3], inth_etmux, 32); + timers[i] = h + 20; - timers[i] = h + 24; - - if(i == 5) continue; - - /* On SH3, the ETMU handler is not at an offset of 0x840 (event - code 0xc40) but at an offset of 0xc0 */ + /* On SH3, the ETMU handler is not at an offset of 0x900 (event + code 0xd00) but at an offset of 0xa0 */ uint32_t *etmu_offset = h + 16; - if(isSH3()) *etmu_offset = *etmu_offset - 0xe80 + 0x2c0; + if(isSH3()) *etmu_offset = *etmu_offset - 0xf40 + 0x2a0; - uint32_t *data_id = h + 20; + uint16_t *data_id = h + 14; *data_id = i; + + uint32_t *TCR = h + 28; + *TCR = (uint32_t)&ETMU[i-3].TCR; } - /* Also install the helper handler */ - gint_inthandler(0xc60, inth_etmu_help, 32); + /* Also install ETMU4, even on SH3, because it contains common code */ + h = gint_inthandler(etmu_event[4], inth_etmu4, 96); + timers[7] = h + 84; + *(uint32_t *)(h + 92) = (uint32_t)&ETMU[4].TCR; /* Enable TMU0 at level 13, TMU1 at level 11, TMU2 at level 9 */ intc_priority(INTC_TMU_TUNI0, 13);