Compare commits

...

3 Commits

Author SHA1 Message Date
Lephe fc7aab6eba
dma: finalize dma_memset() and dma_memcpy()
Adds support for dma_memcpy(), and uses a proper ILRAM allocation scheme
(static linking here) for the temporary buffer in dma_memset().
2019-09-15 19:30:57 +02:00
Lephe 15558c8fb3
support data loading in ILRAM, XRAM and YRAM
This change adds support for three sections .ilram, .xram and .yram,
along with three macros GILRAM, GXRAM and GYRAM, that can be used to
statically load data to on-chip memory.
2019-09-15 19:29:47 +02:00
Lephe bb77e4588d
dma: fix freezes when transferring to/from IL memory
The IL memory is unavailable when the processor goes to sleep, causing
any involved DMA transfer to stall. The dma_transfer_wait() normally
sleeps to save battery power, but this causes the whole system to freeze
and never wake up.

This change lets dma_transfer_wait() decide dynamically whether to sleep
or spinlock. There is no concrete improvement over dma_transfer_noint()
when using IL memory, but it makes dma_transfer() fully generic.

Obviously the same goes for X and Y memory.
2019-09-15 15:20:23 +02:00
8 changed files with 169 additions and 31 deletions

View File

@ -15,19 +15,24 @@ MEMORY
{
/* Userspace mapping of the add-in (0x200 B are for the G1A header).
220k is the maximum amount of simultaneously-mappable code */
rom (rx): o = 0x00300200, l = 220k
rom (rx): o = 0x00300200, l = 220k
/* This is mapped to RAM; 8k on SH3, apparently 32k on SH4 */
ram (rw): o = 0x08100000, l = 8k
ram (rw): o = 0x08100000, l = 8k
/* gint's VBR space, mentioned here for completeness */
vbr (rwx): o = 0x8800e000, l = 5k
vbr (rwx): o = 0x8800e000, l = 5k
/* Some RAM region from P1 area; gint's data will reside here */
rram (rwx): o = 0x8800f400, l = 3k
rram (rwx): o = 0x8800f400, l = 3k
/* On-chip IL memory */
ilram (rwx): o = 0xe5200000, l = 4k
/* On-chip X and Y memory */
xram (rwx): o = 0xe5007000, l = 8k
yram (rwx): o = 0xe5017000, l = 8k
}
SECTIONS
{
/*
** ROM sections
** ROM sections
*/
/* First address to be mapped to ROM (including G1A header) */
@ -94,13 +99,16 @@ SECTIONS
- Resources or assets from fxconv or similar converters
- Data marked read-only by the compiler (.rodata and .rodata.*) */
.rodata : SUBALIGN(4) {
/* Put these first, they need to be 4-aligned */
*(.rodata.4)
*(.rodata .rodata.*)
} > rom
/*
** RAM sections
** RAM sections
*/
. = ORIGIN(ram);
@ -142,12 +150,48 @@ SECTIONS
_sdata = SIZEOF(.data) + SIZEOF(.data.4);
/* On-chip memory sections: IL, X and Y memory */
. = ORIGIN(ilram);
.ilram ALIGN(4) : ALIGN(4) {
_lilram = LOADADDR(.ilram);
_rilram = . ;
*(.ilram)
. = ALIGN(16);
} > ilram AT> rom
. = ORIGIN(xram);
.xram ALIGN(4) : ALIGN(4) {
_lxram = LOADADDR(.xram);
_rxram = . ;
*(.xram)
. = ALIGN(16);
} > xram AT> rom
. = ORIGIN(yram);
.yram ALIGN(4) : ALIGN(4) {
_lyram = LOADADDR(.yram);
_ryram = . ;
*(.yram)
. = ALIGN(16);
} > yram AT> rom
_silram = SIZEOF(.ilram);
_sxram = SIZEOF(.xram);
_syram = SIZEOF(.yram);
/*
** RRAM sections
** 8800e000:4k VBR space
** 8800f000:4k .gint.data and .gint.bss
** RRAM sections
** 8800e000:4k VBR space
** 8800f000:4k .gint.data and .gint.bss
*/
/* VBR address: let's just start at the beginning of the RRAM area.
@ -185,7 +229,7 @@ SECTIONS
/*
** Other sections
** Other sections
*/
/* Unwanted sections going to meet Dave Null:

View File

@ -13,20 +13,25 @@ ENTRY(_start)
MEMORY
{
/* Userspace mapping of the add-in (without G3A header) */
rom (rx): o = 0x00300000, l = 220k
rom (rx): o = 0x00300000, l = 220k
/* Static RAM; stack grows down from the end of this region.
The first 0x2000 bytes are reserved by gint, see below */
ram (rw): o = 0x08102000, l = 512k
ram (rw): o = 0x08102000, l = 512k
/* gint's VBR space, mentioned here for completeness */
vbr (rwx): o = 0x8c160000, l = 5k
vbr (rwx): o = 0x8c160000, l = 5k
/* Some RAM region from P1 area; gint's data will reside here */
rram (rwx): o = 0x8c161400, l = 3k
rram (rwx): o = 0x8c161400, l = 3k
/* On-chip IL memory */
ilram (rwx): o = 0xe5200000, l = 4k
/* On-chip X and Y memory */
xram (rwx): o = 0xe5007000, l = 8k
yram (rwx): o = 0xe5017000, l = 8k
}
SECTIONS
{
/*
** ROM sections
** ROM sections
*/
/* First address to be mapped to ROM */
@ -87,7 +92,7 @@ SECTIONS
- Data marked read-only by the compiler (.rodata and .rodata.*) */
.rodata : SUBALIGN(4) {
/* Put these first, they need to be 4-aligned */
*(.rodata.assets)
*(.rodata.4)
*(.rodata .rodata.*)
} > rom
@ -95,7 +100,7 @@ SECTIONS
/*
** RAM sections
** RAM sections
*/
. = ORIGIN(ram);
@ -130,12 +135,48 @@ SECTIONS
_sdata = SIZEOF(.data) + SIZEOF(.data.4);
/* On-chip memory sections: IL, X and Y memory */
. = ORIGIN(ilram);
.ilram ALIGN(4) : ALIGN(4) {
_lilram = LOADADDR(.ilram);
_rilram = . ;
*(.ilram)
. = ALIGN(16);
} > ilram AT> rom
. = ORIGIN(xram);
.xram ALIGN(4) : ALIGN(4) {
_lxram = LOADADDR(.xram);
_rxram = . ;
*(.xram)
. = ALIGN(16);
} > xram AT> rom
. = ORIGIN(yram);
.yram ALIGN(4) : ALIGN(4) {
_lyram = LOADADDR(.yram);
_ryram = . ;
*(.yram)
. = ALIGN(16);
} > yram AT> rom
_silram = SIZEOF(.ilram);
_sxram = SIZEOF(.xram);
_syram = SIZEOF(.yram);
/*
** gint-related sections
** 8c160000:4k VBR space
** 8c161000:4k .gint.data and .gint.bss
** gint-related sections
** 8c160000:4k VBR space
** 8c161000:4k .gint.data and .gint.bss
*/
/* VBR address: let's just start at the beginning of the RAM area.
@ -173,7 +214,7 @@ SECTIONS
/*
** Other sections
** Other sections
*/
/* Unwanted sections going to meet Dave Null:

View File

@ -13,6 +13,10 @@
/* Additional sections that are only needed on SH3 */
#define GDATA3 __attribute__((section(".gint.data.sh3")))
#define GBSS3 __attribute__((section(".gint.bss.sh3")))
/* Objects for the ILRAM, XRAM and YRAM regions */
#define GILRAM __attribute__((section(".ilram")))
#define GXRAM __attribute__((section(".xram")))
#define GYRAM __attribute__((section(".yram")))
/* Unused parameters or variables */
#define GUNUSED __attribute__((unused))

View File

@ -57,7 +57,7 @@ void dma_transfer(int channel, dma_size_t size, uint length,
void const *src, dma_address_t src_mode,
void *dst, dma_address_t dst_mode);
/* dma_transfer_wait() - Wait for a transfer on channel 0 to finish
/* dma_transfer_wait() - Wait for a transfer to finish
You should call this function when you need to transfer to be complete
before continuing execution. If you are sure that the transfer is finished,
@ -83,16 +83,31 @@ void dma_transfer_noint(int channel, dma_size_t size, uint blocks,
//---
/* dma_memset(): Fast 32-aligned memset
This function is your typical memset, except that the destination and size
This function is your typical memset(), except that the destination and size
must be 32-aligned, and that the pattern is 4 bytes instead of one. It is
replicated to 32 bytes then used to fill the destination area. This 4-byte
fixed size may be lifted in future versions.
This function cannot be used with virtualized (P0) addresses.
@dst Destination address (32-aligned)
@pattern 4-byte pattern to fill @dst
@size Sie of destination area (32-aligned) */
void *dma_memset(void *dst, uint32_t pattern, size_t size);
/* dma_memcpy(): Fast 32-aligned memcpy
This function works exactly like memcpy(), but it expects 32-aligned source,
destination, and size, and uses the DMA to efficiently copy.
This function cannot be used with virtualized (P0) addresses.
@dst Destination address (32-aligned)
@dst Source addresss (32-aligned)
@size Size of region (32-aligned) */
void *dma_memcpy(void * restrict dst, const void * restrict src, size_t size);
#endif /* FXCG50 */
#endif /* GINT_DMA */

View File

@ -20,6 +20,9 @@ extern uint32_t
brom, srom, /* Limits of ROM mappings */
lgdata, sgdata, rgdata, /* gint's data section */
ldata, sdata, rdata, /* User's data section */
lilram, silram, rilram, /* IL memory section */
lxram, sxram, rxram, /* X memory section */
lyram, syram, ryram, /* Y memory section */
sbss, rbss, /* User's BSS section */
btors, mtors, etors; /* Constructor/destructor arrays */
extern gint_driver_t
@ -120,7 +123,10 @@ int start(int isappli, int optnum)
/* Load data sections and wipe the bss section. This has to be done
first for static and global variables to be initialized */
regcpy(lgdata, sgdata, rgdata);
regcpy(ldata, sdata, rdata);
regcpy(ldata, sdata, rdata);
regcpy(lilram, silram, rilram);
regcpy(lxram, sxram, rxram);
regcpy(lyram, syram, ryram);
regclr(rbss, sbss);
bootlog_loaded();

View File

@ -41,7 +41,7 @@ static uint32_t dma_translate(void const *address)
return a;
/* First additional on-chip memory area (XRAM) */
if(a >= 0xe5007000 && a < 0xE5009000)
if(a >= 0xe5007000 && a < 0xe5009000)
return a;
/* Second on-chip memory area (YRAM) */
@ -123,8 +123,18 @@ void dma_transfer_wait(int channel)
channel_t *ch = dma_channel(channel);
if(!ch) return;
/* Wait for the channel to be disabled by the interrupt handler */
while(ch->CHCR.DE) sleep();
/* Wait for the channel to be disabled by the interrupt handler.
When the source or the destination of the transfer is X, Y or IL
memory, refrain from sleeping as this also stops the transfer. */
int onchip = 0;
if(ch->SAR >= 0xe5007000 && ch->SAR < 0xe5204000) onchip = 1;
if(ch->DAR >= 0xe5007000 && ch->DAR < 0xe5204000) onchip = 1;
while(ch->CHCR.DE)
{
if(!onchip) sleep();
}
}
/* dma_transfer_noint(): Perform a data transfer without interruptions */

10
src/dma/memcpy.c Normal file
View File

@ -0,0 +1,10 @@
#include <gint/dma.h>
/* dma_memcpy(): Fast 32-aligned memcpy */
void *dma_memcpy(void * restrict dst, const void * restrict src, size_t size)
{
dma_transfer(1, DMA_32B, size >> 5, src, DMA_INC, dst, DMA_INC);
dma_transfer_wait(1);
return dst;
}

View File

@ -1,12 +1,20 @@
#include <gint/dma.h>
/* Allocate a 32-byte buffer in ILRAM */
GALIGNED(32) GILRAM static uint32_t ILbuf[8];
/* dma_memset(): Fast 32-aligned memset */
void *dma_memset(void *dst, uint32_t l, size_t size)
{
/* TODO: Use a proper IL memory allocation scheme */
uint32_t *IL = (void *)0xe5200000;
for(int i = 0; i < 8; i++) IL[i] = l;
/* Prepare the ILRAM buffer. We need to use ILRAM because the DMA will
have to read the operand once per block, as opposed to an assembler
routine that would hold it in a register. If we place it in RAM, the
DMA will perform twice as many RAM accesses as the handwritten
assembler, which would be very slow. By using ILRAM we use two
different memory regions, making the DMA faster than the CPU. */
for(int i = 0; i < 8; i++) ILbuf[i] = l;
dma_transfer_noint(1, DMA_32B, size >> 5, IL, DMA_FIXED, dst, DMA_INC);
dma_transfer(1, DMA_32B, size >> 5, ILbuf, DMA_FIXED, dst, DMA_INC);
dma_transfer_wait(1);
return dst;
}