diff --git a/src/dma/dma.c b/src/dma/dma.c index 944d14d..7090b3a 100644 --- a/src/dma/dma.c +++ b/src/dma/dma.c @@ -116,6 +116,11 @@ static int dma_setup(int channel, dma_size_t size, uint blocks, if(ch->DAR >= 0xe5007000 && ch->DAR <= 0xe5204000) dma_sleep_blocking[channel] = true; + if(ch->SAR >= 0xfe200000 && ch->SAR <= 0xfe3fffff) + dma_sleep_blocking[channel] = true; + if(ch->DAR >= 0xfe200000 && ch->DAR <= 0xfe3fffff) + dma_sleep_blocking[channel] = true; + return 0; } diff --git a/src/dma/memcpy.c b/src/dma/memcpy.c index 5d8b25c..de603e8 100644 --- a/src/dma/memcpy.c +++ b/src/dma/memcpy.c @@ -4,6 +4,18 @@ void *dma_memcpy(void * __restrict dst, const void * __restrict src, size_t size) { - dma_transfer_sync(1, DMA_32B, size >> 5, src, DMA_INC, dst, DMA_INC); + int block_size = DMA_32B; + int block_count = size >> 5; + + /* Use 4-byte transfers to access SPU memory */ + if(((uint32_t)src >= 0xfe200000 && (uint32_t)src < 0xfe400000) || + ((uint32_t)dst >= 0xfe200000 && (uint32_t)dst < 0xfe400000)) + { + block_size = DMA_4B; + block_count = size >> 2; + } + + dma_transfer_sync(1, block_size, block_count, src, DMA_INC, dst, + DMA_INC); return dst; } diff --git a/src/dma/memset.c b/src/dma/memset.c index 74a182f..14783c4 100644 --- a/src/dma/memset.c +++ b/src/dma/memset.c @@ -14,6 +14,17 @@ void *dma_memset(void *dst, uint32_t l, size_t size) different memory regions, making the DMA faster than the CPU. */ for(int i = 0; i < 8; i++) ILbuf[i] = l; - dma_transfer_sync(1, DMA_32B, size>>5, ILbuf, DMA_FIXED, dst, DMA_INC); + int block_size = DMA_32B; + int block_count = size >> 5; + + /* Use 4-byte transfers to access SPU memory */ + if((uint32_t)dst >= 0xfe200000 && (uint32_t)dst < 0xfe400000) + { + block_size = DMA_4B; + block_count = size >> 2; + } + + dma_transfer_sync(1, block_size, block_count, ILbuf, DMA_FIXED, dst, + DMA_INC); return dst; }