From af5c16a3d3c8338273315afb4c60cb01c8c0aec2 Mon Sep 17 00:00:00 2001 From: Lephe Date: Sat, 4 Feb 2023 21:02:59 +0100 Subject: [PATCH] usb: massively improve writing logic * Move logic around tracking transfers to asyncio.c. * Add a "short buffer" holding 0-3 bytes between writes, so that the driver performs only 4-byte writes in the FIFO and a short write in the commit, if needed. - This is partially due to me thinking at some point that degrading writing size was impossible, but it might actually be possible by writing to FIFO/FIFO+2 or FIFO/FIFO+1/FIFO+2/FIFO+3. - In any case I think this new approach wins on performance. * Get rid of unit_size since we now always use 4 bytes. * Add a waiting function which is used in usb_close() (and once tested should be used in world switches too). * Eliminate some of the special cases for the DCP, though not all (in particular I can't get the commit to rely on the BEMP interrupt yet, nor can I properly clear PID to NAK when unbinding). --- CMakeLists.txt | 2 + TODO | 2 + {src/usb => include/gint/drivers}/asyncio.h | 91 ++++-- include/gint/usb.h | 50 ++- src/usb/asyncio.c | 70 +++++ src/usb/classes/ff-bulk-gray.c | 8 +- src/usb/classes/ff-bulk.c | 14 +- src/usb/pipes.c | 318 ++++++++++---------- src/usb/setup.c | 2 +- src/usb/usb.c | 5 +- src/usb/usb_private.h | 42 ++- src/usb/write4.S | 136 +++++++++ 12 files changed, 513 insertions(+), 227 deletions(-) rename {src/usb => include/gint/drivers}/asyncio.h (63%) create mode 100644 src/usb/asyncio.c create mode 100644 src/usb/write4.S diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e74e25..f0fc47f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,12 +123,14 @@ set(SOURCES_COMMON src/tmu/sleep.c src/tmu/tmu.c # USB driver + src/usb/asyncio.c src/usb/classes/ff-bulk.c src/usb/configure.c src/usb/pipes.c src/usb/setup.c src/usb/string.c src/usb/usb.c + src/usb/write4.S ) set(SOURCES_FX # Gray engine diff --git a/TODO b/TODO index 56c3c5a..a88cfc1 100644 --- a/TODO +++ b/TODO @@ -16,6 +16,8 @@ Extensions on existing code: * core: review forgotten globals and MPU addresses not in * core: run destructors when a task-switch results in leaving the app * fs: support read-only files backed with GetBlockAddress() on fx-CG +* kernel: SH4- or G-III-specific linker scripts? +* keysc: global shortcut SHIFT+0+EXIT for abort() as an infinite loop break Future directions: * Audio playback using TSWilliamson's libsnd method diff --git a/src/usb/asyncio.h b/include/gint/drivers/asyncio.h similarity index 63% rename from src/usb/asyncio.h rename to include/gint/drivers/asyncio.h index 05b4e53..7943d3c 100644 --- a/src/usb/asyncio.h +++ b/include/gint/drivers/asyncio.h @@ -11,7 +11,7 @@ /* Data tracking the progress of a multi-part multi-round async I/O operation. * Multi-part refers to writes being constructed over several calls to - write(2) followed by a "commit" with sync(2) (for async file descriptors; + write(2) followed by a "commit" with fsync(2) (for async file descriptors; synchronous file descriptors are committed at every write). * Multi-round refers to the operation interacting multiple times with hardware in order to communicate the complete data. @@ -29,16 +29,16 @@ | DONE write(2) | | | interrupt | | | | | Data exhausted - | sync(2): start | v - FLYING-COMMIT <------------ IN-PROGRESS - transmission + | fsync(2): start | v + FLYING-SYNC <------------ IN-PROGRESS + transmission Initially the operation is in the IDLE state. When a write(2) is issued, it interacts with hardware then transitions to the IN-PROGRESS state, where it - remains for any subsequent write(2). A sync(2) will properly commit data to + remains for any subsequent write(2). A fsync(2) will properly commit data to the hardware, finish the operation and return to the IDLE state. - The FLYING-WRITE and FLYING-COMMIT states refer to waiting periods, after + The FLYING-WRITE and FLYING-SYNC states refer to waiting periods, after issuing hardware commands, during which hardware communicates. Usually an interrupt signals when hardware is ready to resume work. @@ -47,6 +47,10 @@ directly from IDLE or IN-PROGRESS, to PENDING, to IN-PROGRESS, without actually communicating with the outside world. + An asynchronous write(2) might return to the caller as soon as writing is + finished even if the operation is left in the FLYING-WRITE state, and it may + even return while the operation is in the WRITING state if the DMA is used. + The invariants and meaning for each state are as follow: State Characterization Description @@ -55,9 +59,9 @@ PENDING data_w && !flying_w \ Ready to write pending data && round_size == 0 WRITING round_size > 0 CPU/DMA write to HW in progress - FLYING-WRITE flying_w && !committed_w HW transmission in progress - IN-PROGRESS data_w != NULL && !flying_w Waiting for write(2) or sync(2) - FLYING-COMMIT flying_w && committed_w HW commit in progress + FLYING-WRITE flying_w && type == WRITE HW transmission in progress + IN-PROGRESS !data_w && type == WRITE Waiting for write(2) or fsync(2) + FLYING-SYNC flying_w && type == SYNC HW commit in progress ============================================================================ For a read: @@ -90,20 +94,16 @@ States can be checked and transitioned with the API functions below. */ -enum { ASYNCIO_NONE, ASYNCIO_READ, ASYNCIO_WRITE }; +enum { ASYNCIO_NONE, ASYNCIO_READ, ASYNCIO_WRITE, ASYNCIO_SYNC }; -typedef struct +typedef volatile struct { /** User-facing information **/ - /* Direction of I/O operation */ + /* Type of I/O operation (read/write/fsync) */ uint8_t type; /* Whether the DMA should be used for hardware access */ bool dma; - /* Whether the data has been committed by sync(2) [write] */ - bool committed_w; - /* Operation's unit size (meaning depends on hardware) */ - uint8_t unit_size; union { /* Address of data to transfer, incremented gradually [write] */ @@ -124,17 +124,64 @@ typedef struct be asynchronous if it's using the DMA) */ uint16_t round_size; /* Hardware resource being used for access (meaning depends on hardware). - Usually, this is assigned during hardware transactions, ie.: - - During a write, a controller is assigned when leaving the IDLE state - and returned when re-entering the IDLE state. - - During a read, a controller is assigne when leaving the IDLE-EMPTY - state and returned when re-entering the IDLE-EMPTY state. */ + Usually, this is assigned for the duration of hardware transaction. + This value is user-managed and not modified by asyncio_op functions. */ uint8_t controller; /* Whether a hardware operation is in progress ("flying" write states) */ + // TODO: Do we actually set and maintain this member?! bool flying_w; + /** Internal information **/ + + /* Number of bytes in short buffer (0..3) */ + uint8_t shbuf_size; + /* Short buffer */ + uint32_t shbuf; + } asyncio_op_t; -/* */ +//--- +// Initialization and query functions +//--- + +/* asyncio_op_clear(): Initialize/clear the storage for an I/O operation */ +void asyncio_op_clear(asyncio_op_t *op); + +/* asyncio_op_busy(): Check whether the transfer is busy for syscalls + + This function checks whether the transfer is in a state where the CPU is + busy wrt. starting a new syscall, ie. read(2), write(2) or fsync(2). Returns + true if the CPU is busy and the call has to wait, false if the call can + proceed immediately. */ +bool asyncio_op_busy(asyncio_op_t const *op); + +//--- +// State transition functions +//--- + +/* asyncio_op_start_write(): Start a write call */ +void asyncio_op_start_write(asyncio_op_t *op, void const *data, size_t size, + bool use_dma, gint_call_t const *callback); + +/* asyncio_op_start_sync(): Transition a write I/O operation to a fsync call */ +void asyncio_op_start_sync(asyncio_op_t *op, gint_call_t const *callback); + +/* asyncio_op_finish_call(): Update state after a read/write/fsync call + + This function should be called when the read(2)/write(2)/fsync(2) call last + started on the operation has concluded, including all of the hardware + effects. This isn't the moment when the syscall returns, rather it is the + moment when it completes its work. */ +void asyncio_op_finish_call(asyncio_op_t *op); + +//--- +// Write call functions +//--- + +/* asyncio_op_start_write_round(): Start a single-block write to hardware */ +void asyncio_op_start_write_round(asyncio_op_t *op, size_t size); + +/* asyncio_op_finish_write_round(): Finish a write round and advance data */ +void asyncio_op_finish_write_round(asyncio_op_t *op); #endif /* GINT_USB_ASYNCIO */ diff --git a/include/gint/usb.h b/include/gint/usb.h index ab84b07..66e4a63 100644 --- a/include/gint/usb.h +++ b/include/gint/usb.h @@ -175,23 +175,22 @@ int usb_interface_pipe(usb_interface_t const *interface, int endpoint); /* usb_write_sync(): Synchronously write to a USB pipe - This functions writes (size) bytes of (data) into the specified pipe, by - units of (unit_size) bytes. The unit size must be 1, 2 or 4, and both (data) - and (size) must be multiples of the unit size. In general, you should try to - use the largest possible unit size, as it will be much faster. In a sequence - of writes that concludes with a commit, all the writes must use the same - unit size. + This functions writes (size) bytes of (data) into the specified pipe. If the + data fits into the pipe, this function returns right away, and the data is + *not* transmitted. Otherwise, data is written until the pipe is full, at + which point it is automatically transmitted. After the transfer, this + function resumes writing, returning only once everything is written. Even + then the last bytes will still not have been transmitted, to allow for other + writes to follow. After the last write in a sequence, use usb_commit_sync() + or usb_commit_async() to transmit the last bytes. - If the data fits into the pipe, this function returns right away, and the - data is *not* transmitted. Otherwise, data is written until the pipe is - full, at which point it is automatically transmitted. After the transfer, - this function resumes writing, returning only once everything is written. - Even then the last bytes will still not have been transmitted, to allow for - other writes to follow. After the last write in a sequence, use - usb_commit_sync() or usb_commit_async() to transmit the last bytes. - - If (use_dma=true), the write is performed with the DMA instead of the CPU, - which is generally faster. + If (use_dma=true), the write is performed with the DMA instead of the CPU. + This requires at least 4-byte alignment on: + 1. The input data; + 2. The size of this write; + 3. The amount of data previously written to the pipe not yet committed. + This is because using the DMA does not allow any insertion of CPU logic to + handle unaligned stuff. This function will use a FIFO controller to access the pipe. The FIFO controller will be reserved for further writes until the contents of the @@ -204,17 +203,15 @@ int usb_interface_pipe(usb_interface_t const *interface, int endpoint); waits for the ressources to become available then proceeds normally. @pipe Pipe to write into - @data Source data (unit_size-aligned) - @size Size of source (multiple of unit_size) - @unit_size FIFO access size (must be 1, 2, or 4) + @data Source data + @size Size of source @dma Whether to use the DMA to perform the write -> Returns an error code (0 on success). */ -int usb_write_sync(int pipe, void const *data, int size, int unit_size, - bool use_dma); +int usb_write_sync(int pipe, void const *data, int size, bool use_dma); /* usb_write_sync_timeout(): Synchronously write, with a timeout */ int usb_write_sync_timeout(int pipe, void const *data, int size, - int unit_size, bool use_dma, timeout_t const *timeout); + bool use_dma, timeout_t const *timeout); /* usb_write_async(): Asynchronously write to a USB pipe @@ -236,14 +233,13 @@ int usb_write_sync_timeout(int pipe, void const *data, int size, is idle and USB_WRITE_BUSY otherwise. @pipe Pipe to write into - @data Source data (unit_size-aligned) - @size Size of source (multiple of unit_size) - @unit_size FIFO access size (must be 1, 2, or 4) + @data Source data + @size Size of source @dma Whether to use the DMA to perform the write @callback Optional callback to invoke when the write completes -> Returns an error code (0 on success). */ -int usb_write_async(int pipe, void const *data, int size, int unit_size, - bool use_dma, gint_call_t callback); +int usb_write_async(int pipe, void const *data, int size, bool use_dma, + gint_call_t callback); /* usb_commit_sync(): Synchronously commit a write diff --git a/src/usb/asyncio.c b/src/usb/asyncio.c new file mode 100644 index 0000000..5bf7fa3 --- /dev/null +++ b/src/usb/asyncio.c @@ -0,0 +1,70 @@ +#include +#include + +void asyncio_op_clear(asyncio_op_t *op) +{ + memset((void *)op, 0, sizeof *op); +} + +bool asyncio_op_busy(asyncio_op_t const *op) +{ + /* WAITING and READING states are busy */ + if(op->type == ASYNCIO_READ) + return op->round_size || op->data_r != NULL; + /* WRITING, FLYING-WRITE, FLYING-COMMIT and PENDING states are busy */ + if(op->type == ASYNCIO_WRITE) + return op->round_size || op->flying_w || op->data_w != NULL; + + return false; +} + +void asyncio_op_start_write(asyncio_op_t *op, void const *data, size_t size, + bool use_dma, gint_call_t const *callback) +{ + op->type = ASYNCIO_WRITE; + op->dma = use_dma; + op->data_w = data; + op->size = size; + op->callback = *callback; +} + +void asyncio_op_start_write_round(asyncio_op_t *op, size_t size) +{ + op->round_size = size; +} + +void asyncio_op_finish_write_round(asyncio_op_t *op) +{ + op->buffer_used += op->round_size; + op->data_w += op->round_size; + op->size -= op->round_size; + op->round_size = 0; +} + +void asyncio_op_start_sync(asyncio_op_t *op, gint_call_t const *callback) +{ + if(op->type != ASYNCIO_WRITE) + return; + + op->type = ASYNCIO_SYNC; + op->callback = *callback; +} + +void asyncio_op_finish_call(asyncio_op_t *op) +{ + gint_call(op->callback); + + /* Clean up the operation, unless it is a write, in which case keep + relevant states until the transaction finishes after a fsync(2). */ + if(op->type == ASYNCIO_WRITE) { + op->dma = false; + op->data_w = NULL; + op->size = 0; + op->callback = GINT_CALL_NULL; + op->round_size = 0; + op->flying_w = false; + } + else { + asyncio_op_clear(op); + } +} diff --git a/src/usb/classes/ff-bulk-gray.c b/src/usb/classes/ff-bulk-gray.c index e6c3df8..aab01e8 100644 --- a/src/usb/classes/ff-bulk-gray.c +++ b/src/usb/classes/ff-bulk-gray.c @@ -22,10 +22,10 @@ static void capture_vram_gray(GUNUSED bool onscreen, char const *type) subheader.pixel_format = htole32(USB_FXLINK_IMAGE_GRAY); int pipe = usb_ff_bulk_output(); - usb_write_sync(pipe, &header, sizeof header, 4, false); - usb_write_sync(pipe, &subheader, sizeof subheader, 4, false); - usb_write_sync(pipe, light, 1024, 4, false); - usb_write_sync(pipe, dark, 1024, 4, false); + usb_write_sync(pipe, &header, sizeof header, false); + usb_write_sync(pipe, &subheader, sizeof subheader, false); + usb_write_sync(pipe, light, 1024, false); + usb_write_sync(pipe, dark, 1024, false); usb_commit_sync(pipe); } diff --git a/src/usb/classes/ff-bulk.c b/src/usb/classes/ff-bulk.c index 4aa910d..d73ea46 100644 --- a/src/usb/classes/ff-bulk.c +++ b/src/usb/classes/ff-bulk.c @@ -108,9 +108,9 @@ static void capture_vram(GUNUSED bool onscreen, char const *type) subheader.pixel_format = htole32(format); int pipe = usb_ff_bulk_output(); - usb_write_sync(pipe, &header, sizeof header, 4, false); - usb_write_sync(pipe, &subheader, sizeof subheader, 4, false); - usb_write_sync(pipe, source, size, 4, false); + usb_write_sync(pipe, &header, sizeof header, false); + usb_write_sync(pipe, &subheader, sizeof subheader, false); + usb_write_sync(pipe, source, size, false); usb_commit_sync(pipe); } @@ -123,16 +123,12 @@ void usb_fxlink_text(char const *text, int size) { if(size == 0) size = strlen(text); - int unit_size = 4; - if((uint32_t)text & 3 || size & 3) unit_size = 2; - if((uint32_t)text & 1 || size & 1) unit_size = 1; - usb_fxlink_header_t header; usb_fxlink_fill_header(&header, "fxlink", "text", size); int pipe = usb_ff_bulk_output(); - usb_write_sync(pipe, &header, sizeof header, unit_size, false); - usb_write_sync(pipe, text, size, unit_size, false); + usb_write_sync(pipe, &header, sizeof header, false); + usb_write_sync(pipe, text, size, false); usb_commit_sync(pipe); } diff --git a/src/usb/pipes.c b/src/usb/pipes.c index b11973b..5ccf8a8 100644 --- a/src/usb/pipes.c +++ b/src/usb/pipes.c @@ -6,7 +6,7 @@ #include -#include "asyncio.h" +#include #include "usb_private.h" #define USB SH7305_USB @@ -69,8 +69,9 @@ void usb_pipe_clear(int pipe) USB.PIPECTR[pipe-1].ACLRM = 1; USB.PIPECTR[pipe-1].ACLRM = 0; - /* Clear the sequence bit (important after a world switch since we restore - hardware registers but the host connection is starting from scratch!) */ + /* Clear the sequence bit (important after a world switch since we + restore hardware registers but the connection to the hosts restarts + from scratch!) */ USB.PIPECTR[pipe-1].SQCLR = 1; usb_while(USB.PIPECTR[pipe-1].SQMON != 0); } @@ -106,48 +107,43 @@ static fifo_t fifo_find_available_controller(int pipe) } /* fifo_bind(): Bind a FIFO to a pipe in reading or writing mode */ -static void fifo_bind(fifo_t ct, int pipe, int mode, int size) +static void fifo_bind(fifo_t ct, int pipe, int mode) { - size = (size - (size == 4) - 1) & 3; + int reading = (mode == FIFO_READ); + int writing = (mode == FIFO_WRITE); - if(pipe == 0) { - if(USB.CFIFOSEL.ISEL == 1 && USB.DCPCTR.PID == 1) - return; - - if(mode == FIFO_WRITE) - USB.DCPCTR.PID = PID_BUF; - /* RCNT=0 REW=0 MBW=size BIGEND=1 ISEL=mode CURPIPE=0 */ - USB.CFIFOSEL.word = 0x0100 | (mode << 5) | (size << 10); + /* RCNT=0 REW=0 MBW=2 BIGEND=1 ISEL=mode CURPIPE=0 */ + if(ct == CF) { + USB.CFIFOSEL.word = 0x0900 | (mode << 5); usb_while(!USB.CFIFOCTR.FRDY || USB.CFIFOSEL.ISEL != mode); - return; } - - __typeof__(USB.D0FIFOSEL) sel; - sel.RCNT = 0; - sel.REW = 0; - sel.DCLRM = (mode == FIFO_READ); - sel.DREQE = 0; - sel.MBW = size; - sel.BIGEND = 1; - sel.CURPIPE = pipe; - + /* RCNT=0 REW=0 DCLRM=reading DREQE=0 MBW=2 BIGEND=1 CURPIPE=pipe */ if(ct == D0F) { - USB.D0FIFOSEL.word = sel.word; + USB.D0FIFOSEL.word = 0x0900 | (reading << 13) | pipe; usb_while(!USB.D0FIFOCTR.FRDY || USB.PIPECFG.DIR != mode); } if(ct == D1F) { - USB.D1FIFOSEL.word = sel.word; + USB.D1FIFOSEL.word = 0x0900 | (reading << 13) | pipe; usb_while(!USB.D1FIFOCTR.FRDY || USB.PIPECFG.DIR != mode); } /* Enable USB comunication! */ - USB.PIPECTR[pipe-1].PID = PID_BUF; + if(pipe == 0 && writing) + USB.DCPCTR.PID = PID_BUF; + if(pipe != 0) + USB.PIPECTR[pipe-1].PID = PID_BUF; } /* fifo_unbind(): Unbind a FIFO */ static void fifo_unbind(fifo_t ct) { int pipe = -1; + /* TODO: USB (DCP normalization): NAK when unbinding? + if(ct == CF) { + USB.DCPCTR.CCPL = 0; + USB.DCPCTR.PID = PID_NAK; + usb_while(!USB.DCPCTR.PBUSY); + } */ if(ct == D0F) pipe = USB.D0FIFOSEL.CURPIPE; if(ct == D1F) pipe = USB.D1FIFOSEL.CURPIPE; if(pipe <= 0) @@ -171,52 +167,69 @@ static void fifo_unbind(fifo_t ct) // Writing operations //--- -/* Current operation waiting to be performed on each pipe. There are two - possible states for a pipe's transfer data: - -> Either there is a transfer going on, in which case (data != NULL), - (size != 0), and (buffer_used) has no meaning. - -> Either there is no transfer going on, and (data = NULL), (size = 0). +/* The writing logic is asynchronous, which makes it sometimes hard to track. + The series of call for a write I/O is zero or more usb_write_async() + followed by a usb_commit_async(): - A controller is assigned to t->controller when a write first occurs until - the pipe is fully committed. (ct = NOF) indicates an unused pipe, while - (ct != NOF) indicates that stuff has been written and is waiting a commit. + write_io ::= usb_write_async* usb_commit_async - Additionally, between a call to write_round() and the corresponding - finish_write(), the (round_size) attribute is set to a non-zero value - indicating how many bytes are waiting for write completion. */ + A usb_write_async() will write to the hardware buffer as many times as it + takes to exhaust the input, including 1 time if the hardware buffer can hold + the entire input and 0 times if there is no input. Each _round_ consists of + a call to write_round() to copy with the CPU or start the copy with the DMA, + and a call to finish_round() when the copy is finished. -/* Multi-round operations to be continued whenever buffers are ready */ -GBSS static asyncio_op_t volatile pipe_transfers[10]; + If the round fills the buffer, finish_round() is triggered by the BEMP + interrupt after the hardware finishes transferring. Otherwise finish_round() + is triggered directly when writing finishes. + + complete_round ::= write_round + + + + finish_round + + partial_round ::= write_round + + finish_round + + Note that the "" event is asynchronous if the DMA is used. A + full write will take zero or more complete rounds followed by zero or one + partial round before finish_call() is called: + + usb_write_async ::= complete_round* partial_round? finish_call + + And a commit will trigger a transmission of whatever is left in the buffer + (including nothing) and wait for the BEMP interrupt. + + usb_commit_async ::= + + finish_call + + Most functions can execute either in the main thread or within an interrupt + handler. */ +GBSS static asyncio_op_t pipe_transfers[10]; void usb_pipe_init_transfers(void) { - memset((void *)pipe_transfers, 0, sizeof pipe_transfers); + for(int i = 0; i < 10; i++) + asyncio_op_clear(&pipe_transfers[i]); } -static void write_8(uint8_t const *data, int size, uint8_t volatile *FIFO) +void usb_wait_all_transfers(bool await_long_writes) { - for(int i = 0; i < size; i++) *FIFO = data[i]; -} -static void write_16(uint16_t const *data, int size, uint16_t volatile *FIFO) -{ - for(int i = 0; i < size; i++) *FIFO = data[i]; -} -static void write_32(uint32_t const *data, int size, uint32_t volatile *FIFO) -{ - for(int i = 0; i < size; i++) *FIFO = data[i]; -} - -/* Check whether a pipe is busy with a multi-round write or a transfer */ -GINLINE static bool pipe_busy(int pipe) -{ - /* Multi-round write still not finished */ - if(pipe_transfers[pipe].data_w) return true; - /* Transfer in progress */ - if(pipe && !USB.PIPECTR[pipe-1].BSTS) return true; - /* Callback for a just-finished transfer not yet called */ - if(pipe_transfers[pipe].round_size) return true; - /* All good */ - return false; + while(1) { + bool all_done = true; + for(int i = 0; i < 10; i++) { + asyncio_op_t const *t = &pipe_transfers[i]; + all_done &= !asyncio_op_busy(t); + if(await_long_writes) + all_done &= (t->type != ASYNCIO_WRITE); + } + if(all_done) + return; + sleep(); + } } /* Size of a pipe's buffer area, in bytes */ @@ -229,91 +242,70 @@ static int pipe_bufsize(int pipe) return (USB.PIPEBUF.BUFSIZE + 1) * 64; } -/* finish_transfer(): Finish a multi-round write transfer - - This function is called when the final round of a transfer has completed, - either by the handler of the BEMP interrupt or by the usb_commit_async() - function if the pipe is being committed when empty. */ -static void finish_transfer(asyncio_op_t volatile *t, int pipe) +/* This function is called when a read/write/fsync call and its associated + hardware interactions all complete. */ +static void finish_call(asyncio_op_t *t, int pipe) { - /* Free the FIFO controller */ - fifo_unbind(t->controller); - t->controller = NOF; + /* Unbind the USB controller used for the call, except for writes since + the USB module requires us to keep it until the final commit */ + if(t->type != ASYNCIO_WRITE) { + fifo_unbind(t->controller); + t->controller = NOF; + } - /* Mark the transfer as unused */ - t->committed_w = false; - t->buffer_used = 0; - - /* Disable the interrupt */ - if(pipe != 0) + /* Disable interrupts */ + if((t->type == ASYNCIO_WRITE || t->type == ASYNCIO_SYNC) && pipe != 0) USB.BEMPENB &= ~(1 << pipe); - gint_call(t->callback); - USB_TRACE("finish_transfer()"); + asyncio_op_finish_call(t); + USB_TRACE("finish_call()"); } -/* finish_round(): Update transfer logic after a write round completes - - This function is called when a write round completes, either by the handler - of the BEMP interrupt if the round filled the FIFO, or by the handler of the - DMA transfer or the write_round() function itself if it didn't. - - It the current write operation has finished with this round, this function - invokes the write_async callback. */ -static void finish_round(asyncio_op_t volatile *t, int pipe) +/* This function is called when a round of writing has completed, including all + hardware interactions. If the FIFO got filled by the writing, this is after + the transmission and BEMP interrupt; otherwise this is when the CPU/DMA + finished writing. */ +static void finish_round(asyncio_op_t *t, int pipe) { - /* Update the pointer as a result of the newly-finished write */ - t->buffer_used += t->round_size; - t->data_w += t->round_size; - t->size -= t->round_size; - t->round_size = 0; +// USB_LOG("[PIPE%d] finish_round() for %d bytes\n", pipe, t->round_size); + asyncio_op_finish_write_round(t); /* Account for auto-transfers */ if(t->buffer_used == pipe_bufsize(pipe)) t->buffer_used = 0; - /* At the end, free the FIFO and invoke the callback. Hold the - controller until the pipe is committed */ - if(t->size == 0) { - t->data_w = NULL; - gint_call(t->callback); - } - USB_TRACE("finish_round()"); -} + if(t->size == 0) + finish_call(t, pipe); +} /* write_round(): Write up to a FIFO's worth of data to a pipe If this is a partial round (FIFO not going to be full), finish_round() is invoked after the write. Otherwise the FIFO is transmitted automatically and the BEMP handler will call finish_round() after the transfer. */ -static void write_round(asyncio_op_t volatile *t, int pipe) +static void write_round(asyncio_op_t *t, int pipe) { fifo_t ct = t->controller; - void volatile *FIFO = NULL; + void volatile *FIFO = NULL; if(ct == CF) FIFO = &USB.CFIFO; if(ct == D0F) FIFO = &USB.D0FIFO; if(ct == D1F) FIFO = &USB.D1FIFO; - fifo_bind(ct, pipe, FIFO_WRITE, t->unit_size); /* Amount of data that can be transferred in a single run */ - int available = pipe_bufsize(pipe) - (pipe == 0 ? 0 : t->buffer_used); + int available = pipe_bufsize(pipe) - t->buffer_used; int size = min(t->size, available); - t->round_size = size; /* If this is a partial write (size < available), call finish_round() after the copy to notify the user that the pipe is ready. Otherwise, a USB transfer will occur and the BEMP handler will do it. */ bool partial = (size < available); + asyncio_op_start_write_round(t, size); + if(t->dma) { - /* TODO: USB: Can we use 32-byte DMA transfers? */ - int block_size = DMA_1B; - if(t->unit_size == 2) block_size = DMA_2B, size >>= 1; - if(t->unit_size == 4) block_size = DMA_4B, size >>= 2; - gint_call_t callback = partial ? GINT_CALL(finish_round, (void *)t, pipe) : GINT_CALL_NULL; @@ -321,60 +313,56 @@ static void write_round(asyncio_op_t volatile *t, int pipe) /* Use DMA channel 3 for D0F and 4 for D1F */ int channel = (ct == D0F) ? 3 : 4; - bool ok = dma_transfer_async(channel, block_size, size, + /* TODO: USB: Can we use 32-byte DMA transfers? */ + bool ok = dma_transfer_async(channel, DMA_4B, size >> 2, t->data_w, DMA_INC, (void *)FIFO, DMA_FIXED, callback); if(!ok) USB_LOG("DMA async failed on channel %d!\n", channel); } else { - if(t->unit_size == 1) write_8(t->data_w, size, FIFO); - if(t->unit_size == 2) write_16(t->data_w, size >> 1, FIFO); - if(t->unit_size == 4) write_32(t->data_w, size >> 2, FIFO); + usb_pipe_write4(t->data_w, size, &t->shbuf, &t->shbuf_size, + FIFO); if(partial) finish_round(t, pipe); } USB_TRACE("write_round()"); } -int usb_write_async(int pipe, void const *data, int size, int unit_size, - bool use_dma, gint_call_t callback) +int usb_write_async(int pipe, void const *data, int size, bool use_dma, + gint_call_t callback) { - if(pipe_busy(pipe)) return USB_WRITE_BUSY; + asyncio_op_t *t = &pipe_transfers[pipe]; + if(asyncio_op_busy(t)) + return USB_WRITE_BUSY; - asyncio_op_t volatile *t = &pipe_transfers[pipe]; - if(!data || !size) return 0; - - /* Re-use the controller from a previous write if there is one, - otherwise try to get a new free one */ + /* If this if the first write of a series, find a controller. */ /* TODO: usb_write_async(): TOC/TOU race on controller being free */ - fifo_t ct = t->controller; - if(ct == NOF) ct = fifo_find_available_controller(pipe); - if(ct == NOF) return USB_WRITE_NOFIFO; + if(t->controller == NOF) { + fifo_t ct = fifo_find_available_controller(pipe); + if(ct == NOF) + return USB_WRITE_NOFIFO; + fifo_bind(ct, pipe, FIFO_WRITE); + t->controller = ct; + } - t->data_w = data; - t->size = size; - t->unit_size = (pipe == 0) ? 1 : unit_size; - t->dma = use_dma; - t->committed_w = false; - t->controller = ct; - t->callback = callback; + asyncio_op_start_write(t, data, size, use_dma, &callback); /* Set up the Buffer Empty interrupt to refill the buffer when it gets empty, and be notified when the transfer completes. */ - if(pipe) USB.BEMPENB |= (1 << pipe); + USB.BEMPENB |= (1 << pipe); write_round(t, pipe); return 0; } -int usb_write_sync_timeout(int pipe, void const *data, int size, int unit_size, - bool use_dma, timeout_t const *timeout) +int usb_write_sync_timeout(int pipe, void const *data, int size, bool use_dma, + timeout_t const *timeout) { volatile int flag = 0; while(1) { - int rc = usb_write_async(pipe, data, size, unit_size, use_dma, + int rc = usb_write_async(pipe, data, size, use_dma, GINT_CALL_SET(&flag)); if(rc == 0) break; @@ -396,39 +384,45 @@ int usb_write_sync_timeout(int pipe, void const *data, int size, int unit_size, return 0; } -int usb_write_sync(int pipe, void const *data, int size, int unit, bool dma) +int usb_write_sync(int pipe, void const *data, int size, bool dma) { - return usb_write_sync_timeout(pipe, data, size, unit, dma, NULL); + return usb_write_sync_timeout(pipe, data, size, dma, NULL); } int usb_commit_async(int pipe, gint_call_t callback) { - asyncio_op_t volatile *t = &pipe_transfers[pipe]; - if(pipe_busy(pipe)) return USB_COMMIT_BUSY; + asyncio_op_t *t = &pipe_transfers[pipe]; + if(asyncio_op_busy(t)) + return USB_COMMIT_BUSY; + if(t->type != ASYNCIO_WRITE || t->controller == NOF) + return USB_COMMIT_INACTIVE; - if(t->controller == NOF) return USB_COMMIT_INACTIVE; + /* Flush any remaining bytes in the short buffer. This cannot fill the + buffer and create an auto-transmission situation; instead the module + remains idle after this write. This is because we only use 32-bit + writes, therefore at worst the buffer is 4 bytes away from being + full, and will not be filled by an extra 0-3 bytes. */ + void volatile *FIFO = NULL; + if(t->controller == CF) FIFO = &USB.CFIFO; + if(t->controller == D0F) FIFO = &USB.D0FIFO; + if(t->controller == D1F) FIFO = &USB.D1FIFO; + usb_pipe_flush4(t->shbuf, t->shbuf_size, FIFO); - t->committed_w = true; - t->callback = callback; + /* Switch from WRITE to SYNC type; this influences the BEMP handler and + the final finish_call() */ + asyncio_op_start_sync(t, &callback); - /* TODO: Handle complex commits on the DCP */ - if(pipe == 0) - { - finish_transfer(t, pipe); + /* TODO: Figure out why previous attempts to use BEMP to finish commit + TODO| calls on the DCP failed with a freeze */ + if(pipe == 0) { USB.CFIFOCTR.BVAL = 1; - return 0; - } - - /* Committing an empty pipe ends the transfer on the spot */ - if(t->buffer_used == 0) - { - finish_transfer(t, pipe); + finish_call(t, pipe); return 0; } /* Set BVAL=1 and inform the BEMP handler of the commitment with the - committed_w flag; the handler will invoke finish_transfer() */ - fifo_bind(t->controller, pipe, FIFO_WRITE, t->unit_size); + SYNC type; the handler will invoke finish_call() */ + USB.BEMPENB |= (1 << pipe); if(t->controller == D0F) USB.D0FIFOCTR.BVAL = 1; if(t->controller == D1F) USB.D1FIFOCTR.BVAL = 1; USB_LOG("[PIPE%d] Committed transfer\n", pipe); @@ -471,11 +465,11 @@ void usb_commit_sync(int pipe) /* usb_pipe_write_bemp(): Callback for the BEMP interrupt on a pipe */ void usb_pipe_write_bemp(int pipe) { - asyncio_op_t volatile *t = &pipe_transfers[pipe]; + asyncio_op_t *t = &pipe_transfers[pipe]; - if(t->committed_w) + if(t->type == ASYNCIO_SYNC) { - finish_transfer(t, pipe); + finish_call(t, pipe); } else { diff --git a/src/usb/setup.c b/src/usb/setup.c index cd0f54d..36d4c50 100644 --- a/src/usb/setup.c +++ b/src/usb/setup.c @@ -8,7 +8,7 @@ #define USB SH7305_USB -#define dcp_write(data, size) usb_write_sync(0, data, size, 1, false) +#define dcp_write(data, size) usb_write_sync(0, data, size, false) //--- // SETUP requests diff --git a/src/usb/usb.c b/src/usb/usb.c index 458033e..cebee5b 100644 --- a/src/usb/usb.c +++ b/src/usb/usb.c @@ -223,6 +223,9 @@ void usb_open_wait(void) void usb_close(void) { + usb_wait_all_transfers(false); + usb_pipe_init_transfers(); + intc_priority(INTC_USB, 0); hpoweroff(); USB_LOG("---- usb_close ----\n"); @@ -277,7 +280,7 @@ static void usb_interrupt_handler(void) uint16_t status = USB.BEMPSTS; USB.BEMPSTS = 0; - for(int i = 1; i <= 9; i++) + for(int i = 0; i <= 9; i++) { if(status & (1 << i)) usb_pipe_write_bemp(i); } diff --git a/src/usb/usb_private.h b/src/usb/usb_private.h index bbca9bb..592858b 100644 --- a/src/usb/usb_private.h +++ b/src/usb/usb_private.h @@ -132,6 +132,46 @@ void usb_pipe_write_bemp(int pipe); /* usb_pipe_init_transfers(): Initialize transfer information */ void usb_pipe_init_transfers(void); +/* usb_wait_all_transfers(): Wait for all transfers to finish + + This function waits for all current operations on the pipes to finish their + current read/write/fsync call. Once the waiting period is finished, the + calls are guaranteed to be finished, but write transactions might not (as + they require multiple calls finishing with a fsync(2)). + + If `await_long_writes` is set, this function also waits for all writes to be + committed, which only makes sense if said writes are executed in a thread + that is able to run while this is waiting. */ +void usb_wait_all_transfers(bool await_long_writes); + +/* usb_pipe_write4(): Copy arbitrary ranges of memory to a 4-byte USB FIFO + + This function copies arbitrarily-aligned data of any size into a 4-byte + USB FIFO register. It rearranges data so as to perform only 4-byte aligned + writes. If the data size isn't a multiple of 4 bytes, it stores the + remainder into a short buffer (holding between 0 and 3 bytes), to be + combined with fresh data on the next call. The remainder of the buffer can + be discharged eventually with usb_pipe_flush4(). + + @data Data to write into the FIFO + @size Number of bytes to write + @buffer Address of short buffer + @buffer_size Address of short buffer's size tracker + @FIFO FIFO to output to */ +void usb_pipe_write4(void const *data, int size, uint32_t volatile *buffer, + uint8_t volatile *buffer_size, uint32_t volatile *FIFO); + +/* usb_pipe_flush4(): Flush usb_pipe_write4()'s short buffer + + This function is used after a sequence of usb_pipe_write4() to flush the + last few bytes remaining in the short buffer. + + @buffer Contents of short buffer + @buffer_size Short buffer's size tracker + @FIFO FIFO to output to */ +void usb_pipe_flush4(uint32_t buffer, int buffer_size, + uint32_t volatile *FIFO); + //--- // Timeout waits //--- @@ -171,7 +211,7 @@ enum { void usb_req_setup(void); //--- -// Enumerations and stuff +// Enumerated constants //--- enum { diff --git a/src/usb/write4.S b/src/usb/write4.S new file mode 100644 index 0000000..240a872 --- /dev/null +++ b/src/usb/write4.S @@ -0,0 +1,136 @@ +.global _usb_pipe_write4 +.global _usb_pipe_flush4 + +#define _fifo r3 +#define _data r4 +#define _size r5 +#define _buf r6 +#define _bufsize r7 + +_usb_pipe_write4: + /* Skip to writing the data if the buffer's empty. This test is free + because having it simplifies a later while loop into a do/while. */ + mov.b @_bufsize, r1 + mov #4, r0 + mov.l @r15, _fifo + tst r1, r1 + bt.s .write_data + sub r1, r0 /* Bytes required to fill the buffer */ + + /* If we can't even fill the buffer, skip to the end push. */ + mov.l @_buf, r2 + cmp/gt _size, r0 + bt .push_buffer + + /* Precompute the amount of data left after filling the buffer */ + sub r0, _size + + /* Fill the buffer by reading unaligned bytes */ +1: mov.b @_data+, r1 + shll8 r2 + dt r0 + extu.b r1, r1 + bf.s 1b + or r1, r2 + + /* Commit the filled buffer */ + mov.l r2, @_fifo + +.write_data: + /* Check if we have enough data to run this loop */ + /* TODO: For small sizes use another loop, so we can unroll? */ + mov #4, r0 + cmp/gt _size, r0 + bt 4f + + /* Determine whether we need to use unaligned reads */ + mov #3, r0 + tst r0, _data + mov _size, r1 + bt.s 3f + shlr2 r1 + + /* Unaligned write loop */ +2: movua.l @_data+, r0 + dt r1 + bf.s 2b + mov.l r0, @_fifo + + bra 4f + nop + + /* Aligned write loop */ +3: mov.l @_data+, r0 + dt r1 + bf.s 3b + mov.l r0, @_fifo + +4: mov #3, r0 + and r0, _size + + mov #0, r2 + mov #0, r1 + +.push_buffer: + /* Here r1 = buffer size, r2 = buffer contents, _size = data left */ + + /* Check if there is any data left to push into the short buffer */ + tst _size, _size + mov r1, r0 + add _size, r0 + bt.s .end + mov.b r0, @_bufsize + + /* Push loop */ +5: mov.b @_data+, r1 + shll8 r2 + dt _size + extu.b r1, r1 + bf.s 5b + or r1, r2 + +.end: + rts + mov.l r2, @_buf + +#undef _fifo +#undef _data +#undef _size +#undef _buf +#undef _bufsize + +/* --- */ + +#define _buf r4 +#define _bufsize r5 +#define _fifo r6 + +_usb_pipe_flush4: + /* Jump table. We skip 4*_bufsize bytes, which lands us right on labels + 0:, 1:, 2: or 3: depending on the value of _bufsize. */ + shll2 _bufsize + braf _bufsize + mov _buf, r0 + +0: /* No extra data to write out */ + rts + nop + +1: /* Single byte */ + rts + mov.b r0, @_fifo + +2: /* Two bytes */ + rts + mov.w r0, @_fifo + +3: /* Three bytes */ + mov r0, r1 + shlr8 r1 + mov.w r1, @_fifo + rts + mov.b r0, @(2, _fifo) + +#undef _buf +#undef _bufsize +#undef _fifo