diff --git a/CMakeLists.txt b/CMakeLists.txt index f0fc47f..c8c8243 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,7 @@ set(SOURCES_COMMON src/usb/classes/ff-bulk.c src/usb/configure.c src/usb/pipes.c + src/usb/read4.S src/usb/setup.c src/usb/string.c src/usb/usb.c diff --git a/src/usb/classes/ff-bulk.c b/src/usb/classes/ff-bulk.c index 745ce5f..c90af33 100644 --- a/src/usb/classes/ff-bulk.c +++ b/src/usb/classes/ff-bulk.c @@ -12,7 +12,7 @@ static void notify_read(int endpoint); static usb_dc_interface_t dc_interface = { .bLength = sizeof(usb_dc_interface_t), .bDescriptorType = USB_DC_INTERFACE, - .bInterfaceNumber = -1 /* Set by driver */, + .bInterfaceNumber = -1, /* Set by driver */ .bAlternateSetting = 0, .bNumEndpoints = 2, .bInterfaceClass = 0xff, /* Vendor-Specific */ @@ -283,10 +283,11 @@ void usb_fxlink_set_notifier(void (*notifier_function)(void)) void usb_fxlink_drop_transaction(void) { int block = USB_READ_BLOCK; + char buf[512]; while(1) { timeout_t tm = timeout_make_ms(1000); - int rc = usb_read_async(usb_ff_bulk_input(), NULL, 512, + int rc = usb_read_async(usb_ff_bulk_input(), buf, 512, USB_READ_WAIT | block, NULL, &tm, GINT_CALL_NULL); /* Break on error or short read (end of transaction) */ diff --git a/src/usb/pipes.c b/src/usb/pipes.c index e988f44..316f664 100644 --- a/src/usb/pipes.c +++ b/src/usb/pipes.c @@ -36,7 +36,7 @@ void usb_pipe_configure(int address, endpoint_t const *ep) USB.PIPECFG.TYPE = type; USB.PIPECFG.BFRE = 0; /* Enable continuous mode on all bulk transfer pipes - TODO: Also make it double mode*/ + TODO: Also make it double mode */ USB.PIPECFG.DBLB = 0; USB.PIPECFG.CNTMD = (type == TYPE_BULK); USB.PIPECFG.SHTNAK = 1; @@ -532,11 +532,17 @@ static void USB_LOG_TR(char const *p, asyncio_op_t *t, char const *fmt, ...) int E = USB.INTENB0.BRDYE; USB.INTENB0.BRDYE = 0; + char shbuf[16]; + if(t->shbuf_size >= 4) + sprintf(shbuf, "!!%d", t->shbuf_size); + else + snprintf(shbuf, t->shbuf_size * 2 + 1, "%08x", t->shbuf); + char str[128]; - snprintf(str, sizeof str - 1, "%s: %s buf=%d%s%s req=%d/%d%s ", + snprintf(str, sizeof str - 1, "%s: %s buf=%d%s%s req=%d/%d%s |%s| ", p, t->type == ASYNCIO_READ ? "READ" : "NONE", t->buffer_used, t->cont_r ? "+":"", t->interrupt_r ? "!":"", - t->round_size, t->size, t->autoclose_r ? "#" : ""); + t->round_size, t->size, t->autoclose_r ? "#" : "", shbuf); va_list args; va_start(args, fmt); @@ -623,31 +629,9 @@ static bool read_round(asyncio_op_t *t, int pipe) if(t->controller == D0F) FIFO = &USB.D0FIFO; if(t->controller == D1F) FIFO = &USB.D1FIFO; - void *dataptr = t->data_r; - if(dataptr) { - for(int i = 0; i < round_size / 4; i++) { - *(uint32_t *)dataptr = *FIFO; - dataptr += 4; - } - if(round_size & 2) { - *(uint16_t *)dataptr = *(uint16_t volatile *)FIFO; - dataptr += 2; - } - if(round_size & 1) { - *(uint8_t *)dataptr = *(uint8_t volatile *)FIFO; - dataptr += 1; - } - } - else { - volatile int x; - for(int i = 0; i < round_size / 4; i++) - x = *FIFO; - if(round_size & 2) - x = *(uint16_t volatile *)FIFO; - if(round_size & 1) - x = *(uint8_t volatile *)FIFO; - (void)x; - } + int fifosize = t->buffer_used - t->shbuf_size; + usb_pipe_read4(t->data_r, round_size, FIFO, fifosize, &t->shbuf, + &t->shbuf_size); finish_read_round(t, pipe); return false; diff --git a/src/usb/read4.S b/src/usb/read4.S new file mode 100644 index 0000000..cbf837b --- /dev/null +++ b/src/usb/read4.S @@ -0,0 +1,181 @@ +.global _usb_pipe_read4 + +/* User buffer and round size */ +#define _data r4 +#define _datasize r5 +/* FIFO address and amount of data available in there */ +#define _fifo r6 +#define _fifosize r7 +/* Short buffer address and *pointer to* its size */ +#define _buf r8 +#define _bufsize r9 + +/* Copy _datasize bytes from _fifo to _data, using USB FIFO access rules and + storing excess data in the short buffer. + Requires: 1 ≤ _datasize ≤ _fifosize + *_bufsize */ +_usb_pipe_read4: + mov.l @(4, r15), r1 + mov.l r8, @-r15 + mov.l r9, @-r15 + + mov r1, _bufsize + mov.b @_bufsize, r1 + +/* Step #1: If _datasize ≤ *_bufsize < 4, then we fill user data from the short + buffer, and return immediately. */ + + /* 1 cycle lost here due to early use of r1 */ + + mov.l @(8, r15), _buf + cmp/ge _datasize, r1 + + bt .short_buffer_only + tst r1, r1 + +/* Step #2: If *_bufsize > 0, copy *_bufsize bytes from the short buffer to + _data, emptying the short buffer. */ + + bt 2f + mov _buf, r3 + +1: mov.b @r3+, r2 + dt _datasize + + dt r1 + mov.b r2, @_data + + bf.s 1b + add #1, _data + + mov.b r1, @_bufsize + nop + +/* Step #3: Copy longwords from the FIFO to the user buffer as long as there is + at least 4 bytes in both. Since at this stage _datasize ≤ _fifosize we can + simply check _datasize. */ + +2: /* Update _datasize and _fifosize in advance */ + mov #-4, r0 + and _datasize, r0 + + /* If _datasize ≤ 3, skip this step */ + mov #3, r3 + cmp/ge _datasize, r3 + + bt.s 3f + sub r0, _fifosize + + tst r3, _data + nop + + /* r2 is the number of 4-byte reads; since _datasize > 3, r2 ≥ 1 */ + mov _datasize, r2 + shlr2 r2 + + bf.s .unaligned + and r3, _datasize + +#define VERSION 0 + +.aligned: + +#if VERSION == 0 + mov.l @_fifo, r3 + dt r2 + + mov.l r3, @_data + add #4, _data + + bf.s .aligned + nop +#elif VERSION == 1 + mov _fifo, r3 + nop + +9: movs.l @r3, x0 + dt r2 + + bf.s 9b + movs.l x0, @_data+ +#elif VERSION == 2 + ldrs .al_b + ldre .al_e + ldrc r2 + mov _fifo, r3 + +.al_b: movs.l @r3, x0 +.al_e: movs.l x0, @_data+ +#endif + + bra 3f + nop + +.unaligned: + mov.l @_fifo, r0 + dt r2 + + mov.b r0, @(3, _data) + nop + + shlr8 r0 + mov.b r0, @(2, _data) + + shlr8 r0 + mov.b r0, @(1, _data) + + shlr8 r0 + mov.b r0, @_data + + bf.s .unaligned + add #4, _data + +/* Step #4: Load the final bytes of the round from the FIFO into the short + buffer (can be either nothing or a standard 4-byte read), then finish with a + copy to user data. */ + +3: /* If we finished the read, don't load anything */ + tst _datasize, _datasize + mov #3, r3 + + bt .epilogue + cmp/hi r3, _fifosize + + bf 4f + mov #4, _fifosize + + /* Make a 4-byte read from the FIFO. If there are less than 4 bytes + left this will pad with zeros. */ +4: mov.l @_fifo, r3 + mov _fifosize, r1 + + mov.l r3, @_buf + nop + +/* Step #1/#5: Copy 0 < _datasize ≤ *_bufsize bytes from the short buffer to + _data, then return. r1 must be *_bufsize. */ +.short_buffer_only: + mov.l @_buf, r3 + mov #0, r0 + + sub _datasize, r1 + nop + + /* The loop accesses memory with @(r0, _) but also shifts the contents + of _buf in r3 (it's more convenient to use the available EX slot + than shift by 8*_datasize outside the loop) */ + +5: mov.b @(r0, _buf), r2 + dt _datasize + + mov.b r2, @(r0, _data) + add #1, r0 + + bf.s 5b + shll8 r3 + +.epilogue: + mov.l r3, @_buf + mov.b r1, @_bufsize + mov.l @r15+, r9 + rts + mov.l @r15+, r8 diff --git a/src/usb/usb_private.h b/src/usb/usb_private.h index 58be9c7..68f1a20 100644 --- a/src/usb/usb_private.h +++ b/src/usb/usb_private.h @@ -175,6 +175,23 @@ void usb_pipe_write4(void const *data, int size, uint32_t volatile *buffer, void usb_pipe_flush4(uint32_t buffer, int buffer_size, uint32_t volatile *FIFO); +/* usb_pipe_read4(): Copy arbitrary ranges of memory from a 4-byte USB FIFO + + This function performs arbitrarily-aligned reads of any size from a 4-byte + USB FIFO register to regular memory. It performs only 4-byte reads on the + FIFO (except when reading the last few bytes in the buffer) and copies read + data to the supplied buffer. Any excess bytes read from the FIFO are stored + in a short buffer to be used on the next call. + + @data Buffer to read into + @data_size Read size (1 ≤ data_size ≤ fifo_size + *buffer_size) + @fifo USB FIFO register to read from + @fifo_size Amount of data left in the FIFO (excluding short buffer!) + @buffer Address of short buffer + @buffer_size Address of short buffer's size tracker */ +void usb_pipe_read4(void *data, int size, uint32_t volatile *FIFO, + int fifo_size, uint32_t volatile *buffer, uint8_t volatile *buffer_size); + //--- // Timeout waits //---