gint/src/usb/read4.S

182 lines
3.2 KiB
ArmAsm

.global _usb_pipe_read4
/* User buffer and round size */
#define _data r4
#define _datasize r5
/* FIFO address and amount of data available in there */
#define _fifo r6
#define _fifosize r7
/* Short buffer address and *pointer to* its size */
#define _buf r8
#define _bufsize r9
/* Copy _datasize bytes from _fifo to _data, using USB FIFO access rules and
storing excess data in the short buffer.
Requires: 1 _datasize _fifosize + *_bufsize */
_usb_pipe_read4:
mov.l @(4, r15), r1
mov.l r8, @-r15
mov.l r9, @-r15
mov r1, _bufsize
mov.b @_bufsize, r1
/* Step #1: If _datasize *_bufsize < 4, then we fill user data from the short
buffer, and return immediately. */
/* 1 cycle lost here due to early use of r1 */
mov.l @(8, r15), _buf
cmp/ge _datasize, r1
bt .short_buffer_only
tst r1, r1
/* Step #2: If *_bufsize > 0, copy *_bufsize bytes from the short buffer to
_data, emptying the short buffer. */
bt 2f
mov _buf, r3
1: mov.b @r3+, r2
dt _datasize
dt r1
mov.b r2, @_data
bf.s 1b
add #1, _data
mov.b r1, @_bufsize
nop
/* Step #3: Copy longwords from the FIFO to the user buffer as long as there is
at least 4 bytes in both. Since at this stage _datasize _fifosize we can
simply check _datasize. */
2: /* Update _datasize and _fifosize in advance */
mov #-4, r0
and _datasize, r0
/* If _datasize ≤ 3, skip this step */
mov #3, r3
cmp/ge _datasize, r3
bt.s 3f
sub r0, _fifosize
tst r3, _data
nop
/* r2 is the number of 4-byte reads; since _datasize > 3, r2 ≥ 1 */
mov _datasize, r2
shlr2 r2
bf.s .unaligned
and r3, _datasize
#define VERSION 0
.aligned:
#if VERSION == 0
mov.l @_fifo, r3
dt r2
mov.l r3, @_data
add #4, _data
bf.s .aligned
nop
#elif VERSION == 1
mov _fifo, r3
nop
9: movs.l @r3, x0
dt r2
bf.s 9b
movs.l x0, @_data+
#elif VERSION == 2
ldrs .al_b
ldre .al_e
ldrc r2
mov _fifo, r3
.al_b: movs.l @r3, x0
.al_e: movs.l x0, @_data+
#endif
bra 3f
nop
.unaligned:
mov.l @_fifo, r0
dt r2
mov.b r0, @(3, _data)
nop
shlr8 r0
mov.b r0, @(2, _data)
shlr8 r0
mov.b r0, @(1, _data)
shlr8 r0
mov.b r0, @_data
bf.s .unaligned
add #4, _data
/* Step #4: Load the final bytes of the round from the FIFO into the short
buffer (can be either nothing or a standard 4-byte read), then finish with a
copy to user data. */
3: /* If we finished the read, don't load anything */
tst _datasize, _datasize
mov #3, r3
bt .epilogue
cmp/hi r3, _fifosize
bf 4f
mov #4, _fifosize
/* Make a 4-byte read from the FIFO. If there are less than 4 bytes
left this will pad with zeros. */
4: mov.l @_fifo, r3
mov _fifosize, r1
mov.l r3, @_buf
nop
/* Step #1/#5: Copy 0 < _datasize *_bufsize bytes from the short buffer to
_data, then return. r1 must be *_bufsize. */
.short_buffer_only:
mov.l @_buf, r3
mov #0, r0
sub _datasize, r1
nop
/* The loop accesses memory with @(r0, _) but also shifts the contents
of _buf in r3 (it's more convenient to use the available EX slot
than shift by 8*_datasize outside the loop) */
5: mov.b @(r0, _buf), r2
dt _datasize
mov.b r2, @(r0, _data)
add #1, r0
bf.s 5b
shll8 r3
.epilogue:
mov.l r3, @_buf
mov.b r1, @_bufsize
mov.l @r15+, r9
rts
mov.l @r15+, r8