.section .text.memmove,"ax",%progbits
.global memmove
.global memcpy
.type memmove, %function
.type memcpy, %function
memmove: @ Copy memory forward or backward, allows overlapping src/dest
memcpy: @ args: R0: dest, R1: src, R2: len; returns: dest
@ Check if we need to go backwards
cmp r1, r0 @ TEST src - dst
bcc 9f @ IF src < dest: THEN GOTO [backward]
@ If src == dst we don't need to do anything at all
it eq @ IF src == dest:
bxeq lr @ THEN return dest
stmfd sp!, {r0, lr} @ SAVE return_addr orig_dest @ STACK: orig_dest return_addr
@ (fallthrough)
#if defined(SQUEEZE)
@ Absolute minimum size forward implementation:
1: @ DO:
@ Stupidly copy a byte at a time
subs r2, r2, #1 @ len--
it lt @ IF len < 0:
ldmfdlt sp!, {r0, pc} @ THEN RETURN orig_dest
ldrb r3, [r1], #1 @ R3 = *src++ (byte)
strb r3, [r0], #1 @ *dest++ = R3 (byte)
b 1b @ WHILE true
@ End of absolute minimum size forward implementation
#elif defined(OPTIMIZE_SIZE)
@ Size optimized forward implementation:
@ Skip to tail if we have to copy less than 4 bytes
subs r2, r2, #4 @ len -= 4 @ len offset: -4
blt 2f @ IF len < 0: THEN GOTO [remainder]
@ Skip to tail if src or dest are misaligned
tst r0, #3 @ TEST dest & 3
it eq @ IF !(dest & 3):
tsteq r1, #3 @ THEN TEST src & 3
bne 2f @ IF (dest & 3) OR (src & 3): THEN GOTO [remainder]
@ Copy 4 bytes at a time until less than 4 are remaining
1: @ DO:
ldr r3, [r1], #4 @ R3 = *src++ (word)
str r3, [r0], #4 @ *dest++ = R3 (word)
subs r2, r2, #4 @ len -= 4
bge 1b @ WHILE: len >= 0
@ (fallthrough)
@
2: @ [remainder]: copy the remainder byte by byte
@ Correct length offset from 4 byte copying mode
adds r2, r2, #4 @ len += 4 @ len offset: 0
1: @ DO:
@ Copy the remainder a byte at a time
it eq @ IF len == 0:
ldmfdeq sp!, {r0, pc} @ THEN RETURN orig_dest
ldrb r3, [r1], #1 @ R3 = *src++ (byte)
strb r3, [r0], #1 @ *dest++ = R3 (byte)
subs r2, r2, #1 @ len--
b 1b @ WHILE true
@ End of size optimized forward implementation
#else
@ Performance optimized forward implementation:
@ Skip to 3 byte tail if we have to copy less than 4 bytes
subs r2, r2, #4 @ len -= 4 @ len offset: -4
blt 6f @ IF len < 0: THEN GOTO [remainder3]
@ If the destination is misaligned, align it
@ This will return to label 3 if the source is aligned after the destination was aligned
ands r12, r0, #3 @ misalignment = dest & 3
bne 7f @ IF misalignment != 0: THEN GOTO [align]
@ The destination was already aligned, but the source isn't
@ We have no option but to fall back to byte by byte copying
tst r1, #3 @ TEST src & 3
bne 8f @ IF src & 3: THEN GOTO [bytewise]
@ (fallthrough)
@
3: @ [aligned]: we are at least 4 byte aligned
@ Skip to 11 byte tail if we have to copy less than 12 bytes
subs r2, r2, #8 @ len -= 8 @ len offset: -12
blt 5f @ IF len < 0: THEN GOTO [remainder11]
@ Skip to 31 byte tail if we have to copy less than 32 bytes
subs r2, r2, #0x14 @ len -= 20 @ len offset: -32
blt 4f @ IF len < 0: THEN GOTO [remainder31]
@ Save R4 so that we have an additional copying scratchpad register
str r4, [sp, #-4]! @ SAVE R4 @ STACK: R4 orig_dest return_addr
1: @ DO:
@ Copy 32 bytes at a time
ldmia r1!, {r3, r4, r12, lr} @ {R3,R4,R12,LR} = *src++ (qword)
stmia r0!, {r3, r4, r12, lr} @ *dest++ = {R3,R4,R12,LR} (qword)
ldmia r1!, {r3, r4, r12, lr} @ {R3,R4,R12,LR} = *src++ (qword)
stmia r0!, {r3, r4, r12, lr} @ *dest++ = {R3,R4,R12,LR} (qword)
subs r2, r2, #0x20 @ len -= 32
bge 1b @ WHILE len >= 0
@ Less than 32 bytes remaining, copy 16 if enough are remaining
cmn r2, #0x10 @ TEST len + 16
ittt ge @ IF len >= -16:
ldmiage r1!, {r3, r4, r12, lr} @ THEN {R3,R4,R12,LR} = *src++ (qword)
stmiage r0!, {r3, r4, r12, lr} @ THEN *dest++ = {R3,R4,R12,LR} (qword)
subge r2, r2, #0x10 @ THEN len -= 16
@ No need for R4 anymore, restore it so that later code doesn't have to take care of it
ldr r4, [sp], #4 @ RESTORE R4 @ STACK: orig_dest return_addr
@ (fallthrough)
@
4: @ [remainder31]: we have less than 32 bytes remaining
@ Correct length offset from 32 byte copying mode
adds r2, r2, #0x14 @ len += 20 @ len offset: -12
1: @ DO:
@ Copy 12 bytes at a time, while enough are remaining
itttt ge @ IF len >= 0:
ldmiage r1!, {r3, r12, lr} @ THEN {R3,R12,LR} = *src++ (12 bytes)
stmiage r0!, {r3, r12, lr} @ THEN *dest++ = {R3,R12,LR} (12 bytes)
subsge r2, r2, #0x0c @ THEN len -= 12
bge 1b @ WHILE len >= 12
@ (fallthrough)
@
5: @ [remainder11]: we have less than 12 bytes remaining
@ Correct length offset from 12 byte copying mode
adds r2, r2, #8 @ len += 8 @ len offset: -4
@ Skip to 3 byte tail if less than 4 bytes are remaining
blt 6f @ IF len < 0: THEN GOTO [remainder3]
@ We will copy at least 4 bytes, adjust length
subs r2, r2, #4 @ len -= 4
@ If less than 8 bytes are remaining, copy 4 bytes
itt lt @ IF len < 0:
ldrlt r3, [r1], #4 @ THEN R3 = *src++ (word)
strlt r3, [r0], #4 @ THEN *dest++ = R3 (word)
@ If at least 8 bytes are remaining, copy 8 bytes
ittt ge @ IF len >= 0:
ldmiage r1!, {r3, r12} @ THEN {R3,R12} = *src++ (dword)
stmiage r0!, {r3, r12} @ THEN *dest++ = {R3,R12} (dword)
@ We have subtracted 4 bytes above but copied 8, adjust length
subge r2, r2, #4 @ THEN len -= 4
@ (fallthrough)
@
6: @ [remainder3]: we have less than 4 bytes remaining, copy them individually
@ Correct length offset from 4 byte copying mode
adds r2, r2, #4 @ len += 4 @ len offset: 0
@ If we're finished, return
it eq @ IF len == 0:
ldmfdeq sp!, {r0, pc} @ THEN RETURN orig_dest
cmp r2, #2 @ TEST len - 2
@ We always have to copy at least one byte
ldrb r3, [r1], #1 @ R3 = *src++ (byte)
strb r3, [r0], #1 @ *dest++ = R3 (byte)
@ If we have to copy at least two, copy another one
itt ge @ IF len >= 2:
ldrbge r3, [r1], #1 @ THEN R3 = *src++ (byte)
strbge r3, [r0], #1 @ THEN *dest++ = R3 (byte)
@ If we have to copy more than two (which is alweays 3), copy another one
itt gt @ IF len > 2:
ldrbgt r3, [r1], #1 @ THEN R3 = *src++ (byte)
strbgt r3, [r0], #1 @ THEN *dest++ = R3 (byte)
@ Everything copied, return
ldmfd sp!, {r0, pc} @ RETURN orig_dest
@
7: @ [align]: the destination is misaligned, align it
@ Negate the misalignment to figure out how much we have to adjust
rsb r12, r12, #4 @ misalignment = 4 - misalignment
cmp r12, #2 @ TEST misalignment - 2
@ We always have to copy at least one byte
ldrb r3, [r1], #1 @ R3 = *src++ (byte)
strb r3, [r0], #1 @ *dest++ = R3 (byte)
@ If we have to copy at least two, copy another one
itt ge @ IF misalignment >= 2:
ldrbge r3, [r1], #1 @ THEN R3 = *src++ (byte)
strbge r3, [r0], #1 @ THEN *dest++ = R3 (byte)
@ If we have to copy more than two (which is alweays 3), copy another one
itt gt @ IF misalignment > 2:
ldrbgt r3, [r1], #1 @ THEN R3 = *src++ (byte)
strbgt r3, [r0], #1 @ THEN *dest++ = R3 (byte)
@ The destination is aligned now, check if there are at least 4 bytes remaining
subs r2, r2, r12 @ len -= misalignment
@ Skip to 3 byte tail if not
blt 6b @ IF len < 0: THEN GOTO [remainder3]
@ If the source is now misaligned, we have to copy byte by byte
tst r1, #3 @ TEST src & 3
@ If not, resume fast copying method above
beq 3b @ IF !(src & 3): GOTO [aligned]
@ (fallthrough)
@
8: @ [bytewise]: the destination is aligned, but the source isn't, copy byte by byte
@ DO:
@ Stupidly copy a byte at a time
ldrb r3, [r1], #1 @ R3 = *src++ (byte)
strb r3, [r0], #1 @ *dest++ = R3 (byte)
subs r2, r2, #1 @ len--
bge 8b @ WHILE len >= 0
@ Less than 4 bytes remaining, use 3 byte tail copying code above
b 6b @ GOTO [remainder3]
@ End of performance optimized forward implementation
#endif
9: @ [backward]: the destination is above the source, so we need to copy backwards
@ Jump to end of src and dest, and copy backwards
add r1, r1, r2 @ src += len
add r0, r0, r2 @ dest += len
@ (fallthrough)
#if defined(SQUEEZE)
@ Absolute minimum size backward implementation:
1: @ DO:
@ Stupidly copy a byte at a time
subs r2, r2, #1 @ len--
it lt @ IF len < 0:
ldmfdlt sp!, {r0, pc} @ THEN RETURN orig_dest
ldrb r3, [r1, #-1]! @ R3 = *--src (byte)
strb r3, [r0, #-1]! @ *--dest = R3 (byte)
b 1b @ WHILE true
@ End of absolute minimum size backward implementation
#elif defined(OPTIMIZE_SIZE)
@ Size optimized backward implementation:
@ Skip to tail if we have to copy less than 4 bytes
subs r2, r2, #4 @ len -= 4 @ len offset: -4
blt 2f @ IF len < 0: THEN GOTO [remainder]
@ Skip to tail if src or dest are misaligned
tst r0, #3 @ TEST dest & 3
it eq @ IF !(dest & 3):
tsteq r1, #3 @ THEN TEST src & 3
bne 2f @ IF (dest & 3) OR (src & 3): THEN GOTO [remainder]
@ Copy 4 bytes at a time until less than 4 are remaining
1: @ DO:
ldr r3, [r1, #-4]! @ R3 = *--src (word)
str r3, [r0, #-4]! @ *--dest = R3 (word)
subs r2, r2, #4 @ len -= 4
bge 1b @ WHILE: len >= 0
@ (fallthrough)
@
2: @ [remainder]: copy the remainder byte by byte
@ Correct length offset from 4 byte copying mode
adds r2, r2, #4 @ len += 4 @ len offset: 0
1: @ DO:
@ Copy the remainder a byte at a time
it eq @ IF len == 0:
ldmfdeq sp!, {r0, pc} @ THEN RETURN orig_dest
ldrb r3, [r1, #-1]! @ R3 = *--src (byte)
strb r3, [r0, #-1]! @ *--dest = R3 (byte)
subs r2, r2, #1 @ len--
b 1b @ WHILE true
@ End of size optimized backward implementation
#else
@ Performance optimized backward implementation:
@ Skip to 3 byte tail if we have to copy less than 4 bytes
subs r2, r2, #4 @ len -= 4 @ len offset: -4
blt 6f @ IF len < 0: THEN GOTO [remainder3]
@ If the destination is misaligned, align it
@ This will return to label 3 if the source is aligned after the destination was aligned
ands r12, r0, #3 @ misalignment = dest & 3
bne 7f @ IF misalignment != 0: THEN GOTO [align]
@ The destination was already aligned, but the source isn't
@ We have no option but to fall back to byte by byte copying
tst r1, #3 @ TEST src & 3
bne 8f @ IF src & 3: THEN GOTO [bytewise]
@ (fallthrough)
@
3: @ [aligned]: we are at least 4 byte aligned
@ Skip to 11 byte tail if we have to copy less than 12 bytes
subs r2, r2, #8 @ len -= 8 @ len offset: -12
blt 5f @ IF len < 0: THEN GOTO [remainder11]
@ Skip to 31 byte tail if we have to copy less than 32 bytes
subs r2, r2, #0x14 @ len -= 20 @ len offset: -32
blt 4f @ IF len < 0: THEN GOTO [remainder31]
@ Save R4 so that we have an additional copying scratchpad register
str r4, [sp, #-4]! @ SAVE R4 @ STACK: R4 orig_dest return_addr
1: @ DO:
@ Copy 32 bytes at a time
ldmdb r1!, {r3, r4, r12, lr} @ {R3,R4,R12,LR} = *--src (qword)
stmdb r0!, {r3, r4, r12, lr} @ *--dest = {R3,R4,R12,LR} (qword)
ldmdb r1!, {r3, r4, r12, lr} @ {R3,R4,R12,LR} = *--src (qword)
stmdb r0!, {r3, r4, r12, lr} @ *--dest = {R3,R4,R12,LR} (qword)
subs r2, r2, #0x20 @ len -= 32
bge 1b @ WHILE len >= 0
@ Less than 32 bytes remaining, copy 16 if enough are remaining
cmn r2, #0x10 @ TEST len + 16
ittt ge @ IF len >= -16:
ldmdbge r1!, {r3, r4, r12, lr} @ THEN {R3,R4,R12,LR} = *--src (qword)
stmdbge r0!, {r3, r4, r12, lr} @ THEN *--dest = {R3,R4,R12,LR} (qword)
subge r2, r2, #0x10 @ THEN len -= 16
@ No need for R4 anymore, restore it so that later code doesn't have to take care of it
ldr r4, [sp], #4 @ RESTORE R4 @ STACK: orig_dest return_addr
@ (fallthrough)
@
4: @ [remainder31]: we have less than 32 bytes remaining
@ Correct length offset from 32 byte copying mode
adds r2, r2, #0x14 @ len += 20 @ len offset: -12
1: @ DO:
@ Copy 12 bytes at a time, while enough are remaining
itttt ge @ IF len >= 0:
ldmdbge r1!, {r3, r12, lr} @ THEN {R3,R12,LR} = *--src (12 bytes)
stmdbge r0!, {r3, r12, lr} @ THEN *--dest = {R3,R12,LR} (12 bytes)
subsge r2, r2, #0x0c @ THEN len -= 12
bge 1b @ WHILE len >= 12
@ (fallthrough)
@
5: @ [remainder11]: we have less than 12 bytes remaining
@ Correct length offset from 12 byte copying mode
adds r2, r2, #8 @ len += 8 @ len offset: -4
@ Skip to 3 byte tail if less than 4 bytes are remaining
blt 6f @ IF len < 0: THEN GOTO [remainder3]
@ We will copy at least 4 bytes, adjust length
subs r2, r2, #4 @ len -= 4
@ If less than 8 bytes are remaining, copy 4 bytes
itt lt @ IF len < 0:
ldrlt r3, [r1, #-4]! @ THEN R3 = *--src (word)
strlt r3, [r0, #-4]! @ THEN *--dest = R3 (word)
@ If at least 8 bytes are remaining, copy 8 bytes
ittt ge @ IF len >= 0:
ldmiage r1!, {r3, r12} @ THEN {R3,R12} = *--src (dword)
stmiage r0!, {r3, r12} @ THEN *--dest = {R3,R12} (dword)
@ We have subtracted 4 bytes above but copied 8, adjust length
subge r2, r2, #4 @ THEN len -= 4
@ (fallthrough)
@
6: @ [remainder3]: we have less than 4 bytes remaining, copy them individually
@ Correct length offset from 4 byte copying mode
adds r2, r2, #4 @ len += 4 @ len offset: 0
@ If we're finished, return
it eq @ IF len == 0:
ldmfdeq sp!, {r0, pc} @ THEN RETURN orig_dest
cmp r2, #2 @ TEST len - 2
@ We always have to copy at least one byte
ldrb r3, [r1, #-1]! @ R3 = *--src (byte)
strb r3, [r0, #-1]! @ *--dest = R3 (byte)
@ If we have to copy at least two, copy another one
itt ge @ IF len >= 2:
ldrbge r3, [r1, #-1]! @ THEN R3 = *--src (byte)
strbge r3, [r0, #-1]! @ THEN *--dest = R3 (byte)
@ If we have to copy more than two (which is alweays 3), copy another one
itt gt @ IF len > 2:
ldrbgt r3, [r1, #-1]! @ THEN R3 = *--src (byte)
strbgt r3, [r0, #-1]! @ THEN *--dest = R3 (byte)
@ Everything copied, return
ldmfd sp!, {r0, pc} @ RETURN orig_dest
@
7: @ [align]: the destination is misaligned, align it
@ No need to negate the misalignment here, we are going backwards so we have to adjust by the misaligned amount
cmp r12, #2 @ TEST misalignment - 2
@ We always have to copy at least one byte
ldrb r3, [r1, #-1]! @ R3 = *--src (byte)
strb r3, [r0, #-1]! @ *--dest = R3 (byte)
@ If we have to copy at least two, copy another one
itt ge @ IF misalignment >= 2:
ldrbge r3, [r1, #-1]! @ THEN R3 = *--src (byte)
strbge r3, [r0, #-1]! @ THEN *--dest = R3 (byte)
@ If we have to copy more than two (which is alweays 3), copy another one
itt gt @ IF misalignment > 2:
ldrbgt r3, [r1, #-1]! @ THEN R3 = *--src (byte)
strbgt r3, [r0, #-1]! @ THEN *--dest = R3 (byte)
@ The destination is aligned now, check if there are at least 4 bytes remaining
subs r2, r2, r12 @ len -= misalignment
@ Skip to 3 byte tail if not
blt 6b @ IF len < 0: THEN GOTO [remainder3]
@ If the source is now misaligned, we have to copy byte by byte
tst r1, #3 @ TEST src & 3
@ If not, resume fast copying method above
beq 3b @ IF !(src & 3): GOTO [aligned]
@ (fallthrough)
@
8: @ [bytewise]: the destination is aligned, but the source isn't, copy byte by byte
@ DO:
@ Stupidly copy a byte at a time
ldrb r3, [r1, #-1]! @ R3 = *--src (byte)
strb r3, [r0, #-1]! @ *--dest = R3 (byte)
subs r2, r2, #1 @ len--
bge 8b @ WHILE len >= 0
@ Less than 4 bytes remaining, use 3 byte tail copying code above
b 6b @ GOTO [remainder3]
@ End of performance optimized backward implementation
#endif
.size memmove, . - memmove
.size memcpy, . - memcpy