--- MiNT/src/quickmov.spp 2018/04/24 17:57:24 1.1.1.3 +++ MiNT/src/quickmov.spp 2018/04/24 17:58:56 1.1.1.4 @@ -14,13 +14,85 @@ ; +; quickmovb(char *dst, char *src, long nbytes): + +; like memcpy, does unaligned too... does not check for overlap (memmove). + +; + TEXT XDEF _quickmove + XDEF _quickmovb + + + +;%ifdef OWN_LIB + + XDEF _bcopy + + XDEF __bcopy + + XDEF _memcpy + + + +_bcopy: + +__bcopy: + + move.l 8(sp),a0 ; get dst + + move.l 4(sp),a1 ; get src + + bra.s _quickmovb1 + +;%endif + + + +; for gcc >= 2.5.8 + +_memcpy: + + + +_quickmovb: + + move.l 4(sp),a0 ; get dst + + move.l 8(sp),a1 ; get src + +_quickmovb1: + + move.w a0,d0 + + move.w a1,d1 + + eor.w d1,d0 ; bit 0 == unaligned + + lsr.w #1,d0 ; ...now in x flag + + move.l 12(sp),d0 ; get nbytes + + beq Ldone + roxr.w #1,d1 ; bit 0 == both odd, msb == unaligned + + bmi bytecopy ; unaligned, do the slow thing... + + bcc.s quickmov1 ; both even, ok + + subq.l #1,d0 ; both odd, can be fixed + + move.b (a1)+,(a0)+ + + bra.s quickmov1 + + _quickmove: @@ -30,13 +102,19 @@ _quickmove: move.l 12(sp),d0 ; get nbytes +quickmov1: + + move.w #$1ff,d1 + + and.w d0,d1 ; d1 = nbytes % 512 + lsr.l #8,d0 ; lsr.l #1,d0 ; d0 = nbytes / 512 - subq.l #1,d0 ; prepare for dbra loop + ble.s Leftover ; if <= 0, skip + - bmi.s Leftover ; if < 0, skip movem.l d1-d7/a2-a6,-(sp) ; save regs @@ -92,7 +170,7 @@ L1: subq.l #1,d0 - bge.s L1 + bgt.s L1 @@ -102,23 +180,65 @@ L1: Leftover: ; do the remaining bytes - move.l 12(sp),d1 + moveq.l #3,d0 + + and.w d1,d0 - and.w #$01ff,d1 ; d1 = nbytes % 512 + lsr.w #2,d1 subq.w #1,d1 ; prepare for dbra loop + bmi.s L4done + + lsr.w #1,d1 + + bcc.s L23 + +L2l: + + move.l (a1)+,(a0)+ + +L23: + + move.l (a1)+,(a0)+ + + dbra d1,L2l + +L4done: + + subq.w #1,d0 ; prepare for dbra loop + bmi.s Ldone L2: move.b (a1)+,(a0)+ - dbra d1,L2 + dbra d0,L2 Ldone: rts ; return + + +bytecopy: + + subq.l #1,d0 ; prepare for dbra loop + + move.l d0,d1 + + swap d1 + +L3: + + move.b (a1)+,(a0)+ + + dbra d0,L3 + + dbra d1,L3 + + rts ; return + END