|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. ! 3: * ! 4: * @APPLE_LICENSE_HEADER_START@ ! 5: * ! 6: * The contents of this file constitute Original Code as defined in and ! 7: * are subject to the Apple Public Source License Version 1.1 (the ! 8: * "License"). You may not use this file except in compliance with the ! 9: * License. Please obtain a copy of the License at ! 10: * http://www.apple.com/publicsource and read it before using this file. ! 11: * ! 12: * This Original Code and all software distributed under the License are ! 13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER ! 14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, ! 15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, ! 16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the ! 17: * License for the specific language governing rights and limitations ! 18: * under the License. ! 19: * ! 20: * @APPLE_LICENSE_HEADER_END@ ! 21: */ ! 22: ; ! 23: ; Copy bytes of data around. handles overlapped data. ! 24: ; ! 25: ; Change this to use Altivec later on, and maybe floating point. ! 26: ; ! 27: ; NOTE: This file compiles and executes on both MacOX 8.x (Codewarrior) ! 28: ; and MacOX X. The "#if 0"s are treated as comments by CW so the ! 29: ; stuff between them is included by CW and excluded on MacOX X. ! 30: ; Same with the "#include"s. ! 31: ; ! 32: #include <ppc/asm.h> ! 33: #include <ppc/proc_reg.h> ! 34: ! 35: ; Use CR5_lt to indicate non-cached ! 36: #define noncache 20 ! 37: #if 0 ! 38: noncache: equ 20 ! 39: #endif ! 40: #if 0 ! 41: br0: equ 0 ! 42: #endif ! 43: ! 44: ; ! 45: ; bcopy_nc(from, to, nbytes) ! 46: ; ! 47: ; bcopy_nc operates on non-cached memory so we can not use any kind ! 48: ; of cache instructions. ! 49: ; ! 50: ! 51: ! 52: ! 53: #if 0 ! 54: IF 0 ! 55: #endif ! 56: ENTRY(bcopy_nc, TAG_NO_FRAME_USED) ! 57: #if 0 ! 58: ENDIF ! 59: export xbcopy_nc[DS] ! 60: tc xbcopy_nc[TC],xbcopy_nc[DS] ! 61: csect xbcopy_nc[DS] ! 62: dc.l .xbcopy_nc ! 63: dc.l TOC[tc0] ! 64: export .xbcopy_nc ! 65: csect xbcopy_nc[PR] ! 66: .xbcopy_nc: ! 67: #endif ! 68: ! 69: crset noncache ; Set non-cached ! 70: b bcpswap ! 71: ! 72: ; ! 73: ; void bcopy(from, to, nbytes) ! 74: ; ! 75: ! 76: #if 0 ! 77: IF 0 ! 78: #endif ! 79: ENTRY(bcopy, TAG_NO_FRAME_USED) ! 80: #if 0 ! 81: ENDIF ! 82: export xbcopy[DS] ! 83: tc xbcopyc[TC],xbcopy[DS] ! 84: csect xbcopy[DS] ! 85: dc.l .xbcopy ! 86: dc.l TOC[tc0] ! 87: export .xbcopy ! 88: csect xbcopy[PR] ! 89: .xbcopy: ! 90: #endif ! 91: ! 92: crclr noncache ; Set cached ! 93: ! 94: bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from" ! 95: mr. r5,r5 ; Check if we have a 0 length ! 96: mr r6,r3 ; Set source ! 97: beqlr- cr1 ; Bail if "to" and "from" are the same ! 98: beqlr- ; Bail if length is 0 ! 99: b copyit ; Go copy it... ! 100: ! 101: ; ! 102: ; When we move the memory, forward overlays must be handled. We ! 103: ; also can not use the cache instructions if we are from bcopy_nc. ! 104: ; We need to preserve R3 because it needs to be returned for memcpy. ! 105: ; We can be interrupted and lose control here. ! 106: ; ! 107: ; There is no stack, so in order to used floating point, we would ! 108: ; need to take the FP exception. Any potential gains by using FP ! 109: ; would be more than eaten up by this. ! 110: ; ! 111: ; Later, we should used Altivec for large moves. ! 112: ; ! 113: ! 114: #if 0 ! 115: IF 0 ! 116: #endif ! 117: ENTRY(memcpy, TAG_NO_FRAME_USED) ! 118: #if 0 ! 119: ENDIF ! 120: export xmemcpy[DS] ! 121: tc xmemcpy[TC],xmemcpy[DS] ! 122: csect xmemcpy[DS] ! 123: dc.l .xmemcpy ! 124: dc.l TOC[tc0] ! 125: export .xmemcpy ! 126: csect xmemcpy[PR] ! 127: .xmemcpy: ! 128: #endif ! 129: cmplw cr1,r3,r4 ; "to" and "from" the same? ! 130: mr r6,r4 ; Set the "from" ! 131: mr. r5,r5 ; Length zero? ! 132: crclr noncache ; Set cached ! 133: mr r4,r3 ; Set the "to" ! 134: beqlr- cr1 ; "to" and "from" are the same ! 135: beqlr- ; Length is 0 ! 136: ! 137: copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move) ! 138: lis r8,0x7FFF ; Start up a mask ! 139: srawi r11,r12,31 ; Propagate the sign bit ! 140: dcbt br0,r6 ; Touch in the first source line ! 141: cntlzw r7,r5 ; Get the highest power of 2 factor of the length ! 142: ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF ! 143: xor r9,r12,r11 ; If sink - source was negative, invert bits ! 144: srw r8,r8,r7 ; Get move length limitation ! 145: sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value ! 146: cmplw r12,r5 ; See if we actually forward overlap ! 147: cmplwi cr7,r9,32 ; See if at least a line between source and sink ! 148: dcbtst br0,r4 ; Touch in the first sink line ! 149: cmplwi cr1,r5,32 ; Are we moving more than a line? ! 150: cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space ! 151: blt- fwdovrlap ; This is a forward overlapping area, handle it... ! 152: ! 153: ; ! 154: ; R4 = sink ! 155: ; R5 = length ! 156: ; R6 = source ! 157: ; ! 158: ! 159: ; ! 160: ; Here we figure out how much we have to move to get the sink onto a ! 161: ; cache boundary. If we can, and there are still more that 32 bytes ! 162: ; left to move, we can really speed things up by DCBZing the sink line. ! 163: ; We can not do this if noncache is set because we will take an ! 164: ; alignment exception. ! 165: ! 166: neg r0,r4 ; Get the number of bytes to move to align to a line boundary ! 167: rlwinm. r0,r0,0,27,31 ; Clean it up and test it ! 168: and r0,r0,r8 ; limit to the maximum front end move ! 169: mtcrf 3,r0 ; Make branch mask for partial moves ! 170: sub r5,r5,r0 ; Set the length left to move ! 171: beq alline ; Already on a line... ! 172: ! 173: bf 31,alhalf ; No single byte to do... ! 174: lbz r7,0(r6) ; Get the byte ! 175: addi r6,r6,1 ; Point to the next ! 176: stb r7,0(r4) ; Save the single ! 177: addi r4,r4,1 ; Bump sink ! 178: ! 179: ; Sink is halfword aligned here ! 180: ! 181: alhalf: bf 30,alword ; No halfword to do... ! 182: lhz r7,0(r6) ; Get the halfword ! 183: addi r6,r6,2 ; Point to the next ! 184: sth r7,0(r4) ; Save the halfword ! 185: addi r4,r4,2 ; Bump sink ! 186: ! 187: ; Sink is word aligned here ! 188: ! 189: alword: bf 29,aldouble ; No word to do... ! 190: lwz r7,0(r6) ; Get the word ! 191: addi r6,r6,4 ; Point to the next ! 192: stw r7,0(r4) ; Save the word ! 193: addi r4,r4,4 ; Bump sink ! 194: ! 195: ; Sink is double aligned here ! 196: ! 197: aldouble: bf 28,alquad ; No double to do... ! 198: lwz r7,0(r6) ; Get the first word ! 199: lwz r8,4(r6) ; Get the second word ! 200: addi r6,r6,8 ; Point to the next ! 201: stw r7,0(r4) ; Save the first word ! 202: stw r8,4(r4) ; Save the second word ! 203: addi r4,r4,8 ; Bump sink ! 204: ! 205: ; Sink is quadword aligned here ! 206: ! 207: alquad: bf 27,alline ; No quad to do... ! 208: lwz r7,0(r6) ; Get the first word ! 209: lwz r8,4(r6) ; Get the second word ! 210: lwz r9,8(r6) ; Get the third word ! 211: stw r7,0(r4) ; Save the first word ! 212: lwz r11,12(r6) ; Get the fourth word ! 213: addi r6,r6,16 ; Point to the next ! 214: stw r8,4(r4) ; Save the second word ! 215: stw r9,8(r4) ; Save the third word ! 216: stw r11,12(r4) ; Save the fourth word ! 217: addi r4,r4,16 ; Bump sink ! 218: ! 219: ; Sink is line aligned here ! 220: ! 221: alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move ! 222: mtcrf 3,r5 ; Make branch mask for backend partial moves ! 223: rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move ! 224: beq- backend ; No full lines to move ! 225: ! 226: sub r5,r5,r11 ; Calculate the residual ! 227: li r10,96 ; Stride for touch ahead ! 228: ! 229: nxtline: subic. r0,r0,1 ; Account for the line now ! 230: ! 231: bt- noncache,skipz ; Skip if we are not cached... ! 232: dcbz br0,r4 ; Blow away the whole line because we are replacing it ! 233: dcbt r6,r10 ; Touch ahead a bit ! 234: ! 235: skipz: lwz r7,0(r6) ; Get the first word ! 236: lwz r8,4(r6) ; Get the second word ! 237: lwz r9,8(r6) ; Get the third word ! 238: stw r7,0(r4) ; Save the first word ! 239: lwz r11,12(r6) ; Get the fourth word ! 240: stw r8,4(r4) ; Save the second word ! 241: lwz r7,16(r6) ; Get the fifth word ! 242: stw r9,8(r4) ; Save the third word ! 243: lwz r8,20(r6) ; Get the sixth word ! 244: stw r11,12(r4) ; Save the fourth word ! 245: lwz r9,24(r6) ; Get the seventh word ! 246: stw r7,16(r4) ; Save the fifth word ! 247: lwz r11,28(r6) ; Get the eighth word ! 248: addi r6,r6,32 ; Point to the next ! 249: stw r8,20(r4) ; Save the sixth word ! 250: stw r9,24(r4) ; Save the seventh word ! 251: stw r11,28(r4) ; Save the eighth word ! 252: addi r4,r4,32 ; Bump sink ! 253: bgt+ nxtline ; Do the next line, if any... ! 254: ! 255: ! 256: ; Move backend quadword ! 257: ! 258: backend: bf 27,noquad ; No quad to do... ! 259: lwz r7,0(r6) ; Get the first word ! 260: lwz r8,4(r6) ; Get the second word ! 261: lwz r9,8(r6) ; Get the third word ! 262: lwz r11,12(r6) ; Get the fourth word ! 263: stw r7,0(r4) ; Save the first word ! 264: addi r6,r6,16 ; Point to the next ! 265: stw r8,4(r4) ; Save the second word ! 266: stw r9,8(r4) ; Save the third word ! 267: stw r11,12(r4) ; Save the fourth word ! 268: addi r4,r4,16 ; Bump sink ! 269: ! 270: ; Move backend double ! 271: ! 272: noquad: bf 28,nodouble ; No double to do... ! 273: lwz r7,0(r6) ; Get the first word ! 274: lwz r8,4(r6) ; Get the second word ! 275: addi r6,r6,8 ; Point to the next ! 276: stw r7,0(r4) ; Save the first word ! 277: stw r8,4(r4) ; Save the second word ! 278: addi r4,r4,8 ; Bump sink ! 279: ! 280: ; Move backend word ! 281: ! 282: nodouble: bf 29,noword ; No word to do... ! 283: lwz r7,0(r6) ; Get the word ! 284: addi r6,r6,4 ; Point to the next ! 285: stw r7,0(r4) ; Save the word ! 286: addi r4,r4,4 ; Bump sink ! 287: ! 288: ; Move backend halfword ! 289: ! 290: noword: bf 30,nohalf ; No halfword to do... ! 291: lhz r7,0(r6) ; Get the halfword ! 292: addi r6,r6,2 ; Point to the next ! 293: sth r7,0(r4) ; Save the halfword ! 294: addi r4,r4,2 ; Bump sink ! 295: ! 296: ; Move backend byte ! 297: ! 298: nohalf: bflr 31 ; Leave cuz we are all done... ! 299: lbz r7,0(r6) ; Get the byte ! 300: stb r7,0(r4) ; Save the single ! 301: ! 302: blr ; Leave cuz we are all done... ! 303: ! 304: ; ! 305: ; 0123456789ABCDEF0123456789ABCDEF ! 306: ; 0123456789ABCDEF0123456789ABCDEF ! 307: ; F ! 308: ; DE ! 309: ; 9ABC ! 310: ; 12345678 ! 311: ; 123456789ABCDEF0 ! 312: ; 0 ! 313: ! 314: ; ! 315: ; Here is where we handle a forward overlapping move. These will be slow ! 316: ; because we can not kill the cache of the destination until after we have ! 317: ; loaded/saved the source area. Also, because reading memory backwards is ! 318: ; slower when the cache line needs to be loaded because the critical ! 319: ; doubleword is loaded first, i.e., the last, then it goes back to the first, ! 320: ; and on in order. That means that when we are at the second to last DW we ! 321: ; have to wait until the whole line is in cache before we can proceed. ! 322: ; ! 323: ! 324: fwdovrlap: add r4,r5,r4 ; Point past the last sink byte ! 325: add r6,r5,r6 ; Point past the last source byte ! 326: and r0,r4,r8 ; Apply movement limit ! 327: li r12,-1 ; Make sure we touch in the actual line ! 328: mtcrf 3,r0 ; Figure out the best way to move backwards ! 329: dcbt r12,r6 ; Touch in the last line of source ! 330: rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary ! 331: dcbtst r12,r4 ; Touch in the last line of the sink ! 332: beq- balline ; Aready on cache line boundary ! 333: ! 334: sub r5,r5,r0 ; Precaculate move length left after alignment ! 335: ! 336: bf 31,balhalf ; No single byte to do... ! 337: lbz r7,-1(r6) ; Get the byte ! 338: subi r6,r6,1 ; Point to the next ! 339: stb r7,-1(r4) ; Save the single ! 340: subi r4,r4,1 ; Bump sink ! 341: ! 342: ; Sink is halfword aligned here ! 343: ! 344: balhalf: bf 30,balword ; No halfword to do... ! 345: lhz r7,-2(r6) ; Get the halfword ! 346: subi r6,r6,2 ; Point to the next ! 347: sth r7,-2(r4) ; Save the halfword ! 348: subi r4,r4,2 ; Bump sink ! 349: ! 350: ; Sink is word aligned here ! 351: ! 352: balword: bf 29,baldouble ; No word to do... ! 353: lwz r7,-4(r6) ; Get the word ! 354: subi r6,r6,4 ; Point to the next ! 355: stw r7,-4(r4) ; Save the word ! 356: subi r4,r4,4 ; Bump sink ! 357: ! 358: ; Sink is double aligned here ! 359: ! 360: baldouble: bf 28,balquad ; No double to do... ! 361: lwz r7,-8(r6) ; Get the first word ! 362: lwz r8,-4(r6) ; Get the second word ! 363: subi r6,r6,8 ; Point to the next ! 364: stw r7,-8(r4) ; Save the first word ! 365: stw r8,-4(r4) ; Save the second word ! 366: subi r4,r4,8 ; Bump sink ! 367: ! 368: ; Sink is quadword aligned here ! 369: ! 370: balquad: bf 27,balline ; No quad to do... ! 371: lwz r7,-16(r6) ; Get the first word ! 372: lwz r8,-12(r6) ; Get the second word ! 373: lwz r9,-8(r6) ; Get the third word ! 374: lwz r11,-4(r6) ; Get the fourth word ! 375: stw r7,-16(r4) ; Save the first word ! 376: subi r6,r6,16 ; Point to the next ! 377: stw r8,-12(r4) ; Save the second word ! 378: stw r9,-8(r4) ; Save the third word ! 379: stw r11,-4(r4) ; Save the fourth word ! 380: subi r4,r4,16 ; Bump sink ! 381: ! 382: ; Sink is line aligned here ! 383: ! 384: balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move ! 385: mtcrf 3,r5 ; Make branch mask for backend partial moves ! 386: beq- bbackend ; No full lines to move ! 387: #if 0 ! 388: stwu r1,-8(r1) ; Dummy stack for MacOS ! 389: stw r2,4(r1) ; Save RTOC ! 390: #endif ! 391: ! 392: ! 393: ; Registers in use: R0, R1, R3, R4, R5, R6 ! 394: ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them ! 395: ! 396: bnxtline: subic. r0,r0,1 ; Account for the line now ! 397: ! 398: lwz r7,-32(r6) ; Get the first word ! 399: lwz r5,-28(r6) ; Get the second word ! 400: lwz r2,-24(r6) ; Get the third word ! 401: lwz r12,-20(r6) ; Get the third word ! 402: lwz r11,-16(r6) ; Get the fifth word ! 403: lwz r10,-12(r6) ; Get the sixth word ! 404: lwz r9,-8(r6) ; Get the seventh word ! 405: lwz r8,-4(r6) ; Get the eighth word ! 406: subi r6,r6,32 ; Point to the next ! 407: ! 408: stw r7,-32(r4) ; Get the first word ! 409: ble- bnotouch ; Last time, skip touch of source... ! 410: dcbt br0,r6 ; Touch in next source line ! 411: ! 412: bnotouch: stw r5,-28(r4) ; Get the second word ! 413: stw r2,-24(r4) ; Get the third word ! 414: stw r12,-20(r4) ; Get the third word ! 415: stw r11,-16(r4) ; Get the fifth word ! 416: stw r10,-12(r4) ; Get the sixth word ! 417: stw r9,-8(r4) ; Get the seventh word ! 418: stw r8,-4(r4) ; Get the eighth word ! 419: subi r4,r4,32 ; Bump sink ! 420: ! 421: bgt+ bnxtline ; Do the next line, if any... ! 422: #if 0 ! 423: lwz r2,4(r1) ; Restore RTOC ! 424: lwz r1,0(r1) ; Pop dummy stack ! 425: #endif ! 426: ! 427: ; ! 428: ; Note: We touched these lines in at the beginning ! 429: ; ! 430: ! 431: ; Move backend quadword ! 432: ! 433: bbackend: bf 27,bnoquad ; No quad to do... ! 434: lwz r7,-16(r6) ; Get the first word ! 435: lwz r8,-12(r6) ; Get the second word ! 436: lwz r9,-8(r6) ; Get the third word ! 437: lwz r11,-4(r6) ; Get the fourth word ! 438: stw r7,-16(r4) ; Save the first word ! 439: subi r6,r6,16 ; Point to the next ! 440: stw r8,-12(r4) ; Save the second word ! 441: stw r9,-8(r4) ; Save the third word ! 442: stw r11,-4(r4) ; Save the fourth word ! 443: subi r4,r4,16 ; Bump sink ! 444: ! 445: ; Move backend double ! 446: ! 447: bnoquad: bf 28,bnodouble ; No double to do... ! 448: lwz r7,-8(r6) ; Get the first word ! 449: lwz r8,-4(r6) ; Get the second word ! 450: subi r6,r6,8 ; Point to the next ! 451: stw r7,-8(r4) ; Save the first word ! 452: stw r8,-4(r4) ; Save the second word ! 453: subi r4,r4,8 ; Bump sink ! 454: ! 455: ; Move backend word ! 456: ! 457: bnodouble: bf 29,bnoword ; No word to do... ! 458: lwz r7,-4(r6) ; Get the word ! 459: subi r6,r6,4 ; Point to the next ! 460: stw r7,-4(r4) ; Save the word ! 461: subi r4,r4,4 ; Bump sink ! 462: ! 463: ; Move backend halfword ! 464: ! 465: bnoword: bf 30,bnohalf ; No halfword to do... ! 466: lhz r7,-2(r6) ; Get the halfword ! 467: subi r6,r6,2 ; Point to the next ! 468: sth r7,-2(r4) ; Save the halfword ! 469: subi r4,r4,2 ; Bump sink ! 470: ! 471: ; Move backend byte ! 472: ! 473: bnohalf: bflr 31 ; Leave cuz we are all done... ! 474: lbz r7,-1(r6) ; Get the byte ! 475: stb r7,-1(r4) ; Save the single ! 476: ! 477: blr ; Leave cuz we are all done...
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.