|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. ! 3: * ! 4: * @APPLE_LICENSE_HEADER_START@ ! 5: * ! 6: * The contents of this file constitute Original Code as defined in and ! 7: * are subject to the Apple Public Source License Version 1.1 (the ! 8: * "License"). You may not use this file except in compliance with the ! 9: * License. Please obtain a copy of the License at ! 10: * http://www.apple.com/publicsource and read it before using this file. ! 11: * ! 12: * This Original Code and all software distributed under the License are ! 13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER ! 14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, ! 15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, ! 16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the ! 17: * License for the specific language governing rights and limitations ! 18: * under the License. ! 19: * ! 20: * @APPLE_LICENSE_HEADER_END@ ! 21: */ ! 22: #define STANDALONE 0 ! 23: ! 24: #if STANDALONE ! 25: #include "asm.h" ! 26: #include "assym.h" ! 27: #include "proc_reg.h" /* For CACHE_LINE_SIZE */ ! 28: ! 29: #else ! 30: ! 31: #include <mach/ppc/asm.h> ! 32: #if 0 ! 33: /* #include <assym.h> */ ! 34: #include <ppc/proc_reg.h> /* For CACHE_LINE_SIZE */ ! 35: #endif 0 ! 36: #endif ! 37: ! 38: /* ! 39: * Reg 3 - Pointer to data ! 40: * Reg 4 - Length of data ! 41: * Reg 5 - Accumulated sum value ! 42: * Reg 6 - Starting on odd boundary flag (relative to byte 0 of the checksumed data) ! 43: */ ! 44: ! 45: ENTRY(xsum_assym, TAG_NO_FRAME_USED) ! 46: ! 47: mr r11, r6 ; Swapped flag ! 48: addi r8, 0, 0 ! 49: addi r10, 0, 0x1f ! 50: addi r7, 0, 1 ! 51: addic r7, r7, 0 ; This clears the carry bit! ! 52: mr r12, r5 ; Save the passed-in checksum value ! 53: ! 54: /* ! 55: * Sum bytes before cache line boundary ! 56: */ ! 57: ! 58: cmpi cr0,0,r4,0 ; Check for length of 0 ! 59: beq Lleftovers ! 60: ! 61: and. r9, r3, r10 ! 62: beq Laligned32 ; 32 byte aligned ! 63: ! 64: andi. r9, r3, 0x3 ! 65: beq Laligned4 ! 66: ! 67: andi. r9, r3, 0x1 ! 68: beq Laligned2 ; 2 byte aligned ! 69: ! 70: addi r11, 0, 1 ; swap bytes at end ! 71: lbz r8, 0(r3) ! 72: add r3, r3, r7 ! 73: subf. r4, r7, r4 ! 74: beq Ldone ! 75: ! 76: Laligned2: ! 77: cmpi cr0,0,r4,2 ; If remaining length is less than two - go to wrap-up ! 78: blt Lleftovers ! 79: andi. r9, r3, 0x3 ; If aligned on a 4-byte boundary, go to that code ! 80: beq Laligned4 ! 81: lhz r5, 0(r3) ; Load and add a halfword to the checksum ! 82: adde r8, r8, r5 ! 83: slwi r7, r7, 1 ! 84: add r3, r3, r7 ! 85: subf. r4, r7, r4 ! 86: beq Ldone ! 87: ! 88: ! 89: /* ! 90: Add longwords up to the 32 byte boundary ! 91: */ ! 92: ! 93: Laligned4: ! 94: addi r7, 0, 4 ! 95: Lloop4: ! 96: cmpi cr0,0,r4,4 ! 97: blt Lleftovers ! 98: and. r9, r3, r10 ! 99: beq Laligned32 ! 100: lwz r5, 0(r3) ! 101: adde r8, r8, r5 ! 102: add r3, r3, r7 ! 103: subf. r4, r7, r4 ! 104: bne Lloop4 ! 105: b Ldone ! 106: ! 107: ! 108: /* ! 109: We're aligned on a 32 byte boundary now - add 8 longwords to checksum ! 110: until the remaining length is less than 32 ! 111: */ ! 112: Laligned32: ! 113: andis. r6, r4, 0xffff ! 114: bne Lmainloop ! 115: andi. r6, r4, 0xffe0 ! 116: beq Lleftovers ! 117: ! 118: Lmainloop: ! 119: addi r9, 0, 64 ! 120: addi r10, 0, 32 ! 121: cmpi cr0,0,r4,64 ! 122: blt Lnopretouch ! 123: dcbt r3, r10 ; Touch one cache-line ahead ! 124: Lnopretouch: ! 125: lwz r5, 0(r3) ! 126: ! 127: /* ! 128: * This is the main meat of the checksum. I attempted to arrange this code ! 129: * such that the processor would execute as many instructions as possible ! 130: * in parallel. ! 131: */ ! 132: ! 133: Lloop: ! 134: cmpi cr0,0,r4,96 ! 135: blt Lnotouch ! 136: dcbt r3, r9 ; Touch two cache lines ahead ! 137: Lnotouch: ! 138: adde r8, r8, r5 ! 139: lwz r5, 4(r3) ! 140: lwz r6, 8(r3) ! 141: lwz r7, 12(r3) ! 142: adde r8, r8, r5 ! 143: lwz r5, 16(r3) ! 144: adde r8, r8, r6 ! 145: lwz r6, 20(r3) ! 146: adde r8, r8, r7 ! 147: lwz r7, 24(r3) ! 148: adde r8, r8, r5 ! 149: lwz r5, 28(r3) ! 150: add r3, r3, r10 ! 151: adde r8, r8, r6 ! 152: adde r8, r8, r7 ! 153: adde r8, r8, r5 ! 154: subf r4, r10, r4 ! 155: andi. r6, r4, 0xffe0 ! 156: beq Lleftovers ! 157: lwz r5, 0(r3) ! 158: b Lloop ! 159: ! 160: /* ! 161: * Handle whatever bytes are left ! 162: */ ! 163: ! 164: Lleftovers: ! 165: /* ! 166: * Handle leftover bytes ! 167: */ ! 168: cmpi cr0,0,r4,0 ! 169: beq Ldone ! 170: ! 171: addi r7, 0, 1 ! 172: addi r10, 0, 0x7ffc ! 173: ! 174: and. r9, r4, r10 ! 175: bne Lfourormore ! 176: srw r10, r10, r7 ! 177: and. r9, r4, r10 ! 178: bne Ltwoormore ! 179: b Loneleft ! 180: ! 181: Lfourormore: ! 182: addi r10, 0, 4 ! 183: ! 184: Lfourloop: ! 185: lwz r5, 0(r3) ! 186: adde r8, r8, r5 ! 187: add r3, r3, r10 ! 188: subf r4, r10, r4 ! 189: andi. r6, r4, 0xfffc ! 190: bne Lfourloop ! 191: ! 192: Ltwoormore: ! 193: andi. r6, r4, 0xfffe ! 194: beq Loneleft ! 195: lhz r5, 0(r3) ! 196: adde r8, r8, r5 ! 197: addi r3, r3, 2 ! 198: subi r4, r4, 2 ! 199: ! 200: Loneleft: ! 201: cmpi cr0,0,r4,0 ! 202: beq Ldone ! 203: lbz r5, 0(r3) ! 204: slwi r5, r5, 8 ! 205: adde r8, r8, r5 ! 206: ! 207: /* ! 208: * Wrap the longword around, adding the two 16-bit portions ! 209: * to each other along with any previous and subsequent carries. ! 210: */ ! 211: Ldone: ! 212: addze r8, r8 ; Add the carry ! 213: addze r8, r8 ; Add the carry again (the last add may have carried) ! 214: andis. r6, r8, 0xffff ; Stuff r6 with the high order 16 bits of sum word ! 215: srwi r6, r6, 16 ; Shift it to the low order word ! 216: andi. r8, r8, 0xffff ; Zero out the high order word ! 217: add r8, r8, r6 ; Add the two halves ! 218: ! 219: andis. r6, r8, 0xffff ; Do the above again in case we carried into the ! 220: srwi r6, r6, 16 ; high order word with the last add. ! 221: andi. r8, r8, 0xffff ! 222: add r3, r8, r6 ! 223: ! 224: cmpi cr0,0,r11,0 ; Check to see if we need to swap the bytes ! 225: beq Ldontswap ! 226: ! 227: /* ! 228: * Our buffer began on an odd boundary, so we need to swap ! 229: * the checksum bytes. ! 230: */ ! 231: slwi r8, r3, 8 ; shift byte 0 to byte 1 ! 232: clrlwi r8, r8, 16 ; Clear top 16 bits ! 233: srwi r3, r3, 8 ; shift byte 1 to byte 0 ! 234: or r3, r8, r3 ; or them ! 235: ! 236: Ldontswap: ! 237: add r3, r3, r12 ; Add in the passed-in checksum ! 238: andis. r6, r3, 0xffff ; Wrap and add any carries into the top 16 bits ! 239: srwi r6, r6, 16 ! 240: andi. r3, r3, 0xffff ! 241: add r3, r3, r6 ! 242: ! 243: andis. r6, r3, 0xffff ; Do the above again in case we carried into the ! 244: srwi r6, r6, 16 ; high order word with the last add. ! 245: andi. r3, r3, 0xffff ! 246: add r3, r3, r6 ! 247: blr ! 248: ! 249:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.