|
|
1.1 ! root 1: /* $Id: udiv.S,v 1.4 1996/09/30 02:22:38 davem Exp $ ! 2: * udiv.S: This routine was taken from glibc-1.09 and is covered ! 3: * by the GNU Library General Public License Version 2. ! 4: */ ! 5: ! 6: ! 7: /* This file is generated from divrem.m4; DO NOT EDIT! */ ! 8: /* ! 9: * Division and remainder, from Appendix E of the Sparc Version 8 ! 10: * Architecture Manual, with fixes from Gordon Irlam. ! 11: */ ! 12: ! 13: /* ! 14: * Input: dividend and divisor in %o0 and %o1 respectively. ! 15: * ! 16: * m4 parameters: ! 17: * .udiv name of function to generate ! 18: * div div=div => %o0 / %o1; div=rem => %o0 % %o1 ! 19: * false false=true => signed; false=false => unsigned ! 20: * ! 21: * Algorithm parameters: ! 22: * N how many bits per iteration we try to get (4) ! 23: * WORDSIZE total number of bits (32) ! 24: * ! 25: * Derived constants: ! 26: * TOPBITS number of bits in the top decade of a number ! 27: * ! 28: * Important variables: ! 29: * Q the partial quotient under development (initially 0) ! 30: * R the remainder so far, initially the dividend ! 31: * ITER number of main division loop iterations required; ! 32: * equal to ceil(log2(quotient) / N). Note that this ! 33: * is the log base (2^N) of the quotient. ! 34: * V the current comparand, initially divisor*2^(ITER*N-1) ! 35: * ! 36: * Cost: ! 37: * Current estimate for non-large dividend is ! 38: * ceil(log2(quotient) / N) * (10 + 7N/2) + C ! 39: * A large dividend is one greater than 2^(31-TOPBITS) and takes a ! 40: * different path, as the upper bits of the quotient must be developed ! 41: * one bit at a time. ! 42: */ ! 43: ! 44: ! 45: .globl .udiv ! 46: .globl _Udiv ! 47: .udiv: ! 48: _Udiv: /* needed for export */ ! 49: ! 50: ! Ready to divide. Compute size of quotient; scale comparand. ! 51: orcc %o1, %g0, %o5 ! 52: bne 1f ! 53: mov %o0, %o3 ! 54: ! 55: ! Divide by zero trap. If it returns, return 0 (about as ! 56: ! wrong as possible, but that is what SunOS does...). ! 57: ta 0x2 ! 58: retl ! 59: clr %o0 ! 60: ! 61: 1: ! 62: cmp %o3, %o5 ! if %o1 exceeds %o0, done ! 63: blu Lgot_result ! (and algorithm fails otherwise) ! 64: clr %o2 ! 65: ! 66: sethi %hi(1 << (32 - 4 - 1)), %g1 ! 67: ! 68: cmp %o3, %g1 ! 69: blu Lnot_really_big ! 70: clr %o4 ! 71: ! 72: ! Here the dividend is >= 2**(31-N) or so. We must be careful here, ! 73: ! as our usual N-at-a-shot divide step will cause overflow and havoc. ! 74: ! The number of bits in the result here is N*ITER+SC, where SC <= N. ! 75: ! Compute ITER in an unorthodox manner: know we need to shift V into ! 76: ! the top decade: so do not even bother to compare to R. ! 77: 1: ! 78: cmp %o5, %g1 ! 79: bgeu 3f ! 80: mov 1, %g7 ! 81: ! 82: sll %o5, 4, %o5 ! 83: ! 84: b 1b ! 85: add %o4, 1, %o4 ! 86: ! 87: ! Now compute %g7. ! 88: 2: ! 89: addcc %o5, %o5, %o5 ! 90: bcc Lnot_too_big ! 91: add %g7, 1, %g7 ! 92: ! 93: ! We get here if the %o1 overflowed while shifting. ! 94: ! This means that %o3 has the high-order bit set. ! 95: ! Restore %o5 and subtract from %o3. ! 96: sll %g1, 4, %g1 ! high order bit ! 97: srl %o5, 1, %o5 ! rest of %o5 ! 98: add %o5, %g1, %o5 ! 99: ! 100: b Ldo_single_div ! 101: sub %g7, 1, %g7 ! 102: ! 103: Lnot_too_big: ! 104: 3: ! 105: cmp %o5, %o3 ! 106: blu 2b ! 107: nop ! 108: ! 109: be Ldo_single_div ! 110: nop ! 111: /* NB: these are commented out in the V8-Sparc manual as well */ ! 112: /* (I do not understand this) */ ! 113: ! %o5 > %o3: went too far: back up 1 step ! 114: ! srl %o5, 1, %o5 ! 115: ! dec %g7 ! 116: ! do single-bit divide steps ! 117: ! ! 118: ! We have to be careful here. We know that %o3 >= %o5, so we can do the ! 119: ! first divide step without thinking. BUT, the others are conditional, ! 120: ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- ! 121: ! order bit set in the first step, just falling into the regular ! 122: ! division loop will mess up the first time around. ! 123: ! So we unroll slightly... ! 124: Ldo_single_div: ! 125: subcc %g7, 1, %g7 ! 126: bl Lend_regular_divide ! 127: nop ! 128: ! 129: sub %o3, %o5, %o3 ! 130: mov 1, %o2 ! 131: ! 132: b Lend_single_divloop ! 133: nop ! 134: Lsingle_divloop: ! 135: sll %o2, 1, %o2 ! 136: bl 1f ! 137: srl %o5, 1, %o5 ! 138: ! %o3 >= 0 ! 139: sub %o3, %o5, %o3 ! 140: b 2f ! 141: add %o2, 1, %o2 ! 142: 1: ! %o3 < 0 ! 143: add %o3, %o5, %o3 ! 144: sub %o2, 1, %o2 ! 145: 2: ! 146: Lend_single_divloop: ! 147: subcc %g7, 1, %g7 ! 148: bge Lsingle_divloop ! 149: tst %o3 ! 150: ! 151: b,a Lend_regular_divide ! 152: ! 153: Lnot_really_big: ! 154: 1: ! 155: sll %o5, 4, %o5 ! 156: ! 157: cmp %o5, %o3 ! 158: bleu 1b ! 159: addcc %o4, 1, %o4 ! 160: ! 161: be Lgot_result ! 162: sub %o4, 1, %o4 ! 163: ! 164: tst %o3 ! set up for initial iteration ! 165: Ldivloop: ! 166: sll %o2, 4, %o2 ! 167: ! depth 1, accumulated bits 0 ! 168: bl L.1.16 ! 169: srl %o5,1,%o5 ! 170: ! remainder is positive ! 171: subcc %o3,%o5,%o3 ! 172: ! depth 2, accumulated bits 1 ! 173: bl L.2.17 ! 174: srl %o5,1,%o5 ! 175: ! remainder is positive ! 176: subcc %o3,%o5,%o3 ! 177: ! depth 3, accumulated bits 3 ! 178: bl L.3.19 ! 179: srl %o5,1,%o5 ! 180: ! remainder is positive ! 181: subcc %o3,%o5,%o3 ! 182: ! depth 4, accumulated bits 7 ! 183: bl L.4.23 ! 184: srl %o5,1,%o5 ! 185: ! remainder is positive ! 186: subcc %o3,%o5,%o3 ! 187: b 9f ! 188: add %o2, (7*2+1), %o2 ! 189: ! 190: L.4.23: ! 191: ! remainder is negative ! 192: addcc %o3,%o5,%o3 ! 193: b 9f ! 194: add %o2, (7*2-1), %o2 ! 195: ! 196: L.3.19: ! 197: ! remainder is negative ! 198: addcc %o3,%o5,%o3 ! 199: ! depth 4, accumulated bits 5 ! 200: bl L.4.21 ! 201: srl %o5,1,%o5 ! 202: ! remainder is positive ! 203: subcc %o3,%o5,%o3 ! 204: b 9f ! 205: add %o2, (5*2+1), %o2 ! 206: ! 207: L.4.21: ! 208: ! remainder is negative ! 209: addcc %o3,%o5,%o3 ! 210: b 9f ! 211: add %o2, (5*2-1), %o2 ! 212: ! 213: L.2.17: ! 214: ! remainder is negative ! 215: addcc %o3,%o5,%o3 ! 216: ! depth 3, accumulated bits 1 ! 217: bl L.3.17 ! 218: srl %o5,1,%o5 ! 219: ! remainder is positive ! 220: subcc %o3,%o5,%o3 ! 221: ! depth 4, accumulated bits 3 ! 222: bl L.4.19 ! 223: srl %o5,1,%o5 ! 224: ! remainder is positive ! 225: subcc %o3,%o5,%o3 ! 226: b 9f ! 227: add %o2, (3*2+1), %o2 ! 228: ! 229: L.4.19: ! 230: ! remainder is negative ! 231: addcc %o3,%o5,%o3 ! 232: b 9f ! 233: add %o2, (3*2-1), %o2 ! 234: ! 235: L.3.17: ! 236: ! remainder is negative ! 237: addcc %o3,%o5,%o3 ! 238: ! depth 4, accumulated bits 1 ! 239: bl L.4.17 ! 240: srl %o5,1,%o5 ! 241: ! remainder is positive ! 242: subcc %o3,%o5,%o3 ! 243: b 9f ! 244: add %o2, (1*2+1), %o2 ! 245: ! 246: L.4.17: ! 247: ! remainder is negative ! 248: addcc %o3,%o5,%o3 ! 249: b 9f ! 250: add %o2, (1*2-1), %o2 ! 251: ! 252: L.1.16: ! 253: ! remainder is negative ! 254: addcc %o3,%o5,%o3 ! 255: ! depth 2, accumulated bits -1 ! 256: bl L.2.15 ! 257: srl %o5,1,%o5 ! 258: ! remainder is positive ! 259: subcc %o3,%o5,%o3 ! 260: ! depth 3, accumulated bits -1 ! 261: bl L.3.15 ! 262: srl %o5,1,%o5 ! 263: ! remainder is positive ! 264: subcc %o3,%o5,%o3 ! 265: ! depth 4, accumulated bits -1 ! 266: bl L.4.15 ! 267: srl %o5,1,%o5 ! 268: ! remainder is positive ! 269: subcc %o3,%o5,%o3 ! 270: b 9f ! 271: add %o2, (-1*2+1), %o2 ! 272: ! 273: L.4.15: ! 274: ! remainder is negative ! 275: addcc %o3,%o5,%o3 ! 276: b 9f ! 277: add %o2, (-1*2-1), %o2 ! 278: ! 279: L.3.15: ! 280: ! remainder is negative ! 281: addcc %o3,%o5,%o3 ! 282: ! depth 4, accumulated bits -3 ! 283: bl L.4.13 ! 284: srl %o5,1,%o5 ! 285: ! remainder is positive ! 286: subcc %o3,%o5,%o3 ! 287: b 9f ! 288: add %o2, (-3*2+1), %o2 ! 289: ! 290: L.4.13: ! 291: ! remainder is negative ! 292: addcc %o3,%o5,%o3 ! 293: b 9f ! 294: add %o2, (-3*2-1), %o2 ! 295: ! 296: L.2.15: ! 297: ! remainder is negative ! 298: addcc %o3,%o5,%o3 ! 299: ! depth 3, accumulated bits -3 ! 300: bl L.3.13 ! 301: srl %o5,1,%o5 ! 302: ! remainder is positive ! 303: subcc %o3,%o5,%o3 ! 304: ! depth 4, accumulated bits -5 ! 305: bl L.4.11 ! 306: srl %o5,1,%o5 ! 307: ! remainder is positive ! 308: subcc %o3,%o5,%o3 ! 309: b 9f ! 310: add %o2, (-5*2+1), %o2 ! 311: ! 312: L.4.11: ! 313: ! remainder is negative ! 314: addcc %o3,%o5,%o3 ! 315: b 9f ! 316: add %o2, (-5*2-1), %o2 ! 317: ! 318: L.3.13: ! 319: ! remainder is negative ! 320: addcc %o3,%o5,%o3 ! 321: ! depth 4, accumulated bits -7 ! 322: bl L.4.9 ! 323: srl %o5,1,%o5 ! 324: ! remainder is positive ! 325: subcc %o3,%o5,%o3 ! 326: b 9f ! 327: add %o2, (-7*2+1), %o2 ! 328: ! 329: L.4.9: ! 330: ! remainder is negative ! 331: addcc %o3,%o5,%o3 ! 332: b 9f ! 333: add %o2, (-7*2-1), %o2 ! 334: ! 335: 9: ! 336: Lend_regular_divide: ! 337: subcc %o4, 1, %o4 ! 338: bge Ldivloop ! 339: tst %o3 ! 340: ! 341: bl,a Lgot_result ! 342: ! non-restoring fixup here (one instruction only!) ! 343: sub %o2, 1, %o2 ! 344: ! 345: Lgot_result: ! 346: ! 347: retl ! 348: mov %o2, %o0 ! 349: ! 350: .globl .udiv_patch ! 351: .udiv_patch: ! 352: wr %g0, 0x0, %y ! 353: nop ! 354: nop ! 355: retl ! 356: udiv %o0, %o1, %o0 ! 357: nop
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.