Annotation of 43BSDTahoe/usr.lib/libm/national/sqrt.s, revision 1.1.1.1

1.1       root        1: ; Copyright (c) 1985 Regents of the University of California.
                      2: ; All rights reserved.
                      3: ;
                      4: ; Redistribution and use in source and binary forms are permitted
                      5: ; provided that the above copyright notice and this paragraph are
                      6: ; duplicated in all such forms and that any documentation,
                      7: ; advertising materials, and other materials related to such
                      8: ; distribution and use acknowledge that the software was developed
                      9: ; by the University of California, Berkeley.  The name of the
                     10: ; University may not be used to endorse or promote products derived
                     11: ; from this software without specific prior written permission.
                     12: ; THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
                     13: ; IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
                     14: ; WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
                     15: ;
                     16: ; All recipients should regard themselves as participants in an ongoing
                     17: ; research project and hence should feel obligated to report their
                     18: ; experiences (good or bad) with these elementary function codes, using
                     19: ; the sendbug(8) program, to the authors.
                     20: ;
                     21: ;      @(#)sqrt.s      5.3 (Berkeley) 6/30/88
                     22: ;
                     23: 
                     24: ; double sqrt(x)
                     25: ; double x;
                     26: ; IEEE double precision sqrt
                     27: ; code in NSC assembly by K.C. Ng
                     28: ; 12/13/85
                     29: ;
                     30: ; Method:
                     31: ;      Use Kahan's trick to get 8 bits initial approximation
                     32: ;      by integer shift and add/subtract. Then three Newton
                     33: ;      iterations to get down error to within one ulp. Finally
                     34: ;      twiddle the last bit to make to correctly rounded 
                     35: ;      according to the rounding mode.
                     36: ;
                     37:        .vers   2
                     38:        .text
                     39:        .align  2
                     40:        .globl  _sqrt
                     41: _sqrt:
                     42:        enter   [r3,r4,r5,r6,r7],44
                     43:        movl    f4,tos
                     44:        movl    f6,tos
                     45:        movd    2146435072,r2   ; r2 = 0x7ff00000
                     46:        movl    8(fp),f0        ; f2 = x 
                     47:        movd    12(fp),r3       ; r3 = high part of x
                     48:        movd    r3,r4           ; make a copy of high part of x in r4
                     49:        andd    r2,r3           ; r3 become the bias exponent of x
                     50:        cmpd    r2,r3           ; if r3 = 0x7ff00000 then x is INF or NAN
                     51:        bne     L22
                     52:                                ; to see if x is INF
                     53:        movd    8(fp),r0        ; r0 = low part of x
                     54:        movd    r4,r1           ; r1 is high part of x again
                     55:        andd    0xfff00000,r1   ; mask off the sign and exponent of x
                     56:        ord     r0,r1           ; or with low part, if 0 then x is INF
                     57:        cmpqd   0,r1            ; 
                     58:        bne     L1              ; not 0; therefore x is NaN; return x.
                     59:        cmpqd   0,r4            ; now x is Inf, is it +inf?
                     60:        blt     L1              ; +INF, return x 
                     61:                                ; -INF, return NaN by doing 0/0
                     62: nan:   movl    0f0.0,f0        ; 
                     63:        divl    f0,f0
                     64:        br      L1
                     65: L22:                           ; now x is finite
                     66:        cmpl    0f0.0,f0        ; x = 0 ?
                     67:        beq     L1              ; return x if x is +0 or -0
                     68:        cmpqd   0,r4            ; Is x < 0 ?
                     69:        bgt     nan             ; if x < 0 return NaN
                     70:        movqd   0,r5            ; r5 == scalx initialize to zero
                     71:        cmpqd   0,r3            ; is x is subnormal ?  (r3 is the exponent)
                     72:        bne     L21             ; if x is normal, goto L21
                     73:        movl    L30,f2          ; f2 = 2**54
                     74:        mull    f2,f0           ; scale up x by 2**54
                     75:        subd    0x1b00000,r5    ; off set the scale factor by -27 in exponent
                     76: L21:
                     77:                                ; now x is normal
                     78:                                ; notations:
                     79:                                ;    r1 == copy of fsr
                     80:                                ;    r2 == mask of e inexact enable flag
                     81:                                ;    r3 == mask of i inexact flag
                     82:                                ;    r4 == mask of r rounding mode
                     83:                                ;    r5 == x's scale factor (already defined)
                     84: 
                     85:        movd    0x20,r2
                     86:        movd    0x40,r3
                     87:        movd    0x180,r4
                     88:        sfsr    r0              ; store fsr to r0
                     89:        movd    r0,r1           ; make a copy of fsr to r1
                     90:        bicd    [5,6,7,8],r0    ; clear e,i, and set r to round to nearest
                     91:        lfsr    r0
                     92: 
                     93:                                ; begin to compute sqrt(x)
                     94:        movl    f0,-8(fp)
                     95:        movd    -4(fp),r0       ; r0 the high part of modified x
                     96:        lshd    -1,r0           ; r0 >> 1
                     97:        addd    0x1ff80000,r0   ; add correction to r0  ...got 5 bits approx.
                     98:        movd    r0,r6
                     99:        lshd    -13,r6          ; r6 = r0>>-15
                    100:        andd    0x7c,r6         ; obtain 4*leading 5 bits of r0
                    101:        addrd   L29,r7          ; r7 = address of L29 = table[0]
                    102:        addd    r6,r7           ; r6 = address of L29[r6] = table[r6]
                    103:        subd    0(r7),r0        ; r0 = r0 - table[r6]
                    104:        movd    r0,-4(fp)
                    105:        movl    -8(fp),f2       ; now f2 = y approximate sqrt(f0) to 8 bits
                    106: 
                    107:        movl    0f0.5,f6        ; f6 = 0.5
                    108:        movl    f0,f4
                    109:        divl    f2,f4           ; t = x/y
                    110:        addl    f4,f2           ; y = y + x/y
                    111:        mull    f6,f2           ; y = 0.5(y+x/y) got 17 bits approx.
                    112:        movl    f0,f4
                    113:        divl    f2,f4           ; t = x/y
                    114:        addl    f4,f2           ; y = y + x/y
                    115:        mull    f6,f2           ; y = 0.5(y+x/y) got 35 bits approx.
                    116:        movl    f0,f4
                    117:        divl    f2,f4           ; t = x/y
                    118:        subl    f2,f4           ; t = x/y - y
                    119:        mull    f6,f4           ; t = 0.5(x/y-y)
                    120:        addl    f4,f2           ; y = y + 0.5(x/y -y) 
                    121:                                ; now y approx. sqrt(x) to within 1 ulp
                    122: 
                    123:                                ; twiddle last bit to force y correctly rounded
                    124:        movd    r1,r0           ; restore the old fsr
                    125:        bicd    [6,7,8],r0      ; clear inexact bit but retain inexact enable
                    126:        ord     0x80,r0         ; set rounding mode to round to zero
                    127:        lfsr    r0
                    128: 
                    129:        movl    f0,f4
                    130:        divl    f2,f4           ; f4 = x/y
                    131:        sfsr    r0
                    132:        andd    r3,r0           ; get the inexact flag
                    133:        cmpqd   0,r0
                    134:        bne     L18
                    135:                                ; if x/y exact, then ...
                    136:        cmpl    f2,f4           ; if y == x/y 
                    137:        beq     L2
                    138:        movl    f4,-8(fp)
                    139:        subd    1,-8(fp)
                    140:        subcd   0,-4(fp)        
                    141:        movl    -8(fp),f4       ; f4 = f4 - ulp
                    142: L18:
                    143:        bicd    [6],r1
                    144:        ord     r3,r1           ; set inexact flag in r1
                    145: 
                    146:        andd    r1,r4           ; r4 = the old rounding mode
                    147:        cmpqd   0,r4            ; round to nearest?
                    148:        bne     L17
                    149:        movl    f4,-8(fp)
                    150:        addd    1,-8(fp)
                    151:        addcd   0,-4(fp)
                    152:        movl    -8(fp),f4       ; f4 = f4 + ulp
                    153:        br      L16
                    154: L17:
                    155:        cmpd    0x100,r4        ; round to positive inf ?
                    156:        bne     L16
                    157:        movl    f4,-8(fp)       
                    158:        addd    1,-8(fp)
                    159:        addcd   0,-4(fp)
                    160:        movl    -8(fp),f4       ; f4 = f4 + ulp
                    161: 
                    162:        movl    f2,-8(fp)       
                    163:        addd    1,-8(fp)
                    164:        addcd   0,-4(fp)
                    165:        movl    -8(fp),f2       ; f2 = f2 + ulp
                    166: L16:
                    167:        addl    f4,f2           ; y  = y + t
                    168:        subd    0x100000,r5     ; scalx = scalx - 1
                    169: L2:
                    170:        movl    f2,-8(fp)       
                    171:        addd    r5,-4(fp)
                    172:        movl    -8(fp),f0
                    173:        lfsr    r1
                    174: L1:
                    175:        movl    tos,f6
                    176:        movl    tos,f4
                    177:        exit    [r3,r4,r5,r6,r7]
                    178:        ret     0
                    179:        .data
                    180: L28:   .byte   64,40,35,41,115,113,114,116,46,99
                    181:        .byte   9,49,46,49,32,40,117,99,98,46
                    182:        .byte   101,108,101,102,117,110,116,41,32,57
                    183:        .byte   47,49,57,47,56,53,0
                    184: L29:   .blkb   4
                    185:        .double 1204
                    186:        .double 3062
                    187:        .double 5746
                    188:        .double 9193
                    189:        .double 13348
                    190:        .double 18162
                    191:        .double 23592
                    192:        .double 29598
                    193:        .double 36145
                    194:        .double 43202
                    195:        .double 50740
                    196:        .double 58733
                    197:        .double 67158
                    198:        .double 75992
                    199:        .double 85215
                    200:        .double 83599
                    201:        .double 71378
                    202:        .double 60428
                    203:        .double 50647
                    204:        .double 41945
                    205:        .double 34246
                    206:        .double 27478
                    207:        .double 21581
                    208:        .double 16499
                    209:        .double 12183
                    210:        .double 8588
                    211:        .double 5674
                    212:        .double 3403
                    213:        .double 1742
                    214:        .double 661
                    215:        .double 130
                    216: L30:   .blkb   4
                    217:        .double 1129316352      ;L30:   .double 0,0x43500000
                    218: L31:   .blkb   4
                    219:        .double 0x1ff00000
                    220: L32:   .blkb   4
                    221:        .double 0x5ff00000

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.