Source to machdep/i386/fp_emul/fp_lipsq.s
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
* Reserved. This file contains Original Code and/or Modifications of
* Original Code as defined in and that are subject to the Apple Public
* Source License Version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. Please obtain a copy of the
* License at http://www.apple.com/publicsource and read it before using
* this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
* License for the specific language governing rights and limitations
* under the License."
*
* @APPLE_LICENSE_HEADER_END@
*/
.file "lipsq.s"
.ident "@(#)kern-fp:lipsq.s 1.1"
// ***************************************************************************
//
// l i p s q . m o d
// =================
//
// ===============================================================
// intel corporation proprietary information
// this software is supplied under the terms of a license
// agreement or non-disclosure agreement with intel corporation
// and may not be copied nor disclosed except in accordance with
// the terms of that agreement.
// ===============================================================
//
// functions:
// implements the loading of the constants:
// one, log base 2 of ten, log base 2 of e, pi,
// log base 10 of 2, log base e of 2, and zero.
// implements the 80387 square root instruction.
// implements the 80387 integer part instruction.
//
// public procedures:
// load_con sqrt intpt
//
// *****************************************************************************
//
//...september 16, 1983...
//
// .file *a_mli*
//
//$nolist
#include "fp_e80387.h"
//$list
.text //a_med segment er public
//
// extrn put_si_result,sticky_right_shift,round
// extrn addition_normalize,left_shift,getx
// extrn set_up_indefinite,clear_6w,set_6w
// extrn test_4w,left_shift_result_cl
// extrn left_shift_frac1_cl,left_shift_frac2_cl
// extrn add_to_frac_2,gradual_underflow
// extrn subtraction_normalize,put_indefinite
// extrn i_masked_,set_i_masked_,set_d_masked_
// extrn affine_infinity_,get_precision
//
.globl load_con
.globl intpt
.globl sqrt
//
// temp real floating point numbers for push constant instructions
//
treal_table:
.value 0x00000,0x00000,0x00000,0x00000,0x08000,0x03fff //treal_one
.value 0x04000,0x08afe,0x0cd1b,0x0784b,0x0d49a,0x04000 //treal_l2t
.value 0x0c000,0x0f0bb,0x05c17,0x03b29,0x0b8aa,0x03fff //treal_l2e
.value 0x0c000,0x0c234,0x02168,0x0daa2,0x0c90f,0x04000 //treal_pi
.value 0x0a000,0x0f798,0x0fbcf,0x09a84,0x09a20,0x03ffd //treal_lg2
.value 0x0e000,0x079ab,0x0d1cf,0x017f7,0x0b172,0x03ffe //treal_ln2
.value 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000 //treal_0
//$eject
// ***************************************************************
// load_con
// ******
// function:
// implements the load constants instructions.
//
// inputs:
// offset of fpn pointer in bx register.
//
// outputs:
// constant value on top of stack
//
// data accessed:
// - offset_result
//
// data changed:
// - mem_operand_pointer
//
// procedures called:
// getx
//
// *******************************************************************
// all load constant instructions use the same entry point.
//
ALIGN
load_con: //proc near
//
// the contents of bx is used as an index to treal_table.
// getx unpacks the extended floating point number and put_result
// pushes the value to the top of the 80387 stack.
//
shrb $1,%bl //take care of extra shi
mov $0x06,%eax //treal_table ptr = cs:
mulb %bl // index * 6 - (6 * 2)
add $(treal_table-(load_1_op*12)),%eax // * (load_1_op)
mov %eax, %esi
movl $0, result_dword_frac(%ebp)
movw %cs:(%esi), %ax
movw %ax, result_dword_frac+frac80(%ebp)
mov %cs:2(%esi), %eax
mov %eax, result_dword_frac+frac64(%ebp)
mov %cs:6(%esi), %eax
mov %eax, result_dword_frac+frac32(%ebp)
movzwl %cs:10(%esi), %eax
mov %eax, dword_result_expon
xor %eax, %eax
mov %eax, result_sign(%ebp)
movb prec64, %dl
mov $offset_result, %edi
call round
mov $offset_result, %edi
call addition_normalize
andb $~a_mask, %gs:sr_flags
xor %eax, %eax
mov before_error_signals(%ebp), %eax
lahf
mov $offset_result, %edi
jmp put_si_result
//load_con endp
//$eject
// ***********************************************************************
// intpt:
// *****
// function:
// implements 80387 integer part instruction
//
// inputs:
//
// outputs:
//
// data accessed:
// - offset_result offset_operand1
// - tag1 expon1
// - word_frac1
//
// data changed:
// - tag1 expon1
// - result
//
// procedures called:
// get_operand gradual_underflow
// round subtraction_normalize
// put_si_result test_4w
// i_masked? set_i_masked?
// set_d_masked?
//
// **************************************************************************
ALIGN
intpt: //proc
// movb true,%ah / load constant
jz separate_cases // branch if no stack error
call set_stk_u_error
testb invalid_mask, %gs:sr_masks
jz intpt_done
jmp put_op1_result //math stack error, so return indef
ALIGN
separate_cases:
andb $~a_mask, %gs:sr_flags //initialize a-bit to zero
movb tag1(%ebp), %al // load op1 tag
cmpb valid,%al
je valid_case
cmpb denormd,%al
je denormalized_operand
cmpb unsupp,%al
jne check_nan
movl $offset_operand1, %edi
call set_up_indefinite
set_i_err:
orb invalid_mask, %gs:sr_errors
testb invalid_mask, %gs:sr_masks
jz intpt_done
jmp put_op1_result // masked_i error so return indef
ALIGN
intpt_done:
ret
ALIGN
check_nan:
cmpb inv, %al
je kind_of_nan_
jmp put_op1_result //infinity or zero, same answer
ALIGN
kind_of_nan_:
testb $0x40, msb_frac1
jz make_qnan
jmp put_op1_result // op1 is a qnan so pass it through
ALIGN
make_qnan:
orb $0x40, msb_frac1 //op1 is an snan so make it a qnan
jmp set_i_err //and signal i_error
ALIGN
denormalized_operand:
orb denorm_mask, %gs:sr_errors
testb denorm_mask, %gs:sr_masks
jz intpt_done
movw $0x0001,expon1(%ebp) //if masked d-error, make valid
movb valid,tag1(%ebp)
valid_case:
movl $0x403e,%eax
cmpl %eax,dword_expon1 //if expon >=63, then number
jge give_op1 // is already an integer
mov $offset_operand1,%edi //gradual uflow until expon=63
push %edi
call gradual_underflow
pop %edi //round to precision 64
movb prec64,%dl
movb false,%al
call round
cmpb true, rnd1_inexact
jne detect_zero
orb $inexact_mask, %gs:sr_errors
cmpb true,added_one
jne detect_zero
orb $a_mask, %gs:sr_flags
detect_zero:
xor %eax,%eax
movl $dword_frac1+4,%edi //if fraction = 0, result = 0
call test_4w
jz zero_result
mov $offset_operand1,%edi //normalize
call subtraction_normalize
jmp put_op1_result
ALIGN
zero_result:
movl %eax,dword_expon1 //set result to true zero
movb special,tag1(%ebp)
give_op1:
jmp put_op1_result
//intpt endp
//$eject
// ***************************************************************************
// sqrtx:
//
// function:
// fractional square root routine.
//
// inputs:
// assumes valid, non-zero,positive, normalized
// fraction is in frac1.
//
// outputs
// leaves fractional square root in result.
//
// data accessed:
// - offset_operand1 lsb_frac1
// - offset_operand2 word_frac2
// - offset_result lsb_frac2
// - result_word_frac lsb_result
// - msb_frac1
//
// data changed:
// - word_frac2 lsb_frac2
// - result_word_frac lsb_result
//
// procedures called:
// left_shift_result_cl left_shift_frac1_cl
// left_shift_frac2_cl clear_6w
// set_6w add_to_frac_2
//
// ***********************************************************************
ALIGN
sqrtx: //proc
movl $result_dword_frac,%edi //during this computation,
call clear_6w //the lsb of the result will
mov $dword_frac2,%edi // hold g and s, and the msb
call set_6w // will hold carry-out bits
and $0x000000ff,%ecx //clear high word
stc //cf holds the quotient bit
push $65 // iterate 65 times
pushf // stack the quotient bit
jmp enter_sqrt_loop
ALIGN
sqrt_loop:
push %ecx
pushf // stack the q_bit
movb $1,%cl
call left_shift_result_cl //shift result left one bit
popf // inject the new q_bit into
pushf // the least significant byte
adcb %cl,%cl
orb %cl,1+lsb_result
movb $2,%cl // into the lsb
call left_shift_frac2_cl //shift frac2 left 2 bits
enter_sqrt_loop:
movb msb_frac1,%al //g and s bits of frac2 <--
andb $0x0c0,%al // top 2 bits of frac1
movb %al,lsb_frac2
movb $2,%cl // shift frac1 left 2
call left_shift_frac1_cl
movb $0x0c0,%al
movb 1+lsb_result,%ah
popf //test q_bit
jc q_bit_set
shl $16,%eax
call add_to_frac2 //frac2.gs <-- frac2.gs +
jmp set_q_bit //result.11
q_bit_set:
notb %ah //frac2.gs <-- frac2.gs +
shl $16,%eax //take care of extra low word
add %eax,dword_frac2(%ebp) //not(result).11
mov result_dword_frac+frac64(%ebp),%eax
not %eax
adc %eax,dword_frac2+frac64(%ebp)
mov result_dword_frac+frac32(%ebp),%eax
not %eax
adc %eax,dword_frac2+frac32(%ebp)
set_q_bit:
pop %ecx // reload loop count
loop sqrt_loop // loop until done
rcrb $1,%cl // set g bit of result to q_bit
orb %cl,lsb_result
movw result_word_frac,%ax //frac2 <- frac2 +
incb %ah // result + 1
shl $16,%eax
call add_to_frac2
mov $dword_frac2+frac64,%edi //if frac2 = 0 then
xor %eax,%eax //s_bit of result = 0,
call test_4w //otherwise 1.
orb 1+lsb_frac2,%al
jz left_adjust_result
orb $0x40,lsb_result
left_adjust_result:
movb $8,%cl //shift result left 8 bits
jmp left_shift_result_cl // to eliminate carry
add_to_frac2:
mov $result_dword_frac,%esi // si points to the addend
mov $dword_frac2,%edi // di points to the result
jmp add_to_frac_2 // add result frac to frac2
//sqrtx endp
//$eject
// ***********************************************************************
// sqrt:
//
// function:
// implements the 80387 sqrt instruction
//
// inputs:
// assume op1 is set up
//
// outputs:
// result
//
// data accessed:
// - result_record_offset result_expon
// - offset_operand1 tag1
// - sign1 expon1
// - msb_frac1 offset_operand1
// - offset_result result_sign
// - result_tag
//
// data changed:
// - expon1 result_sign
// - result_tag result_expon
//
// procedures called:
// set_up_indefinite sticky_right_shift
// sqrtx round
// addition_normalize affine_infinity?
// set_i_masked? get_precision
// set_d_masked? put_si_result
//
// ***********************************************************************
ALIGN
sqrt: //proc
jz sqrt_cont // if stack error, sqrt done
call set_stk_u_error
testb invalid_mask, %gs:sr_masks
jnz put_op1 //masked stack error, so return indef
sqrt_done:
ret
ALIGN
sqrt_cont:
andb $~a_mask, %gs:sr_flags
movb tag1(%ebp),%al // load tag for op1
cmpb valid,%al
jne op1_denorm_
cmpb positive, sign1(%ebp)
jne i_error
jmp sqrt_valid_case
op1_denorm_:
cmpb denormd,%al //if op1 denormalized, then give
jne op1_zero_
cmpb positive, sign1(%ebp)
jne i_error
jmp d_error
op1_zero_:
cmpb special,%al //if op1 = 0, then give 0 as the
je put_op1 //result
cmpb unsupp,%al
je i_error
cmpb inv,%al //if op1 inv, then give i_error
jne inf_op1
jmp kind_of_nan_
inf_op1:
cmpb positive, sign1(%ebp) //infinity case
je put_op1 // -infinity is invalid
i_error:
call set_i_masked_ //denormalized, unnormalized,
jz sqrt_done // negative, or proj +infinity
mov $offset_result,%edi
call set_up_indefinite //if masked i_error, then give
jmp sqrt_give_result //indefinite
put_op1: //if op1 = zero, nan, or +inf,
jmp give_op1 // then give op1 as the result
d_error:
call set_d_masked_ //op1 is denormalized
jz sqrt_done
movl $offset_operand1, %edi
call norm_denorm //if d_error masked, make valid
sqrt_valid_case:
subl $exponent_bias,dword_expon1
testl $0x0001,dword_expon1
jz even_expon
decl dword_expon1 //if expon1 odd, then expon1 <--
jmp halve_exponent // expon1 - 1
even_expon:
movb $1,%cl //if expon1 even, then shift
xorb %al,%al // frac1 right one bit
mov $offset_operand1,%edi
call sticky_right_shift
halve_exponent:
movl dword_expon1,%eax
sarl $1,%eax
addl $exponent_bias,%eax
movl %eax,dword_result_expon
call sqrtx // calculate fraction
sqrt_round_result:
xorl %eax,%eax // not second rounding
movl %eax,result_sign(%ebp) // set sign and tag
movl $offset_result,%edi // round result
call get_precision
call round
mov $offset_result,%edi
call addition_normalize //(possible renormalize)
cmpb true, rnd1_inexact
jne sqrt_give_result
orb $inexact_mask, %gs:sr_errors
cmpb true, added_one
jne sqrt_give_result
orb $a_mask, %gs:sr_flags
sqrt_give_result:
mov $offset_result,%edi
jmp put_si_result
//sqrt endp
//
//a_med ends
//
// end