Source to machdep/i386/fp_emul/fp_remsc.s
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
* Reserved. This file contains Original Code and/or Modifications of
* Original Code as defined in and that are subject to the Apple Public
* Source License Version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. Please obtain a copy of the
* License at http://www.apple.com/publicsource and read it before using
* this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
* License for the specific language governing rights and limitations
* under the License."
*
* @APPLE_LICENSE_HEADER_END@
*/
.file "remsc.s"
.ident "@(#)kern-fp:remsc.s 1.1"
// ********************************************************************
//
// r e m s c . m o d
// =================
//
// ===============================================================
// intel corporation proprietary information
// this software is supplied under the terms of a license
// agreement or non-disclosure agreement with intel corporation
// and may not be copied nor disclosed except in accordance with
// the terms of that agreement.
// ===============================================================
//
// functions:
// implements 80387 fprem instruction.
// implements 80387 fprem1 instruction.
// implements 80387 fscale instruction.
//
// public procedures:
// remr scale
// add_to_frac add_to_frac_2
//
// internal procedures:
// remrx
//
// ************************************************************************
//
//...March 3, 1987...
//
//
//$nolist
#include "fp_e80387.h"
//$list
.data //a_msr segment rw public
// extrn %gs:sr_masks,%gs:sr_flags,%gs:sr_errors,%gs:sr_controls
//a_msr ends
//
// assume %ds:a_msr
//
.text //a_med segment er public
//
// extrn put_result,set_up_indefinite,test_4w
// extrn subtraction_normalize,move_op_to_result
// extrn underflow_response,right_shift_frac1_cl
// extrn right_shift_frac2_cl,left_shift_frac1_1
// extrn fix32,overflow_response,set_stk_u_error
// extrn subadd,put_arith_result,set_up_nan_return
// extrn test_6w,clear_6w,norm_denorm
// extrn left_shift_frac2_cl
.globl remr
.globl add_to_frac
.globl add_to_frac_2
.globl scale
.globl remrx
//
// ***************************************************************************
// remrx:
// ******
// function:
// fractional remainder.
//
// inputs:
// assumes dividend is in op1, divisor is in op2,
// number of quotient bits to generate is in ax,
// and q is cleared to all zeroes.
//
// outputs:
// it returns the fractional remainder in op1, and
// the low bits of the quotient in q.
//
// data accessed:
// - offset_operand1 offset_operand2
// - offset_result
//
// data changed:
// - frac1 frac2
//
// procedures called:
// right_shift_frac1_cl right_shift_frac2_cl
// left_shift_frac1_1 add_to_frac
//
// ************************************************************************
ALIGN
remrx: //proc
push %eax // save the loop count
movb $1,%cl // shift frac1 right 1 bit
call right_shift_frac1_cl
movb $1,%cl // shift frac2 right 1 bit
call right_shift_frac2_cl // returns cf clear, ch = 0
mov $offset_operand2,%esi // result_frac <-- - frac2
mov $offset_result,%edi
movw $0,(%ebp,%esi) // clear extra bytes
mov $0x03,%ecx // load loop count
FALLSTHRU
complement_frac2:
mov $0x0000,%eax // clear ax, leave cf intact
sbb (%ebp,%esi),%eax // 0 - frac2
mov %eax,(%ebp,%edi) // store into result frac
inc %esi // bump offsets
inc %esi // (doesnt affect cf)
inc %esi
inc %esi
inc %edi
inc %edi
inc %edi
inc %edi
LOOP(complement_frac2) // loop until result = -frac2
jmp enter_loop
ALIGN
form_next_frac1:
push %ecx // stack loop count
call left_shift_frac1_1 // shift frac1 left one bit
mov $offset_operand2,%esi
testb $0x01,q // if lsb of q = 1 then
jz do_add // frac1 <-- frac1 + frac2
enter_loop:
mov $offset_result,%esi // else, frac1 <-- frac1 - frac2
do_add:
mov $offset_operand1,%edi
call add_to_frac // move carry-out from add
rclb $1,q // shift carry-out into q
// adc hi_q, 0 ; hi_q counts non-0 quotient bits
pop %ecx // above bit b7 of the quotient.
LOOP(form_next_frac1) // if looping done,
testb $0x01,q // then one last iteration
jnz last_shift // frac1 <-- frac1 + frac2
mov $offset_operand2,%esi
mov $offset_operand1,%edi
call add_to_frac
last_shift:
jmp left_shift_frac1_1 // shift frac1 left 1 bit
//remrx endp
//
// **************************************************************************
// add_to_frac:
// ***********
// function:
// adds a 10-byte fraction to another 10-byte fraction.
//
// inputs:
// ss:ebp+esi points to the source fraction, and ss:ebp+edi points
// to the destination fraction.
//
// outputs:
// carry flag set if there was a carry out, else reset.
//
// data accessed:
//
// data changed:
//
// ***************************************************************************
ALIGN
add_to_frac: //proc
mov (%ebp,%esi),%eax
xorw %ax,%ax
add_to_frac_2:
add %eax,(%ebp,%edi)
mov frac64(%ebp,%esi),%eax
adc %eax,frac64(%ebp,%edi)
mov frac32(%ebp,%esi),%eax
adc %eax,frac32(%ebp,%edi)
simple_return:
ret
ALIGN
//add_to_frac endp
// *************************************************************************
ALIGN
mov_esi_edi: //proc
add %ebp, %edi //add global record offsets
add %ebp, %esi
// push %ds //save a_msr ??
// push %ss //load source segment register
// pop %ds
// push %ss //load destination segment register
// pop %es //into es
/* FAST_MOVSL */
movl %ss:0(%esi),%ecx
movl %ecx,%ss:0(%esi)
movl %ss:4(%esi),%ecx
movl %ecx,%ss:4(%esi)
movl %ss:8(%esi),%ecx
movl %ecx,%ss:8(%esi)
// addl $12,%esi
// addl $12,%edi
// movl $0,%ecx
// pop %ds //reload a_msr
ret
//mov_esi_edi endp
// *************************************************************************
//
// ***************************************************************************
// remr:
// *****
// function:
// 80387 remainder instruction
//
// inputs:
// assumes the operand records are set up.
//
// outputs:
// results in result record.
//
// data accessed:
// - result_rec_offset offset_operand1
// - tag1 expon1
// - word_frac1 tag2
// - expon2 msb_frac2
// - offset_result
//
// data changed:
// - tag1 expon1
//
// procedures called:
// set_up_indefinite set_stk_u_error
// remrx subtraction_normalize
// move_op_to_result underflow_response
// put_result test_4w
// subadd mov_esi_edi
//
// ***************************************************************************
ALIGN
remr: //proc
lahf // save stack empty flag (86 z-flag)
andb $~(a_mask+c_mask),%gs:sr_flags// clear 87 a-flag, and also
// c-flag for 387 compatibility
movw $0,exp_tmp // initialize q and hi_q to 0.
sahf // restore stack empty flag (86 z-flag)
jz catch_special_cases // branch if no stack error
call set_stk_u_error // stack underflow occurred
jmp unmasked_i_error_
ALIGN
catch_special_cases:
movb tag1(%ebp),%al // both operands valid?
orb tag2(%ebp),%al
jnz special_cases_handler // no, branch to handler of special cases
valid_case:
mov dword_expon1,%eax // eax <- expon_diff = exp1 - exp2
sub dword_expon2,%eax // if expon_diff < zero,
mov $offset_operand1,%edi // then op1 is the modulus
jl rem_or_mod_
cmp $63,%eax // if expon_diff > 63,
jle calc_exponent // then set c-bit for incomplete
orb $c_mask,%gs:sr_flags // reduction and replace expon_diff
or $32,%eax // by ((expon_diff or 32) mod 64)
and $63,%eax
calc_exponent:
sub %eax,dword_expon1 // expon1 <- remainder exponent
inc %eax // num_quotient_bits = expon_diff + 1
call remrx // calculate remainder fraction
xor %eax,%eax // detect zero result
mov $dword_frac1+frac64,%edi
call test_4w
jnz do_normalize
testb $c_mask,%gs:sr_flags // was reduction complete?
jz remr_zero // if so, process q as well
movb %al,q // else, make q 0 (value of al)
jmp remr_zero
ALIGN
do_normalize:
mov $offset_operand1,%edi
call subtraction_normalize
// mov (ebp).tag1, valid ; unneeded if denormals retagged
reduction_incomplete_:
testb $c_mask,%gs:sr_flags
jz rem_or_mod_ // rem or mod matters only if complete
movb $0,q // if incomplete, make q 0.
jmp check_rem_underflow // branch to check unmasked underflow
ALIGN
rem_or_mod_:
lahf //save zf to indicate whether
//op2 needs restoration
cmpb $rem1_op,operation_type(%ebp)
jne check_rem_underflow
sahf
jnz op2_restored
movb $1, %cl
call left_shift_frac2_cl
op2_restored:
decl dword_expon2
push sign1(%ebp)
movb positive,sign2(%ebp)
movb positive,sign1(%ebp)
movb $sub_op,operation_type(%ebp)
mov dword_expon1, %eax
cmp dword_expon2, %eax
mov $offset_operand1, %esi
jle save_shiftable_op
mov $offset_operand2, %esi
save_shiftable_op:
mov $offset_cop, %edi
call mov_esi_edi
call subadd
mov $offset_result,%edi
call test_6w
pop sign1(%ebp)
mov $offset_operand1, %edi
jnz check_sign
tiebreaker:
testb $1,q
jz check_rem_underflow
incb q
notb sign1(%ebp)
jmp check_rem_underflow
ALIGN
check_sign:
mov $offset_cop, %esi
cmpb positive,result_sign(%ebp)
je restore_op2_
call mov_esi_edi
mov $offset_operand1, %edi
jmp check_rem_underflow
ALIGN
restore_op2_:
mov dword_expon1, %eax
cmp dword_expon2, %eax
je reduce_modulus
mov $offset_operand2, %edi
call mov_esi_edi
reduce_modulus:
incb q
incl dword_expon2
movb sign1(%ebp),%al
movb %al,sign2(%ebp)
call subadd
mov $offset_result,%edi
check_rem_underflow:
cmp $0x0001,expon(%ebp,%edi) // if expon1 < 1, then underflow
jge do_put_result
mov %edi,%esi // give std underflow response
call move_op_to_result
push %gs:sr_masks
orb prec64,%gs:sr_controls // *or* works only because prec64
movb false,rnd1_inexact // sets all bits in the pc field
call underflow_response
pop %gs:sr_masks
move_result_to_result:
mov $offset_result,%edi
jmp do_put_result
ALIGN
remr_zero:
mov %eax,dword_expon1 // if remainder fraction = 0,
movb special,tag1(%ebp) // then set result to 0
cmpb $rem1_op,operation_type(%ebp)
jne put_op1
movb positive,sign1(%ebp)
testb rnd_down,%gs:sr_controls
jz put_op1
testb rnd_up,%gs:sr_controls
jnz put_op1
movb negative,sign1(%ebp)
put_op1:
mov $offset_operand1,%edi
do_put_result:
mov offset_result_rec,%esi
call put_result
andb $~(zero_mask+a_mask+sign_mask),%gs:sr_flags
movb q,%al
shlb $6,%al // move 3 low bits of q to
jnc fix_z_bit // s, z, and a bits
orb $sign_mask,%gs:sr_flags
fix_z_bit:
shlb $1,%al
jnc fix_a_bit
orb $zero_mask,%gs:sr_flags
fix_a_bit:
shlb $1,%al
jnc remr_done
orb $a_mask,%gs:sr_flags
remr_done:
ret
ALIGN
special_cases_handler:
testb $0x10,%al // al contains (ebp).tag1 or (ebp).tag2
jz op1_nan_ // no branch if at least one op unsupported
invalid_operand:
orb invalid_mask,%gs:sr_errors
unmasked_i_error_:
testb invalid_mask,%gs:sr_masks// if unmasked, just exit
jz remr_done // (c-bit remains clear here,
// to indicate complete reduction.)
// else, return indefinite
remr_indef:
mov $offset_operand1,%edi
call set_up_indefinite
load_esi_for_result:
mov offset_result_rec,%esi
jmp put_arith_result
ALIGN
op1_nan_: // here, neither op is unsupported.
movb tag2(%ebp),%al
movb tag1(%ebp),%ah
cmpb inv,%ah
je op1_snan_
cmpb inv,%al
jne non_nan_supp_ops
jmp signal_invalid_
ALIGN
op1_snan_:
testb $0x40,msb_frac1
jz signal_for_snan
op2_also_nan_:
cmpb inv,%al
jne jmp_set_up_nan_return
signal_invalid_:
testb $0x40,msb_frac2
jnz jmp_set_up_nan_return
signal_for_snan:
orb invalid_mask,%gs:sr_errors// set i-error for signaling nan operand
testb invalid_mask,%gs:sr_masks// if unmasked, return forthwith
jz remr_done // else, masked, so return max nan.
jmp_set_up_nan_return:
jmp set_up_nan_return
ALIGN
non_nan_supp_ops:
cmpb $scale_op,operation_type(%ebp)
je scale_non_nan_supp_ops
rem_non_nan_supp_ops:
cmpb infinty,%ah
je invalid_operand
cmpb special,%al
je invalid_operand
cmpb denormd,%ah
jne op2_denorm_
orb denorm_mask,%gs:sr_errors
testb denorm_mask,%gs:sr_masks
jz remr_done
mov $offset_operand1,%edi
cmpb infinty,%al
jne norm_op1
testb $0x80,msb_frac1
jz op1_true_denormal
movl $1,dword_expon1
movb valid,tag1(%ebp)
jmp load_esi_for_result
ALIGN
op1_true_denormal:
testb underflow_mask,%gs:sr_masks
jnz load_esi_for_result
call norm_denorm
addl wrap_around_constant,dword_expon1// add wrap-around
orb underflow_mask,%gs:sr_errors
jmp load_esi_for_result
ALIGN
norm_op1:
push %eax
call norm_denorm
pop %eax
op2_denorm_:
cmpb denormd,%al
jne non_error_specials
orb denorm_mask,%gs:sr_errors
testb denorm_mask,%gs:sr_masks
jz remr_done
mov $offset_operand2,%edi
push %eax
call norm_denorm
pop %eax
non_error_specials:
cmpb special,%ah
je put_op1
op2_inf_:
cmpb infinty,%al
je put_op1
jmp valid_case
ALIGN
//
scale_non_nan_supp_ops:
cmpb infinty,%ah
jne scaler_denormd_
cmpb positive,sign1(%ebp)
jne op2_also_inf_
cmpb special,%al
je invalid_operand
cmpb denormd,%al
jne xfer_sign
orb denorm_mask,%gs:sr_errors
testb denorm_mask,%gs:sr_masks
jz exit_scale
xfer_sign:
movb sign2(%ebp),%al
movb %al,sign1(%ebp)
mov $offset_operand1,%edi
jmp put_scaled_result
ALIGN
op2_also_inf_:
cmpb infinty,%al
je invalid_operand
cmpb denormd,%al
jne zero_op2
orb denorm_mask,%gs:sr_errors
testb denorm_mask,%gs:sr_masks
jz exit_scale
zero_op2:
mov $offset_operand2,%edi
call clear_6w
mov %eax,dword_expon2 // clear eax
movb special,tag2(%ebp)
jmp put_scaled_result
ALIGN
scaler_denormd_:
cmpb denormd,%ah
jne scalend_denormd_
orb denorm_mask,%gs:sr_errors
testb denorm_mask,%gs:sr_masks
jnz scalend_denormd_
exit_scale:
ret
ALIGN
scalend_denormd_:
cmpb denormd,%al
jne give_op2
orb denorm_mask, %gs:sr_errors
testb denorm_mask, %gs:sr_masks
jz exit_scale
cmpb valid,%ah
jne check_unfl_mask
mov $offset_operand2,%edi
call norm_denorm
jmp valid_scale_case
ALIGN
check_unfl_mask:
testb underflow_mask,%gs:sr_masks
jnz give_op2
jmp scale_underflow
//remr endp
//
//
// **********************************************************************
// scale:
// ******
// function:
// emulates the 80387 scale instruction.
//
// inputs:
// scale term (scaler) from st(1) in operand1, and
// scalend (*to be scaled*) from st(0) in operand2
//
// outputs:
// scaled operand in operand2 record or result_record.
// error indicators set.
//
// data accessed:
// - result_rec_offset tag1
// - word_frac1 offset_operand2
// - tag2 expon2
// - offset_result extra_word_reg
//
// data changed:
// - expon2
//
// procedures called:
// set_up_indefinite fix32 put_result
// move_op_to_result underflow_response
// overflow_response
//
// *************************************************************************
ALIGN
scale: //proc
jz catch_spcl_scale_cases // branch if no stack error
call set_stk_u_error // stack underflow occurred
jmp unmasked_i_error_
ALIGN
catch_spcl_scale_cases:
andb $~(a_mask),%gs:sr_flags// clear 87 a-bit
movb tag1(%ebp),%al // both operands valid?
orb tag2(%ebp),%al
jnz special_cases_handler // no, branch to handler of special cases
valid_scale_case:
push %gs:sr_masks // save current rounding controls
orb rnd_to_zero,%gs:sr_controls // institute round by chopping
// this *or* works only because
// rnd_to_zero sets entire rc field
call fix32 // convert scale factor to int32
pop %gs:sr_masks // restore rnd control
jz add_scale_term // if zf=1, no overflow in fix32
cmpb positive,sign1(%ebp)
je get_least_sf_xtrm_ovfl
jmp get_grtst_sf_xtrm_unfl
ALIGN
add_scale_term:
mov dword_frac1+frac32(%ebp),%eax // int32 scale factor
cmp least_sf_xtrm_ovfl,%eax
jle check_xtrm_unfl
get_least_sf_xtrm_ovfl:
mov least_sf_xtrm_ovfl,%eax
jmp add_int32_to_dword_exp
ALIGN
check_xtrm_unfl:
cmp grtst_sf_xtrm_unfl,%eax
jge add_int32_to_dword_exp
get_grtst_sf_xtrm_unfl:
mov grtst_sf_xtrm_unfl,%eax
add_int32_to_dword_exp:
add %eax,dword_expon2 // add scale term to op2s expon
cmpl $0x7ffe,dword_expon2
jg scale_overflow
cmp $1,dword_expon2
jl scale_underflow
give_op2:
mov $offset_operand2,%edi
put_scaled_result:
mov offset_result_rec,%esi
jmp put_result
ALIGN
scale_overflow:
mov $offset_operand2,%esi // move op2 to result
call move_op_to_result
call overflow_response
jmp set_up_result
ALIGN
scale_underflow:
mov $offset_operand2,%esi // move op2 to result
call move_op_to_result
push %gs:sr_masks
orb prec64,%gs:sr_controls // *or* works only because prec64
movb false,rnd1_inexact // sets all bits in the pc field
call underflow_response
pop %gs:sr_masks
set_up_result:
mov $offset_result,%edi
jmp put_scaled_result
ALIGN
//scale endp
//
//a_med ends
//
// end