58217f5900
WOO HOO!
666 lines
17 KiB
ArmAsm
666 lines
17 KiB
ArmAsm
.file "reg_round.S"
|
|
/*---------------------------------------------------------------------------+
|
|
| reg_round.S |
|
|
| |
|
|
| Rounding/truncation/etc for FPU basic arithmetic functions. |
|
|
| |
|
|
| Copyright (C) 1993 |
|
|
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
|
| Australia. E-mail billm@vaxc.cc.monash.edu.au |
|
|
| |
|
|
| This code has four possible entry points. |
|
|
| The following must be entered by a jmp intruction: |
|
|
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
|
|
| |
|
|
| The _round_reg entry point is intended to be used by C code. |
|
|
| From C, call as: |
|
|
| void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
|
|
| |
|
|
| For correct "up" and "down" rounding, the argument must have the correct |
|
|
| sign. |
|
|
| |
|
|
+---------------------------------------------------------------------------*/
|
|
|
|
/*---------------------------------------------------------------------------+
|
|
| Four entry points. |
|
|
| |
|
|
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
|
|
| %eax:%ebx 64 bit significand |
|
|
| %edx 32 bit extension of the significand |
|
|
| %edi pointer to an FPU_REG for the result to be stored |
|
|
| stack calling function must have set up a C stack frame and |
|
|
| pushed %esi, %edi, and %ebx |
|
|
| |
|
|
| Needed just for the fpu_reg_round_sqrt entry point: |
|
|
| %cx A control word in the same format as the FPU control word. |
|
|
| Otherwise, PARAM4 must give such a value. |
|
|
| |
|
|
| |
|
|
| The significand and its extension are assumed to be exact in the |
|
|
| following sense: |
|
|
| If the significand by itself is the exact result then the significand |
|
|
| extension (%edx) must contain 0, otherwise the significand extension |
|
|
| must be non-zero. |
|
|
| If the significand extension is non-zero then the significand is |
|
|
| smaller than the magnitude of the correct exact result by an amount |
|
|
| greater than zero and less than one ls bit of the significand. |
|
|
| The significand extension is only required to have three possible |
|
|
| non-zero values: |
|
|
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
|
|
| bit smaller than the magnitude of the |
|
|
| true exact result. |
|
|
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
|
|
| smaller than the magnitude of the true |
|
|
| exact result. |
|
|
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
|
|
| bit smaller than the magnitude of the |
|
|
| true exact result. |
|
|
| |
|
|
+---------------------------------------------------------------------------*/
|
|
|
|
/*---------------------------------------------------------------------------+
|
|
| The code in this module has become quite complex, but it should handle |
|
|
| all of the FPU flags which are set at this stage of the basic arithmetic |
|
|
| computations. |
|
|
| There are a few rare cases where the results are not set identically to |
|
|
| a real FPU. These require a bit more thought because at this stage the |
|
|
| results of the code here appear to be more consistent... |
|
|
| This may be changed in a future version. |
|
|
+---------------------------------------------------------------------------*/
|
|
|
|
|
|
#include "fpu_asm.h"
|
|
#include "exception.h"
|
|
#include "control_w.h"
|
|
|
|
/* Flags for FPU_bits_lost */
|
|
#define LOST_DOWN $1
|
|
#define LOST_UP $2
|
|
|
|
/* Flags for FPU_denormal */
|
|
#define DENORMAL $1
|
|
#define UNMASKED_UNDERFLOW $2
|
|
|
|
|
|
#ifdef REENTRANT_FPU
|
|
/* Make the code re-entrant by putting
|
|
local storage on the stack: */
|
|
#define FPU_bits_lost (%esp)
|
|
#define FPU_denormal 1(%esp)
|
|
|
|
#else
|
|
/* Not re-entrant, so we can gain speed by putting
|
|
local storage in a static area: */
|
|
.data
|
|
.align 2,0
|
|
FPU_bits_lost:
|
|
.byte 0
|
|
FPU_denormal:
|
|
.byte 0
|
|
#endif REENTRANT_FPU
|
|
|
|
|
|
.text
|
|
.align 2,144
|
|
.globl fpu_reg_round
|
|
.globl fpu_reg_round_sqrt
|
|
.globl fpu_Arith_exit
|
|
.globl _round_reg
|
|
|
|
/* Entry point when called from C */
|
|
_round_reg:
|
|
pushl %ebp
|
|
movl %esp,%ebp
|
|
pushl %esi
|
|
pushl %edi
|
|
pushl %ebx
|
|
|
|
movl PARAM1,%edi
|
|
movl SIGH(%edi),%eax
|
|
movl SIGL(%edi),%ebx
|
|
movl PARAM2,%edx
|
|
movl PARAM3,%ecx
|
|
jmp fpu_reg_round_sqrt
|
|
|
|
fpu_reg_round: /* Normal entry point */
|
|
movl PARAM4,%ecx
|
|
|
|
fpu_reg_round_sqrt: /* Entry point from wm_sqrt.S */
|
|
|
|
#ifdef REENTRANT_FPU
|
|
pushl %ebx /* adjust the stack pointer */
|
|
#endif REENTRANT_FPU
|
|
|
|
#ifdef PARANOID
|
|
/* Cannot use this here yet */
|
|
/* orl %eax,%eax */
|
|
/* jns L_entry_bugged */
|
|
#endif PARANOID
|
|
|
|
cmpl EXP_UNDER,EXP(%edi)
|
|
jle xMake_denorm /* The number is a de-normal */
|
|
|
|
movb $0,FPU_denormal /* 0 -> not a de-normal */
|
|
|
|
xDenorm_done:
|
|
movb $0,FPU_bits_lost /* No bits yet lost in rounding */
|
|
|
|
movl %ecx,%esi
|
|
andl CW_PC,%ecx
|
|
cmpl PR_64_BITS,%ecx
|
|
je LRound_To_64
|
|
|
|
cmpl PR_53_BITS,%ecx
|
|
je LRound_To_53
|
|
|
|
cmpl PR_24_BITS,%ecx
|
|
je LRound_To_24
|
|
|
|
#ifdef PARANOID
|
|
jmp L_bugged /* There is no bug, just a bad control word */
|
|
#endif PARANOID
|
|
|
|
|
|
/* Round etc to 24 bit precision */
|
|
LRound_To_24:
|
|
movl %esi,%ecx
|
|
andl CW_RC,%ecx
|
|
cmpl RC_RND,%ecx
|
|
je LRound_nearest_24
|
|
|
|
cmpl RC_CHOP,%ecx
|
|
je LCheck_truncate_24
|
|
|
|
cmpl RC_UP,%ecx /* Towards +infinity */
|
|
je LUp_24
|
|
|
|
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
|
je LDown_24
|
|
|
|
#ifdef PARANOID
|
|
jmp L_bugged
|
|
#endif PARANOID
|
|
|
|
LUp_24:
|
|
cmpb SIGN_POS,SIGN(%edi)
|
|
jne LCheck_truncate_24 /* If negative then up==truncate */
|
|
|
|
jmp LCheck_24_round_up
|
|
|
|
LDown_24:
|
|
cmpb SIGN_POS,SIGN(%edi)
|
|
je LCheck_truncate_24 /* If positive then down==truncate */
|
|
|
|
LCheck_24_round_up:
|
|
movl %eax,%ecx
|
|
andl $0x000000ff,%ecx
|
|
orl %ebx,%ecx
|
|
orl %edx,%ecx
|
|
jnz LDo_24_round_up
|
|
jmp LRe_normalise
|
|
|
|
LRound_nearest_24:
|
|
/* Do rounding of the 24th bit if needed (nearest or even) */
|
|
movl %eax,%ecx
|
|
andl $0x000000ff,%ecx
|
|
cmpl $0x00000080,%ecx
|
|
jc LCheck_truncate_24 /* less than half, no increment needed */
|
|
|
|
jne LGreater_Half_24 /* greater than half, increment needed */
|
|
|
|
/* Possibly half, we need to check the ls bits */
|
|
orl %ebx,%ebx
|
|
jnz LGreater_Half_24 /* greater than half, increment needed */
|
|
|
|
orl %edx,%edx
|
|
jnz LGreater_Half_24 /* greater than half, increment needed */
|
|
|
|
/* Exactly half, increment only if 24th bit is 1 (round to even) */
|
|
testl $0x00000100,%eax
|
|
jz LDo_truncate_24
|
|
|
|
LGreater_Half_24: /* Rounding: increment at the 24th bit */
|
|
LDo_24_round_up:
|
|
andl $0xffffff00,%eax /* Truncate to 24 bits */
|
|
xorl %ebx,%ebx
|
|
movb LOST_UP,FPU_bits_lost
|
|
addl $0x00000100,%eax
|
|
jmp LCheck_Round_Overflow
|
|
|
|
LCheck_truncate_24:
|
|
movl %eax,%ecx
|
|
andl $0x000000ff,%ecx
|
|
orl %ebx,%ecx
|
|
orl %edx,%ecx
|
|
jz LRe_normalise /* No truncation needed */
|
|
|
|
LDo_truncate_24:
|
|
andl $0xffffff00,%eax /* Truncate to 24 bits */
|
|
xorl %ebx,%ebx
|
|
movb LOST_DOWN,FPU_bits_lost
|
|
jmp LRe_normalise
|
|
|
|
|
|
/* Round etc to 53 bit precision */
|
|
LRound_To_53:
|
|
movl %esi,%ecx
|
|
andl CW_RC,%ecx
|
|
cmpl RC_RND,%ecx
|
|
je LRound_nearest_53
|
|
|
|
cmpl RC_CHOP,%ecx
|
|
je LCheck_truncate_53
|
|
|
|
cmpl RC_UP,%ecx /* Towards +infinity */
|
|
je LUp_53
|
|
|
|
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
|
je LDown_53
|
|
|
|
#ifdef PARANOID
|
|
jmp L_bugged
|
|
#endif PARANOID
|
|
|
|
LUp_53:
|
|
cmpb SIGN_POS,SIGN(%edi)
|
|
jne LCheck_truncate_53 /* If negative then up==truncate */
|
|
|
|
jmp LCheck_53_round_up
|
|
|
|
LDown_53:
|
|
cmpb SIGN_POS,SIGN(%edi)
|
|
je LCheck_truncate_53 /* If positive then down==truncate */
|
|
|
|
LCheck_53_round_up:
|
|
movl %ebx,%ecx
|
|
andl $0x000007ff,%ecx
|
|
orl %edx,%ecx
|
|
jnz LDo_53_round_up
|
|
jmp LRe_normalise
|
|
|
|
LRound_nearest_53:
|
|
/* Do rounding of the 53rd bit if needed (nearest or even) */
|
|
movl %ebx,%ecx
|
|
andl $0x000007ff,%ecx
|
|
cmpl $0x00000400,%ecx
|
|
jc LCheck_truncate_53 /* less than half, no increment needed */
|
|
|
|
jnz LGreater_Half_53 /* greater than half, increment needed */
|
|
|
|
/* Possibly half, we need to check the ls bits */
|
|
orl %edx,%edx
|
|
jnz LGreater_Half_53 /* greater than half, increment needed */
|
|
|
|
/* Exactly half, increment only if 53rd bit is 1 (round to even) */
|
|
testl $0x00000800,%ebx
|
|
jz LTruncate_53
|
|
|
|
LGreater_Half_53: /* Rounding: increment at the 53rd bit */
|
|
LDo_53_round_up:
|
|
movb LOST_UP,FPU_bits_lost
|
|
andl $0xfffff800,%ebx /* Truncate to 53 bits */
|
|
addl $0x00000800,%ebx
|
|
adcl $0,%eax
|
|
jmp LCheck_Round_Overflow
|
|
|
|
LCheck_truncate_53:
|
|
movl %ebx,%ecx
|
|
andl $0x000007ff,%ecx
|
|
orl %edx,%ecx
|
|
jz LRe_normalise
|
|
|
|
LTruncate_53:
|
|
movb LOST_DOWN,FPU_bits_lost
|
|
andl $0xfffff800,%ebx /* Truncate to 53 bits */
|
|
jmp LRe_normalise
|
|
|
|
|
|
/* Round etc to 64 bit precision */
|
|
LRound_To_64:
|
|
movl %esi,%ecx
|
|
andl CW_RC,%ecx
|
|
cmpl RC_RND,%ecx
|
|
je LRound_nearest_64
|
|
|
|
cmpl RC_CHOP,%ecx
|
|
je LCheck_truncate_64
|
|
|
|
cmpl RC_UP,%ecx /* Towards +infinity */
|
|
je LUp_64
|
|
|
|
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
|
je LDown_64
|
|
|
|
#ifdef PARANOID
|
|
jmp L_bugged
|
|
#endif PARANOID
|
|
|
|
LUp_64:
|
|
cmpb SIGN_POS,SIGN(%edi)
|
|
jne LCheck_truncate_64 /* If negative then up==truncate */
|
|
|
|
orl %edx,%edx
|
|
jnz LDo_64_round_up
|
|
jmp LRe_normalise
|
|
|
|
LDown_64:
|
|
cmpb SIGN_POS,SIGN(%edi)
|
|
je LCheck_truncate_64 /* If positive then down==truncate */
|
|
|
|
orl %edx,%edx
|
|
jnz LDo_64_round_up
|
|
jmp LRe_normalise
|
|
|
|
LRound_nearest_64:
|
|
cmpl $0x80000000,%edx
|
|
jc LCheck_truncate_64
|
|
|
|
jne LDo_64_round_up
|
|
|
|
/* Now test for round-to-even */
|
|
testb $1,%ebx
|
|
jz LCheck_truncate_64
|
|
|
|
LDo_64_round_up:
|
|
movb LOST_UP,FPU_bits_lost
|
|
addl $1,%ebx
|
|
adcl $0,%eax
|
|
|
|
LCheck_Round_Overflow:
|
|
jnc LRe_normalise
|
|
|
|
/* Overflow, adjust the result (significand to 1.0) */
|
|
rcrl $1,%eax
|
|
rcrl $1,%ebx
|
|
incl EXP(%edi)
|
|
jmp LRe_normalise
|
|
|
|
LCheck_truncate_64:
|
|
orl %edx,%edx
|
|
jz LRe_normalise
|
|
|
|
LTruncate_64:
|
|
movb LOST_DOWN,FPU_bits_lost
|
|
|
|
LRe_normalise:
|
|
testb $0xff,FPU_denormal
|
|
jnz xNormalise_result
|
|
|
|
xL_Normalised:
|
|
cmpb LOST_UP,FPU_bits_lost
|
|
je xL_precision_lost_up
|
|
|
|
cmpb LOST_DOWN,FPU_bits_lost
|
|
je xL_precision_lost_down
|
|
|
|
xL_no_precision_loss:
|
|
/* store the result */
|
|
movb TW_Valid,TAG(%edi)
|
|
|
|
xL_Store_significand:
|
|
movl %eax,SIGH(%edi)
|
|
movl %ebx,SIGL(%edi)
|
|
|
|
xorl %eax,%eax /* No errors detected. */
|
|
|
|
cmpl EXP_OVER,EXP(%edi)
|
|
jge L_overflow
|
|
|
|
fpu_reg_round_exit:
|
|
#ifdef REENTRANT_FPU
|
|
popl %ebx /* adjust the stack pointer */
|
|
#endif REENTRANT_FPU
|
|
|
|
fpu_Arith_exit:
|
|
popl %ebx
|
|
popl %edi
|
|
popl %esi
|
|
leave
|
|
ret
|
|
|
|
|
|
/*
|
|
* Set the FPU status flags to represent precision loss due to
|
|
* round-up.
|
|
*/
|
|
xL_precision_lost_up:
|
|
push %eax
|
|
call _set_precision_flag_up
|
|
popl %eax
|
|
jmp xL_no_precision_loss
|
|
|
|
/*
|
|
* Set the FPU status flags to represent precision loss due to
|
|
* truncation.
|
|
*/
|
|
xL_precision_lost_down:
|
|
push %eax
|
|
call _set_precision_flag_down
|
|
popl %eax
|
|
jmp xL_no_precision_loss
|
|
|
|
|
|
/*
|
|
* The number is a denormal (which might get rounded up to a normal)
|
|
* Shift the number right the required number of bits, which will
|
|
* have to be undone later...
|
|
*/
|
|
xMake_denorm:
|
|
/* The action to be taken depends upon whether the underflow
|
|
exception is masked */
|
|
testb CW_Underflow,%cl /* Underflow mask. */
|
|
jz xUnmasked_underflow /* Do not make a denormal. */
|
|
|
|
movb DENORMAL,FPU_denormal
|
|
|
|
pushl %ecx /* Save */
|
|
movl EXP_UNDER+1,%ecx
|
|
subl EXP(%edi),%ecx
|
|
|
|
cmpl $64,%ecx /* shrd only works for 0..31 bits */
|
|
jnc xDenorm_shift_more_than_63
|
|
|
|
cmpl $32,%ecx /* shrd only works for 0..31 bits */
|
|
jnc xDenorm_shift_more_than_32
|
|
|
|
/*
|
|
* We got here without jumps by assuming that the most common requirement
|
|
* is for a small de-normalising shift.
|
|
* Shift by [1..31] bits
|
|
*/
|
|
addl %ecx,EXP(%edi)
|
|
orl %edx,%edx /* extension */
|
|
setne %ch /* Save whether %edx is non-zero */
|
|
xorl %edx,%edx
|
|
shrd %cl,%ebx,%edx
|
|
shrd %cl,%eax,%ebx
|
|
shr %cl,%eax
|
|
orb %ch,%dl
|
|
popl %ecx
|
|
jmp xDenorm_done
|
|
|
|
/* Shift by [32..63] bits */
|
|
xDenorm_shift_more_than_32:
|
|
addl %ecx,EXP(%edi)
|
|
subb $32,%cl
|
|
orl %edx,%edx
|
|
setne %ch
|
|
orb %ch,%bl
|
|
xorl %edx,%edx
|
|
shrd %cl,%ebx,%edx
|
|
shrd %cl,%eax,%ebx
|
|
shr %cl,%eax
|
|
orl %edx,%edx /* test these 32 bits */
|
|
setne %cl
|
|
orb %ch,%bl
|
|
orb %cl,%bl
|
|
movl %ebx,%edx
|
|
movl %eax,%ebx
|
|
xorl %eax,%eax
|
|
popl %ecx
|
|
jmp xDenorm_done
|
|
|
|
/* Shift by [64..) bits */
|
|
xDenorm_shift_more_than_63:
|
|
cmpl $64,%ecx
|
|
jne xDenorm_shift_more_than_64
|
|
|
|
/* Exactly 64 bit shift */
|
|
addl %ecx,EXP(%edi)
|
|
xorl %ecx,%ecx
|
|
orl %edx,%edx
|
|
setne %cl
|
|
orl %ebx,%ebx
|
|
setne %ch
|
|
orb %ch,%cl
|
|
orb %cl,%al
|
|
movl %eax,%edx
|
|
xorl %eax,%eax
|
|
xorl %ebx,%ebx
|
|
popl %ecx
|
|
jmp xDenorm_done
|
|
|
|
xDenorm_shift_more_than_64:
|
|
movl EXP_UNDER+1,EXP(%edi)
|
|
/* This is easy, %eax must be non-zero, so.. */
|
|
movl $1,%edx
|
|
xorl %eax,%eax
|
|
xorl %ebx,%ebx
|
|
popl %ecx
|
|
jmp xDenorm_done
|
|
|
|
|
|
xUnmasked_underflow:
|
|
movb UNMASKED_UNDERFLOW,FPU_denormal
|
|
jmp xDenorm_done
|
|
|
|
|
|
/* Undo the de-normalisation. */
|
|
xNormalise_result:
|
|
cmpb UNMASKED_UNDERFLOW,FPU_denormal
|
|
je xSignal_underflow
|
|
|
|
/* The number must be a denormal if we got here. */
|
|
#ifdef PARANOID
|
|
/* But check it... just in case. */
|
|
cmpl EXP_UNDER+1,EXP(%edi)
|
|
jne L_norm_bugged
|
|
#endif PARANOID
|
|
|
|
#ifdef PECULIAR_486
|
|
/*
|
|
* This implements a special feature of 80486 behaviour.
|
|
* Underflow will be signalled even if the number is
|
|
* not a denormal after rounding.
|
|
* This difference occurs only for masked underflow, and not
|
|
* in the unmasked case.
|
|
* Actual 80486 behaviour differs from this in some circumstances.
|
|
*/
|
|
orl %eax,%eax /* ms bits */
|
|
js LNormalise_shift_done /* Will be masked underflow */
|
|
#endif PECULIAR_486
|
|
|
|
orl %eax,%eax /* ms bits */
|
|
js xL_Normalised /* No longer a denormal */
|
|
|
|
jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits */
|
|
|
|
orl %ebx,%ebx
|
|
jz L_underflow_to_zero /* The contents are zero */
|
|
|
|
/* Shift left 32 - 63 bits */
|
|
movl %ebx,%eax
|
|
xorl %ebx,%ebx
|
|
subl $32,EXP(%edi)
|
|
|
|
LNormalise_shift_up_to_31:
|
|
bsrl %eax,%ecx /* get the required shift in %ecx */
|
|
subl $31,%ecx
|
|
negl %ecx
|
|
shld %cl,%ebx,%eax
|
|
shl %cl,%ebx
|
|
subl %ecx,EXP(%edi)
|
|
|
|
LNormalise_shift_done:
|
|
testb $0xff,FPU_bits_lost /* bits lost == underflow */
|
|
jz xL_Normalised
|
|
|
|
/* There must be a masked underflow */
|
|
push %eax
|
|
pushl EX_Underflow
|
|
call _exception
|
|
popl %eax
|
|
popl %eax
|
|
jmp xL_Normalised
|
|
|
|
|
|
/*
|
|
* The operations resulted in a number too small to represent.
|
|
* Masked response.
|
|
*/
|
|
L_underflow_to_zero:
|
|
push %eax
|
|
call _set_precision_flag_down
|
|
popl %eax
|
|
|
|
push %eax
|
|
pushl EX_Underflow
|
|
call _exception
|
|
popl %eax
|
|
popl %eax
|
|
|
|
/* Reduce the exponent to EXP_UNDER */
|
|
movl EXP_UNDER,EXP(%edi)
|
|
movb TW_Zero,TAG(%edi)
|
|
jmp xL_Store_significand
|
|
|
|
|
|
/* The operations resulted in a number too large to represent. */
|
|
L_overflow:
|
|
push %edi
|
|
call _arith_overflow
|
|
pop %edi
|
|
jmp fpu_reg_round_exit
|
|
|
|
|
|
xSignal_underflow:
|
|
/* The number may have been changed to a non-denormal */
|
|
/* by the rounding operations. */
|
|
cmpl EXP_UNDER,EXP(%edi)
|
|
jle xDo_unmasked_underflow
|
|
|
|
jmp xL_Normalised
|
|
|
|
xDo_unmasked_underflow:
|
|
/* Increase the exponent by the magic number */
|
|
addl $(3*(1<<13)),EXP(%edi)
|
|
push %eax
|
|
pushl EX_Underflow
|
|
call EXCEPTION
|
|
popl %eax
|
|
popl %eax
|
|
jmp xL_Normalised
|
|
|
|
|
|
#ifdef PARANOID
|
|
/* If we ever get here then we have problems! */
|
|
L_bugged:
|
|
pushl EX_INTERNAL|0x201
|
|
call EXCEPTION
|
|
popl %ebx
|
|
jmp L_exception_exit
|
|
|
|
L_norm_bugged:
|
|
pushl EX_INTERNAL|0x216
|
|
call EXCEPTION
|
|
popl %ebx
|
|
jmp L_exception_exit
|
|
|
|
L_entry_bugged:
|
|
pushl EX_INTERNAL|0x217
|
|
call EXCEPTION
|
|
popl %ebx
|
|
L_exception_exit:
|
|
mov $1,%eax
|
|
jmp fpu_reg_round_exit
|
|
#endif PARANOID
|