/* --- Copyright University of Sussex 1997. All rights reserved. ----------
 * File:            S.sun4/src/afloat.s
 * Purpose:
 * Author:          John Gibson, Aug 19 1988 (see revisions)
 */

;;; ----------------- FLOATING POINT SUPPORT -----------------------------

#_<

#_INCLUDE 'asm.ph'

constant
	procedure Sys$-Float_qrem
	;

lconstant macro (
	;;; current exception bits in %fsr
	NXC	= 2:1e0,	;;; result inexact
	DZC	= 2:1e1,	;;; division by zero
	UFC	= 2:1e2,	;;; underflow
	OFC	= 2:1e3,	;;; overflow
	NVC	= 2:1e4,	;;; invalid operand

	NODC	= NVC || OFC || DZC,
	);

>_#

/********************* wrapping structures ***********************************/

ASM_TEXT_SECTION
	.word	Ltext_end-Ltext_start, C_LAB(Sys$-objmod_pad_key)
Ltext_start:
ASM_DATA_SECTION
	.word	Ldata_end-Ldata_start, C_LAB(Sys$-objmod_pad_key)
Ldata_start:

/**************************************************************************/

ASM_TEXT_SECTION

	;;; _pfcopy(_dest_addr, _src_addr)
	;;; copy a double float from one location to another
DEF_C_LAB (_pfcopy)
	ld	[%us], %o0		;;; source addr
	ld	[%us+4], %o1		;;; dest addr
	inc	8, %us
	ldd	[%o0], %f0
	retl
	std	%f0, [%o1]


;;; --- ARITHMETIC --------------------------------------------------------

/* 	These routines return their 1st arg (_dfaddr1) rather than <true>
	(just quicker, and nothing relies on them returning <true>).
	<false> is returned for overflow, etc.
*/

	;;; _pfadd(_dfaddr1, _dfaddr2) -> _dfaddr1 or <false>
	;;; add _dfaddr2 destructively into _dfaddr1
DEF_C_LAB (_pfadd)
	ld	[%us], %o1		;;; _dfaddr2
	ld	[%us+4], %o0		;;; _dfaddr1
	ldd	[%o1], %f2		;;; load 2 into f2,f3
	ldd	[%o0], %f0		;;; load 1 into f0,f1
	faddd	%f0, %f2, %f0
	st	%fsr, [%us]		;;; get float status flags
	ld	[%us], %o1
	btst	_:NODC, %o1		;;; NVC, OFC or DZC set ?
	bz	retn_result		;;; return result if not
	inc	4, %us
	retl
	st	%r_false, [%us]		;;; else return false

	;;; _pfsub(_dfaddr1, _dfaddr2) -> _dfaddr1 or <false>
	;;; subtract _dfaddr2 destructively from _dfaddr1
DEF_C_LAB (_pfsub)
	ld	[%us], %o1		;;; _dfaddr2
	ld	[%us+4], %o0		;;; _dfaddr1
	ldd	[%o1], %f2		;;; load 2 into f2,f3
	ldd	[%o0], %f0		;;; load 1 into f0,f1
	fsubd	%f0, %f2, %f0
	st	%fsr, [%us]		;;; get float status flags
	ld	[%us], %o1
	btst	_:NODC, %o1		;;; NVC, OFC or DZC set ?
	bz	retn_result		;;; return result if not
	inc	4, %us
	retl
	st	%r_false, [%us]		;;; else return false

	;;; _pfmult(_dfaddr1, _dfaddr2) -> _dfaddr1 or <false>
	;;; multiply _dfaddr1 destructively by _dfaddr2
DEF_C_LAB (_pfmult)
	ld	[%us], %o1		;;; _dfaddr2
	ld	[%us+4], %o0		;;; _dfaddr1
	ldd	[%o1], %f2		;;; load 2 into f2,f3
	ldd	[%o0], %f0		;;; load 1 into f0,f1
	fmuld	%f0, %f2, %f0
	st	%fsr, [%us]		;;; get float status flags
	ld	[%us], %o1
	btst	_:NODC, %o1		;;; NVC, OFC or DZC set ?
	bz	retn_result		;;; return result if not
	inc	4, %us
	retl
	st	%r_false, [%us]		;;; else return false

	;;; _pfmult(_dfaddr1, _dfaddr2) -> _dfaddr1 or <false>
	;;; divide _dfaddr1 destructively by _dfaddr2
DEF_C_LAB (_pfdiv)
	ld	[%us], %o1		;;; _dfaddr2
	ld	[%us+4], %o0		;;; _dfaddr1
	ldd	[%o1], %f2		;;; load 2 into f2,f3
	ldd	[%o0], %f0		;;; load 1 into f0,f1
;;; next instruction in hex due "as" problem: inserts fmovs %f0, %f0
;;; after an fdivd, which on Sparcstations resets %fsr
	.word 0x81a009c2
;;; is	fdivd	%f0, %f2, %f0
	st	%fsr, [%us]		;;; get float status flags
	ld	[%us], %o1
	btst	_:NODC, %o1		;;; NVC, OFC or DZC set ?
	bz	retn_result		;;; return result if not
	inc	4, %us
	retl
	st	%r_false, [%us]		;;; else return false

retn_result:
	retl
	std	%f0, [%o0]		;;; result back into _dfaddr1


;;; --- OTHER ARITHMETIC OPERATIONS ---------------------------------------

	;;; _pfabs(_dfaddr) (absolute value)
DEF_C_LAB (_pfabs)
	ld	[%us], %o0		;;; _dfaddr
	inc	4, %us
	ldub	[%o0], %o1		;;; top byte of ms part with sign
	bclr	0x80, %o1		;;; clear sign bit
	retl
	stb	%o1, [%o0]		;;; replace byte

	;;; _pfnegate(_dfaddr) (negate)
DEF_C_LAB (_pfnegate)
	ld	[%us], %o0		;;; _dfaddr
	inc	4, %us
	ldub	[%o0], %o1		;;; top byte of ms part with sign
	btog	0x80, %o1		;;; invert sign bit
	retl
	stb	%o1, [%o0]		;;; replace byte

	;;; dummy that just chain to -Float_qrem-
DEF_C_LAB (_pfqrem)
	sethi	%hi(XC_LAB(Sys$-Float_qrem)), %o0
	jmp	%o0+%lo(XC_LAB(Sys$-Float_qrem))
	nop



;;; --- PREDICATES --------------------------------------------------------

	;;; replace tos with true and return (o0 contains %hi(true) )
repl_true:
	bset	%lo(C_LAB(true)), %o0
	;;; replace tos with o0 and return
repl_o0:
	retl
	st	%o0, [%us]


	;;; _pfsgr(_dfaddr1, _dfaddr2) -> bool
DEF_C_LAB (_pfsgr)
	ld	[%us], %o1		;;; _dfaddr2
	ld	[%us+4], %o0		;;; _dfaddr1
	ldd	[%o1], %f2		;;; load 2 into f2,f3
	ldd	[%o0], %f0		;;; load 1 into f0,f1
	inc	4, %us
	fcmpd	%f0, %f2
	sethi	%hi(C_LAB(true)), %o0	;;; need delay after fcmpd
	fbg,a	repl_o0; bset %lo(C_LAB(true)), %o0
	retl
	st	%r_false, [%us]

	;;; _pfsgreq(_dfaddr1, _dfaddr2) -> bool
DEF_C_LAB (_pfsgreq)
	ld	[%us], %o1		;;; _dfaddr2
	ld	[%us+4], %o0		;;; _dfaddr1
	ldd	[%o1], %f2		;;; load 2 into f2,f3
	ldd	[%o0], %f0		;;; load 1 into f0,f1
	inc	4, %us
	fcmpd	%f0, %f2
	sethi	%hi(C_LAB(true)), %o0	;;; need delay here
	fbge,a	repl_o0; bset %lo(C_LAB(true)), %o0
	retl
	st	%r_false, [%us]

	;;; _pfeq(_dfaddr1, _dfaddr2) -> bool
DEF_C_LAB (_pfeq)
	ld	[%us], %o1		;;; _dfaddr2
	ld	[%us+4], %o0		;;; _dfaddr1
	ldd	[%o1], %f2		;;; load 2 into f2,f3
	ldd	[%o0], %f0		;;; load 1 into f0,f1
	inc	4, %us
	fcmpd	%f0, %f2
	sethi	%hi(C_LAB(true)), %o0	;;; need delay here
	fbe,a	repl_o0; bset %lo(C_LAB(true)), %o0
	retl
	st	%r_false, [%us]

	;;; _pfneg(_dfaddr) -> bool
	;;; test double float negative (genuinely, i.e. not -0.0)
DEF_C_LAB (_pfneg)
	ld	[%us], %o0		;;; _dfaddr
	ldd	[%o0], %o2		;;; load both parts
	tst	%o2			;;; sign bit set?
	bpos	1f			;;; false if positive
	sll	%o2, 1, %o2		;;; remove sign bit
	orcc	%o3, %o2, %g0		;;; any bits set in either part?
	bnz,a	repl_true; sethi %hi(C_LAB(true)), %o0	;;; true if so
1:	retl
	st	%r_false, [%us]		;;; else false

	;;; _pfzero(_dfaddr) -> bool
	;;; test double float zero (i.e. including -0.0)
DEF_C_LAB (_pfzero)
	ld	[%us], %o0		;;; _dfaddr
	ldd	[%o0], %o2		;;; load both parts
	sll	%o2, 1, %o2		;;; remove sign bit from ms part
	orcc	%o3, %o2, %g0		;;; any bits set?
	bz,a	repl_true; sethi %hi(C_LAB(true)), %o0	;;; true if not
	retl
	st	%r_false, [%us]		;;; else false


;;; --- CONVERSION -------------------------------------------------------

;;; ---- Routines to float sysints, decimals and ddecimals

	;;; _pf_sfloat_dec(decimal)
DEF_C_LAB (_pf_sfloat_dec)
	ld	[%us], %o1		;;; decimal
	bclr	1, %o1			;;; clear tag bit on decimal
	retl
	st	%o1, [%us]		;;; store result

	;;; _pf_dfloat_int(_int, _dfaddr)
DEF_C_LAB (_pf_dfloat_int)
	ld	[%us], %o0		;;; _dfaddr
	ld	[%us+4], %f0		;;; _int
	inc	8, %us
	fitod	%f0, %f0		;;; cvt to double float
	retl
	std	%f0, [%o0]		;;; store result

	;;; _pf_dfloat_dec(decimal, _dfaddr)
DEF_C_LAB (_pf_dfloat_dec)
	ld	[%us+4], %o1		;;; decimal
	ld	[%us], %o0		;;; _dfaddr
	bclr	1, %o1			;;; clear tag bit on decimal
	st	%o1, [%us+4]		;;; store single in mem
	ld	[%us+4], %f0		;;; then load into f0
	inc	8, %us
	fstod	%f0, %f0		;;; cvt single to double float
	retl
	std	%f0, [%o0]		;;; store result

	;;; _pf_dfloat_ddec(ddecimal, _dfaddr)
DEF_C_LAB (_pf_dfloat_ddec)
	ld	[%us], %o0		;;; _dfaddr
	ld	[%us+4], %o1		;;; ddecimal ptr
	inc	8, %us
	ld	[%o1+_DD_1], %o2	;;; ms part
	ld	[%o1+_DD_2], %o3	;;; ls part
	retl
	std	%o2, [%o0]		;;; store result


;;; ---- Routines to convert back from a double float

	;;; _pf_intof(_dfaddr) -> _dfaddr -> _int  or -> <false>
DEF_C_LAB (_pf_intof)
	ld	[%us], %o0		;;; _dfaddr
	ldd	[%o0], %f0		;;; load into float reg
	fdtoi	%f0, %f0		;;; cvt to int
	st	%fsr, [%us]		;;; get float status flags
	ld	[%us], %o1
	st	%f0, [%us]		;;; _int result
	btst	_:NVC, %o1		;;; NVC set ?
	bz,a	1f			;;; return result if not
	st	%o0, [%us-4]		;;; _dfaddr (non-false result)
	retl
	st	%r_false, [%us]		;;; else return false

1:	retl
	dec	4, %us


	;;; _pf_cvt_to_dec(_dfaddr) -> decimal or -> <false>
	;;; convert double to decimal (rounded to 21 bit mantissa)
DEF_C_LAB (_pf_cvt_to_dec)
	ld	[%us], %o0		;;; _dfaddr
	ldd	[%o0], %o2		;;; both parts in o2,o3
	srl	%o2, 20, %o2		;;; exponent down to bit 0
	btst	0x7ff, %o2		;;; exponent zero?
	bz	2f			;;; return 0 if so
	;;; set bits to force rounding of 22nd bit of mantissa in all
	;;; cases except where tied and 21st bit is 0 (even)
	sethi	%hi(0x40000000), %o1
	cmp	%o3, %o1
	be	1f			;;; tied and even, won't round
	sethi	%hi(0x38000000), %o1
	bset	%o1, %o3		;;; set 23rd, 24th, 25th bits
	st	%o3, [%o0+4]		;;; replace ls word

1:	ldd	[%o0], %f0		;;; load the double
	fdtos	%f0, %f0		;;; cvt and round to single
	st	%f0, [%us]		;;; get single on tos
	ld	[%us], %o0		;;; now in o0
	srl	%o0, 23, %o1		;;; exponent at bottom
	andcc	%o1, 0xff, %o1		;;; mask exponent
	bnz,a	3f
	cmp	%o1, 0xff		;;; else test exponent = 255? (overflow)
	;;; return 0
2:	mov	1, %o0			;;; return 0.0 for zero exponent
	retl
	st	%o0, [%us]

3:	bne,a	4f			;;; br if not exponent /= 255
	bclr	2, %o0			;;; clearing int tag bit
	retl
	st	%r_false, [%us]		;;; else return false

4:	bset	1, %o0			;;; set simple tag bit
	retl
	st	%o0, [%us]		;;; return it


	;;; _pf_cvt_to_ddec(_dfaddr, ddecimal)  (fill in given ddecimal)
DEF_C_LAB (_pf_cvt_to_ddec)
	ld	[%us+4], %o1		;;; _dfaddr
	ld	[%us], %o0		;;; the result ddecimal
	ldd	[%o1], %o2		;;; both parts
	inc	8, %us
	srl	%o2, 20, %o4		;;; exponent down to bit 0
	btst	0x7ff, %o4		;;; exponent non-zero?
	bnz,a	1f			;;; leave alone if so
	st	%o2, [%o0+_DD_1]	;;; store ms part
	clr	[%o0+_DD_1]		;;; else make zero
	clr	%o3
1:	retl
	st	%o3, [%o0+_DD_2]


	;;; _pfmodf(_fracaddr, _dfaddr)
	;;; frac part of _dfaddr into _fracaddr, int part back into _dfaddr
DEF_C_LAB (_pfmodf)
	save	%sp, -24*4, %sp		;;; need space for _modf to store i regs
	ld	[%us], %l0		;;; _dfaddr
	ld	[%us+4], %l1		;;; addr for frac
	mov	%l0, %o2		;;; _dfaddr receives int part
	ldd	[%l0], %o0		;;; parts into arg regs o0, o1
	inc	8, %us
	mov	%r_svb,   %i0		;;; save g regs across call
	mov	%r_false, %i1
	mov	%us,      %i2
	tst	%o0			;;; -ve ?
	bneg,a	2f			;;; br if so
	sethi	%hi(0x80000000), %o3
	call	_modf			;;; return frac part in f0
	nop
1:	std	%f0, [%l1]		;;; store frac part
	mov	%i0, %r_svb		;;; restore g regs
	mov	%i1, %r_false
	mov	%i2, %us
	ret
	restore

	;;; negative
2:	call	_modf
	bclr	%o3, %o0		;;; make positive
	ldub	[%l0], %o1		;;; _dfaddr with int part
	fnegs	%f0, %f0		;;; negate frac part
	bset	0x80, %o1		;;; make int part neg
	b	1b
	stb	%o1, [%l0]


;;; ---- Routines to extract/update sfloat and dfloat field values

	;;; _pf_extend_s_to_d(_dfaddr) -> _dfaddr or <false>
	;;; extend single at _dfaddr into double (where single could
	;;; be Inf or NaN)
DEF_C_LAB (_pf_extend_s_to_d)
	ld	[%us], %o0		;;; _dfaddr
	lduh	[%o0], %o1		;;; half containing exponent
	srl	%o1, 7, %o1		;;; shift down
	and	%o1, 0xff, %o1		;;; and out
	cmp	%o1, 0xff		;;; Inf or NaN?
	be	1f			;;; br if so
	ld	[%o0], %f0		;;; the single
	fstod	%f0, %f0		;;; convert to double
	retl
	std	%f0, [%o0]		;;; double back into _dfaddr
1:	retl
	st	%r_false, [%us]		;;; else return false

	;;; _pf_check_d(_dfaddr) -> _dfaddr or <false>
	;;; check double at _dfaddr (where double could be Inf or NaN)
DEF_C_LAB (_pf_check_d)
	ld	[%us], %o0		;;; _dfaddr
	lduh	[%o0], %o1		;;; half containing exponent
	srl	%o1, 4, %o1		;;; shift down
	and	%o1, 0x7ff, %o1		;;; and out
	cmp	%o1, 0x7ff		;;; Inf or NaN?
	be,a	1f			;;; br if so ...
	st	%r_false, [%us]		;;; ... returning false
1:	retl
	nop

	;;; _pf_round_d_to_s(_dfaddr) -> _dfaddr or <false>
	;;; round double to single back into _dfaddr
DEF_C_LAB (_pf_round_d_to_s)
	ld	[%us], %o0		;;; _dfaddr
	ldd	[%o0], %f0		;;; the double
	fdtos	%f0, %f0		;;; convert and round to single
	st	%fsr, [%us-4]		;;; get float status flags
	ld	[%us-4], %o1
	btst	_:OFC, %o1		;;; OFC set ?
	bz,a	1f			;;; OK if not
	st	%f0, [%o0]		;;; single back into _dfaddr
	st	%r_false, [%us]		;;; else return false
1:	retl
	nop


;;; --- GET/SET EXPONENT --------------------------------------------------

	;;; get and set exponent E of a double float, where E is
	;;; the number of bits needed for the integer part, e.g. 1 for 1.0,
	;;; 0 for 0.5, -1 for 0.05, etc.

	;;; _pf_expof(_dfaddr) -> _E
DEF_C_LAB (_pf_expof)
	ld	[%us], %o0		;;; _dfaddr
	lduh	[%o0], %o0		;;; half containing exponent
	srl	%o0, 4, %o0		;;; shift down
	and	%o0, 0x7ff, %o0		;;; and out
	dec	1022, %o0		;;; get _E
	retl
	st	%o0, [%us]		;;; return it

	;;; _E -> _pf_expof(_dfaddr) -> _dfaddr or -> <false>
	;;; ( <false> if _E too big/small )
DEF_C_LAB (-> _pf_expof)
	ld	[%us], %o0		;;; _dfaddr
	ld	[%us+4], %o1		;;; _E
	inc	4, %us
	inccc	1022, %o1		;;; now IEEE exponent
	ble	1f			;;; underflow if 0 or neg
	cmp	%o1, 0x7ff		;;; overflow (>= 2047) ?
	bge	1f			;;; br if so
	sll	%o1, 4, %o1		;;; align exponent
	lduh	[%o0], %o2		;;; first half of dfloat
	set	0x7ff0, %o3
	bclr	%o3, %o2		;;; clear current exponent
	bset	%o1, %o2		;;; set new
	sth	%o2, [%o0]		;;; put back
	retl
	st	%o0, [%us]
1:	retl
	st	%r_false, [%us]		;;; return false


/*************** end labels for wrapping structures **************************/

ASM_TEXT_SECTION
	.align	8
Ltext_end:
ASM_DATA_SECTION
	.align	8
Ldata_end:

/**************************************************************************/



/* --- Revision History ---------------------------------------------------
--- John Gibson, Oct  1 1997
	Now includes asm.ph
--- John Gibson, Mar 24 1995
	Made _pfmodf save g registers across call to C _modf
--- John Gibson, Feb 25 1995
	Removed _m*ath1/2 subroutines (library math functions now called
	via _extern)
--- John Gibson, Dec  6 1993
	In order to fix Solaris non-detection of overflow, rewrote
	_m*ath1 and _m*ath2 to just call routines in c_core.c
--- Robert John Duncan, Jun  1 1993
	Changed to use ASM_SECTION macros for changing section
--- Robert John Duncan, Jul 27 1992
	Changed to use -EXTERN_NAME-
--- John Gibson, Mar 25 1991
	_pf_extend_s_to_d changed to check for externally-generated Infs/NaNs
	and return boolean result; _pf_check_d added to do same for doubles.
--- John Gibson, Jan  7 1991
	Fixed bug in _pf_cvt_to_dec (wasn't testing for 0 exponent before
	trying to round).
--- John Gibson, Nov 29 1990
	Fixed problem in _pfdiv: the assembler inserts an `fmovs %f0, %f0'
	instruction after an fdivd (for no apparently good reason), which
	on Sparcstations clears the float status flags and disables checking
	of overflow, dividing by 0, etc.
--- Ian Rogers, Dec 15 1989
	Moved in RogerE's code for _pf_sfloat_dec
--- John Gibson, Aug 17 1989
	Replaced # EXEC ... # ENDEXEC with #_< ... >_#
--- John Gibson, Jan 15 1989
	Replaced use of UC_LAB etc for updater with -> before pathname
 */
