/* --- Copyright University of Sussex 1998. All rights reserved. ----------
 * File:            C.power/src/aarith.s
 * Purpose:
 * Author:          John Gibson, Mar  4 1998
 */

;;; ------------------- ARITHMETIC ROUTINES ------------------------------

;;;	***********************************************
;;;	****         NOTE ASSEMBLER BUG:          *****
;;;	****  (___reg) DOES NOT ASSEMBLE AS 0(___reg)   *****
;;;	***********************************************

#_<

#_INCLUDE 'asm.ph'

constant
	procedure $-Sys$-Array$-Sub_error
	;

lconstant macro (
	_PD_ARRAY_TABLE		= @@PD_ARRAY_TABLE,
	_PD_ARRAY_VECTOR	= @@PD_ARRAY_VECTOR,
	_PD_ARRAY_SUBSCR_PDR	= @@PD_ARRAY_SUBSCR_PDR,
	);


>_#

ASM_START_FILE

ASM_CODE_PSECT


;;; --- LOGICAL BIT ROUTINES ------------------------------------------------

	;;; _biset(______int1, ______int2) -> ______int3  (logical or)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_biset)
	ldW	rt1, 0(rusp)
	ldWu	rt0, _WOFFS(rusp)
	or	rt0, rt0, rt1
	stW	rt0, 0(rusp)
	blr


	;;; _biclear(______int1, ______int2) -> ______int3  (logical and not)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_biclear)
	ldW	rt1, 0(rusp)
	ldWu	rt0, _WOFFS(rusp)
	andc	rt0, rt0, rt1
	stW	rt0, 0(rusp)
	blr


	;;; _bimask(______int1, ______int2) -> ______int3  (logical and)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_bimask)
	ldW	rt1, 0(rusp)
	ldWu	rt0, _WOFFS(rusp)
	and	rt0, rt0, rt1
	stW	rt0, 0(rusp)
	blr


	;;; _bixor(______int1, ______int2) -> ______int3  (logical exclusive or)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_bixor)
	ldW	rt1, 0(rusp)
	ldWu	rt0, _WOFFS(rusp)
	xor	rt0, rt0, rt1
	stW	rt0, 0(rusp)
	blr


;;; --- MULTIPLY -----------------------------------------------------------

	;;; _pmult(____int1, ____int2) -> _______product
	;;; pop integer multiply
ASM_ALIGN_QUAD
DEF_C_LAB 4 (_pmult)
	ldW	rt7, 0(rusp)		;;; ____int2 (rt7 = R4)
	ldWu	rchain, _WOFFS(rusp)	;;; ____int1 (rchain = R3)
	srawi	rt7, rt7, _:WORD_SHIFT	;;; make ____int2 sysint
	rlwinm	rchain, rchain, 0, ~3	;;; clear tag bits on ____int1
	mflr	rt0			;;; save return
	bla	.__mull			;;; do the multiply
	mtlr	rt0			;;; restore return
	ori	rt7, rt7, 3		;;; set tag bits on result
	stW	rt7, 0(rusp)		;;; return it
	blr

	;;; _pmult_testovf(____int1, ____int2) -> (_______product, ____bool)
	;;; ____bool true if no overflow, false if overflow
ASM_ALIGN_QUAD
DEF_C_LAB (_pmult_testovf)
	ldW	rt7, 0(rusp)		;;; ____int2 (rt7 = R4)
	ldW	rchain, _WOFFS(rusp)	;;; ____int1 (rchain = R3)
	srawi	rt7, rt7, _:WORD_SHIFT	;;; make ____int2 sysint
	rlwinm	rchain, rchain, 0, ~3	;;; clear tag bits on ____int1
	mflr	rt0			;;; save return
	bla	.__mull			;;; do the multiply
	mtlr	rt0			;;; restore return
	ori	rt7, rt7, 3		;;; set tag bits on result ...
	stW	rt7, _WOFFS(rusp)	;;; ... and store it in any case

#_IF WORD_BITS==DOUBLE_BITS
#_ELSE
	;;; test product in 32-bit range
	srawi	rt0, rt7, 31		;;; sign extension of (lo) result part
	la	rt1, _TRUEOFFS(rfalse)	;;; get true for ____bool result
	cmplw	CR0, rchain, rt0	;;; sign extension = hi result part?
	stW	rt1, 0(rusp)		;;; set true return for ____bool
	beqlr+				;;; return if so
	stW	rfalse, 0(rusp)		;;; else return false
	blr
#_ENDIF


;;; --- DIVIDE -------------------------------------------------------------

	;;; _div(__________dividend, _________divisor) -> (___________remainder, __________quotient)
	;;; integer divide

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_div)
	ldW	rt7, 0(rusp)		;;; _________divisor (rt7 = R4)
	ldW	rchain, _WOFFS(rusp)	;;; __________dividend (rchain = R3)
	mflr	rt0			;;; save return
	bla	.__divss		;;; do the divide
	mtlr	rt0			;;; restore return
	stW	rt7, _WOFFS(rusp)	;;; ___________remainder
	stW	rchain, 0(rusp)		;;; __________quotient
	blr


	;;; _divq(__________dividend, _________divisor) -> __________quotient
	;;; integer divide, quotient only

ASM_ALIGN_QUAD
DEF_C_LAB 2 (_divq)
	ldW	rt7, 0(rusp)		;;; _________divisor (rt7 = R4)
	ldWu	rchain, _WOFFS(rusp)	;;; __________dividend (rchain = R3)
	mflr	rt0			;;; save return
	bla	.__quoss		;;; do the divide
	mtlr	rt0			;;; restore return
	stW	rchain, 0(rusp)		;;; __________quotient
	blr


	;;; _pdiv(________dividend, _______divisor) -> (_________remainder, ________quotient)
	;;; pop integer divide

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_pdiv)
	ldW	rt7, 0(rusp)		;;; _________divisor (rt7 = R4)
	ldW	rchain, _WOFFS(rusp)	;;; __________dividend (rchain = R3)
	srawi	rt7, rt7, _:WORD_SHIFT		;;; _______divisor -> sysint
	srawi	rchain, rchain, _:WORD_SHIFT	;;; ________dividend -> sysint
	mflr	rt0			;;; save return
	bla	.__divss		;;; do the divide
	mtlr	rt0			;;; restore return
	slwi	rt7, rt7, _:WORD_SHIFT
	ori	rt7, rt7, 3		;;; ___________remainder -> popint
	slwi	rchain, rchain, _:WORD_SHIFT
	ori	rchain, rchain, 3	;;; __________quotient -> popint
	stW	rt7, _WOFFS(rusp)	;;; ___________remainder
	stW	rchain, 0(rusp)		;;; __________quotient
	blr


;;; ------------------------------------------------------------------------

	;;; _pint_testovf(_____int) -> (____pint, true)  if okay
	;;;			-> false         if overflow
	;;; convert int to popint, with overflow test

ASM_ALIGN_QUAD
DEF_C_LAB (_pint_testovf)
	ldW	rt0, 0(rusp)		;;; _____int
	la	rt3, _TRUEOFFS(rfalse)	;;; anticipate true needed
	slwi	rt1, rt0, _:WORD_SHIFT
	ori	rt1, rt1, 3		;;; get ____pint result in rt1
	srawi	rt2, rt1, _:WORD_SHIFT	;;; reverse shift it into rt2
	cmplW	CR0, rt2, rt0		;;; overflow if not same as _____int
	bne-	Lb1			;;; branch if so
	stW	rt1, 0(rusp)		;;; return ____pint
	stWu	rt3, -_WOFFS(rusp)	;;; and true
	blr
Lb1:	stW	rfalse, 0(rusp)		;;; false if overflow
	blr


;;; ------------------------------------------------------------------------

	;;; _pshift_testovf(_____pint1, _______nbits) -> (_____pint2, true)  if okay
	;;;				   -> false          if overflow
	;;; shift popint left, with overflow test

ASM_ALIGN_QUAD
DEF_C_LAB (_pshift_testovf)
	ldW	rt0, _WOFFS(rusp)	;;; _____pint1
	ldW	rt1, 0(rusp)		;;; _______nbits
	la	rt4, _TRUEOFFS(rfalse)	;;; anticipate true needed
	rlwinm.	rt0, rt0, 0, ~3		;;; clear tag bits on _____pint1
	bz-	Lc1			;;; can't overflow if _____pint1 was 0
	cmplWi	CR0, rt1, 30		;;; shift >= 30 bits ?
	bge-	Lc2			;;; overflow if so
	slw	rt2, rt0, rt1		;;; else do the shift into rt2
#_IF WORD_BITS==DOUBLE_BITS
#_ENDIF
	sraw	rt3, rt2, rt1		;;; then reverse the shift
	cmplW	CR0, rt3, rt0		;;; overflow if not same as original
	bne-	Lc2			;;; branch if so
	ori	rt2, rt2, 3		;;; set tag bits on result
	stW	rt2, _WOFFS(rusp)	;;; return _____pint2
Lc1:	stW	rt4, 0(rusp)		;;; and true
	blr
	;;; overflow -- return false only
Lc2:	stWu	rfalse, _WOFFS(rusp)
	blr


;;; --- BIGINTEGER ARITHMETIC --------------------------------------------

	;;; _emul(______________multiplicand, ____________multiplier) -> (____lo, ____hi)
	;;; multiply two slices to get double slice result

ASM_ALIGN_QUAD
DEF_C_LAB (_emul)
DEF_C_LAB (_posword_emul)
	ldW	rt7, 0(rusp)		;;; ____________multiplier (rt7 = R4)
	ldW	rchain, _WOFFS(rusp)	;;; ______________multiplicand (rchain = R3)
	mflr	rt0			;;; save return
	bla	.__mull			;;; do the multiply into rchain, rt7
	mtlr	rt0			;;; restore return
	addc	R0, rt7, rt7		;;; get hi bit of lo part as carry
	adde	rchain, rchain, rchain	;;; add as lo bit in (hi part) << 1
	rlwinm	rt7, rt7, 0, 1, 31	;;; clear hi bit in lo part
	stW	rchain, 0(rusp)		;;; return hi part
	stW	rt7, _WOFFS(rusp)	;;; return lo part
	blr


	;;; _ediv(____hi, ____lo, _________divisor) -> (___________remainder, __________quotient)
	;;; divide double slice dividend by single

ASM_ALIGN_QUAD
DEF_C_LAB (_ediv)
	ldW	rt6, 0(rusp)		;;; _________divisor
	ldW	rt7, _WOFFS*2(rusp)	;;; ____hi
	ldWu	rchain, _WOFFS(rusp)	;;; ____lo
	mflr	rt0			;;; save return
	bl	div2by1			;;; uses rt4 - rt7, rchain
	mtlr	rt0			;;; save return
	stW	rt7, _WOFFS(rusp)	;;; return ___________remainder
	stW	rchain, 0(rusp)		;;; return __________quotient
	blr


	;;; _bgi_mult(_____val, _______saddr, ______slim, _______daddr) -> (_______carry, __________nextdest)
	;;; multiply a biginteger by a signed value into a destination bigint

ASM_ALIGN_QUAD
DEF_C_LAB (_bgi_mult)
	ldW	rt0, _WOFFS*3(rusp)	;;; multiplier _____val
	ldW	rt2, _WOFFS(rusp)	;;; source lim ______slim
	ldW	rt3, 0(rusp)		;;; destination addr _______daddr
	ldWu	rt1, _WOFFS*2(rusp)	;;; source addr _______saddr
	mflr	rt6			;;; save return
	li	rt4, 0			;;; zero carry slice

Le1:	lwz	rchain, 0(rt1)		;;; next source slice in rchain (= R3)
	la	rt1, 4(rt1)		;;; step on source
	mr	rt7, rt0		;;; with multiplier in rt7 (= R4)
	bla	.__mull			;;; mult into rchain, rt7
	;;; add in last carry slice
	srawi	rt5, rt4, 31		;;; get carry slice sign (0 or -1)
	addc	rt7, rt7, rt4		;;; add carry slice to product lo
	addze	rchain, rchain		;;; add carry bit to product hi
	add	rchain, rchain, rt5	;;; add carry slice sign to hi
	;;; move bit 31 of lo up to bit 0 of hi
	addc	R0, rt7, rt7		;;; get hi bit of lo part as carry
	rlwinm	rt7, rt7, 0, 1, 31	;;; clear hi bit in lo part
	adde	rt4, rchain, rchain	;;; add as lo bit in (hi part) << 1,

	stw	rt7, 0(rt3)		;;; store lo at next destination
	la	rt3, 4(rt3)		;;; step on destination
	cmplW	CR0, rt1, rt2		;;; compare source addr with lim
	blt+	Le1			;;; next source slice if more

	mtlr	rt6			;;; restore return
	stw	rt4, _WOFFS(rusp)	;;; return next carry slice
	stw	rt3, 0(rusp)		;;; and next destination
	blr


	;;; _bgi_mult_add(_____val, _______saddr, ______slim, ________sdaddr) -> (_______carry, __________nextdest)
	;;; multiply a biginteger by a signed value
	;;; and add into a destination bigint

ASM_ALIGN_QUAD
DEF_C_LAB (_bgi_mult_add)
	ldW	rt0, _WOFFS*3(rusp)	;;; multiplier _____val
	ldW	rt2, _WOFFS(rusp)	;;; source lim ______slim
	ldW	rt3, 0(rusp)		;;; destination addr _______daddr
	ldWu	rt1, _WOFFS*2(rusp)	;;; source addr _______saddr
	mflr	rt6			;;; save return
	li	rt4, 0			;;; zero carry slice

Lf1:	lwz	rchain, 0(rt1)		;;; next source slice in rchain (= R3)
	la	rt1, 4(rt1)		;;; step on source
	mr	rt7, rt0		;;; with multiplier in rt7 (= R4)
	bla	.__mull			;;; mult into rchain, rt7
	;;; add in last carry slice
	srawi	rt5, rt4, 31		;;; get carry slice sign (0 or -1)
	addc	rt7, rt7, rt4		;;; add carry slice to product lo
	lwz	rt4, 0(rt3)		;;; load next destination slice
	addze	rchain, rchain		;;; add carry bit to product hi
	add	rchain, rchain, rt5	;;; add carry slice sign to hi
	;;; add in destination slice
	srawi	rt5, rt4, 31		;;; get dst slice sign (0 or -1)
	addc	rt7, rt7, rt4		;;; add dst slice to product lo
	addze	rchain, rchain		;;; add carry bit to product hi
	add	rchain, rchain, rt5	;;; add dst slice sign to hi
	;;; move bit 31 of lo up to bit 0 of hi
	addc	R0, rt7, rt7		;;; get hi bit of lo part as carry
	rlwinm	rt7, rt7, 0, 1, 31	;;; clear hi bit in lo part
	adde	rt4, rchain, rchain	;;; add as lo bit in (hi part) << 1,

	stw	rt7, 0(rt3)		;;; store lo at next destination
	la	rt3, 4(rt3)		;;; step on destination
	cmplW	CR0, rt1, rt2		;;; compare source addr with lim
	blt+	Lf1			;;; next source slice if more

	mtlr	rt6			;;; restore return
	stw	rt4, _WOFFS(rusp)	;;; return next carry slice
	stw	rt3, 0(rusp)		;;; and next destination
	blr


	;;; _bgi_div(_________divisor, _______saddr, ______slim, ______dlim) -> ___________remainder
	;;; divide a biginteger by a signed value into a destination bigint

ASM_ALIGN_QUAD
DEF_C_LAB (_bgi_div)
	mflr	rt4			;;; save return ...
	mtctr	rt4			;;; ... in count reg
	ldW	rt1, _WOFFS*2(rusp)	;;; source addr _______saddr
	ldW	rt2, _WOFFS(rusp)	;;; source lim ______slim
	ldW	rt3, 0(rusp)		;;; destination lim ______dlim
	ldWu	rt0, _WOFFS*3(rusp)	;;; _________divisor

	;;; do first slice with __divss
	lwzu	rchain, -4(rt2)		;;; most sig slice (signed) into rchain
	mr	rt7, rt0		;;; divisor into rt7 (= R4)
	bla	.__divss		;;; quot in rchain, rem in rt7
	b	Lg2

	;;; loop for rest done with div2by1
Lg1:	lwzu	rchain, -4(rt2)		;;; next (+ve) slice into rchain is lo
	mr	rt6, rt0		;;; divisor into rt6
	bl	div2by1			;;; rem in rt7, quot in rchain
Lg2:	stwu	rchain, -4(rt3)		;;; store quot at next dest (can be -ve)
	cmplW	CR0, rt2, rt1		;;; reached source start?
	bgt+	Lg1			;;; loop if not

	stW	rt7, 0(rusp)		;;; return remainder
	bctr				;;; return


;;; --- COMPUTE ARRAY SUBSCRIPTS -----------------------------------------

	;;; _array_sub()
	;;; compute an array total subscript -- called inside an array
	;;; procedure

ASM_ALIGN_QUAD
DEF_C_LAB (_array_sub)
	la	rt5, _PD_ARRAY_TABLE(rpb) ;;; start of params
	ldW	rt4, 0(rt5)	   ;;; init total subscript to base subscript
	ldWu	rt3, _WOFFS(rt5)   ;;; length in first dimension
	mflr	rt6		   ;;; save return
	b	Lh3

Lh1:	ldW	rchain, 0(rusp)	   ;;; next dimension subscript from stack
	ldWu	rt1, _WOFFS(rt5)   ;;; dimension lower bound
	andi.	R0, rchain, 2	   ;;; subscript is popint?
	subfc	rchain, rt1, rchain ;;; subtract lower bound from subscript
	bz	Lh4		   ;;; error if subscript not popint
	cmplW	CR0, rchain, rt3   ;;; compare with length
	ldWu	rt7, _WOFFS(rt5)   ;;; dimension scaling factor
	bge	Lh4	   	   ;;; error if subscript >= length unsigned
	mr.	R0, rt7		   ;;; test scale factor
	la	rusp, _WOFFS(rusp) ;;; inc stack
	bz	Lh2		   ;;; no multiply needed if scale factor 0
	bla	.__mull		   ;;; mult rchain by rt7 into rt7
	mr	rchain, rt7
Lh2:	ldWu	rt3, _WOFFS(rt5)   ;;; length in next dimension
	add	rt4, rt4, rchain   ;;; add scaled subscript to total
Lh3:    mr.	R0, rt3		   ;;; test length
	bnz+	Lh1		   ;;; loop if next length nonzero

	;;; finished -- stack total subscript and arrayvector, and then
	;;; chain subscripting procedure
	ldW	rt0, _PD_ARRAY_VECTOR(rpb)
	stWu	rt4, -_WOFFS(rusp)	;;; stack total subscript
	ldW	rpb, _PD_ARRAY_SUBSCR_PDR(rpb)
	stWu	rt0, -_WOFFS(rusp)	;;; stack arrayvector
	ldW	rt0, 0(rpb)		;;; subscr procedure exec addr
	mtlr	rt6			;;; restore return
	mtctr	rt0
	bctr				;;; chain subscr procedure

	;;; subscript error (bad subscript still tos) -- call error procedure
Lh4:	ldW	rpb, _SVB_OFFS(Sys$-Array$-Sub_error)(rsvb)
	mtlr	rt6			;;; restore return
	b	XC_LAB(Sys$-Array$-Sub_error)	;;; chain Sub_error


;;; --- BIGINT DOUBLE-SLICE BY SLICE DIVIDE -------------------------------
;;; (where quotient is 31 bits or less)
;;; dividend hi slice in rt7, lo slice in rchain, divisor in rt6
;;; uses rt4 - rt7, rchain
;;; Returns remainder in rt7, quotient in rchain

div2by1:
	;;; get dividend +ve (save dividend sign in CR1)
	cmpwi	CR1, rt7, 0		;;; dividend -ve?
	bge	CR1, Lj1		;;; branch if dividend +ve
	;;; dividend -ve
	neg.	rchain, rchain		;;; negate dividend lo
	neg	rt7, rt7		;;; negate dividend hi
	bz	Lj1			;;; branch if lo is 0
	addi	rt7, rt7, -1		;;; else propagate -1 carry from lo

	;;; get divisor +ve (save divisor sign in CR2)
Lj1:	cmpwi	CR2, rt6, 0		;;; divisor -ve?
	bge	CR2, Lj2		;;; branch if divisor +ve
	neg.	rt6, rt6		;;; negate divisor
	bge	Lj2			;;; OK if divisor now +ve
	;;; largest -ve divisor (-2 ** 31 = bit 31)
	mr	rt5, rt7		;;; save dividend hi
	andc	rt7, rchain, rt6	;;; remainder is dividend lo clr bit 31
	mr	rchain, rt5		;;; quotient is dividend hi
	b	Lj3

Lj2:	add	rt5, rchain, rchain	;;; shift first sig lo bit (30) to top
	li	rchain, 0		;;; init 0 quotient in rchain

#_<
lvars n;
for n from 30 by -1 to 0 do
[
\t  addc   \t  rt5, rt5, rt5	\n	;;; shift next lo bit into carry
\t  adde   \t  rt7, rt7, rt7	\n	;;; shift rem hi left one and add carry
\t  cmplw  \t  CR0, rt7, rt6	\n	;;; rem >= divisor?
\t  blt+    \t  '$+12'		\n	;;; branch if rem < divisor
\t  subfc  \t  rt7, rt6, rt7	\n	;;; else reduce rem
%if n >= 16 then
    [\t  oris \t rchain, rchain, %'1<'><(n-16)% \n]	;;; and set quotient bit
else
    [\t  ori \t rchain, rchain, %'1<'><n% \n]	;;; and set quotient bit
endif.dl%
].dl
endfor
>_#

	;;; remainder in rt7, quotient in rchain -- sort out signs
Lj3:	blt-	CR1, Lj4		;;; branch if dividend was -ve
	bgelr+	CR2			;;; return if divisor was also +ve
	neg	rchain, rchain		;;; else negate quotient
	blr
Lj4:	neg	rt7, rt7		;;; negate remainder
	bltlr	CR2			;;; return if divisor was also -ve
	neg	rchain, rchain		;;; else negate quotient
	blr


ASM_END_FILE
