/* --- Copyright University of Sussex 1995. All rights reserved. ----------
 * File:	C.hppa/src/aarith.s
 * Purpose:	Arithmetic routines for HP PA-RISC 1.1
 * Author:	Julian Clinton, December 1992 (see revisions)
 */


#_<

#_INCLUDE 'declare.ph'

constant
	procedure Sys$-Array$-Sub_error
	;

lconstant macro (
	_PD_EXECUTE  		= @@PD_EXECUTE,
	_PD_ARRAY_TABLE		= @@PD_ARRAY_TABLE,
	_PD_ARRAY_VECTOR	= @@PD_ARRAY_VECTOR,
	_PD_ARRAY_SUBSCR_PDR	= @@PD_ARRAY_SUBSCR_PDR,
);

>_#

#_INCLUDE 'asm_macros.h'

/************************* wrapping structures ************************/

	.code
	.word	Lcode_end-Lcode_start, C_LAB(Sys$-objmod_pad_key)
Lcode_start
	.data
	.word	Ldata_end-Ldata_start, C_LAB(Sys$-objmod_pad_key)
Ldata_start


/**********************************************************************/


	.code


;;; === LOGICAL BIT ROUTINES ==========================================

;;; _BISET
;;; _BICLEAR
;;; _BIMASK
;;; _BIXOR
;;; 	Bit operations on machine integers

;;; Call:
;;;	_I _bitop _J -> _K

DEF_C_LAB 4 (_biset)

	ldwm		4(%usp), %arg1		;;; pop _J
	ldwm		4(%usp), %arg0		;;; pop _I
	or		%arg0, %arg1, %arg0	;;; I_ or _J -> %arg0
	RETE					;;; non-local return
	stwm		%arg0, -4(%usp)

DEF_C_LAB 4 (_biclear)

	ldwm		4(%usp), %arg1		;;; pop _J
	ldwm		4(%usp), %arg0		;;; pop _I
	andcm		%arg0, %arg1, %arg0	;;; _I and ~_J -> %arg0
	RETE					;;; non-local return
	stwm		%arg0, -4(%usp)

DEF_C_LAB 4 (_bimask)

	ldwm		4(%usp), %arg1		;;; pop _J
	ldwm		4(%usp), %arg0		;;; pop _I
	and		%arg0, %arg1, %arg0	;;; _I and _J -> %arg0
	RETE					;;; non-local return
	stwm		%arg0, -4(%usp)

DEF_C_LAB 4 (_bixor)

	ldwm		4(%usp), %arg1		;;; pop _J
	ldwm		4(%usp), %arg0		;;; pop _I
	xor		%arg0, %arg1, %arg0	;;; _I XOR _J -> %arg0
	RETE					;;; non-local return
	stwm		%arg0, -4(%usp)


;;; === MACHINE INTEGER ARITHMETIC ====================================

;;; _MULT
;;;     Signed 32-bit multiplication. NB: this uses the XMPYU
;;;     instruction which does *unsigned* multiplication, but that's OK
;;;     so long as it's just the bottom 32 bits of the result we're
;;;     interested in.

;;; Call:
;;;	_I _mult _J -> _K

DEF_C_LAB 4 (_mult)

	fldws,ma	4(%usp), %fr4L
	fldws,ma	4(%usp), %fr4R
	xmpyu		%fr4L, %fr4R, %fr5
	RETE
	fstws,mb	%fr5R, -4(%usp)


;;; DIV64:
;;;     64- by 32-bit signed division. Based on the algorithm given in
;;;     the Assembly Language Reference Manual, Chapter 5.

;;; Arguments:
;;;	%arg0	high word of dividend
;;;	%arg1	low word of dividend
;;;	%arg2	divisor

;;; Results:
;;;	%ret0	quotient
;;;	%ret1	remainder

;;; Register usage:
;;;	%r1	temporary
;;;	%rp	return link

div64
	;;; copy dividend (%arg1,%arg0) to (%ret1,%ret0), checking for < 0
	movb,>=		%arg1, %ret1, L$61
	copy		%arg0, %ret0

	;;; dividend is negative: make it absolute
	sub		0, %ret0, %ret0
	subb		0, %ret1, %ret1

L$61
	;;; clear carry and set the V-bit to the complement of the
	;;; divisor sign (check for zero divisor omitted)
	sub		0, %arg2, %r1
	ds		0, %r1, 0

	;;; 32-step non-restoring division
	add		%ret0, %ret0, %ret0	;;; shft MSB of quotient into carry
	ds		%ret1, %arg2, %ret1	;;; 1st divide step (check for
						;;; overflow omitted)
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 2nd divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 3rd divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 4th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 5th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 6th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 7th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 8th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 9th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 10th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 11th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 12th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 13th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 14th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 15th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 16th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 17th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 18th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 19th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 20th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 21st divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 22nd divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 23rd divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 24th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 25th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 26th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 27th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 28th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 29th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 30th divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 31st divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry
	ds		%ret1, %arg2, %ret1	;;; 32nd divide step
	addc		%ret0, %ret0, %ret0	;;; shift quotient with/into carry

	;;; test sign of remainder
	comb,>=,n	%ret1, 0, L$62

	;;; negative remainder: correct by adding in the absolute value
	;;; of the divisor
	add,<		%arg2, 0, 0
	add,tr		%ret1, %arg2, %ret1
	addl		%ret1, %r1, %ret1

L$62	;;; remainder now positive: set its sign to that of the
	;;; dividend and correct sign of quotient based on operand signs
	add,>=		%arg1, 0, 0
	sub		0, %ret1, %ret1
	xor,>=		%arg1, %arg2, 0
	sub		0, %ret0, %ret0

	;;; local return
	bv		(%rp)
	nop


;;; _DIV
;;;	Signed division.

;;; Call:
;;;	_I _div _J -> _QUOT -> _REM

DEF_C_LAB 4 (_div)

	ldwm		4(%usp), %arg2		;;; pop divisor
	ldwm		4(%usp), %arg0		;;; pop dividend
	bl		div64, %rp
	extrs		%arg0, 0, 1, %arg1	;;; sign-extend to 64 bits

	stwm		%ret1, -4(%usp)		;;; push remainder
	RETE
	stwm		%ret0, -4(%usp)		;;; push quotient


;;; _DIVQ
;;;	Signed division, quotient only

;;; Call:
;;;	_I _divq _J -> _QUOT

DEF_C_LAB 2 (_divq)

	ldwm		4(%usp), %arg2		;;; pop divisor
	ldwm		4(%usp), %arg0		;;; pop dividend
	bl		div64, %rp
	extrs		%arg0, 0, 1, %arg1	;;; sign-extend to 64 bits

	RETE
	stwm		%ret0, -4(%usp)		;;; push quotient


;;; _SHIFT
;;; 	Shift a machine integer _I by a signed quantity _J

;;; Call:
;;;	_shift(_I, _J) -> _K
;;;
;;; Register usage:
;;; 	%arg0	_I
;;; 	%arg1	_J

DEF_C_LAB (_shift)

	ldwm		4(%usp), %arg1		;;; _J
	ldwm		4(%usp), %arg0		;;; _I

	;;; Test the sign of the shift. If sign bit (bit0) is 1 then negative
	bb,<,n		%arg1, 0, L$4

	;;; Otherwise sign is positive: shift left and return
	subi		31, %arg1, %arg1	;;; 31 - _J -> _J
	mtsar		%arg1			;;; _J -> sar
	zvdep		%arg0, 32, %arg1
	RETE                           		;;; return
	stwm		%arg1, -4(%usp)		;;; push result (in delay slot)

L$4	;;; Sign is negative: shift right by the absolute amount and return

	;;; %arg1 is negative so make it positive by subtracting
	;;; it from 0. Then subtract from 31 to ensure correct bit
	;;; numbering (bit 31 is rightmost) and shift the integer right
	;;; Note: 31 - (0 - arg1) == 31 + arg1
	addi		31, %arg1, %arg1
	mtsar		%arg1
	vextrs		%arg0, 32, %arg1
	RETE                           		;;; return
	stwm		%arg1, -4(%usp)		;;; push result (in delay slot)


;;; === POP INTEGER ARITHMETIC ========================================

;;; _PMULT
;;;	POP integer multiply, with no overflow check (implements fi_*).
;;;	NB: see comments for _MULT above.

;;; Call:
;;;	I _pmult J -> K

;;; Register usage:
;;;	%arg0	I
;;;	%arg1	J
;;;	%fr4L/R	args _J and _I
;;;	%fr5	sysint result _K
;;;	%ret0	popint result K

DEF_C_LAB 4 (_pmult)

	ldwm		4(%usp), %arg1		;;; pop J
	ldwm		4(%usp), %arg0		;;; pop I

	;;; Clear popint bits from I and convert J to a system integer
	sub		%arg0, %pzero, %arg0
	extrs		%arg1, 29, 30, %arg1

	;;; multiply, and copy low word of result to %ret0
	stwm		%arg1, -4(%usp)		;;; push _J
	stwm		%arg0, -4(%usp)		;;; push _I
	fldws,ma	4(%usp), %fr4L		;;; pop _I
	fldws,ma	4(%usp), %fr4R		;;; pop _J
	xmpyu		%fr4L, %fr4R, %fr5
	fstws,mb	%fr5R, -4(%usp)		;;; push result

	;;; return result with popint bits restored
	ldwm		4(%usp), %ret0
	add		%ret0, %pzero, %ret0
	RETE
	stwm		%ret0, -4(%usp)


;;; _PMULT_TESTOVF
;;;	Multiply two simple pop integers with a test for overflow.
;;;	Returns a result, plus <true> if OK or <false> if overflow.

;;; Call:
;;;	_pmult_testovf(I, J) -> BOOL -> K

;;; Register usage:
;;; 	%arg0	I
;;; 	%arg1	J
;;; 	%ret0	(a) abs(_I)
;;;		(b) low word of result (K)
;;;	%ret1	(a) abs(_J)
;;;		(b) high word of result
;;; 	%t1	temporary for overflow check
;;;	%fr4/5	temporaries for multiplication

DEF_C_LAB (_pmult_testovf)

	ldwm		4(%usp), %arg1		;;; pop J
	ldw		0(%usp), %arg0		;;; load I (not popped)

	;;; convert J to a system integer and get its absolute value in
	;;; %ret1
	extrs,>=	%arg1, 29, 30, %ret1
	sub		0, %ret1, %ret1

	;;; clear the popint bits from I and get its absolute value in
	;;; %ret0
	sub,>=		%arg0, %pzero, %ret0
	sub		0, %ret0, %ret0

	;;; multiply the absolute values, returning 64-bit (positive)
	;;; result in (%ret1,%ret0)
	stwm		%ret0, -4(%usp)
	stwm		%ret1, -4(%usp)
	fldws,ma	4(%usp), %fr4L
	fldws,ma	4(%usp), %fr4R
	xmpyu		%fr4L, %fr4R, %fr5
	fstws,mb	%fr5R, -4(%usp)
	fstws,mb	%fr5L, -4(%usp)
	ldwm		4(%usp), %ret1
	ldwm		4(%usp), %ret0

	;;; if top 32 bits (%ret1) aren't zero, the multiply must have
	;;; overflowed
	comb,<>,n	%ret1, 0, L$7

	;;; if I & J have different signs, negate the result
	xor,>=		%arg0, %arg1, %t1
	sub		0, %ret0, %ret0

	;;; now check the sign agrees
	xor,>=		%ret0, %t1, 0
	b,n		L$7

	;;; add popint bits back into the result and push it
	add		%ret0, %pzero, %ret0
	stw		%ret0, 0(%usp)

	;;; no overflow -- return <true>
	LDA32		C_LAB(true), %t1
	RETE
	stwm		%t1, -4(%usp)

L$7	;;; overflow -- return <false>
	RETE
	stwm		%false, -4(%usp)


;;; _PDIV:
;;;	Divide two simple pop integers with no checking (implements fi_//)

;;; Call:
;;;	I _pdiv J -> QUOT -> REM

DEF_C_LAB 4 (_pdiv)

	ldwm		4(%usp), %arg2		;;; pop J
	ldwm		4(%usp), %arg0		;;; pop I

	;;; Convert operands to system integers and divide using div64
	extrs		%arg0, 29, 30, %arg0
	extrs		%arg0, 0, 1, %arg1	;;; sign-extend to 64 bits
	bl		div64, %rp
	extrs		%arg2, 29, 30, %arg2	;;; branch delay slot

	;;; Convert quotient and remainder back to popints and return
	;;; (with quotient on top)
	sh2add		%ret1, %pzero, %ret1
	sh2add		%ret0, %pzero, %ret0
	stwm		%ret1, -4(%usp)		;;; push remainder
	RETE
	stwm		%ret0, -4(%usp)		;;; push quotient


;;; _PINT_TESTOVF:
;;;	Convert machine integer to pop integer, checking for overflow.
;;;	Returns a popint plus <true> if OK, or just <false> if overflow.

;;; Call:
;;;	_pint_testovf(_I) -> <true> -> I
;;;	_pint_testovf(_I) -> <false>

;;; Register usage:
;;; 	%arg0	(a) argument _I
;;;		(b) result I
;;;	%t1	<true> if no overflow

DEF_C_LAB (_pint_testovf)

	ldwm		4(%usp), %arg0

	;;; convert to popint and branch on overflow
	sh2add,nsv	%arg0, %pzero, %arg0
	b,n		L$11

	;;; return popint and <true>
	stwm		%arg0, -4(%usp)
	LDA32   	C_LAB(true), %t1
	RETE
	stwm		%t1, -4(%usp)

L$11	;;; overflow -- return <false>
	RETE
	stwm		%false, -4(%usp)


;;; _PSHIFT_TESTOVF:
;;;	Left-shift a simple popint by a positive machine integer amount,
;;;	and test for overflow. Returns a popint result plus <true> if OK,
;;;	or just <false> for overflow.

;;; Call:
;;;	_pshift_testovf(I, _N) -> <true> -> J
;;;	_pshift_testovf(I, _N) -> <false>

;;; Register usage:
;;; 	%arg0	argument I
;;; 	%arg1	argument _N
;;; 	%ret0	result J
;;; 	%t1	(a) bit position for shift (copied to %sar)
;;;		(b) temporary for overflow check
;;;		(c) <true> if no overflow

DEF_C_LAB (_pshift_testovf)

	ldwm		4(%usp), %arg1		;;; _N
	ldw		(%usp), %arg0		;;; I (NB: left on stack)

	;;; clear popint bits from %arg0: if the result is zero, there's
	;;; no need to shift
	addib,=,n	-3, %arg0, L$12

	;;; compute bit position for the shift (31 - _N) in %t1: if it's
	;;; outside the range 1 .. 31 the shift will definitely overflow
	subi,>>		31, %arg1, %t1
	b,n		L$13

	;;; do the shift, result in %ret0
	mtsar		%t1
	zvdep		%arg0, 32, %ret0

	;;; check for overflow by shifting the result back right and
	;;; comparing with the original
	vextrs		%ret0, 32, %t1
	comb,<>,n	%t1, %arg0, L$13

	;;; reset popint bits in the result and return it with <true>
	add		%ret0, %pzero, %ret0
	stw		%ret0, (%usp)
L$12	LDA32		C_LAB(true), %t1
	RETE
	stwm		%t1, -4(%usp)

L$13	;;; overflow -- return <false>
	RETE
	stw		%false, (%usp)


;;; === BIGINTEGER ARITHMETIC =========================================

;;; _EMUL:
;;; _POSWORD_EMUL:
;;;	Multiply two biginteger slices to get a double length result.
;;;	A "slice" is a full word, but only the lower 31 bits are used;
;;;	the top bit is always 0.

;;; Call:
;;;	_emul(_MULTIPLICAND, _MULTIPLIER) -> _HIPART -> _LOPART

DEF_C_LAB (_emul)
DEF_C_LAB (_posword_emul)

	;;; convert arguments on stack to unsigned; original values left
	;;; in (%t1,%t2)
	ldw		0(%usp), %t2
	ldw		4(%usp), %t1
	movb,>=,n	%t2, %t4, L$21
	sub		0, %t2, %t4
	stw		%t4, 0(%usp)
L$21	movb,>=,n	%t1, %t3, L$22
	sub		0, %t1, %t3
	stw		%t3, 4(%usp)

L$22     ;;; multiply; result in (%ret1,%ret0)
	fldws,ma	4(%usp), %fr4L
	fldws,ma	4(%usp), %fr4R
	xmpyu		%fr4L, %fr4R, %fr5
	fstws,mb	%fr5L, -4(%usp)
	fstws,mb	%fr5R, -4(%usp)
	ldwm		4(%usp), %ret0
	ldwm		4(%usp), %ret1

	;;; adjust sign of result from signs of original arguments
	xor,<		%t1, %t2, 0
	b,n		L$23
	sub		0, %ret0, %ret0
	subb		0, %ret1, %ret1

L$23	;;; shift top bit of low part to bottom bit of high part
	extru		%ret0, 0, 1, %t1
	sh1add		%ret1, %t1, %ret1
	dep		0, 0, 1, %ret0

	;;; return
	stwm		%ret0, -4(%usp)
	RETE
	stwm		%ret1, -4(%usp)


;;; _EDIV:
;;;	Divide a two-slice dividend by a single divisor.

;;; Call:
;;;	_ediv(_HIPART, _LOPART, _DIVISOR) -> _QUOT -> _REM

;;; Register usage:
;;;	%arg0	low word of dividend
;;;	%arg1	high word of dividend
;;;	%arg2	divisor
;;;	%ret0	quotient
;;;	%ret1	remainder

;;; Uses:
;;;	div64 (defined below)

DEF_C_LAB (_ediv)

	ldwm		4(%usp), %arg2		;;; _DIVISOR
	ldwm		4(%usp), %arg0		;;; _LOPART
	ldwm		4(%usp), %arg1		;;; _HIPART

	;;; shift bottom bit of _HIPART into top of _LOPART to give true
	;;; 64-bit value and divide
	dep		%arg1, 0, 1, %arg0
	bl		div64, %rp
	extrs		%arg1, 30, 31, %arg1

	stwm		%ret1, -4(%usp)		;;; push remainder
	RETE
	stwm		%ret0, -4(%usp)		;;; push quotient


;;; _BGI_MULT:
;;;	Multiply a biginteger by a signed machine integer and store the
;;;	result in a destination bigint. Return a pointer to the next free
;;;	slice in the destination and the last carry slice.

;;; Call:
;;; 	_bgi_mult(_MULTIPLIER, _SRCADDR, _SRCLIM, _DSTADDR) -> _NEXT_DSTADDR
;;;							    -> _CARRY

;;; Register usage:
;;;	%arg0	_DSTADDR
;;;	%arg1	_SRCLIM
;;;	%arg2	_SRCADDR
;;;	%arg3	_MULTIPLIER
;;;	%fr4R	absolute multiplier
;;;	%t1	carry slice
;;;	%t2	low word result of multiplication
;;;	%t3	high word result of multiplication
;;;	%t4	(a) current slice
;;;		(b) temporary
;;;	%fr4/5	temporaries for multiplication

DEF_C_LAB (_bgi_mult)

	ldwm		4(%usp), %arg0		;;; _DSTADDR
	ldwm		4(%usp), %arg1		;;; _SRCLIM
	ldwm		4(%usp), %arg2		;;; _SRCADDR

	ldw		(%usp), %arg3		;;; _MULTIPLIER (not popped)

	;;; initialise the carry slice (%t1) to zero
	copy		0, %t1

	;;; primitive multiplication is unsigned, so copy the absolute
	;;; value of the multiplier to the FPU (original value is
	;;; preserved in %arg3)
	comb,>=		%arg3, 0, L$31
	ldw		(%arg2), %t4		;;; load first slice in delay slot
	sub		0, %arg3, %t3
	stw		%t3, (%usp)
L$31	fldws,ma	4(%usp), %fr4L

L$32	;;; Repeat

	;;; load absolute value of next slice to FPU (already loaded to %t4)
	;;; and multiply
	;;; NB: only the last slice can be negative, so this is optimised for
	;;; the more usual case
	fldws,ma	4(%arg2), %fr4R
	comb,>=		%t4, 0, L$33
	xmpyu		%fr4L, %fr4R, %fr5
	sub		0, %t4, %t3
	stwm		%t3, -4(%usp)
	fldws,ma	4(%usp), %fr4R
	xmpyu		%fr4L, %fr4R, %fr5

L$33	;;; copy 64-bit result to (%t3,%t2)
	fstws,mb	%fr5L, -4(%usp)
	fstws,mb	%fr5R, -4(%usp)
	ldwm		4(%usp), %t2
	ldwm		4(%usp), %t3

	;;; add the result to the previous carry slice: the result will
	;;; always be positive because of the unsigned multiply, but if
	;;; the signs of the original arguments were different, then the
	;;; result should really be negative and we subtract it from
	;;; the carry instead
	xor,<		%arg3, %t4, 0
	b		L$35
	extrs		%t1, 0, 1, %t4		;;; get sign of carry slice

	;;; subtract from carry
	sub		%t1, %t2, %t2		;;; low half
	b		L$36
	subb		%t4, %t3, %t3		;;; high half

L$35	;;; add to carry
	add		%t1, %t2, %t2		;;; low half
	addc		%t4, %t3, %t3		;;; high half

L$36	;;; shift top bit of low half result (%t2) into the bottom bit of
	;;; the high half (%t3); this becomes the next carry slice
	extru		%t2, 0, 1, %t4
	sh1add		%t3, %t4, %t1
	dep		0, 0, 1, %t2

	;;; store low result at next destination slice
	stwm		%t2, 4(%arg0)

	;;; Until %arg1 = %arg2, load next slice to %t4
	comb,<>,n	%arg1, %arg2, L$32
	ldw		(%arg2), %t4

	;;; return carry slice and next destination pointer
	stwm		%t1, -4(%usp)
	RETE
	stwm		%arg0, -4(%usp)


;;; _BGI_MULT_ADD:
;;;	Multiply a biginteger by a signed machine integer and add it into
;;;	a destination bigint. Returns pointer to the next free slice in the
;;;	destination, and the last carry slice.

;;; Call:
;;; 	_bgi_mult_add(_MULTIPLIER, _SRCADDR, _SRCLIM, _DSTADDR)
;;;						-> _NEXT_DSTADDR -> _CARRY

;;; Register usage:
;;;	%arg0	_DSTADDR
;;;	%arg1	_SRCLIM
;;;	%arg2	_SRCADDR
;;;	%arg3	_MULTIPLIER
;;;	%fr4R	absolute multiplier
;;;	%t1	carry slice
;;;	%t2	low word result of multiplication
;;;	%t3	high word result of multiplication
;;;	%t4	(a) current slice
;;;		(b) temporary
;;;	%fr4/5	temporaries for multiplication

DEF_C_LAB (_bgi_mult_add)

	ldwm		4(%usp), %arg0		;;; _DSTADDR
	ldwm		4(%usp), %arg1		;;; _SRCLIM
	ldwm		4(%usp), %arg2		;;; _SRCADDR

	ldw		(%usp), %arg3		;;; _MULTIPLIER (not popped)

	;;; initialise the carry slice (%t1) to zero
	copy		0, %t1

	;;; primitive multiplication is unsigned, so copy the absolute
	;;; value of the multiplier to the FPU (original value is
	;;; preserved in %arg3)
	comb,>=		%arg3, 0, L$41
	ldw		(%arg2), %t4		;;; load first slice in delay slot
	sub		0, %arg3, %t3
	stw		%t3, (%usp)
L$41	fldws,ma	4(%usp), %fr4L

L$42	;;; Repeat

	;;; load absolute value of next slice to FPU (already loaded to %t4)
	;;; and multiply
	;;; NB: only the last slice can be negative, so this is optimised for
	;;; the more usual case
	fldws,ma	4(%arg2), %fr4R
	comb,>=		%t4, 0, L$43
	xmpyu		%fr4L, %fr4R, %fr5
	sub		0, %t4, %t3
	stwm		%t3, -4(%usp)
	fldws,ma	4(%usp), %fr4R
	xmpyu		%fr4L, %fr4R, %fr5

L$43	;;; copy 64-bit result to (%t3,%t2)
	fstws,mb	%fr5L, -4(%usp)
	fstws,mb	%fr5R, -4(%usp)
	ldwm		4(%usp), %t2
	ldwm		4(%usp), %t3

	;;; add the result to the previous carry slice: the result will
	;;; always be positive because of the unsigned multiply, but if
	;;; the signs of the original arguments were different, then the
	;;; result should really be negative and we subtract it from
	;;; the carry instead
	xor,<		%arg3, %t4, 0
	b		L$45
	extrs		%t1, 0, 1, %t4		;;; get sign of carry slice

	;;; subtract from carry
	sub		%t1, %t2, %t2		;;; low half
	b		L$46
	subb		%t4, %t3, %t3		;;; high half

L$45	;;; add to carry
	add		%t1, %t2, %t2		;;; low half
	addc		%t4, %t3, %t3		;;; high half

L$46	;;; add in next destination slice
	ldw		(%arg0), %t1
	add		%t2, %t1, %t2		;;; add to low half
	extrs		%t1, 0, 1, %t4		;;; sign-extend
	addc		%t3, %t4, %t3		;;; add with carry to high half

	;;; shift top bit of low half result (%t2) into the bottom bit of
	;;; the high half (%t3); this becomes the next carry slice
	extru		%t2, 0, 1, %t4
	sh1add		%t3, %t4, %t1
	dep		0, 0, 1, %t2

	;;; store low result at next destination slice
	stwm		%t2, 4(%arg0)

	;;; Until %arg1 = %arg2, load next slice to %t4
	comb,<>,n	%arg1, %arg2, L$42
	ldw		(%arg2), %t4

	;;; return carry slice and next destination pointer
	stwm		%t1, -4(%usp)
	RETE
	stwm		%arg0, -4(%usp)


;;; _BGI_DIV:
;;;	Divide a biginteger by a signed machine integer into a destination
;;; 	bigint and return the last remainder.

;;; Call:
;;;	_bgi_div(_DIVISOR, _SRCADDR, _SRCLIM, _DSTLIM) -> _REM

;;; Register usage:
;;;	%arg0	bigint slice (low word of dividend for div64)
;;;	%arg1	high word of dividend for div64
;;;	%arg2	_DIVISOR
;;;	%ret0	quotient returned by div64
;;;	%ret1	remainder returned by div64
;;;	%r1	temporary used by div64
;;;	%rp	return link for div64
;;;	%t1	_SRCADDR
;;;	%t2	_SRCLIM
;;;	%t3	_DSTLIM

DEF_C_LAB (_bgi_div)

	ldwm		4(%usp), %t3		;;; _DSTLIM
	ldwm		4(%usp), %t2		;;; _SRCLIM
	ldwm		4(%usp), %t1		;;; _SRCADDR
	ldwm		4(%usp), %arg2		;;; _DIVISOR

	;;; load top slice to %arg0, sign-extend to (%arg1,%arg0) and
	;;; divide to initialise quotient and remainder
	ldws,mb		-4(%t2), %arg0
	bl		div64, %rp
	extrs		%arg0, 0, 1, %arg1

	;;; store first quotient to destination and quit if that's all
	comb,=		%t2, %t1, L$52
	stws,mb		%ret0, -4(%t3)

L$51	;;; loop to divide remaining slices

	;;; load next slice
	ldws,mb		-4(%t2), %arg0

	;;; combine with remainder from previous divide step
	;;; (bottom bit of remainder shifted into top bit of slice)
	;;; and divide
	extrs		%ret1, 30, 31, %arg1
	bl		div64, %rp
	dep		%ret1, 0, 1, %arg0

	;;; store quotient to destination and quit if done
	comb,<>		%t2, %t1, L$51
	stws,mb		%ret0, -4(%t3)

L$52	;;; return last remainder
	RETE
	stwm		%ret1, -4(%usp)


;;; === COMPUTE ARRAY SUBSCRIPTS ======================================

;;; _ARRAY_SUB:
;;;	computes the total subscript for a multi-dimensional array.

;;; Call:
;;;	_array_sub(/* INDEXES */) -> _SUBSCRIPT

;;; Arguments:
;;;     A set of indexes on the stack, one for each dimension. The array
;;;     procedure table contains additional parameters arranged as
;;;     follows:
;;;
;;;	    offset			;;; popint (initial subscript value)
;;;	    dimension-1			;;; sysint, base 0
;;;	    lower_bound-1		;;; popint
;;;	    scale_factor-1		;;; sysint
;;;		...
;;;	    dimension-n			;;; n >= 1
;;;	    lower_bound-n
;;;	    scale_factor-n
;;;	    0				;;; marks the end

;;; Register usage:
;;;	%ret0	accumulates the total subscript
;;; 	%t1	(1) pointer into procedure table
;;; 		(2) subscript procedure
;;;	%t2	(1) dimension
;;; 		(2) scale factor
;;; 		(3) array vector
;;;	%t3	(scaled) index
;;;	%t4	lower bound
;;;	%fr4	temporary for multiplication
;;;	%fr5	     ""      ""      ""

DEF_C_LAB (_array_sub)

	;;; initialise %t1 to point at the array parameters
	ldo		_PD_ARRAY_TABLE(%pb), %t1

	;;; initialise %ret0 with subscript offset
	ldwm		4(%t1), %ret0

	;;; load first dimension to %t2: may be zero already for a
	;;; 0-dimensional array
	ldwm		4(%t1), %t2
	comb,=,n	%t2, 0, L$73

	;;; load first index from stack to %t3
	ldwm		4(%usp), %t3

L$71	;;; Repeat

	;;; load lower bound to %t4
	ldwm		4(%t1), %t4

	;;; check index in %t3 is a pop integer
	bb,>=,n		%t3, 30, array_sub_error

	;;; subtract lower bound from index and check it for range
	;;; against the dimension in %t2 (both index and lower bound are
	;;; popints, the dimension isn't)
	sub		%t3, %t4, %t3
	comb,>>=,n	%t3, %t2, array_sub_error

	;;; load scale factor to %t2 and %fr4L
	ldw		(%t1), %t2
	fldws,ma	4(%t1), %fr4L

	;;; test value in %t2 for zero: if so, it really means 1 (!) so
	;;; we skip the multiplication
	comb,=,n	%t2, 0, L$72

	;;; scale the index in %t3
	stwm		%t3, -4(%usp)
	fldws,ma	4(%usp), %fr4R
	xmpyu		%fr4L, %fr4R, %fr5
	fstws,mb	%fr5R, -4(%usp)
	ldwm		4(%usp), %t3

L$72	;;; load next dimension to %t2
	ldwm		4(%t1), %t2

	;;; add index to the running total
	add		%ret0, %t3, %ret0

	;;; Until %t2 = 0
	comb,<>,n	%t2, 0, L$71

	;;; load next index in branch delay slot
	ldwm		4(%usp), %t3

L$73	;;; finished -- push total subscript to stack, push array vector
	;;; to stack, and then chain subscripting procedure

	ldw		_PD_ARRAY_SUBSCR_PDR(%pb), %t1
	ldw		_PD_ARRAY_VECTOR(%pb), %t2
	stwm		%ret0, -4(%usp)
	CHAIN		%t1
	stwm		%t2, -4(%usp)

array_sub_error

	;;; last index popped from the stack was an error (either not an
	;;; integer or out of range)
	CALLSYS		XC_LAB(weakref Sys$-Array$-Sub_error)
	addi		-4, %usp, %usp	;;; reveal the last index again

	;;; in case the error routine returns
	CHAINSYS	XC_LAB(setpop)
	nop

;;; _ARRAY_SUB_TEMPLATE:
;;;     template for calling _array_sub from user code (copied by
;;;     Array$-Cons in "array_cons.p")

DEF_C_LAB (Sys$- _array_sub_template)

	ldil		L'C_LAB(_array_sub), %r1
	ble		R'C_LAB(_array_sub)(%sr4, %r1)
	nop


	.code
	.import		C_LAB(Sys$-objmod_pad_key), data
	.import   	C_LAB(true), data
	.import		XC_LAB(weakref Sys$-Array$-Sub_error), data
	.import		XC_LAB(setpop), data


/***************** end labels for wrapping structures *****************/

	.code
	.align  8
Lcode_end
	.data
	.align  8
Ldata_end

/**********************************************************************/


/* --- Revision History ---------------------------------------------------
--- Integral Solutions Ltd, Aug 31 1995 (Julian Clinton)
	Removed ":" after L$73 in _array_sub
--- Robert John Duncan, Sep  1 1994
	Changes to _array_sub
 */
