/* --- Copyright University of Sussex 1994. All rights reserved. ----------
 * File:            C.alpha/src/aarith.s
 * Purpose:
 * Author:          John Gibson, Aug 31 1994
 */

;;; ------------------- ARITHMETIC ROUTINES ------------------------------

#_<

#_INCLUDE 'asm.ph'

constant
	procedure $-Sys$-Array$-Sub_error
	;

lconstant macro (
	_PD_ARRAY_TABLE		= @@PD_ARRAY_TABLE,
	_PD_ARRAY_VECTOR	= @@PD_ARRAY_VECTOR,
	_PD_ARRAY_SUBSCR_PDR	= @@PD_ARRAY_SUBSCR_PDR,
	);


>_#

ASM_START_FILE

ASM_CODE_PSECT


;;; --- LOGICAL BIT ROUTINES ------------------------------------------------

	;;; _biset(______int1, ______int2) -> ______int3  (logical or)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_biset)
	ldW	rt1, 0(rusp)
	ldW	rt0, _WOFFS(rusp)
	lda	rusp, _WOFFS(rusp)
	or	rt0, rt1, rt0
	stW	rt0, 0(rusp)
	ret	rzero, (rret)


	;;; _biclear(______int1, ______int2) -> ______int3  (logical and not)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_biclear)
	ldW	rt1, 0(rusp)
	ldW	rt0, _WOFFS(rusp)
	lda	rusp, _WOFFS(rusp)
	bic	rt0, rt1, rt0
	stW	rt0, 0(rusp)
	ret	rzero, (rret)


	;;; _bimask(______int1, ______int2) -> ______int3  (logical and)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_bimask)
	ldW	rt1, 0(rusp)
	ldW	rt0, _WOFFS(rusp)
	lda	rusp, _WOFFS(rusp)
	and	rt0, rt1, rt0
	stW	rt0, 0(rusp)
	ret	rzero, (rret)


	;;; _bixor(______int1, ______int2) -> ______int3  (logical exclusive or)

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_bixor)
	ldW	rt1, 0(rusp)
	ldW	rt0, _WOFFS(rusp)
	lda	rusp, _WOFFS(rusp)
	xor	rt0, rt1, rt0
	stW	rt0, 0(rusp)
	ret	rzero, (rret)


;;; --- MULTIPLY -----------------------------------------------------------

	;;; _pmult(____int1, ____int2) -> _______product
	;;; pop integer multiply
ASM_ALIGN_QUAD
DEF_C_LAB 4 (_pmult)
	ldW	rt1, 0(rusp)		;;; ____int2
	ldW	rt0, _WOFFS(rusp)	;;; ____int1
	sra	rt1, _:WORD_SHIFT, rt1 ;;; make ____int2 sysint
	bic	rt0, 3, rt0		;;; clear tag bits on other
	mulq	rt0, rt1, rt0		;;; do the multiply
	lda	rusp, _WOFFS(rusp)
	bis	rt0, 3, rt0		;;; set tag bits on result
	stW	rt0, 0(rusp)		;;; return it
	ret	rzero, (rret)

	;;; _pmult_testovf(____int1, ____int2) -> (_______product, ____bool)
	;;; ____bool true if no overflow, false if overflow
ASM_ALIGN_QUAD
DEF_C_LAB (_pmult_testovf)
	ldW	rt1, 0(rusp)		;;; ____int2
	ldW	rt0, _WOFFS(rusp)	;;; ____int1
	sra	rt1, _:WORD_SHIFT, rt1 ;;; make ____int2 sysint
	bic	rt0, 3, rt0		;;; clear tag bits on other
	mulq	rt0, rt1, rt4		;;; do the multiply
	lda	rt5, _TRUEOFFS(rfalse)	;;; get true for ____bool result
	bis	rt4, 3, rt4		;;; set tag bits on _______product ...
	stW	rt4, _WOFFS(rusp)	;;; ... and store it in any case

#_IF WORD_BITS==DOUBLE_BITS
	;;; can't overflow if both args in 32-bit range
	addl	rt0, rzero, rt2
	cmpeq	rt0, rt2, rt2
	addl	rt1, rzero, rt3
	cmpeq	rt1, rt3, rt3
	and	rt2, rt3, rt2
	blbs	rt2, !$1f		;;; both in 32-bit range, so OK
	;;; else test top 64 bits of product is sign extension of result
	umulh	rt0, rt1, rt3		;;; top 64 bits of unsigned 128-bit result
	mov	rt1, rt2
	cmovge	rt0, rzero, rt1
	cmovge	rt2, rzero, rt0
	subq	rt3, rt0, rt3
	subq	rt3, rt1, rt0		;;; = signed product
	sra	rt4, 63, rt4		;;; sign extension of result
#_ELSE
	;;; test product in 32-bit range
	addl	rt4, rzero, rt0
#_ENDIF
	cmpeq	rt0, rt4, rt0
	cmovlbc	rt0, rfalse, rt5	;;; replace true with false if not eq
!$1:	stW	rt5, 0(rusp)		;;; return it
	ret	rzero, (rret)


;;; --- DIVIDE -------------------------------------------------------------

	;;; _div(__________dividend, _________divisor) -> (___________remainder, __________quotient)
	;;; integer divide

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_div)
	ldW	rt0, _WOFFS(rusp)	;;; __________dividend
	ldW	rt1, 0(rusp)		;;; _________divisor
	bsr	rchain, divWbyW
	stW	rt0, _WOFFS(rusp)	;;; ___________remainder
	stW	rt1, 0(rusp)		;;; __________quotient
	ret	rzero, (rret)


	;;; _divq(__________dividend, _________divisor) -> __________quotient
	;;; integer divide, quotient only

ASM_ALIGN_QUAD
DEF_C_LAB 2 (_divq)
	ldW	rt0, _WOFFS(rusp)	;;; __________dividend
	ldW	rt1, 0(rusp)		;;; _________divisor
	lda	rusp, _WOFFS(rusp)
	bsr	rchain, divWbyW
	stW	rt1, 0(rusp)		;;; __________quotient
	ret	rzero, (rret)


	;;; _pdiv(________dividend, _______divisor) -> (_________remainder, ________quotient)
	;;; pop integer divide

ASM_ALIGN_QUAD
DEF_C_LAB 4 (_pdiv)
	ldW	rt0, _WOFFS(rusp)	;;; ________dividend
	ldW	rt1, 0(rusp)		;;; _______divisor
	sra	rt0, _:WORD_SHIFT, rt0 ;;; ________dividend -> sysint
	sra	rt1, _:WORD_SHIFT, rt1 ;;; _______divisor -> sysint
	bsr	rchain, divWbyW
	sWaddW	rt0, 3, rt0		;;; ___________remainder -> popint
	sWaddW	rt1, 3, rt1		;;; __________quotient -> popint
	stW	rt0, _WOFFS(rusp)	;;; _________remainder
	stW	rt1, 0(rusp)		;;; ________quotient
	ret	rzero, (rret)


;;; ------------------------------------------------------------------------

	;;; _pint_testovf(_____int) -> (____pint, true)  if okay
	;;;			-> false         if overflow
	;;; convert int to popint, with overflow test

ASM_ALIGN_QUAD
DEF_C_LAB (_pint_testovf)
	ldW	rt0, 0(rusp)		;;; _____int
	lda	rt3, _TRUEOFFS(rfalse)	;;; anticipate true needed
	sWaddW	rt0, 3, rt1		;;; get ____pint result
	sra	rt1, _:WORD_SHIFT, rt2 ;;; reverse shift
	cmpeq	rt2, rt0, rt2		;;; overflow if not same as original
	blbc	rt2, !$1f		;;; br if so
	stW	rt1, 0(rusp)		;;; return ____pint
	lda	rusp, -_WOFFS(rusp)	;;; push stack
	stW	rt3, 0(rusp)		;;; and true
	ret	rzero, (rret)
!$1:	stW	rfalse, 0(rusp)		;;; false if overflow
	ret	rzero, (rret)


;;; ------------------------------------------------------------------------

	;;; _pshift_testovf(_____pint1, _______nbits) -> (_____pint2, true)  if okay
	;;;				   -> false          if overflow
	;;; shift popint left, with overflow test

ASM_ALIGN_QUAD
DEF_C_LAB (_pshift_testovf)
	ldW	rt0, _WOFFS(rusp)	;;; _____pint1
	ldW	rt1, 0(rusp)		;;; _______nbits
	lda	rt4, _TRUEOFFS(rfalse)	;;; anticipate true needed
	bic	rt0, 3, rt0		;;; clear tag bits on _____pint1
	sll	rt0, rt1, rt2		;;; do the shift
	cmpule	rt1, 63, rt3		;;; shift <= 63 bits?
	cmovlbc	rt3, rzero, rt2		;;; if not, replace result with 0
#_IF WORD_BITS/==DOUBLE_BITS
	addl	rt2, rzero, rt2		;;; normalise 32-bit result
#_ENDIF
	sra	rt2, rt1, rt3		;;; reverse the shift
	cmpeq	rt3, rt0, rt1		;;; overflow if not same as original
	blbc	rt1, !$1f		;;; br if so
	bis	rt2, 3, rt2		;;; set tag bits on result
	stW	rt2, _WOFFS(rusp)	;;; return _____pint2
	stW	rt4, 0(rusp)		;;; and true
	ret	rzero, (rret)
	;;; overflow -- return false
!$1:	stW	rfalse, _WOFFS(rusp)
	lda	rusp, _WOFFS(rusp)
	ret	rzero, (rret)


;;; --- BIGINTEGER ARITHMETIC --------------------------------------------

	;;; _emul(______________multiplicand, ____________multiplier) -> (____lo, ____hi)
	;;; multiply two slices to get double slice result

ASM_ALIGN_QUAD
DEF_C_LAB (_emul)
DEF_C_LAB (_posword_emul)
	ldW	rt0, _WOFFS(rusp)	;;; ______________multiplicand
	ldW	rt1, 0(rusp)		;;; ____________multiplier
	mulq	rt0, rt1, rt0		;;; do the multiply
	ldah	rt2, -32768(rzero)	;;; ones in bits 31-63
	sra	rt0, 31, rt1		;;; ____hi result
	bic	rt0, rt2, rt0		;;; clear hi part = ____lo result
	stW	rt1, 0(rusp)		;;; return ____hi
	stW	rt0, _WOFFS(rusp)	;;; return ____lo
	ret	rzero, (rret)


	;;; _ediv(____hi, ____lo, _________divisor) -> (___________remainder, __________quotient)
	;;; divide double slice dividend by single

ASM_ALIGN_QUAD
DEF_C_LAB (_ediv)
	ldW	rt0, _WOFFS*2(rusp)	;;; ____hi
	ldW	rt2, _WOFFS(rusp)	;;; ____lo
	ldW	rt1, 0(rusp)		;;; _________divisor
	lda	rusp, _WOFFS(rusp)
	sll	rt0, 31, rt0		;;; combine dividend hi and lo slices
	or	rt0, rt2, rt0
	bsr	rchain, div64by64_q32	;;; quot in rt1, rem in rt0
	stW	rt0, _WOFFS(rusp)	;;; return ___________remainder
	stW	rt1, 0(rusp)		;;; return __________quotient
	ret	rzero, (rret)


	;;; _bgi_mult(_____val, _______saddr, ______slim, _______daddr) -> (_______carry, __________nextdest)
	;;; multiply a biginteger by a signed value into a destination bigint

ASM_ALIGN_QUAD
DEF_C_LAB (_bgi_mult)
	ldW	rt0, _WOFFS*3(rusp)	;;; multiplier _____val
	ldW	rt1, _WOFFS*2(rusp)	;;; source addr _______saddr
	ldW	rt2, _WOFFS(rusp)	;;; source lim ______slim
	ldW	rt3, 0(rusp)		;;; destination addr _______daddr
	lda	rusp, _WOFFS*2(rusp)
	clr	rt4			;;; zero carry slice
	ldah	rt6, -32768(rzero)	;;; ones in bits 31-63

!$1:	ldl	rt5, 0(rt1)		;;; next source slice in rt5
	lda	rt1, 4(rt1)		;;; step source
	mulq	rt5, rt0, rt5		;;; multiply source by _____val
	lda	rt3, 4(rt3)		;;; step destination
	addq	rt5, rt4, rt5		;;; add last carry
	sra	rt5, 31, rt4		;;; hi part is next carry
	bic	rt5, rt6, rt5		;;; clear hi part to get lo part
	stl	rt5, -4(rt3)		;;; store lo at last destination
	cmpult	rt1, rt2, rt5		;;; compare source addr with lim
	blbs	rt5, !$1b		;;; next source slice if more

	stW	rt4, _WOFFS(rusp)	;;; return next carry slice
	stW	rt3, 0(rusp)		;;; and next destination
	ret	rzero, (rret)


	;;; _bgi_mult_add(_____val, _______saddr, ______slim, ________sdaddr) -> (_______carry, __________nextdest)
	;;; multiply a biginteger by a signed value
	;;; and add into a destination bigint

ASM_ALIGN_QUAD
DEF_C_LAB (_bgi_mult_add)
	ldW	rt0, _WOFFS*3(rusp)	;;; multiplier _____val
	ldW	rt1, _WOFFS*2(rusp)	;;; source addr _______saddr
	ldW	rt2, _WOFFS(rusp)	;;; source lim ______slim
	ldW	rt3, 0(rusp)		;;; destination addr _______daddr
	lda	rusp, _WOFFS*2(rusp)
	clr	rt4			;;; zero carry slice
	ldah	rt6, -32768(rzero)	;;; ones in bits 31-63

!$1:	ldl	rt5, 0(rt1)		;;; next source slice into rt5
	lda	rt1, 4(rt1)		;;; step source
	mulq	rt5, rt0, rt5		;;; multiply source by _____val
	ldl	rchain, 0(rt3)		;;; next destination slice
	lda	rt3, 4(rt3)		;;; step destination
	addq	rt4, rchain, rt4	;;; add dest slice to last carry
	addq	rt5, rt4, rt5		;;; add last carry to product
	sra	rt5, 31, rt4		;;; hi part is next carry
	bic	rt5, rt6, rt5		;;; clear hi part to get lo part
	stl	rt5, -4(rt3)		;;; store lo at last destination
	cmpult	rt1, rt2, rt5		;;; compare source addr with lim
	blbs	rt5, !$1b		;;; next source slice if more

	stW	rt4, _WOFFS(rusp)	;;; return next carry slice
	stW	rt3, 0(rusp)		;;; and next destination
	ret	rzero, (rret)


	;;; _bgi_div(_________divisor, _______saddr, ______slim, ______dlim) -> ___________remainder
	;;; divide a biginteger by a signed value into a destination bigint

ASM_ALIGN_QUAD
DEF_C_LAB (_bgi_div)
	stW	rnpl0, -_WOFFS(rusp)	;;; save local regs
	stW	rnpl1, -_WOFFS*2(rusp)
	stW	rnpl2, -_WOFFS*3(rusp)
	stW	rnpl3, -_WOFFS*4(rusp)

	ldW	rnpl0, _WOFFS*3(rusp)	;;; _________divisor
	ldW	rnpl1, _WOFFS*2(rusp)	;;; source addr _______saddr
	ldW	rnpl2, _WOFFS(rusp)	;;; source lim ______slim
	ldW	rnpl3, 0(rusp)		;;; destination lim ______dlim

	;;; do first slice with div32by32
	ldl	rt0, -4(rnpl2)		;;; most sig slice (signed) into rt0
	mov	rnpl0, rt1		;;; divisor into rt1
	bsr	rchain, div32by32	;;; quot in rt1, rem in rt0
	br	!$2f

	;;; loop for rest done with div64by64_q32
!$1:	ldl	rt2, -4(rnpl2)		;;; next (+ve) slice into rt2 is lo
	mov	rnpl0, rt1		;;; divisor into rt1
	sll	rt0, 31, rt0		;;; combine dividend hi and lo slices
	or	rt0, rt2, rt0
	bsr	rchain, div64by64_q32	;;; quot in rt1, rem in rt0
!$2:	stl	rt1, -4(rnpl3)		;;; store quot at next dest (can be -ve)
	lda	rnpl2, -4(rnpl2)	;;; step back source
	lda	rnpl3, -4(rnpl3)	;;; step back destination
	cmpule	rnpl2, rnpl1, rt3	;;; reached source start?
	blbc	rt3, !$1b		;;; loop if not

	ldW	rnpl0, -_WOFFS(rusp)	;;; restore local regs
	ldW	rnpl1, -_WOFFS*2(rusp)
	ldW	rnpl2, -_WOFFS*3(rusp)
	ldW	rnpl3, -_WOFFS*4(rusp)
	lda	rusp, _WOFFS*3(rusp)	;;; remove args but 1
	stW	rt0, 0(rusp)		;;; return remainder
	ret	rzero, (rret)


;;; --- COMPUTE ARRAY SUBSCRIPTS -----------------------------------------

	;;; _array_sub()
	;;; compute an array total subscript -- called inside an array
	;;; procedure

ASM_ALIGN_QUAD
DEF_C_LAB (_array_sub)
	lda	rt5, _PD_ARRAY_TABLE+_WOFFS(rpb) ;;; start of params + _WOFFS
	ldW	rt4, -_WOFFS(rt5)  ;;; init total subscript to base subscript
	ldW	rt3, 0(rt5)	   ;;; length in first dimension
	br	!$3f

!$1:	ldW	rt2, 0(rusp)	   ;;; next dimension subscript from stack
	ldW	rt1, _WOFFS(rt5)   ;;; dimension lower bound
	and	rt2, 2, rt0	   ;;; subscript is popint?
	subq	rt2, rt1, rt2	   ;;; subtract lower bound from subscript
	beq	rt0, !$4f	   ;;; error if subscript not popint
	cmpult	rt2, rt3, rt0	   ;;; compare with length
	ldW	rt1, _WOFFS*2(rt5) ;;; dimension scaling factor
	beq	rt0, !$4f	   ;;; error if subscript >= length unsigned
	lda	rusp, _WOFFS(rusp) ;;; inc stack
	beq	rt1, !$2f	   ;;; no multiply needed if scale factor 0
	mulq	rt2, rt1, rt2	   ;;; multiply by scale factor
!$2:	lda	rt5, _WOFFS*3(rt5) ;;; step to next dimension params
	ldW	rt3, 0(rt5)	   ;;; length in next dimension
	addq	rt4, rt2, rt4	   ;;; add scaled subscript to total
!$3:	bne	rt3, !$1b	   ;;; loop if nonzero

	;;; finished -- stack total subscript and arrayvector, and then
	;;; chain subscripting procedure
	ldW	rt0, _PD_ARRAY_VECTOR(rpb)
	stW	rt4, -_WOFFS(rusp)	;;; subscript
	stW	rt0, -_WOFFS*2(rusp)	;;; arrayvector
	ldW	rpb, _PD_ARRAY_SUBSCR_PDR(rpb)
	ldW	rt0, 0(rpb)
	lda	rusp, -_WOFFS*2(rusp)	;;; decr stack
	jmp	rzero, (rt0)			;;; chain subscr procedure

	;;; subscript error (bad subscript still tos) -- call error procedure
!$4:	ldW	rpb, _SVB_OFFS(Sys$-Array$-Sub_error)(rsvb)
	ldW	rt0, 0(rpb)
	jmp	rzero, (rt0)		;;; chain Sub_error


;;; --- 32-BIT BY 32-BIT SIGNED DIVISION ROUTINE --------------------------
;;; Takes dividend in rt0, divisor in rt1.
;;; Produces quotient in rt1, remainder in rt0.
;;; Uses rt0 - rt5. Return in rchain.

ASM_ALIGN_QUAD
div32by32:
#_IF WORD_BITS/==DOUBLE_BITS
divWbyW:
#_ENDIF
	clr	rt2			;;; clear sign indicators
	bgt	rt1, !$1f		;;; br if divisor +ve
	negl	rt1, rt1		;;; negate it
	bis	rt2, 1, rt2		;;; set negate quot at end
	bgt	rt1, !$1f		;;; br if divisor now +ve
	beq	rt1, div_zero		;;; br if dividing by zero
	;;; largest -ve divisor (-2**31)
	cmpeq	rt1, rt0, rt1		;;; quot=1 if dividend same, 0 if not
	cmovlbs	rt1, rzero, rt0		;;; zero rem if quot=1
	ret	rzero, (rchain)

!$1:	bge	rt0, !$2f		;;; br if dividend >= 0
	negq	rt0, rt0		;;; negq allows greatest -ve dividend
	xor	rt2, 1, rt2		;;; invert negate quot at end
	subl	rt2, 2, rt2		;;; set negate rem at end

	;;; establish amount to shift quotient up to align top with dividend
!$2:	clr	rt3			;;; zero shift count

#_<
lvars n;
for n from 4 by -1 to 0 do
[
\t  addq    \t	rt3, %_int(1<<n)%, rt4 \n
\t  srl     \t	rt0, rt4, rt5	\n	;;; dividend >> tmp
\t  cmplt   \t	rt5, rt1, rt5	\n	;;; test that less than divisor
\t  cmovlbc \t	rt5, rt4, rt3	\n	;;; tmp -> shift count if not
].dl
endfor
>_#

	sll	rt1, 32, rt1		;;; divisor to top half of reg
	sll	rt0, 32, rt0		;;; dividend to top half of reg
	subq	rt1, 1, rt1		;;; quot bit in low half when subtracted
	sll	rt1, rt3, rt1		;;; shift divisor up by shift count
	;;; switch on (32 - shift count) * 3 instrs * 4 bytes
	mov	32, rt5
	subq	rt5, rt3, rt3		;;; J = 32 - shift
#_IF DEF VMS
    .begin_exact
#_ENDIF
	br	rt4, !$3f		;;; get 3f -- MUST BE 3 INSTRS AFTER
!$3:	s4subq	rt3, rt3, rt3		;;; J*3
	s4addq	rt3, rt4, rt3		;;; *4 and add to 3b
	jmp	rzero, (rt3)

#_<
repeat 31 times
[
\t  subq   \t	rt0, rt1, rt3	\n	;;; sub divsr from rem, add in quot bit
\t  srl    \t	rt1,  _1, rt1	\n	;;; shift divisor/quot bit down for next
\t  cmovge \t	rt3, rt3, rt0	\n	;;; replace rem for next if not neg
].dl
endrepeat,
>_#
	subq	rt0, rt1, rt3
	cmovge	rt3, rt3, rt0
#_IF DEF VMS
    .end_exact
#_ENDIF

	extll	rt0, 0, rt1		;;; quotient
	srl	rt0, 32, rt0		;;; remainder
	negq	rt1, rt4
	negq	rt0, rt3
	cmovlbs	rt2, rt4, rt1
	cmovlt	rt2, rt3, rt0
	ret	rzero, (rchain)

div_zero:
#_IF DEF VMS
	mov	 -2, r16		;;; SS$_INTDIV
	call_pal gentrap		;;; 170
#_ENDIF
	ret	rzero, (rchain)


;;; --- 64-BIT BY 64-BIT SIGNED DIVISION FOR 32-BIT QUOTIENT -----------------
;;; Takes dividend in rt0, divisor in rt1.
;;; Produces quotient in rt1, remainder in rt0.
;;; Uses rt0 - rt6. Return in rchain.

ASM_ALIGN_QUAD
div64by64_q32:
	clr	rt6			;;; clear sign indicators

	;;; get divisor +ve
	bgt	rt1, !$1f		;;; br if divisor +ve
	negq	rt1, rt1		;;; else negate it
	bis	rt6, 1, rt6		;;; set negate quot at end
	bgt	rt1, !$1f		;;; br if divisor now +ve
	beq	rt1, div_zero		;;; br if dividing by zero
	;;; largest -ve divisor (- 2**63)
	cmpeq	rt1, rt0, rt1		;;; quot=1 if dividend same, 0 if not
	cmovlbs	rt1, rzero, rt0		;;; zero rem if quot=1
	ret	rzero, (rchain)

	;;; get dividend +ve
!$1:	bge	rt0, !$2f		;;; br if dividend >= 0
	negq	rt0, rt0		;;; else negate it
	xor	rt6, 1, rt6		;;; invert negate quot at end
	subq	rt6, 2, rt6		;;; set negate rem at end

	;;; establish amount to shift quotient up to align top with dividend
!$2:	clr	rt3			;;; zero shift count

#_<
lvars n;
for n from 4 by -1 to 0 do
[
\t  addq    \t	rt3, %_int(1<<n)%, rt4 \n	;;; shift count + 2**N  -> tmp
\t  srl     \t	rt0, rt4, rt5	\n	;;; dividend >> tmp
\t  cmplt   \t	rt5, rt1, rt5	\n	;;; test that less than divisor
\t  cmovlbc \t	rt5, rt4, rt3	\n	;;; tmp -> shift count if not
].dl
endfor
>_#

	sll	rt1, rt3, rt1		;;; shift divisor up by shift count
	mov	1, rt4		;;; bit for quotient
	sll	rt4, rt3, rt4		;;; shift up by shift count
	;;; switch on (32 - shift count) * 6 instrs * 4 bytes
#_IF DEF VMS
    .begin_exact
#_ENDIF
	br	rt2, !$4f		;;; get 4f -- MUST BE 6 INSTRS AFTER
!$4:	mov	32, rt5
	subq	rt5, rt3, rt3		;;; J = 32 - shift
	s4subq	rt3, rt3, rt3		;;; J*3
	s8addq	rt3, rt2, rt5		;;; *8 and add to 4b
	clr	rt3			;;; clear quotient
	jmp	rzero, (rt5)

#_<
repeat 31 times
[
\t  subq   \t	rt0, rt1, rt5	\n	;;; sub divisor from dividend -> tmp1
\t  addq   \t	rt3, rt4, rt2	\n	;;; add quotient bit -> tmp2
\t  srl    \t	rt1,  _1, rt1	\n	;;; shift divisor for next
\t  srl    \t	rt4,  _1, rt4	\n	;;; shift quotient bit for next
\t  cmovge \t	rt5, rt5, rt0	\n	;;; tmp1 -> dividend if not neg
\t  cmovge \t	rt5, rt2, rt3	\n	;;; tmp2 -> quotient if not neg
].dl
endrepeat,
>_#
	subq	rt0, rt1, rt5
	addq	rt3, rt4, rt2
	cmovge	rt5, rt5, rt0
	cmovge	rt5, rt2, rt3
#_IF DEF VMS
    .end_exact
#_ENDIF

	negq	rt3, rt1		;;; quotient in rt3
	negq	rt0, rt2
	cmovlbc	rt6, rt3, rt1
	cmovlt	rt6, rt2, rt0
	ret	rzero, (rchain)


#_IF WORD_BITS==DOUBLE_BITS

;;; --- 64-BIT BY 64-BIT SIGNED DIVISION ROUTINE --------------------------
;;; Takes dividend in rt0, divisor in rt1.
;;; Produces quotient in rt1, remainder in rt0.
;;; Uses rt0 - rt6. Return in rchain.

ASM_ALIGN_QUAD
div64by64:
divWbyW:
	addl	rt1, rzero, rt2
	cmpeq	rt2, rt1, rt2
	blbc	rt2, div64by64_q32	;;; divisor > 32-bit -- quot at most 32-bit
	;;; divisor <= 32-bit
	addl	rt0, rzero, rt2
	cmpeq	rt2, rt0, rt2
	blbs	rt2, div32by32		;;; both <= 32-bit

	;;; dividend > 32-bit but divisor <= 32-bit -- quotient may be
	;;; > 32-bit
	lda	rsp, -8*4(rsp)
	stq	rt0,	8*0(rsp)	;;; ________dividend
	stq	rt1,	8*1(rsp)	;;; _______divisor
	stq	rchain, 8*2(rsp)

	sra	rt0, 32, rt0		;;; ________dividend hi half
	bsr	rchain, div32by32	;;; divide by divisor
	ldq	rt2, 8*0(rsp)		;;; ________dividend again
	sll	rt0, 32, rt0		;;; rem from hi/divisor to top
	extll	rt2, 0, rt2		;;; ________dividend lo half
	addq	rt0, rt2, rt0		;;; combine with hi rem
	stq	rt1, 8*3(rsp)		;;; save quot from hi/divisor
	ldq	rt1, 8*1(rsp)		;;; _______divisor again
	bsr	rchain, div64by64_q32	;;; (rem+lo)/divisor
	ldq	rt2, 8*3(rsp)		;;; hi quot
	ldq	rchain, 8*2(rsp)
	sll	rt2, 32, rt2		;;; hi quot to top
	addq	rt2, rt1, rt1		;;; add to lo quot

	;;; deal with case of -ve dividend producing +ve remainder
	ldq	rt2, 8*0(rsp)		;;; original ________dividend
	subq	rt0, 1, rt3		;;; rem-1 is neg unless rem > 0
	bic	rt2, rt3, rt2		;;; +ve unless ________dividend -ve and rem>0
	bge	rt2, !$1f		;;; OK if +ve
	;;; correct it
	ldq	rt2, 8*1(rsp)		;;; _______divisor
	subq    rt1, 1, rt4		;;; quot-1 -> tmp
	addq	rt0, rt2, rt3		;;; rem+divisor -> tmp
	addq    rt1, 1, rt1		;;; quot+1 -> quot
	subq	rt0, rt2, rt0		;;; rem-divisor -> rem
	cmovlt	rt2, rt4, rt1		;;; quot-1 -> quot if divisor -ve
	cmovlt	rt2, rt3, rt0		;;; rem+divisor -> rem if divisor -ve

!$1:	lda	rsp, 8*4(rsp)
	ret	rzero, (rchain)


#_ENDIF

ASM_END_FILE
