/* --- Copyright University of Sussex 1995. All rights reserved. ----------
 * File:	C.hppa/src/amove.s
 * Purpose:	Memory moves and compares for HP PA-RISC 1.1
 * Author:	Julian Clinton, November 1992 (see revisions)
 */


#_<

#_INCLUDE 'declare.ph'

>_#


#_INCLUDE 'asm_macros.h'

/************************* wrapping structures ************************/

	.code
	.word	Lcode_end-Lcode_start, C_LAB(Sys$-objmod_pad_key)
Lcode_start
	.data
	.word	Ldata_end-Ldata_start, C_LAB(Sys$-objmod_pad_key)
Ldata_start

/**********************************************************************/


	.code


;;; === COMPARISONS ===================================================

;;; _BCMP:
;;;	Compare two byte regions of the same length

;;; Call:
;;;	_bcmp(_BYTE_LENGTH, _SRC1, _SRC2) -> BOOL

;;; Registers used:
;;;	%arg0	length of the comparison in bytes
;;;	%arg1	pointer to region 1
;;;	%arg2	pointer to region 2
;;;	%t1	(1) current byte of region 1
;;;		(2) if successful, result of <true>
;;;	%t2	current byte of region 2

DEF_C_LAB (_bcmp)

	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; If length (%arg0) is zero, then return <true> immediately
	comb,=,n	%arg0, 0, L$2

	;;; Although this next instruction should be part of the loop code,
	;;; subsequent calls will be executed in the delay slot of the branch.
	;;; This is therefore only run the first time the loop is executed.
	ldbs,ma		1(%arg1), %t1

L$1	;;; Repeat

	;;; Load byte from region 1 to %t1 and byte from region 2 to %t2
	;;; If the bytes are different, return <false>
	ldbs,ma		1(%arg2), %t2
	comb,<>,n	%t1, %t2, L$3

	;;; Else decrement length (in %arg0) until 0 and load byte
	addib,<>,n	-1, %arg0, L$1
	ldbs,ma		1(%arg1), %t1		;;; branch delay slot

L$2	;;; Same: return <true>
	LDA32   	C_LAB(true), %t1	;;; %t1 will be <true>
	RETE
	stwm 		%t1, -4(%usp)		;;; push <true> on the user stack

L$3	;;; Different: return <false>
	RETE
	stwm		%false, -4(%usp)	;;; push <false> onto userstack


;;; _SCMP:
;;;	Compare two short (halfword) regions of the same length

;;; Call:
;;;	_scmp(_BYTE_LENGTH, _SRC1, _SRC2) -> BOOL

;;; Registers used:
;;;	%arg0	length of the comparison in bytes
;;;	%arg1	pointer to region 1
;;;	%arg2	pointer to region 2
;;;	%t1	(1) current halfword of region 1
;;;		(2) if successful, result of <true>
;;;	 %t2	current halfword of region 2

DEF_C_LAB (_scmp)

	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; If length (%arg0) is zero, then return <true> immediately
	comb,=,n	%arg0, 0, L$12

	;;; only run first time through loop
	ldhs,ma		2(%arg1), %t1

L$11	;;; Repeat

	;;; Load halfword from region 1 to %t1 and halfword from region 2 to %t2
	;;; If the halfwords are different, return <false>
	ldhs,ma		2(%arg2), %t2
	comb,<>,n	%t1, %t2, L$13

	;;; Else decrement length (in %arg0) until 0 and load short
	addib,<>,n	-2, %arg0, L$11
	ldhs,ma		2(%arg1), %t1		;;; branch delay slot

L$12	;;; Same: return <true>
	LDA32   	C_LAB(true), %t1	;;; %t1 will be <true>
	RETE
	stwm		%t1, -4(%usp)		;;; push <true> on the user stack

L$13	;;; Different: return <false>
	RETE
	stwm		%false, -4(%usp)	;;; push <false> onto userstack


;;; _ICMP:
;;; _CMP:
;;;	Compare two int/word regions of the same length

;;; Call:
;;;	_cmp(_BYTE_LENGTH, _SRC1, _SRC2) -> BOOL

;;; Registers used:
;;;	 %arg0	length of the comparison in bytes
;;;	 %arg1	pointer to region 1
;;;	 %arg2	pointer to region 2
;;;	 %t1	(1) current nword of region 1
;;;		(2) if successful, result of <true>
;;;	 %t2	current word of region 2

DEF_C_LAB (_icmp)
DEF_C_LAB (_cmp)

	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; If length (%arg0) is zero, then return <true> immediately
	comb,=,n	%arg0, 0, L$22

	;;; only run first time through loop
	ldwm		4(%arg1), %t1

L$21	;;; Repeat

	;;; Load word from region 1 to %t1 and word from region 2 to %t2
	;;; If the words are different, return <false>
	ldwm		4(%arg2), %t2
	comb,<>,n	%t1, %t2, L$23

	;;; Else decrement length (in %arg0) until 0 and load word
	addib,<>,n	-4, %arg0, L$21
	ldwm		4(%arg1), %t1		;;; branch delay slot

L$22	;;; Same: return <true>
	LDA32   	C_LAB(true), %t1	;;; %t1 will be <true>
	RETE
	stwm		%t1, -4(%usp)		;;; push <true> on the user stack

L$23	;;; Different: return <false>
	RETE
	stwm		%false, -4(%usp)	;;; push <false> onto userstack



;;; === MOVES =========================================================

;;; _MOVEQ:
;;;     Quick word move. The move is done from the lowest word first, so
;;;     if the source and destination regions overlap, the direction of
;;;     the move must be downwards to preserve correctness. Returns a
;;;     pointer to the next destination word.

;;; Call:
;;;	_moveq(_BYTE_LENGTH, _SRCADDR, _DSTADDR) -> _NEXT_DSTADDR;

;;; Registers used:
;;;	 %arg0	length of the move
;;;	 %arg1	source pointer
;;;	 %arg2	destination pointer
;;;	 %t1	current word

DEF_C_LAB (_moveq)

	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; Quit if the source and destination addresses are the same
	;;; or if the byte length is zero
	comb,=,n	%arg1, %arg2, L$33
	comb,=,n	%arg0, 0, L$32

	;;; only run first time through loop
	ldwm		4(%arg1), %t1

L$31	;;; Repeat

	;;; Move word from source (%arg1) to destination (%arg2)
	stwm		%t1, 4(%arg2)

	;;; Decrement length (in %arg0) until 0 and load next word
	addib,<>,n	-4, %arg0, L$31
	ldwm		4(%arg1), %t1		;;; branch delay slot


L$32	;;; Return the next destination address
	RETE
	stwm		%arg2, -4(%usp)

L$33	;;; Source and destination the same:
	;;; compute and return the next destination address
	add		%arg2, %arg0, %arg2
	RETE
	stwm		%arg2, -4(%usp)


;;; _BMOVE:
;;; _SMOVE:
;;; _IMOVE:
;;; _DMOVE:
;;; _MOVE:
;;;     General purpose moves for bytes, halfwords and words. Cope with
;;;     any alignment and with moves in both directions. Return address
;;;     of the next destination.

;;; Call:
;;;	_move(_BYTE_LENGTH, _SRCADDR, _DSTADDR) -> _NEXT_DSTADDR;

;;; Uses:
;;; 	movbytes, movwords (defined below)

;;; Registers used:
;;;	%arg0	number of bytes to be moved
;;;	%arg1	pointer to source
;;;	%arg2	pointer to destination
;;;	%t1	used to calculate next destination

DEF_C_LAB (_bmove)
DEF_C_LAB (_smove)

	;;; Set register arguments for -movbytes-
	;;;
	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; Push the next destination address as the return value
	add		%arg2, %arg0, %t1
	stwm		%t1, -4(%usp)		;;; push result

	;;; Return if source and destination addresses are the same,
	;;; otherwise do the move
	comb,<>,n	%arg1, %arg2, movbytes	;;; forward branch so nullify
	RETE
	nop

DEF_C_LAB (_imove)
DEF_C_LAB (_dmove)
DEF_C_LAB (_move)

	;;; Set register arguments for -movwords-
	;;;
	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; Push the next destination address as the return value
	add		%arg2, %arg0, %t1
	stwm		%t1, -4(%usp)		;;; push result

	;;; Return if source and destination addresses are the same,
	;;; otherwise do the move
	comb,<>,n	%arg1, %arg2, movwords	;;; forward branch so nullify
	RETE
	nop


;;; MOVBYTES
;;; MOVWORDS
;;;     General purpose memory move. Not callable from Pop, as they take
;;;     their their arguments in registers rather than on the stack (but
;;;     cf. _bmove _bmove, _smove and _move above). MOVWORDS is for use
;;;     when the move is word-aligned.

;;; Arguments:
;;;	%arg0   length of move in bytes
;;;	%arg1   source address
;;;	%arg2   destination address

;;; Results:
;;;	none

;;; Other registers used:
;;;	%t1	(1) result of bitwise OR of all the arguments
;;;		(2) byte/word being moved

movbytes

	;;; OR together the bits of the source address, destination address
	;;; and byte count to test for word alignment
	or		%arg1, %arg2, %t1
	or		%arg0, %t1, %t1

	;;; If neither of the bottom two bits is set, the move is word
	;;; aligned so move a word at a time.
	extru		%t1,31,2,%t1		;;; extract two right bits
	comb,=,n	%t1, 0, movwords	;;; if 0 then branch

	;;; Otherwise, move a byte at a time
	;;; If length is zero there's nothing to move, so exit
	comb,=,n	%arg0, 0, L$44

	;;; Test the direction of the move:
	;;; if source (%arg1) > destination (%arg2), the move is downward;
	;;; otherwise, the move is upward, so branch
	;;;
	comb,<<=,n	%arg1, %arg2, L$42


	;;; Downward move

	;;; only run first time through loop
	;;;
	ldbs,ma		1(%arg1), %t1

L$41	;;; Repeat

	;;; Move a byte from source (%arg1) to destination (%arg2) via %t1
	stbs,ma		%t1, 1(%arg2)

	;;; Decrement length (in %arg0) until 0 and load byte
	addib,<>,n	-1, %arg0, L$41
	ldbs,ma		1(%arg1), %t1		;;; branch delay slot

	;;; Possible non-local return
	RETE
	nop

L$42	;;; Upward move:
	;;; adjust pointers to point above the last byte in each region
	add		%arg1, %arg0, %arg1
	add		%arg2, %arg0, %arg2

	;;; only run first time through loop
	;;;
	ldbs,mb		-1(%arg1), %t1

L$43	;;; Repeat

	;;; Move a byte from source (%arg1) to destination (%arg2) via %t1
	stbs,mb		%t1, -1(%arg2)

	;;; Decrement length (in %arg0) until 0 and load short
	addib,<>,n	-1, %arg0, L$43
	ldbs,mb		-1(%arg1), %t1		;;; branch delay slot

L$44	;;; Possible non-local return
	RETE
	nop


movwords

	;;; movwords is also called from "aprocess.s"
	.export		movwords, code

	;;; If length is zero there's nothing to move, so exit
	comb,=,n	%arg0, 0, L$54

	;;; Test the direction of the move:
	;;; if source (%arg1) > destination (%arg2), the move is downward;
	;;; otherwise, the move is upward, so branch
	;;;
	comb,<<=,n	%arg1, %arg2, L$52

	;;; Downward move

	;;; only run first time through loop
	;;;
	ldwm		4(%arg1), %t1

L$51	;;; Repeat

	;;; Move a word from source (%arg1) to destination (%arg2) via %t1
	stwm		%t1, 4(%arg2)

	;;; Decrement length (in %arg0) until 0 and load word
	addib,<>,n	-4, %arg0, L$51
	ldwm		4(%arg1), %t1		;;; branch delay slot

	;;; Possible non-local return
	RETE
	nop


L$52	;;; Upward move:
	;;; adjust pointers to point above the last word in each region
	add		%arg1, %arg0, %arg1
	add		%arg2, %arg0, %arg2

	;;; only run first time through loop
	;;;
	ldwm		-4(%arg1), %t1

L$53	;;; Repeat

	;;; Move a word from source (%arg1) to destination (%arg2) via %t1
	stwm		%t1, -4(%arg2)

	;;; Decrement length (in %arg0) until 0 and load word
	addib,<>,n	-4, %arg0, L$53
	ldwm		-4(%arg1), %t1		;;; branch delay slot

L$54	;;; Possible non-local return
	RETE
	nop



;;; === FILLING =======================================================

;;; _BFILL:
;;;	Fill a region of bytes with a given byte.

;;; Call:
;;;	_bfill(_BYTE, _BYTE_LENGTH, _DSTADDR);

;;; Registers used:
;;;	%arg0	the byte
;;;	%arg1	length of the region in bytes
;;;	%arg2	destination pointer

DEF_C_LAB (_bfill)

	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; If length (%arg1) is zero, then return immediately
	comb,=,n	%arg1, 0, L$62

	;;; only run first time through loop
	stbs,ma		%arg0, 1(%arg2)

L$61	;;; Repeat

	;;; Decrement length (in %arg1) until 0 and store short
	addib,<>,n	-1, %arg1, L$61
	stbs,ma		%arg0, 1(%arg2)		;;; branch delay slot

L$62	;;; Return
	RETE
	nop


;;; _IFILL:
;;; _FILL:
;;;	Fill a region of int/words with a given word.

;;; Call:
;;;	_fill(_LONG, _BYTE_LENGTH, _DSTADDR);

;;; Registers used:
;;;	%arg0	the word
;;;	%arg1	length of the region in bytes
;;;	%arg2	destination pointer

DEF_C_LAB (_ifill)
DEF_C_LAB (_fill)

	ldwm		4(%usp), %arg2
	ldwm		4(%usp), %arg1
	ldwm		4(%usp), %arg0

	;;; If length (%arg1) is zero, then return immediately
	comb,=,n	%arg1, 0, L$72

	;;; only run first time through loop
	stwm		%arg0, 4(%arg2)

L$71	;;; Repeat

	;;; Decrement length (in %arg1) until 0 and store word
	addib,<>,n	-4, %arg1, L$71
	stwm		%arg0, 4(%arg2)		;;; branch delay slot

L$72	;;; Return
	RETE
	nop



;;; === RELOCATING POPLOG MEMORY REGIONS ==============================

;;; _MOVE_USERSTACK:
;;;     Move the user stack up or down. _BYTE_OFFS specifies the amount
;;;     of the shift in bytes.

;;; Call:
;;;	_move_userstack(_BYTE_OFFS)

;;; Registers used:
;;;	%arg0	the size of the move (stack length)
;;;	%arg1	source pointer
;;;	%arg2	destination pointer
;;;	%t1	_BYTE_OFFS
;;;	%t2	value of _USERHI

DEF_C_LAB (_move_userstack)

	;;; Load the amount to shift (in bytes) into %t1
	ldwm		4(%usp), %t1

	;;; Load the value of _USERHI into %t2
	ldw		_SVB_OFFS(_userhi)(%svb), %t2

	;;; Compute the size of the move in %arg0 (_USERHI - %usp)
	sub		%t2, %usp, %arg0

	;;; Adjust the value of _USERHI by adding the byte offset of the
	;;; move to the current value of _USERHI and assigning that back
	;;; to the symbol address
	add		%t2, %t1, %t2
	stw		%t2, _SVB_OFFS(_userhi)(%svb)

	;;; Adjust the stack pointer, copying the current value to %arg1
	;;; and the new value to %arg2
	copy		%usp, %arg1
	add		%usp, %t1, %usp
;	copy		%usp, %arg2		/*MOVED*/

	;;; Do the move:
	;;; movwords expects %arg0 = byte length, %arg1 = source,
	;;; %arg2 = destination
	b		movwords
	copy		%usp, %arg2



;;; _MOVE_CALLSTACK:
;;;     Relocate the top part of the callstack between the stack pointer
;;;     %sp and the _LIMIT address. The size of the relocation is
;;;     _BYTE_OFFS which may be positive or negative. The size is
;;;     computed as if the stack grows down, so for the HP we have to
;;;     invert the sign: i.e., positive means move *down* and negative
;;;     means move *up*. Also, _LIMIT will always be less than or equal
;;;     to %sp, so we use that as the starting address.

;;; Call:
;;;	_move_callstack(_BYTE_OFFS, _LIMIT);

;;; Registers used:
;;;	%arg0	the size of the move (stack length)
;;;	%arg1	source pointer (_LIMIT)
;;;	%arg2	destination pointer
;;;	%t1	_BYTE_OFFS

DEF_C_LAB (_move_callstack)

	ldwm		4(%usp), %arg1	;;; _LIMIT
	ldwm		4(%usp), %t1	;;; _BYTE_OFFS

	;;; Compute size of move in %arg0 (sp - _LIMIT)
	sub		%sp, %arg1, %arg0

	;;; Compute destination address in %arg2 (_LIMIT - _BYTE_OFFS)
	sub		%arg1, %t1, %arg2

	;;; Do the move:
	;;; movwords expects %arg0 = byte length, %arg1 = source,
	;;; %arg2 = destination
	;;; Compute new stack pointer (sp - _BYTE_OFFS) in branch delay slot
	b		movwords
	sub		%sp, %t1, %sp


;;; === BITFIELD OPERATIONS ===========================================

;;; _BFIELD:
;;; _SBFIELD:
;;;     Extract a (signed) bitfield. Not called directly from POP, but
;;;     used to implement M_MOVE(s)bit and I_PUSH_FIELD instructions.

;;; Arguments:
;;; 	%arg0	structure address
;;; 	%arg1	bit offset of field within structure
;;; 	%arg2	field width in bits (W)

;;; Results:
;;; 	%ret0	the extracted bitfield, right-justified and zero- or
;;;		sign-extended as appropriate

;;; Other registers:
;;;	%ret1	second word from structure (when field crosses a word boundary)
;;;	%t1	temporary


DEF_C_LAB(_bfield)

	;;; address of low word containing field in %arg0
	extru		%arg1, 26, 27, %t1
	sh2add 		%t1, %arg0, %arg0

	;;; bit offset of field within word to %arg1
	extru		%arg1, 31, 5, %arg1

	;;; 32 - W to %arg2
	subi		32, %arg2, %arg2

	;;; load low word and check whether that is sufficient to contain
	;;; the complete field (bit offset <= 32-W)
	comb,>>		%arg1, %arg2, L$80
	ldw		(%arg0), %ret0

	;;; left justify field in %ret0 to clear upper bits
	subi		31, %arg1, %t1
	mtsar		%t1
	zvdep		%ret0, 32, %ret0

	;;; right justify to get final value
	mtsar		%arg2
	RETE
	vshd		0, %ret0, %ret0

L$80	;;; overspill -- load second word to %ret1
	ldw		4(%arg0), %ret1

	;;; merge low bits from %ret0 with high bits of %ret1 back to
	;;; %ret0
	subi		32, %arg1, %t1
	mtsar		%t1
	vshd		%ret0, %ret1, %ret0

	;;; right justify to get final value
	mtsar		%arg2
	RETE
	vshd		0, %ret0, %ret0

DEF_C_LAB(_sbfield)

	;;; address of low word containing field in %arg0
	extru		%arg1, 26, 27, %t1
	sh2add 		%t1, %arg0, %arg0

	;;; bit offset of field within word to %arg1
	extru		%arg1, 31, 5, %arg1

	;;; 32 - W to %arg2
	subi		32, %arg2, %arg2

	;;; load low word and check whether that is sufficient to contain
	;;; the complete field (bit offset <= 32-W)
	comb,>>		%arg1, %arg2, L$81
	ldw		(%arg0), %ret0

	;;; left justify field in %ret0 to clear upper bits
	subi		31, %arg1, %t1
	mtsar		%t1
	zvdep		%ret0, 32, %ret0

	;;; right justify (with sign-extension) to get final value
	extrs		%ret0, 0, 1, %t1
	mtsar		%arg2
	RETE
	vshd		%t1, %ret0, %ret0

L$81	;;; overspill -- load second word to %ret1
	ldw		4(%arg0), %ret1

	;;; merge low bits from %ret0 with high bits of %ret1 back to
	;;; %ret0
	subi		32, %arg1, %t1
	mtsar		%t1
	vshd		%ret0, %ret1, %ret0

	;;; right justify (with sign-extension) to get final value
	extrs		%ret0, 0, 1, %t1
	mtsar		%arg2
	RETE
	vshd		%t1, %ret0, %ret0


;;; _UBFIELD:
;;;     Update a bitfield within a structure. Not called directly from
;;;     POP but used to implement M_UPDbit and I_POP_FIELD instructions.

;;; Arguments:
;;; 	%arg0	the new value as a system integer
;;; 	%arg1	structure address
;;; 	%arg2	bit offset of field within structure
;;; 	%arg3	field width in bits (W)

;;; Results:
;;; 	none

;;; Other registers used:
;;;	%ret1	second word of structure (when field crosses a word boundary)
;;; 	%t[1-3]	temporaries

DEF_C_LAB(_ubfield)

	;;; address of low word containing field in %arg1
	extru		%arg2, 26, 27, %t1
	sh2add 		%t1, %arg1, %arg1

	;;; bit offset of field within word to %arg2
	extru		%arg2, 31, 5, %arg2

	;;; 31 - offset to %arg2
	;;; W - 1 to %arg3
	subi		31, %arg2, %arg2
	addi		-1, %arg3, %arg3

	;;; load low word
	ldw		(%arg1), %ret0

	;;; left justify new value in %arg0 with a corresponding all-1s mask
	;;; in %t1
	mtsar		%arg3
	zvdepi		-1, 32, %t1
	zvdep		%arg0, 32, %arg0

	;;; position field and mask according to offset
	mtsar		%arg2
	vextru		%t1, 32, %t2
	vextru		%arg0, 32, %t3

	;;; merge in with the existing low word value, and check whether
	;;; the field spills over to the next word (31-offset >= W-1)
	andcm		%ret0, %t2, %ret0
	comb,<<		%arg2, %arg3, L$82
	or		%ret0, %t3, %ret0

	;;; store back and return
	RETE
	stw		%ret0, (%arg1)

L$82	;;; overspill -- load next word to %ret1
	ldw		4(%arg1), %ret1

	;;; position field and mask for next word
	subi		30, %arg2, %arg2
	mtsar		%arg2
	zvdep		%t1, 32, %t2
	zvdep		%arg0, 32, %t3

	;;; merge in with existing value
	andcm		%ret1, %t2, %ret1
	or		%ret1, %t3, %ret1

	;;; store back and return
	stw		%ret0, (%arg1)
	RETE
	stw		%ret1, 4(%arg1)


	.code
	.import		C_LAB(Sys$-objmod_pad_key), data
	.import   	C_LAB(true), data


/***************** end labels for wrapping structures *****************/

	.code
	.align  8
Lcode_end
	.data
	.align  8
Ldata_end

/**********************************************************************/


/* --- Revision History ---------------------------------------------------
--- John Gibson, Apr  6 1995
	Added _icmp, _imove, _dmove, _ifill as equivalent to word versions
 */
