/* --- Copyright University of Sussex 1996. All rights reserved. ----------
 > File:			C.hppa/src/ass.p
 > Purpose:			Run-time code generator for HP PA-RISC 1.1
 > Author:			Rob Duncan & Simon Nichols, Feb  5 1993 (see revisions)
 */

#_INCLUDE 'declare.ph'
#_INCLUDE 'vmdefs.ph'
#_INCLUDE 'external.ph'

global constant

	;;; Assembly code subroutines referenced at run-time

	_popenter,
	_popuenter,
	_popuncenter,
	_checkall,
	_bfield,
	_sbfield,
	_ubfield,
	_setstklen,
	_setstklen_diff,
;

global vars
	pop_debugging,		;;; <false> if optimisation pass wanted
	_trap,				;;; interrupt flag
;

section $-Sys$-Vm;

constant procedure (

	;;; VM interface

	Code_pass,
	Drop_I_code,
	Get_procedure,
	Is_register,
	Trans_structure,
);

vars
	asm_clist,			;;; list of I-code to be assembled
	asm_instr,			;;; the current I-code instruction
	asm_struct_list,	;;; list of items to go in the structure table
	_asm_pass,			;;; assembly pass counter - false when dropping code
	_asm_drop_ptr,		;;; pointer to drop code at
	_asm_code_offset,	;;; offset into executable code (in bytes)
	_Nreg,				;;; number of registers
	_Npopreg,			;;; number of pop registers
	_Nlocals,			;;; number of dynamic locals
	_Npopstkvars,		;;; number of pop on-stack lvars
	_Nstkvars,			;;; total number of on-stack lvars
;

endsection;

lvars
	_framesize,			;;; stack frame size in bytes
	_regmask,			;;; mask for register save/restore
	_strsize,			;;; structure table size in bytes
;


;;; -----------------------------------------------------------------------

section $-Sys$-Vm;

/*
 *	Registers
 */

lconstant

	;;; Register names

	_R_0		=  _0,
	_R_1		=  _1,
	_R_2		=  _2,
	_R_NPOP4	=  _3,
	_R_NPOP3	=  _4,
	_R_NPOP2	=  _5,
	_R_NPOP1	=  _6,
	_R_NPOP0	=  _7,
	_R_POP5		=  _8,
	_R_POP4		=  _9,
	_R_POP3		= _10,
	_R_POP2		= _11,
	_R_POP1		= _12,
	_R_POP0		= _13,
	_R_PZERO	= _14,
	_R_FALSE	= _15,
	_R_SVB		= _16,
	_R_PB		= _17,
	_R_USP		= _18,
	_R_T4		= _19,
	_R_T3		= _20,
	_R_T2		= _21,
	_R_T1		= _22,
	_R_ARG3		= _23,
	_R_ARG2		= _24,
	_R_ARG1		= _25,
	_R_ARG0		= _26,
	_R_DP		= _27,
	_R_RET0		= _28,
	_R_RET1		= _29,
	_R_SP		= _30,
	_R_31		= _31,

	_R_SR0		=  _0,
	_R_SR4		=  _4,
	_R_SR5		=  _5,

	_R_SAR		= _11,

;

;;; Register identifiers used by the VM

protected register constant

	arg_reg_0	 = _pint(_R_ARG0) << 1,
	arg_reg_1	 = _pint(_R_ARG1) << 1,
	arg_reg_2	 = _pint(_R_ARG2) << 1,

	pop_reg_0	 = _pint(_R_POP0) << 1 || 1,
	pop_reg_1	 = _pint(_R_POP1) << 1 || 1,
	pop_reg_2	 = _pint(_R_POP2) << 1 || 1,
	pop_reg_3	 = _pint(_R_POP3) << 1 || 1,
	pop_reg_4	 = _pint(_R_POP4) << 1 || 1,
	pop_reg_5	 = _pint(_R_POP5) << 1 || 1,

	nonpop_reg_0 = _pint(_R_NPOP0) << 1,
	nonpop_reg_1 = _pint(_R_NPOP1) << 1,
	nonpop_reg_2 = _pint(_R_NPOP2) << 1,
	nonpop_reg_3 = _pint(_R_NPOP3) << 1,
	nonpop_reg_4 = _pint(_R_NPOP4) << 1,

	chain_reg	 = _pint(_R_1) << 1,

;

;;; Register lvars

lconstant
	_N_POP_REGS	 = _6,
	_N_NPOP_REGS = _5,
;

constant
	asm_pop_registers = [% [],
		ident pop_reg_0,
		ident pop_reg_1,
		ident pop_reg_2,
		ident pop_reg_3,
		ident pop_reg_4,
		ident pop_reg_5,
	%],
	asm_nonpop_registers = [% [],
		ident nonpop_reg_0,
		ident nonpop_reg_1,
		ident nonpop_reg_2,
		ident nonpop_reg_3,
		ident nonpop_reg_4,
	%],
;

;;; Is_address_reg:
;;;		all registers are address registers

define Is_address_reg() with_nargs 1;
	Is_register();
enddefine;

;;; tmp_reg:
;;;     allocates a new scratch register. We never bother to check for
;;;     registers in use, because there are so many available.

lconstant TMP_REGS = [% _R_T1, _R_T2, _R_T3, _R_T4, _R_2, _R_RET0, _R_RET1 %];

lvars _tmp_regs = [];

define lconstant tmp_reg();
	if _tmp_regs == [] then
		TMP_REGS -> _tmp_regs;
	endif;
	fast_destpair(_tmp_regs) -> _tmp_regs;
enddefine;


/*
 *	Instruction Formats
 */

define:inline lconstant OPCODE(x, y);
	(_int((x) << (y)))
enddefine;

lconstant

	;;; Major opcodes

	_ARITH		= OPCODE(16:02, 26),
	_LDIL		= OPCODE(16:08, 26),
	_ADDIL		= OPCODE(16:0A, 26),
	_LDO		= OPCODE(16:0D, 26),
	_LDB		= OPCODE(16:10, 26),
	_LDH		= OPCODE(16:11, 26),
	_LDW		= OPCODE(16:12, 26),
	_LDWM		= OPCODE(16:13, 26),
	_STB		= OPCODE(16:18, 26),
	_STH		= OPCODE(16:19, 26),
	_STW		= OPCODE(16:1A, 26),
	_STWM		= OPCODE(16:1B, 26),
	_COMBT		= OPCODE(16:20, 26),
	_COMIBT		= OPCODE(16:21, 26),
	_COMBF		= OPCODE(16:22, 26),
	_COMIBF		= OPCODE(16:23, 26),
	_SUBI		= OPCODE(16:25, 26),
	_ADDI		= OPCODE(16:2D, 26),
	_EXTRACT	= OPCODE(16:34, 26),
	_DEPOSIT	= OPCODE(16:35, 26),
	_BE			= OPCODE(16:38, 26),
	_BLE		= OPCODE(16:39, 26),
	_BRANCH		= OPCODE(16:3A, 26),

	;;; System opcodes

	_LDSID		= OPCODE(16:85,  5),
	_MTSP		= OPCODE(16:C1,  5),

	;;; Arith/Log opcodes

	_OR			= OPCODE(16:12,  5),
	_SUB		= OPCODE(16:20,  5),
	_ADD		= OPCODE(16:30,  5),
	_SH1ADD		= OPCODE(16:32,  5),
	_SH2ADD	    = OPCODE(16:34,  5),
	_SH3ADD		= OPCODE(16:36,  5),

	;;; Extract/Deposit opcodes

	_EXTRU		= OPCODE(16:06, 10),
	_EXTRS		= OPCODE(16:07, 10),
	_ZDEP		= OPCODE(16:02, 10),
	_DEP		= OPCODE(16:03, 10),

	;;; Branch opcodes

	_BL			= OPCODE(16:00, 13),
	_BLR		= OPCODE(16:02, 13),
	_BV			= OPCODE(16:06, 13),

	;;; Bit masks for conditional branches

	_COMB_I		= OPCODE(16:01, 26),	;;; Imm/Reg
	_COMB_F		= OPCODE(16:02, 26),	;;; F/T

	;;; Compare conditions

	_cond_EQ	= _1,
	_cond_LT	= _2,
	_cond_LE	= _3,
	_cond_ULT	= _4,
	_cond_ULE	= _5,
	_cond_SV	= _6,
	_cond_OD	= _7,

;

	;;; LDIL, ADDIL
define lconstant LongImm(_instr, _i, _r) -> _instr;
	lvars _instr, _i, _r;
	_shift(_r, _21) _biset _instr -> _instr;
	;;; nasty encoding for 21-bit immediate
	_shift(_i _bimask _16:000003,  _12) _biset _instr -> _instr;
	_shift(_i _bimask _16:00007C,  _14) _biset _instr -> _instr;
	_shift(_i _bimask _16:000180,   _7) _biset _instr -> _instr;
	_shift(_i _bimask _16:0FFE00,  _-8) _biset _instr -> _instr;
	if _neg(_i) then _instr _biset _1 -> _instr endif;
enddefine;

	;;; LDO, LDB, LDH, LDW, LDWM
define lconstant Load(_instr, _d, _b, _t) -> _instr;
	lvars _instr, _d, _b, _t;
	_shift(_b, _21) _biset _instr -> _instr;
	_shift(_t, _16) _biset _instr -> _instr;
	_shift(_d _bimask _16:1FFF, _1) _biset _instr -> _instr;
	if _neg(_d) then _instr _biset _1 -> _instr endif;
enddefine;

	;;; Load(_LDWM, _4, _R_USP, _t)
define lconstant Pop(_t) -> _instr;
	lvars _t, _instr = _16:4E400008;
	_shift(_t, _16) _biset _instr -> _instr;
enddefine;

	;;; STB, STH, STW, STWM
define lconstant Store(_instr, _r, _d, _b) -> _instr;
	lvars _instr, _r, _d, _b;
	_shift(_b, _21) _biset _instr -> _instr;
	_shift(_r, _16) _biset _instr -> _instr;
	_shift(_d _bimask _16:1FFF, _1) _biset _instr -> _instr;
	if _neg(_d) then _instr _biset _1 -> _instr endif;
enddefine;

	;;; Store(_STWM, _r, _-4, _R_USP)
define lconstant Push(_r) -> _instr;
	lvars _r, _instr = _16:6E403FF9;
	_shift(_r, _16) _biset _instr -> _instr;
enddefine;

	;;; ADDI, SUBI
define lconstant ArithImm(_instr, _i, _r, _t) -> _instr;
	lvars _instr, _i, _r, _t;
	_shift(_r, _21) _biset _instr -> _instr;
	_shift(_t, _16) _biset _instr -> _instr;
	_shift(_i _bimask _16:3FF, _1) _biset _instr -> _instr;
	if _neg(_i) then _instr _biset _1 -> _instr endif;
enddefine;

	;;; OR, SUB, ADD, SH?ADD
define lconstant ArithLog(_instr, _r1, _r2, _t) -> _instr;
	lvars _instr, _r1, _r2, _t;
	_shift(_r2, _21) _biset _instr -> _instr;
	_shift(_r1, _16) _biset _instr -> _instr;
	_t _biset _instr -> _instr;
	_ARITH _biset _instr -> _instr;
enddefine;

	;;; ArithLog(_OR, _r, _0, _t)
define lconstant Copy(_r, _t) -> _instr;
	lvars _r, _t, _instr = _16:08000240;
	_shift(_r, _16) _biset _instr -> _instr;
	_t _biset _instr -> _instr;
enddefine;

lconstant
	;;; Copy(_0, _0)
	Nop = _16:08000240,
;
	;;; EXTRU, EXTRS
define lconstant Extract(_instr, _r, _p, _len, _t) -> _instr;
	lvars _instr, _r, _p, _len, _t;
	_shift(_r, _21) _biset _instr -> _instr;
	_shift(_t, _16) _biset _instr -> _instr;
	_shift(_p,  _5) _biset _instr -> _instr;
	(_32 _sub _len) _biset _instr -> _instr;
	_EXTRACT _biset _instr -> _instr;
enddefine;

	;;; ZDEP
define lconstant Deposit(_instr, _r, _p, _len, _t) -> _instr;
	lvars _instr, _r, _p, _len, _t;
	_shift(_t, _21) _biset _instr -> _instr;
	_shift(_r, _16) _biset _instr -> _instr;
	_shift(_31 _sub _p,  _5) _biset _instr -> _instr;
	(_32 _sub _len) _biset _instr -> _instr;
	_DEPOSIT _biset _instr -> _instr;
enddefine;

	;;; BL
define lconstant Branch(_instr, n, _wd, _t) -> _instr;
	lvars n, _instr, _wd, _t, _instr;
	_shift(_t, _21) _biset _instr -> _instr;
	;;; displacement (_wd) is given in bytes: conversion to words is
	;;; hidden in the encoding (the bottom two bits are simply ignored).
	;;; The -8 accounts for the 8 bytes added when the instruction is
	;;; executed.
	_wd _sub _8 -> _wd;
	_shift(_wd _bimask _16:00FFC,   _1) _biset _instr -> _instr;
	_shift(_wd _bimask _16:01000, _-10) _biset _instr -> _instr;
	_shift(_wd _bimask _16:3E000,   _3) _biset _instr -> _instr;
	if _neg(_wd) then _instr _biset _1 -> _instr endif;
	;;; nullify bit
	if n then _instr _biset _2 -> _instr endif;
	_BRANCH _biset _instr -> _instr;
enddefine;

	;;; BLR, BV
define lconstant RegBranch(_instr, n, _x, _r) -> _instr;
	lvars n, _instr, _x, _r, _instr;
	_shift(_r, _21) _biset _instr -> _instr;
	_shift(_x, _16) _biset _instr -> _instr;
	;;; nullify bit
	if n then _instr _biset _2 -> _instr endif;
	_BRANCH _biset _instr -> _instr;
enddefine;

	;;; COMBT, COMIBT, COMBF, COMIBF
define lconstant CondBranch(_instr, _c, n, _r1, _r2, _wd) -> _instr;
	lvars n, _instr, _c, _r1, _r2, _wd;
	_shift(_r2, _21) _biset _instr -> _instr;
	;;; _r1 may be register or immediate depending on opcode
	if _instr _bitst _COMB_I then
		_shift(_r1 _bimask _16:F, _17) _biset _instr -> _instr;
		if _neg(_r1) then _instr _biset _16:10000 -> _instr endif;
	else
		_shift(_r1, _16) _biset _instr -> _instr;
	endif;
	_shift(_c,  _13) _biset _instr -> _instr;
	;;; _wd is in bytes (as above)
	_wd _sub _8 -> _wd;
	_shift(_wd _bimask _16:0FFC,   _1) _biset _instr -> _instr;
	_shift(_wd _bimask _16:1000, _-10) _biset _instr -> _instr;
	if _neg(_wd) then _instr _biset _1 -> _instr endif;
	;;; nullify bit
	if n then _instr _biset _2 -> _instr endif;
enddefine;

	;;; BE, BLE
define lconstant ExternalBranch(_instr, n, _wd, _s, _b) -> _instr;
	lvars n, _instr, _wd, _s, _b;
	_shift(_b, _21) _biset _instr -> _instr;
	;;; encode 3-bit space register
	_shift(_s _bimask _16:3, _14) _biset _instr -> _instr;
	if _s _sgreq _4 then _instr _biset _16:2000 -> _instr endif;
	;;; same encoding for displacement as for local Branch
	_shift(_wd _bimask _16:00FFC,   _1) _biset _instr -> _instr;
	_shift(_wd _bimask _16:01000, _-10) _biset _instr -> _instr;
	_shift(_wd _bimask _16:3E000,   _3) _biset _instr -> _instr;
	if _neg(_wd) then _instr _biset _1 -> _instr endif;
	;;; nullify bit
	if n then _instr _biset _2 -> _instr endif;
enddefine;

	;;; LDSID
define lconstant Ldsid(_r, _t) -> _instr;
	lvars _r, _t, _instr = _LDSID;
	_shift(_r, _21) _biset _instr -> _instr;
	_t _biset _instr -> _instr;
enddefine;

	;;; MTSP
define lconstant Mtsp(_r, _s) -> _instr;
	lvars _r, _s, _instr = _MTSP;
	_shift(_r, _16) _biset _instr -> _instr;
	;;; encode 3-bit space register
	_shift(_s _bimask _16:3, _14) _biset _instr -> _instr;
	if _s _sgreq _4 then _instr _biset _16:2000 -> _instr endif;
enddefine;


/*
 *	Operand Types
 */

	;;; General operand (intvec)
struct ASM_OPERAND {
	int		OPND_ACCESS;
	full	OPND_ARGS[];
};

lconstant

	;;; Access modes for operands (OPND_ACCESS)

	_ACC_REGISTER	 = _1,
	_ACC_IMMEDIATE	 = _2,
	_ACC_BASED		 = _3,
	_ACC_BASED_IDVAL = _4,
	_ACC_ABS_IDVAL	 = _5,
	_ACC_PUSH		 = _6,
	_ACC_POP		 = _7,

	;;; Special operands

	-_USP = consintvec(#| _pint(_ACC_PUSH) |#),
	USP_+ = consintvec(#| _pint(_ACC_POP) |#),
	i_USP = consintvec(#| _pint(_ACC_BASED), _pint(_R_USP), 0 |#),

	;;; Reusable operands

	SRC_1 = writeable initintvec(3),
	SRC_2 = writeable initintvec(3),
	DST   = writeable initintvec(3),

	;;; Flag for operand access

	DEFER  = true,
	DIRECT = false,

;

;;; IS_IMM_N:
;;;		<true> if argument fits in N bits

define:inline lconstant IS_IMM_5(i);
	(_int(-16:10) _slteq (i) and (i) _slt _int(16:10))
enddefine;

define:inline lconstant IS_IMM_11(i);
	(_int(-16:400) _slteq (i) and (i) _slt _int(16:400))
enddefine;

define:inline lconstant IS_IMM_14(i);
	(_int(-16:2000) _slteq (i) and (i) _slt _int(16:2000))
enddefine;

;;; L_PART:
;;;		returns the high 21 bits of a value (L'i)

define:inline lconstant L_PART(i);
	_shift(i, _int(-11))
enddefine;

;;; R_PART:
;;;		returns the low 11 bits of a value (R'i)

define:inline lconstant R_PART(i);
	((i) _bimask _int(16:7FF))
enddefine;

;;; SREG:
;;;     returns the space register associated with an address (code or
;;;     data) determined by the 2 high order bits

define:inline lconstant SREG(addr);
	(if (addr) _bitst _int(16:40000000) then _R_SR5 else _R_SR4 endif)
enddefine;


/*
 *	Code Planting
 */

lvars
	_arg_reg_3 = false,
		;;; last value loaded to %arg3 (used for caching the left-hand
		;;; parts of long addresses)
	_last_instr = false,
		;;; last instruction planted
;

;;; END_BLOCK:
;;;		called at the end of each basic block (i.e. after a label or
;;;		jump).

define:inline lconstant END_BLOCK();
	;;; contents of this register are no longer reliable
	false -> _arg_reg_3;
	;;; last instruction planted may not be the last one executed
	false -> _last_instr;
enddefine;

;;; Drop:
;;;		plant a word in the instruction stream.

define lconstant Drop(_word);
	lvars _word;
	unless _asm_pass then
		;;; only output data on the last pass
		_word -> _asm_drop_ptr!(w)++ -> _asm_drop_ptr;
	endunless;
	_word -> _last_instr;
	@@(w){_asm_code_offset}++ -> _asm_code_offset;
enddefine;

;;; unDrop:
;;;		undo the last drop

define unDrop() -> _word;
	lvars _word = _last_instr;
	if _word then
		unless _asm_pass then
			_asm_drop_ptr--@(w) -> _asm_drop_ptr;
		endunless;
		false -> _last_instr;
		--@@(w){_asm_code_offset} -> _asm_code_offset;
	endif;
enddefine;

constant procedure Drop_l = Drop;		;;; for external references

;;; based_addr:
;;;     for an arbitrary input value, returns a base register and 14-bit
;;;     displacement, such that adding the two together will recreate
;;;     the value. The register %arg3 is used to remember the last long
;;;     address computed ("genproc.p" uses all four argument registers
;;;     for this, but at run-time the VM has access to %arg[0-2] so we
;;;     can't depend on their values).

define lconstant based_addr(_val);
	lvars _val, _disp;

	define lconstant close_to(_addr, _val);
		lvars _addr, _val;
		_val _sub _addr -> _val;
		if IS_IMM_14(_val) then _val else false endif;
	enddefine;

	;;; quick check against known register values
	if _val == _0 then
		(_0, _R_0);
	elseif _val == 0 then
		(_0, _R_PZERO);
	elseif _val == false then
		(_0, _R_FALSE);

	;;; then for closeness
	elseif IS_IMM_14(_val) then
		(_val, _R_0);
	elseif close_to(false, _val) ->> _disp then
		(_disp, _R_FALSE);
	elseif close_to(_special_var_block, _val) ->> _disp then
		(_disp, _R_SVB);

	;;; use %arg3 to hold upper 21 bits of the value
	else
		R_PART(_val) -> _disp;
		L_PART(_val) -> _val;
		unless _arg_reg_3 and _val == _arg_reg_3 then
			_val -> _arg_reg_3;
			Drop(LongImm(_LDIL, _val, _R_ARG3));
		endunless;
		(_disp, _R_ARG3);
	endif;
enddefine;

;;; Drop_load, Drop_store:
;;;     plant a general LOAD/STORE instruction; needs two instructions
;;;     if the displacement is > 14 bits.

define lconstant Drop_load(_op, _disp, _src, _dst);
	lvars _op, _disp, _src, _dst;
	if IS_IMM_14(_disp) then
		Drop(Load(_op, _disp, _src, _dst));
	else
		Drop(LongImm(_ADDIL, L_PART(_disp), _src));
		Drop(Load(_op, R_PART(_disp), _R_1, _dst));
	endif;
enddefine;

define lconstant Drop_store(_op, _src, _disp, _dst);
	lvars _op, _src, _disp, _dst;
	if IS_IMM_14(_disp) then
		Drop(Store(_op, _src, _disp, _dst));
	else
		Drop(LongImm(_ADDIL, L_PART(_disp), _dst));
		Drop(Store(_op, _src, R_PART(_disp), _R_1));
	endif;
enddefine;

;;; Drop_branch:
;;;     plant an unconditional branch: _target is an offset relative to
;;;     the procedure start, and is assumed to be within range of a
;;;     single branch instruction. _instr is an instruction to plant in
;;;     the delay slot, or <false> meaning nullify.

define lconstant Drop_branch(_target, _instr);
	lvars _target, _instr;
	_target _sub _asm_code_offset -> _target;
	if _instr then
		Drop(Branch(_BL, false, _target, _0));
		Drop(_instr);
	else
		Drop(Branch(_BL, true, _target, _0));
	endif;
	;;; branch ends a basic block
	END_BLOCK();
enddefine;

;;; Drop_long_branch:
;;;     plant an unconditional branch to an arbitrary target using two
;;;     instructions. Relies on the assumption that code is being
;;;     planted in the space addressed by %sr5 (data). _instr is an
;;;     instruction to plant in the delay slot, or <false> meaning
;;;     nullify.

define lconstant Drop_long_branch(_target, _instr);
	lvars _target, _instr;
	@@PD_TABLE{_strsize _add _target} -> _target;
	Drop(LongImm(_ADDIL, L_PART(_target), _R_PB));
	if _instr then
		Drop(ExternalBranch(_BE, false, R_PART(_target), _R_SR5, _R_1));
		Drop(_instr);
	else
		Drop(ExternalBranch(_BE, true, R_PART(_target), _R_SR5, _R_1));
	endif;
	;;; branch ends a basic block
	END_BLOCK();
enddefine;

;;; Drop_call:
;;;     plant a branch to a fixed address routine. _instr is an
;;;     instruction to plant in the delay slot, or <false> meaning
;;;     nullify.

define lconstant Drop_call(_op, _addr, _instr);
	lvars _op, _addr, _instr, (_disp, _base) = based_addr(_addr);
	if _instr then
		Drop(ExternalBranch(_op, false, _disp, SREG(_addr), _base));
		Drop(_instr);
	else
		Drop(ExternalBranch(_op, true, _disp, SREG(_addr), _base));
	endif;
	;;; branch ends a basic block
	END_BLOCK();
enddefine;


;;; -- MOVE Instructions --------------------------------------------------

;;; Get_opnd:
;;;		encodes an argument of a MOVE -type instruction.

define lconstant Get_opnd(_i, defer, _opnd) -> _opnd;
	lvars arg, defer, _i, _opnd;
	asm_instr!INST_ARGS[_i] -> arg;
	;;; replace structure with offset or reg ident on first pass
	Trans_structure(arg) ->> arg -> asm_instr!INST_ARGS[_i];
	if issimple(arg) then
		;;; offset
		_int(arg) -> arg;
		if _neg(arg) then
			;;; stack lvar: -arg- is the offset ________computed ___for _a
			;;; ________________downward-growing _____stack, negated and shifted left 1; the
			;;; bottom bit indicates whether access is via an ident,
			;;; requiring a double indirection if -defer- is <true>
			unless arg _bitst _1 then false -> defer endunless;
			;;; set operand arguments for based address
			_R_SP -> _opnd!OPND_ARGS[_0];
			;;; HP stack grows up, so leave the offset as negative, but
			;;; subtract 4 to account for the stack pointer pointing one
			;;; word beyond the frame
			_shift(arg, _-1) _sub _4 -> _opnd!OPND_ARGS[_1];
		else
			;;; literal table entry:
			;;; -arg- is its offset
			_R_PB -> _opnd!OPND_ARGS[_0];
			@@PD_TABLE{arg} -> _opnd!OPND_ARGS[_1];
		endif;
		;;; access mode is based address, with extra indirection if deferred
		if defer then
			_ACC_BASED_IDVAL -> _opnd!OPND_ACCESS;
		else
			_ACC_BASED -> _opnd!OPND_ACCESS;
		endif;
	elseif Is_register(arg) ->> _i then
		unless defer then
			mishap(0, 'SYSTEM ERROR IN Get_opnd (register used as immediate operand)');
		endunless;
		_ACC_REGISTER -> _opnd!OPND_ACCESS;
		_int(_i) -> _opnd!OPND_ARGS[_0];
	elseif defer then
		;;; idval of absolute address
		_ACC_ABS_IDVAL -> _opnd!OPND_ACCESS;
		arg -> _opnd!OPND_ARGS[_0];
	else
		;;; immediate
		_ACC_IMMEDIATE -> _opnd!OPND_ACCESS;
		arg -> _opnd!OPND_ARGS[_0];
	endif;
enddefine;

;;; Get_imm:
;;;		encodes an immediate operand

define lconstant Get_imm(_i, _opnd) -> _opnd;
	lvars _i, _opnd;
	_ACC_IMMEDIATE -> _opnd!OPND_ACCESS;
	asm_instr!INST_ARGS[_i] -> _opnd!OPND_ARGS[_0];
enddefine;

;;; Get_dst:
;;;     encodes the destination of a MOVE-type instruction (i.e.
;;;     argument 2 if present, or a stack push if not).

define lconstant Get_dst();
	if asm_instr!V_LENGTH == _2 then
		;;; destination is a stack push
		-_USP;
	else
		Get_opnd(_1, DEFER, DST);
	endif;
enddefine;

;;; Do_load:
;;;     load a general operand to a register: if _reg is given, use it;
;;;     otherwise any register will do.

define lconstant Do_load(_src, _reg) -> _reg;
	lvars _src, _reg, _disp;
	go_on _pint(_src!OPND_ACCESS) to
		REGISTER IMMEDIATE BASED BASED_IDVAL ABS_IDVAL PUSH POP
	else
		ERROR
	;

	REGISTER:
		_src!OPND_ARGS[_0] -> _src;
		unless _reg then
			_src -> _reg;
		elseunless _src == _reg then
			Drop(Copy(_src, _reg));
		endunless;
		return;

	IMMEDIATE:
		based_addr(_src!OPND_ARGS[_0]) -> (_disp, _src);
		unless _disp == _0 and _src /== _R_ARG3 then
			unless _reg then tmp_reg() -> _reg endunless;
			Drop(Load(_LDO, _disp, _src, _reg));
		elseunless _reg then
			_src -> _reg;
		elseunless _src == _reg then
			Drop(Copy(_src, _reg));
		endunless;
		return;

	BASED:
		unless _reg then tmp_reg() -> _reg endunless;
		Drop_load(_LDW, _src!OPND_ARGS[_1], _src!OPND_ARGS[_0], _reg);
		return;

	BASED_IDVAL:
		;;; as BASED, but with extra indirection through ID_VALOF
		unless _reg then tmp_reg() -> _reg endunless;
		Drop_load(_LDW, _src!OPND_ARGS[_1], _src!OPND_ARGS[_0], _reg);
		Drop(Load(_LDW, @@ID_VALOF, _reg, _reg));
		return;

	ABS_IDVAL:
		unless _reg then tmp_reg() -> _reg endunless;
		Drop(Load(_LDW, based_addr(_src!OPND_ARGS[_0]@ID_VALOF), _reg));
		return;

	POP:
		unless _reg then tmp_reg() -> _reg endunless;
		Drop(Pop(_reg));
		return;

	PUSH:
	ERROR:
		mishap(_pint(_src!OPND_ACCESS), 1,
			'SYSTEM ERROR IN Do_load (illegal operand access mode)');
enddefine;

;;; Do_store:
;;;		update a general destination operand from a register.

define lconstant Do_store(_reg, _dst);
	lvars _reg, _dst, _disp;
	go_on _pint(_dst!OPND_ACCESS) to
		REGISTER IMMEDIATE BASED BASED_IDVAL ABS_IDVAL PUSH POP
	else
		ERROR
	;

	REGISTER:
		_dst!OPND_ARGS[_0] -> _dst;
		unless _reg == _dst then
			Drop(Copy(_reg, _dst));
		endunless;
		return;

	BASED:
		Drop_store(_STW, _reg, _dst!OPND_ARGS[_1], _dst!OPND_ARGS[_0]);
		return;

	BASED_IDVAL:
		Drop_load(_LDW, _dst!OPND_ARGS[_1], _dst!OPND_ARGS[_0],
			tmp_reg() ->> _dst);
		Drop(Store(_STW, _reg, @@ID_VALOF, _dst));
		return;

	ABS_IDVAL:
		Drop(Store(_STW, _reg, based_addr(_dst!OPND_ARGS[_0]@ID_VALOF)));
		return;

	PUSH:
		Drop(Push(_reg));
		return;

	IMMEDIATE:
	POP:
	ERROR:
		mishap(_pint(_dst!OPND_ACCESS), 1,
			'SYSTEM ERROR IN Do_store (illegal operand access mode)');
enddefine;

;;; Do_move:
;;;		general move operation

define lconstant Do_move(_src, _dst);
	lvars _src, _dst;
	if _dst!OPND_ACCESS == _ACC_REGISTER then
		Do_load(_src, _dst!OPND_ARGS[_0]) -> ;
	else
		Do_store(Do_load(_src, false), _dst);
	endif;
enddefine;

;;; I_POP dst:
;;;		pop from stack to identifier -dst-

define I_POP();
	Do_move(USP_+, Get_opnd(_0, DEFER, DST));
enddefine;

;;; I_POPQ dst:
;;;		pop identifier from stack into call-stack location

define I_POPQ();
	Do_move(USP_+, Get_opnd(_0, DIRECT, DST));
enddefine;

;;; I_STORE dst:
;;;		copy top of stack to identifier -dst-

define I_STORE();
	Do_move(i_USP, Get_opnd(_0, DEFER, DST));
enddefine;

;;; I_MOVE src [dst]:
;;;		move from identifier -src- to -dst-

define I_MOVE();
	Do_move(Get_opnd(_0, DEFER, SRC_1), Get_dst());
enddefine;

;;; I_MOVEQ src [dst]:
;;;		move literal -src- to -dst-

define I_MOVEQ();
	Do_move(Get_opnd(_0, DIRECT, SRC_1), Get_dst());
enddefine;

;;; I_MOVES:
;;;		copy top of stack

define I_MOVES =
	Do_move(% i_USP, -_USP %);
enddefine;

;;; I_MOVENUM n [dst]:
;;;		copy number -n- to -dst-

define I_MOVENUM();
	Do_move(Get_imm(_0, SRC_1), Get_dst());
enddefine;

;;; I_MOVEADDR addr [dst]:
;;;		copy literal -address- to -dst- (same as I_MOVENUM)

constant procedure (
	I_MOVEADDR = I_MOVENUM,
);

;;; I_MOVE_CALLER_RETURN src dst move_p:
;;;     move to/from return-address field in caller's stack frame using
;;;     -move_p- (I_MOVE or I_MOVEADDR)

define I_MOVE_CALLER_RETURN();
	fast_chain(asm_instr!INST_ARGS[_2]);
enddefine;

;;; I_PUSH_UINT n:
;;;		push -n- as (unsigned) system integer

define I_PUSH_UINT();
	lvars (_disp, _reg) = based_addr(Pint_->_uint(asm_instr!INST_ARGS[_0], _-1));
	unless _zero(_disp) then
		Drop(Load(_LDO, _disp, _reg, tmp_reg() ->> _reg));
	endunless;
	Drop(Push(_reg));
enddefine;

;;; I_ERASE:
;;;		erase an item from the stack

define I_ERASE();
	Drop(Pop(_R_0));
enddefine;

;;; I_SWAP i j:
;;;		swap i'th and j'th items on the stack

define I_SWAP();
	lvars _i, _j, _reg_1, _reg_2;
	@@(w)[_int(asm_instr!INST_ARGS[_0])] -> _i;
	@@(w)[_int(asm_instr!INST_ARGS[_1])] -> _j;
	Drop_load(_LDW, _i, _R_USP, tmp_reg() ->> _reg_1);
	Drop_load(_LDW, _j, _R_USP, tmp_reg() ->> _reg_2);
	Drop_store(_STW, _reg_1, _j, _R_USP);
	Drop_store(_STW, _reg_2, _i, _R_USP);
enddefine;


;;; -- Field Access Instructions ------------------------------------------

;;; field_push_op, field_pop_op:
;;;		opcodes for pushing and popping standard-sized fields

define lconstant field_push_op =
	list_assoc_val(%[%
		t_BYTE,				_LDB,
		t_SHORT,			_LDH,
		t_WORD,				_LDW,
		t_BYTE ||t_SIGNED,	_LDB,
		t_SHORT||t_SIGNED,	_LDH,
		t_WORD ||t_SIGNED,	_LDW,
	%]%);
enddefine;

define lconstant field_pop_op =
	list_assoc_val(%[%
		t_BYTE,				_STB,
		t_SHORT,			_STH,
		t_WORD,				_STW,
		t_BYTE ||t_SIGNED,	_STB,
		t_SHORT||t_SIGNED,	_STH,
		t_WORD ||t_SIGNED,	_STW,
	%]%);
enddefine;

;;; field_shamt:
;;;		amount of shift needed to convert subscript to offset

define lconstant field_shamt =
	list_assoc_val(%[%
		t_BYTE,				-2,
		t_SHORT,			-1,
		t_WORD,				 0,
		t_DOUBLE,			 1,
	%]%);
enddefine;

;;; field_offs:
;;;		adjustment to be added to shifted subscript to account for base 1
;;;		indexing and residual popint bits

define lconstant field_offs =
	list_assoc_val(%[%
		t_BYTE,				-1,
		t_SHORT,			-3,
		t_WORD,				-7,
		t_DOUBLE,		   -14,
	%]%);
enddefine;

;;; Do_mult:
;;;     multiply a source register by an immediate value (_size > 0) and
;;;     place the result in _dst (/== _src). Uses the same algorithm as
;;;     "genproc.p" to expand the multiplication into a series of shifts,
;;;     adds and subtracts.

define lconstant Do_mult(_src, _size, _dst) -> _reg;
	lvars _src, _size, _dst, _reg;

	define lconstant mult(_i, _reg, _src, _dst) -> _reg;
		lvars subtract, _i, _reg, _src, _dst, _n = _0;
		returnif(_i == _1);
		if _i _bimask _2:111 == _2:111 then
			_i _add _1 -> _i;
			true -> subtract;
		else
			_i _sub _1 -> _i;
			false -> subtract;
		endif;
		repeat
			_shift(_i, _-1) -> _i;
			_n _add _1 -> _n;
			quitif(_i _bitst _1);
		endrepeat;
		mult(_i, _reg, _src, _dst) -> _reg;
		if subtract then
			Drop(Deposit(_ZDEP, _reg, _31 _sub _n, _32 _sub _n, _dst));
			Drop(ArithLog(_SUB, _dst, _src, _dst));
		elseif _n == _1 then
			Drop(ArithLog(_SH1ADD, _reg, _src, _dst));
		elseif _n == _2 then
			Drop(ArithLog(_SH2ADD, _reg, _src, _dst));
		elseif _n == _3 then
			Drop(ArithLog(_SH3ADD, _reg, _src, _dst));
		else
			Drop(Deposit(_ZDEP, _reg, _31 _sub _n, _32 _sub _n, _dst));
			Drop(ArithLog(_ADD, _dst, _src, _dst));
		endif;
		_dst -> _reg;
	enddefine;

	if _size _bimask _2:111 == _2:111 then
		mult(_shift(_size, _-1), _src, _src, _dst) -> _reg;
		Drop(ArithLog(_SH1ADD, _reg, _src, _dst ->> _reg));
	else
		lvars _n = _0;
		until _size _bitst _1 do
			_shift(_size, _-1) -> _size;
			_n _add _1 -> _n;
		enduntil;
		mult(_size, _src, _src, _dst) -> _reg;
		unless _zero(_n) then
			Drop(Deposit(_ZDEP, _reg, _31 _sub _n, _32 _sub _n, _dst ->> _reg));
		endunless;
	endif;
enddefine;

;;;	Do_exptr_deref:
;;;		deref an external pointer -n- times

define lconstant Do_exptr_deref(n, _src, _dst) -> _src;
	lvars n, _src, _dst;
	fast_repeat n times
		Drop(Load(_LDW, _0, _src, _dst));
		_dst -> _src;
	endrepeat;
enddefine;

;;; Do_bitfield:
;;;     access/update bitfield from I_PUSH_FIELD/I_POP_FIELD. Simple
;;;     cases are coded in-line, otherwise uses the bitfield routines
;;;     from "amove.s":
;;;			_bfield(_base, _offs, _size) -> _n		/* unsigned */
;;;			_sbfield(_base, _offs, _size) -> _n		/* signed */
;;;			_ubfield(_n, _base, _offs, _size)		/* update */

define lconstant Do_bitfield(type, _size, base, offs, exptr, upd);
	lvars type, base, offs, exptr, upd, _size, _breg, _reg, _p, _n;
	;;; Get structure base address in %arg[01]
	if upd then _R_ARG1 else _R_ARG0 endif -> _breg;
	if not(base) then
		;;; structure is on top of stack
		Drop(Pop(_breg));
	elseif Is_register(Trans_structure(base)) ->> _reg then
		;;; structure is already in a register lvar
		Drop(Copy(_int(_reg), _breg));
	else
		mishap(0, 'SYSTEM ERROR IN Do_bitfield (bad structure)');
	endif;
	;;; if it's an external structure, get the real address
	if exptr then
		Do_exptr_deref(exptr, _breg, _breg) -> ;
	endif;
	;;; interpret offset
	_int(_size) -> _size;
	if isinteger(offs) then
		_int(offs) -> _n;
		(_n _add _size _sub _1) _bimask _16:1F -> _p;
		if _p _add _1 _sgreq _size then
			;;; field lies within a single, known word -- plant inline code
			_shift(_n, _-3) _biclear _3 -> _n;
			Drop_load(_LDW, _n, _breg, _R_ARG2);
			if upd then
				Drop(Pop(_R_ARG0));
				Drop(Deposit(_DEP, _R_ARG0, _p, _size, _R_ARG2));
				Drop_store(_STW, _R_ARG2, _n, _breg);
			else
				Drop(Extract(
					if type == t_BIT then _EXTRU else _EXTRS endif,
					_R_ARG2, _p, _size, _R_RET0));
			endif;
			;;; OK
			return;
		endif;
		;;; need a subroutine call -- load offset to %arg[12]
		Drop(Load(_LDO, based_addr(_n), _breg _sub _1));
	else
		if not(offs) then
			;;; vector index on stack
			Drop(Pop(_breg _sub _1 ->> _reg));
		elseif Is_register(Trans_structure(offs)) ->> _reg then
			;;; vector index in register
			_int(_reg) -> _reg;
		else
			mishap(0, 'SYSTEM ERROR IN Do_bitfield (bad offset)');
		endif;
		;;; convert to bit offset in %arg[12]
		Drop(Extract(_EXTRS, _reg, _29, _30, _R_RET0 ->> _reg));
		Do_mult(_reg, _size, _breg _sub _1) -> _reg;
		Drop(ArithImm(_ADDI, @@V_BYTES _sub _size, _reg, _breg _sub _1));
	endif;
	;;; postpone load of _size argument to the delay slot
	Load(_LDO, _size, _R_0, _breg _sub _2) -> _size;
	;;; call subroutine
	if upd then
		Drop(Pop(_R_ARG0));
		Drop_call(_BLE, _ubfield, _size);
	elseif type == t_BIT then
		Drop_call(_BLE, _bfield, _size);
	else
		Drop_call(_BLE, _sbfield, _size);
	endif;
enddefine;

;;; Do_field:
;;;		set registers for non-bitfield access/update (from I_PUSH_FIELD/
;;;		I_POP_FIELD/I_PUSH_FIELD_ADDR). Temporary registers:
;;;			%arg0	new value (when updating)
;;;			%arg1	structure base address
;;;			%arg2	structure offset (when subscripting)
;;;			%ret0	scratch (for address computation); result from access

define lconstant Do_field(type, size, base, offs, exptr, upd);
	lvars type, size, base, offs, exptr, upd, _breg, _ireg, _n;
	;;; Get structure base address in a register
	if not(base) then
		;;; Structure is on top of stack
		Drop(Pop(_R_ARG1 ->> _breg));
	elseif Is_register(Trans_structure(base)) ->> _breg then
		;;; Structure is already in a register lvar
		_int(_breg) -> _breg;
	else
		mishap(0, 'SYSTEM ERROR IN Do_field (bad structure)');
	endif;
	;;; If it's an external structure, get the real address
	if exptr then
		Do_exptr_deref(exptr, _breg, _R_ARG1) -> _breg;
	endif;
	;;; Interpret offset:
	if isinteger(offs) then
		;;; Constant offset for record-type structure:
		;;; convert bit offset to bytes
		##(b){_int(offs)|1} -> _n;
		;;; index reg not needed
		false -> _ireg;
	elseif not(offs) then
		;;; Vector index on stack:
		Drop(Pop(_R_ARG2 ->> _ireg));
	elseif Is_register(Trans_structure(offs)) ->> _ireg then
		;;; Vector index in register lvar:
		_int(_ireg) -> _ireg;
	else
		mishap(0, 'SYSTEM ERROR IN Do_field (bad offset)');
	endif;
	;;; Pop new value from stack if updating
	if upd then
		Drop(Pop(_R_ARG0));
	endif;
	if _ireg then
		type fi_&& t_BASE_TYPE -> type;
		field_shamt(type) -> _n;
		;;; size is in units of the base type
		if size == 1 then
			;;; subscript can be scaled by shifting
			_int(_n) -> _n;
			if _n _slt _0 then
				;;; shift right
				Drop(Extract(_EXTRS, _ireg, _31 _add _n, _32 _add _n,
					_R_ARG2 ->> _ireg));
			elseif _n _sgr _0 then
				;;; shift left
				Drop(Deposit(_ZDEP, _ireg, _31 _sub _n, _32 _sub _n,
					_R_ARG2 ->> _ireg));
			endif;
			;;; Compute displacement to account for pointer offset,
			;;; base 1 indexing and the popint bits
			@@V_BYTES _add _int(field_offs(type)) -> _n;
		else
			;;; pushing address of compound structure: need to multiply
			_int(size fi_<< (_n fi_+ 2)) -> _n;
			Drop(Extract(_EXTRS, _ireg, _29, _30, _R_RET0 ->> _ireg));
			Do_mult(_ireg, _n, _R_ARG2) -> _ireg;
			;;; displacemment accounts for pointer offset and base 1 indexing
			@@V_BYTES _sub _n -> _n;
		endif;
		;;; Add offset to base
		Drop(ArithLog(_ADD, _ireg, _breg, _R_ARG1 ->> _breg));
	endif;
	if upd then
		;;; return arguments for Store
		(_R_ARG0, _n, _breg);
	else
		;;; return arguments for Load
		(_n, _breg, _R_RET0);
	endif;
enddefine;

;;; I_PUSH_FIELD type size base offs cvtpint exptr:
;;;		extract a field from a structure

define I_PUSH_FIELD();
	lvars (, type, size, base, offs, cvtpint, exptr) = explode(asm_instr);
	if type fi_&& t_BASE_TYPE == t_BIT then
		Do_bitfield(type, size, base, offs, exptr, false);
	else
		Drop_load(field_push_op(type),
			Do_field(type, 1, base, offs, exptr, false));
		unless type fi_&& t_SIGNED == 0 then
			;;; explicit sign-extension required
			type fi_&& t_BASE_TYPE -> type;
			if type == t_BYTE then
				;;; Drop(Extract(_EXTRS, _R_RET0, _31, _8, _R_RET0));
				Drop(_16:D39C1FF8);
			elseif type == t_SHORT then
				;;; Drop(Extract(_EXTRS, _R_RET0, _31, _16, _R_RET0));
				Drop(_16:D39C1FF0);
			endif;
		endunless;
	endif;
	;;; Result now in %ret0
	if cvtpint then
		;;; Convert to popint
		;;; Drop(ArithLog(_SH2ADD, _R_RET0, _R_PZERO, _R_RET0));
		Drop(_16:09DC069C);
	endif;
	;;; Stack the result
	;;; Drop(Push(_R_RET0));
	Drop(_16:6E5C3FF9);
enddefine;

;;; I_POP_FIELD type size base offs exptr:
;;;		update a field in a structure

define I_POP_FIELD();
	lvars (, type, size, base, offs, exptr) = explode(asm_instr);
	if type fi_&& t_BASE_TYPE == t_BIT then
		Do_bitfield(type, size, base, offs, exptr, true);
	else
		Drop_store(field_pop_op(type),
			Do_field(type, 1, base, offs, exptr, true));
	endif;
enddefine;

;;; I_PUSH_FIELD_ADDR type size base offs exptr:
;;;     push the address of a field in a structure (may be non-standard
;;;     size)

define I_PUSH_FIELD_ADDR();
	lvars (, type, size, base, offs, exptr) = explode(asm_instr);
	Drop_load(_LDO, Do_field(type, size, base, offs, exptr, false));
	;;; Drop(Push(_R_RET0));
	Drop(_16:6E5C3FF9);
enddefine;


;;; -- Fast Field Access --------------------------------------------------

;;; Get_fsrc:
;;;     get operand of a "fast_" procedure or optimised instruction. The
;;;     operand is argument _i of the current instruction. It could be a
;;;     MOVE instruction (representing a push not yet done) or <false>
;;;     if the item is already on the stack.

define lconstant Get_fsrc(_i, _opnd);
	lvars	op, _i, _opnd;
	dlocal	asm_instr;
	if asm_instr!INST_ARGS[_i] ->> asm_instr then
		;;; source specified by a MOVE instruction
		if (asm_instr!INST_OP ->> op) == I_MOVENUM or op == I_MOVEADDR then
			;;; immediate data
			Get_imm(_0, _opnd);
		else
			;;; general move:
			;;; op == I_MOVE implies DEFER, op == I_MOVEQ implies DIRECT
			Get_opnd(_0, op == I_MOVE, _opnd);
		endif;
	else
		;;; on stack
		USP_+;
	endif;
enddefine;

;;; Get_fdst:
;;;     get destination operand of a "fast_" procedure. This should be a
;;;     push on the user stack, but it can be optimised away if there's
;;;     an immediately-following POP instruction.

define lconstant Get_fdst();
	lvars	instr;
	dlocal	asm_instr;
	;;; look out for a following POP instruction
	fast_front(fast_back(asm_clist)) -> instr;
	if isvector(instr) and instr!INST_OP == I_POP then
		;;; erase the instruction
		fast_back(asm_clist) -> asm_clist;
		;;; return the destination of the POP
		instr -> asm_instr;
		Get_opnd(_0, DEFER, DST);
	else
		;;; return a stack push
		-_USP;
	endif;
enddefine;

;;;	Do_fastsubv:
;;;		compute the operand of a FASTSUBV instruction.

define lconstant Do_fastsubv(_opnd) -> _opnd;
	lvars _opnd, _breg, _ireg;
	;;; Structure in _breg
	Do_load(Get_fsrc(_1, SRC_1), false) -> _breg;
	;;; Popint subscript from stack to _ireg
	Drop(Pop(tmp_reg() ->> _ireg));
	;;; Add base to index
	Drop(ArithLog(_ADD, _breg, _ireg, _ireg));
	;;; Return operand for element access:
	_ACC_BASED -> _opnd!OPND_ACCESS;
	_ireg -> _opnd!OPND_ARGS[_0];
	;;; arg 0 is offset to first vector element (as popint) -- subtract
	;;; popint 1 to account for base 1 subscript and popint bits
	_int(asm_instr!INST_ARGS[_0]) _sub 1 ->  _opnd!OPND_ARGS[_1];
enddefine;

;;; I_FASTFIELD offs src [dst]:
;;;		move a full field from src+offs to dst

define I_FASTFIELD();
	lvars _reg, _offs, _tmp;
	Do_load(Get_fsrc(_1, SRC_1), false) -> _reg;
	tmp_reg() -> _tmp;
	if asm_instr!INST_ARGS[_0] ->> _offs then
		_int(_offs) -> _offs;
	else
		;;; <false> means -fast_destpair-
		Drop(Load(_LDW, @@P_FRONT, _reg, _tmp));
		Drop(Push(_tmp));
		@@P_BACK -> _offs;
	endif;
	Drop_load(_LDW, _offs, _reg, _tmp);
	Do_store(_tmp, Get_fdst());
enddefine;

;;; I_UFASTFIELD offs dst:
;;;		update a full field at dst+offs from the stack

define I_UFASTFIELD();
	lvars _reg, _tmp;
	Do_load(Get_fsrc(_1, SRC_1), false) -> _reg;
	Drop(Pop(tmp_reg() ->> _tmp));
	Drop_store(_STW, _tmp, _int(asm_instr!INST_ARGS[_0]), _reg);
enddefine;

;;; I_FASTSUBV src:
;;;		fast_subscrv((), src)

define I_FASTSUBV();
	Do_move(Do_fastsubv(SRC_1), Get_fdst());
enddefine;

;;; I_UFASTSUBV dst:
;;;		() -> fast_subscrv((), dst)

define I_UFASTSUBV();
	Do_move(USP_+, Do_fastsubv(DST));
enddefine;


;;; -- Fast Integer Arithmetic --------------------------------------------

;;; Do_fast_+- :
;;;		implements
;;;			_src_2 fi_OP _src_1 -> _dst
;;;		where OP is "+" if -adding- and "-" otherwise.

define lconstant Do_fast_+-(adding, _src_1, _src_2, _dst);
	lvars adding, _src_1, _src_2, _dst, _reg, _imm = false;
	if _src_1!OPND_ACCESS == _ACC_IMMEDIATE then
		_src_1!OPND_ARGS[_0] _sub _3 -> _imm;
		if adding then
			Do_load(_src_2, false) -> _src_2;
		elseif _imm /== _-16:80000000 then
			;;; safe to implement subtract-immediate as add-negation
			_negate(_imm) -> _imm;
			true -> adding;
			Do_load(_src_2, false) -> _src_2;
		else
			;;; negation would overflow
			false -> _imm;
		endif;
	elseif _src_2!OPND_ACCESS == _ACC_IMMEDIATE then
		if adding then
			_src_2!OPND_ARGS[_0] _sub _3 -> _imm;
			Do_load(_src_1, false) -> _src_2;
		else
			;;; subtract from immediate
			_src_2!OPND_ARGS[_0] -> _imm;
			if _imm _add _3 _sgr _imm then
				;;; addition doesn't overflow
				_imm _add _3 -> _imm;
				Do_load(_src_1, false) -> _src_2;
			else
				false -> _imm;
			endif;
		endif;
	endif;
	if _dst!OPND_ACCESS == _ACC_REGISTER then
		_dst!OPND_ARGS[_0] -> _reg;
	else
		tmp_reg() -> _reg;
	endif;
	if _imm then
		if IS_IMM_11(_imm) then
			Drop(ArithImm(if adding then _ADDI else _SUBI endif,
				_imm, _src_2, _reg));
		elseif adding then
			if IS_IMM_14(_imm) then
				Drop(Load(_LDO, _imm, _src_2, _reg));
			else
				Drop(LongImm(_ADDIL, L_PART(_imm), _src_2));
				Drop(Load(_LDO, R_PART(_imm), _R_1, _reg));
			endif;
		else
			Drop(Load(_LDO, based_addr(_imm), tmp_reg() ->> _src_1));
			Drop(ArithLog(_SUB, _src_1, _src_2, _reg));
		endif;
	else
		Do_load(_src_1, false) -> _src_1;
		Do_load(_src_2, false) -> _src_2;
		Drop(ArithLog(_SUB, _src_1, _R_PZERO, tmp_reg() ->> _src_1));
		Drop(ArithLog(if adding then _ADD else _SUB endif, _src_2, _src_1,
			_reg));
	endif;
	unless _dst!OPND_ACCESS == _ACC_REGISTER then
		Do_store(_reg, _dst);
	endunless;
enddefine;

;;; I_FAST_+-_2 adding src dst:
;;;		dst fi_OP src -> dst

define I_FAST_+-_2();
	lvars _dst = Get_fsrc(_2, DST);
	if _dst == USP_+ then i_USP -> _dst endif;
	Do_fast_+-(
		asm_instr!INST_ARGS[_0],
		Get_fsrc(_1, SRC_1),
		_dst, _dst);
enddefine;

;;; I_FAST_+-_3 adding src1 src2 dst:
;;;		src2 fi_OP src1 -> dst

define I_FAST_+-_3();
	Do_fast_+-(
		asm_instr!INST_ARGS[_0],
		Get_fsrc(_1, SRC_1),
		Get_fsrc(_2, SRC_2),
		if asm_instr!INST_ARGS[_3] then
			Get_opnd(_3, DEFER, DST)
		else
			-_USP
		endif);
enddefine;


;;; -- Branches and Tests -------------------------------------------------

;;; is_target_N:
;;;     <true> if a branch target can be encoded in the N-bit word
;;;     displacement field of the current branch instruction (i.e. the
;;;     actual value encoded will be: (target-asm_code_offset-8)>>2).
;;;     For a backward branch the displacement is known for definite,
;;;     but for a forward branch it could increase -- in principle, up
;;;     to twice the size, so we halve the allowed range accordingly.

define lconstant is_target_12(_target);
	lvars _target;
	if _neg(_target) then
		;;; forward branch on first pass, not yet known: assume OK
		true;
	else
		_target _sub _asm_code_offset -> _target;
		_-16:1FF8 _slteq _target and _target _slt _16:1008;
	endif;
enddefine;

define lconstant is_target_17(_target);
	lvars _target;
	if _neg(_target) then
		;;; forward branch on first pass, not yet known: assume OK
		true;
	else
		_target _sub _asm_code_offset -> _target;
		_-16:3FFF8 _slteq _target and _target _slt _16:20008;
	endif;
enddefine;

;;; I_LABEL:
;;;		called from -Code_pass- when a label is encountered in -asm_clist-

define I_LABEL();
	;;; set front of the label pair to the popint offset from the code start
	_pint(_asm_code_offset) -> fast_front(asm_clist);
	;;; label ends a basic block
	END_BLOCK();
enddefine;

;;; I_BR_std:
;;;		plant a branch instruction of a known size (8 bytes).

define I_BR_std(_br_offset, _arg);
	lvars _br_offset, _arg;
	if is_target_17(_br_offset) then
		Drop_branch(_br_offset, Nop);
	else
		mishap(0, 'SYSTEM ERROR IN I_BR_std (target out of range)');
	endif;
enddefine;

;;; I_BR_long:
;;;     plant an unconditional branch to a target outside the range of a
;;;     single branch instruction

define lconstant I_BR_long(_br_offset, _arg);
	lvars _br_offset, _arg;
	Drop_long_branch(_br_offset, false);
enddefine;

;;; I_BR:
;;;		plant an unconditional branch

define I_BR(_br_offset, _arg);
	lvars _br_offset, _arg;
	if _asm_pass == 1 and not(is_target_17(_br_offset)) then
		;;; update the instruction to drop a long branch on subequent passes
		I_BR_long -> asm_instr!INST_ARGS[_arg];
		;;; and do it now
		I_BR_long(_br_offset, _arg);
	else
		;;; if there's a valid last instruction, pull it down into the
		;;; delay slot
		Drop_branch(_br_offset, unDrop());
	endif;
enddefine;

;;; I_BRCOND_long:
;;;     plants a conditional branch to a target outside the range of the
;;;     given instruction (_op + _cond). Works by inverting the sense of
;;;     the test to branch around an unconditional jump to the target.

define lconstant I_BRCOND_long(_op, _cond, _r1, _r2, is_so, _br_offset, _arg);
	lvars is_so, _op, _cond, _r1, _r2, _br_offset, _arg;
	if is_so then
		;;; negate the test
		_op _bixor _COMB_F -> _op;
	endif;
	Drop(CondBranch(_op, _cond, true, _r1, _r2, _8));
	if is_target_17(_br_offset) then
		Drop_branch(_br_offset, false);
	else
		;;; need a long branch, which means a 12-byte offset
		unDrop() -> ;
		Drop(CondBranch(_op, _cond, true, _r1, _r2, _12));
		Drop_long_branch(_br_offset, false);
	endif;
enddefine;

;;; I_BRCOND:
;;;		standard conditional branch, used as argument to I_IF_opt etc.

define I_BRCOND(_op, _cond, _r1, _r2, is_so, _br_offset, _arg);
	lvars is_so, _op, _cond, _r1, _r2, _br_offset, _arg;
	if _asm_pass == 1 and not(is_target_12(_br_offset)) then
		;;; update the instruction to drop a long branch on subequent passes
		I_BRCOND_long -> asm_instr!INST_ARGS[_arg];
		;;; and do it now
		I_BRCOND_long(_op, _cond, _r1, _r2, is_so, _br_offset, _arg);
	else
		unless is_so then
			;;; negate the test
			_op _bixor _COMB_F -> _op;
		endunless;
/*		This attempt to nullify forward branches doesn't work
		if _nonneg(_br_offset)	;;; i.e. has been defined
		and _br_offset _slteq _asm_code_offset@(b){_8}
		then
			;;; backward branch
			Drop(CondBranch(_op, _cond, false, _r1, _r2,
				_br_offset _sub _asm_code_offset));
			Drop(Nop);
		else
			;;; forward branch -- can safely nullify
			Drop(CondBranch(_op, _cond, true, _r1, _r2,
				_br_offset _sub _asm_code_offset));
		endif;
*/
		Drop(CondBranch(_op, _cond, false, _r1, _r2,
			_br_offset _sub _asm_code_offset));
		Drop(Nop);
	endif;
	;;; branch ends a basic block
	END_BLOCK();
enddefine;

;;; Drop_if:
;;;		call the branch-dropping routine from an argument of a test
;;;		instruction (I_IF_opt or I_BOOL_opt). Currently, this will
;;;		always be I_BRCOND or Drop_long_cond_branch.

define lconstant Drop_if(/* _op, _cond, _r1, _r2, */ instr) with_nargs 5;
	lvars	instr;
	dlocal	asm_instr = instr;
	fast_apply(/* _op, _cond, _r1, _r2, */
		instr!INST_ARGS[_1],					;;; is_so
		_int(fast_front(instr!INST_ARGS[_0])),	;;; _br_offset
		_2,										;;; _arg
		instr!INST_ARGS[_2]);					;;; I_BRCOND etc.
enddefine;

;;; I_IF_opt target is_so branch_p src:
;;;     the standard test instruction: branches if the source operand is
;;;     non-false. (If -is_so- is <false>, the sense of the test is
;;;     reversed.)

define I_IF_opt();
	Drop_if(_COMBF, _cond_EQ, Do_load(Get_fsrc(_3, SRC_1), false), _R_FALSE,
		asm_instr);
enddefine;

;;; I_BOOL_opt target is_so branch_p src:
;;;		like I_IF_opt, but used for sysAND and sysOR: if the branch is
;;;		taken, the operand should be left on the stack.

define I_BOOL_opt();
	lvars _src = Get_fsrc(_3, SRC_1);
	if _src == USP_+ then
		;;; copy top of stack to reg
		Drop(Load(_LDW, _0, _R_USP, tmp_reg() ->> _src));
	else
		;;; copy src to reg and push
		Drop(Push(Do_load(_src, false) ->> _src));
	endif;
	Drop_if(_COMBF, _cond_EQ, _src, _R_FALSE, asm_instr);
	;;; If the branch wasn't taken, remove the item from the stack again
	Drop(Load(_LDO, _4, _R_USP, _R_USP));
enddefine;

;;; I_IF_CMP subr src1 src2 instr:
;;;     -subr- is a comparison subroutine - _eq, _slt etc. - meant to be
;;;     applied to -src1- and -src2-, but optimised to an inline test.
;;;     The two operands are "fsrc"-type operands, i.e. either MOVE
;;;     instructions or <false> for top-of-stack. The last argument is
;;;     an I_IF_opt instruction for testing the result of the
;;;     comparison. The sense of the test is: subr(opd2, opd1)

define lconstant cond_op =
	list_assoc_val(%[%
		nonop _eq,		conspair(_COMBT, _cond_EQ),
		nonop _neq,		conspair(_COMBF, _cond_EQ),
		nonop _slt,		conspair(_COMBT, _cond_LT),
		nonop _slteq,	conspair(_COMBT, _cond_LE),
		nonop _sgr,		conspair(_COMBF, _cond_LE),
		nonop _sgreq,	conspair(_COMBF, _cond_LT),
	%]%);
enddefine;

define I_IF_CMP();
	lvars _op, _cond, _src_1, _src_2;
	fast_destpair(cond_op(asm_instr!INST_ARGS[_0])) -> (_op, _cond);
	Get_fsrc(_1, SRC_1) -> _src_1;
	Get_fsrc(_2, SRC_2) -> _src_2;
	if _src_2!OPND_ACCESS == _ACC_IMMEDIATE
	and IS_IMM_5(_src_2!OPND_ARGS[_0])
	then
		;;; do immediate comparison
		_op _biset _COMB_I -> _op;
		(Do_load(_src_1, false), _src_2!OPND_ARGS[_0]) -> (_src_1, _src_2);
	elseif _src_1!OPND_ACCESS == _ACC_IMMEDIATE
	and IS_IMM_5(_src_1!OPND_ARGS[_0])
	then
		;;; do immediate comparison with operands reversed
		_op _biset _COMB_I -> _op;
		(Do_load(_src_2, false), _src_1!OPND_ARGS[_0]) -> (_src_1, _src_2);
		;;; invert sense of test
		unless _cond == _cond_EQ then
			_op _bixor _COMB_F -> _op;
			if _cond == _cond_LT then _cond_LE else _cond_LT endif -> _cond;
		endunless;
	else
		Do_load(_src_1, false) -> _src_1;
		Do_load(_src_2, false) -> _src_2;
	endif;
	Drop_if(_op, _cond, _src_2, _src_1, asm_instr!INST_ARGS[_3]);
enddefine;

;;; I_SWITCH lablist elselab src:
;;;		computed goto.

define I_SWITCH();
	lvars lablist, elselab, _ncases, _src, _reg, _tmp, _offs;

	asm_instr!INST_ARGS[_0] -> lablist;
	asm_instr!INST_ARGS[_1] -> elselab;
	_int(listlength(lablist)) -> _ncases;

	;;; Load the operand to a register
	Do_load(Get_fsrc(_2, SRC_1), false) -> _src;
	;;; Convert to sysint
	Drop(Extract(_EXTRS, _src, _29, _30, tmp_reg() ->> _reg));

	;;; Compare the argument with the number of cases and skip the
	;;; switch if out of range (a branch of 12 bytes takes us to the
	;;; first entry in the table, which will then jump to the else case)
	if IS_IMM_5(_ncases) then
		Drop(CondBranch(_COMIBT, _cond_ULT, true, _ncases, _reg, _12));
	else
		Drop(Load(_LDO, based_addr(_ncases), tmp_reg() ->> _tmp));
		Drop(CondBranch(_COMBT, _cond_ULT, true, _tmp, _reg, _12));
	endif;

	;;; Do the switch
	Drop(RegBranch(_BLR, false, _reg, _R_0));
	Drop(Nop);

	;;; Plant the jump table: each entry must be 2 words long. First
	;;; entry is for the 0 (error) case -- jumps to the end of the
	;;; table.
	_asm_code_offset _add _shift(_ncases _add _1, _3) -> _offs;
	repeat
		if is_target_17(_offs) then
			Drop_branch(_offs, Nop);
		else
			Drop_long_branch(_offs, false);
		endif;
		quitif(lablist == []);
		_int(fast_front(fast_destpair(lablist) -> lablist)) -> _offs;
	endrepeat;

	;;; After the table: if there was no explicit "else" case, push the
	;;; original argument back on the stack for a following error
	unless elselab then
		Drop(Push(_src));
	endunless;
enddefine;

;;; I_PLOG_IFNOT_ATOM fail_label instr:
;;;     planted after a call to -prolog_unify_atom- from "aprolog.s":
;;;     %ret0 will be zero if unification succeeded, non-zero otherwise.
;;;     The last argument is an I_BRCOND instruction to plant a jump if
;;;     the unification failed.

define I_PLOG_IFNOT_ATOM();
	fast_apply(
		_COMBF, _cond_EQ,
		_R_RET0, _R_0,
		true,										;;; is_so
		_int(fast_front(asm_instr!INST_ARGS[_0])),	;;; _br_offset
		_1, asm_instr!INST_ARGS[_1]);				;;; I_BRCOND
enddefine;

;;; I_PLOG_TERM_SWITCH fail_label instr1 var_label instr2:
;;;     planted after a call to -prolog_pair(term)_switch- from
;;;     "aprolog.s". %ret0 will be set as follows:
;;;			0	matching term
;;;		  > 0	variable
;;;		  < 0	failure

define I_PLOG_TERM_SWITCH();
	;;; Branch to -var_label- if item was a variable
	fast_apply(
		_COMBF, _cond_LE,
		_R_RET0, _R_0,
		true,
		_int(fast_front(asm_instr!INST_ARGS[_2])),
		_3, asm_instr!INST_ARGS[_3]);
	;;; Branch to -fail_label- if item didn't match
	fast_apply(
		_COMBT, _cond_LT,
		_R_RET0, _R_0,
		true,
		_int(fast_front(asm_instr!INST_ARGS[_0])),
		_1, asm_instr!INST_ARGS[_1]);
	;;; Fall through if item matched
enddefine;


;;; -- Procedure Call and Return ------------------------------------------

;;; Do_call:
;;;		call a pop procedure

define lconstant Do_call(on_stack, defer, _routine);
	lvars on_stack, defer, _routine, _src, _reg;
	if on_stack then
		USP_+;
	else
		Get_opnd(_0, defer, SRC_1);
	endif -> _src;
	if _routine then
		;;; Call via checking routine: get procedure in %arg0
		Do_load(_src, _R_ARG0) -> ;
		Drop_call(_BLE, _routine, Nop);
	else
		;;; Call direct: get execute address in %arg0
		Do_load(_src, false) -> _src;
		Drop(Load(_LDW, @@PD_EXECUTE, _src, _R_ARG0));
		Drop(Ldsid(_src, tmp_reg() ->> _reg));
		Drop(Mtsp(_reg, _R_SR0));
		Drop(ExternalBranch(_BLE, false, _0, _R_SR0, _R_ARG0));
		Drop(Nop);
		;;; branch ends a basic block
		END_BLOCK();
	endif;
enddefine;

define I_CALL	 = Do_call(% false, DEFER,  _popenter    %) enddefine;
define I_CALLQ	 = Do_call(% false, DIRECT, _popenter    %) enddefine;
define I_CALLP	 = Do_call(% false, DEFER,   false       %) enddefine;
define I_CALLPQ	 = Do_call(% false, DIRECT,  false       %) enddefine;
define I_CALLS	 = Do_call(% true,  DIRECT, _popenter    %) enddefine;
define I_CALLPS	 = Do_call(% true,  DIRECT,  false       %) enddefine;
define I_UCALL	 = Do_call(% false, DEFER,  _popuenter   %) enddefine;
define I_UCALLQ	 = Do_call(% false, DIRECT, _popuenter   %) enddefine;
define I_UCALLP	 = Do_call(% false, DEFER,  _popuncenter %) enddefine;
define I_UCALLPQ = Do_call(% false, DIRECT, _popuncenter %) enddefine;
define I_UCALLS	 = Do_call(% true,  DIRECT, _popuenter   %) enddefine;
define I_UCALLPS = Do_call(% true,  DIRECT, _popuncenter %) enddefine;

;;; I_CALLABS addr:
;;;		call a pop procedure at a fixed address (e.g. system procedure)

define I_CALLABS();
	lvars _addr = asm_instr!INST_ARGS[_0];
	Drop_call(_BLE, _addr!PD_EXECUTE, Nop);
enddefine;

;;; I_CHAIN_REG reg:
;;; 	Chain a procedure from a register

define I_CHAIN_REG();
	lvars _src = _int(Is_register(asm_instr!INST_ARGS[_0])), _reg;
	Drop(Load(_LDW, @@PD_EXECUTE, _src, _R_ARG0));
	Drop(Ldsid(_src, tmp_reg() ->> _reg));
	Drop(Mtsp(_reg, _R_SR0));
	Drop(ExternalBranch(_BE, false, _0, _R_SR0, _R_ARG0));
	Drop(Nop);
	;;; branch ends a basic block
	END_BLOCK();
enddefine;

;;; I_CALLSUB subr:
;;;		call a subroutine

define I_CALLSUB();
	Drop_call(_BLE, asm_instr!INST_ARGS[_0], Nop);
enddefine;

;;; I_CHAINSUB subr:
;;;		chain a subroutine

define I_CHAINSUB();
	Drop_call(_BE, asm_instr!INST_ARGS[_0], Nop);
enddefine;

;;; I_CALLSUB_REG subr:
;;;		like I_CALLSUB, except that the routine adress may be in a register

define I_CALLSUB_REG();
	lvars _src = fast_front(asm_instr!INST_ARGS[_0]), _reg;
	if Is_register(_src) ->> _reg then
		Drop(ExternalBranch(_BLE, false, _0, _R_SR4, _int(_reg)));
		Drop(Nop);
		;;; branch ends a basic block
		END_BLOCK();
	else
		Drop_call(_BLE, _src, Nop);
	endif;
enddefine;

;;; I_RETURN:
;;;		return from procedure call

define I_RETURN();
	lvars _reg;
	Drop(Ldsid(_R_31, tmp_reg() ->> _reg));
	Drop(Mtsp(_reg, _R_SR0));
	Drop(ExternalBranch(_BE, false, _0, _R_SR0, _R_31));
	Drop(Nop);
	;;; branch ends a basic block
	END_BLOCK();
enddefine;


;;; -- Miscellaneous ------------------------------------------------------

;;; I_STACKLENGTH:
;;;		push the length of the user stack

define I_STACKLENGTH();
	lvars _reg;
	Drop(Load(_LDW, based_addr(ident _userhi), tmp_reg() ->> _reg));
	Drop(ArithLog(_SUB, _reg, _R_USP, _reg));
	Drop(ArithLog(_ADD, _reg, _R_PZERO, _reg));
	Drop(Push(_reg));
enddefine;

;;; I_SETSTACKLENGTH saved_stacklength nresults:
;;;		check that the stacklength is equal to the saved stacklength plus
;;;		an expected number of results. If not, call -setstklen_diff- to fix.
;;;		This does the same job as -setstklen- from "alisp.s", but avoids the
;;;		subroutine call when the number of results is known in advance.

lvars _i_setstklen_offs = _0; 	;;; size of call to _setstklen_diff

define I_SETSTACKLENGTH();
	lvars _nresults, _reg;
	if asm_instr!INST_ARGS[_1] ->> _nresults then
		;;; Load stack base (_userhi) to %arg0
		Drop(Load(_LDW, based_addr(ident _userhi), _R_ARG0));
		;;; Load saved stacklength to a register
		Do_load(Get_fsrc(_0, SRC_1), false) -> _reg;
		;;; Adjust stack base for number of results and popint bits
		;;; (in _nresults and saved stacklength)
		Drop(ArithImm(_ADDI, _6 _sub _nresults, _R_ARG0, _R_ARG0));
		;;; Subtract saved stacklength to get desired stack pointer
		Drop(ArithLog(_SUB, _R_ARG0, _reg, _R_ARG0));
		;;; Compare desired stack pointer with actual: if not the same,
		;;; call _setstklen_diff to fix
		lvars _offs = _asm_code_offset;
		Drop(CondBranch(_COMBT, _cond_EQ, true, _R_ARG0, _R_USP,
			_i_setstklen_offs));
		Drop_call(_BLE, _setstklen_diff, Nop);
		@@(b){_asm_code_offset, _offs} -> _i_setstklen_offs;
	else
		;;; both stacklength and nresults are on the stack:
		;;; call _setstklen
		Drop_call(_BLE, _setstklen, Nop);
	endif;
enddefine;

;;; I_LISP_TRUE:
;;;		replace <false> result on top of stack by nil.

define I_LISP_TRUE();
	lvars _reg, _disp, _base;
	Drop(Load(_LDW, _0, _R_USP, tmp_reg() ->> _reg));
	based_addr(nil) -> (_disp, _base);	;;; may emit one instruction
	Drop(CondBranch(_COMBF, _cond_EQ, true, _reg, _R_FALSE, _12));
	Drop(Load(_LDO, _disp, _base, _reg));
	Drop(Store(_STW, _reg, _0, _R_USP));
	;;; branch ends a basic block
	END_BLOCK();
enddefine;

;;; I_CHECK:
;;;		plant checks on backward jumps.

lvars _i_check_offs = _0;	;;; size of call to _checkall

define I_CHECK();
	lvars _reg_1, _reg_2;
	Drop(Load(_LDW, based_addr(ident _trap), tmp_reg() ->> _reg_1));
	Drop(Load(_LDW, based_addr(ident _userlim), tmp_reg() ->> _reg_2));
	;;; Call checkall if _trap is set
	Drop(CondBranch(_COMBF, _cond_EQ, true, _reg_1, _R_0, _8));
	;;; Don't call checkall if _userlim <= %usp
	lvars _offs = _asm_code_offset;
	Drop(CondBranch(_COMBT, _cond_LE, true, _reg_2, _R_USP, _i_check_offs));
	Drop_call(_BLE, _checkall, Nop);
	@@(b){_asm_code_offset, _offs} -> _i_check_offs;
enddefine;


;;; -- Procedure Entry and Exit -------------------------------------------

define I_CREATE_SF();

	;;; Compute procedure address in %pb: uses BL to get a pointer to
	;;; the third word of the code, then adjusts for the procedure
	;;; header length (plus 3 for the two privilege bits set in the
	;;; return address)
	lvars _offs = _negate(@@PD_TABLE{_strsize _add _11});
	Drop(Branch(_BL, false, _8, _R_PB));
	if IS_IMM_14(_offs) then
		Drop(Load(_LDO, _offs, _R_PB, _R_PB));
	else
		Drop(LongImm(_ADDIL, L_PART(_offs), _R_PB));
		Drop(Load(_LDO, R_PART(_offs), _R_1, _R_PB));
	endif;

	;;; Allocate the stack frame and save the return address
	Drop(Store(_STWM, _R_31, _framesize, _R_SP));

	;;; Compute SP offset to start of frame
	lvars _offs = _4 _sub _framesize;

	;;; Save register lvars
	lvars _reg = _R_NPOP4 _add (_regmask _bimask _16:FF);
	while _reg _slteq _R_NPOP0 do
		Drop(Store(_STW, _reg, _offs, _R_SP));
		_offs@(w)[_1] -> _offs;
		_reg _add _1 -> _reg;
	endwhile;
	_R_POP5 _add _shift(_regmask, _-8) -> _reg;
	while _reg _slteq _R_POP0 do
		Drop(Store(_STW, _reg, _offs, _R_SP));
		_offs@(w)[_1] -> _offs;
		_reg _add _1 -> _reg;
	endwhile;

	;;; Save dynamic locals
	lvars _i = _0, _instr = false;
	if _Nlocals _bitst _1 then
		;;; Odd number of locals: do the first one specially, pulling
		;;; down the previous instruction to separate the loads
		unDrop() -> _instr;
		Drop(Load(_LDW, @@PD_TABLE, _R_PB, _R_T1));
		Drop(_instr);
		Drop(Load(_LDW, @@ID_VALOF, _R_T1, _R_T1));
		;;; Keep the store for later
		Store(_STW, _R_T1, _offs, _R_SP) -> _instr;
		_offs@(w)[_1] -> _offs;
		_1 -> _i;
	endif;
	until _i == _Nlocals then
		;;; Do remaining saves pairwise to reduce pipeline stalls
		Drop(Load(_LDW, @@PD_TABLE[_i], _R_PB, _R_T2));
		Drop(Load(_LDW, @@PD_TABLE[_i _add _1], _R_PB, _R_T3));
		Drop(Load(_LDW, @@ID_VALOF, _R_T2, _R_T2));
		Drop(Load(_LDW, @@ID_VALOF, _R_T3, _R_T3));
		Drop(Store(_STW, _R_T2, _offs@(w)[_0], _R_SP));
		Drop(Store(_STW, _R_T3, _offs@(w)[_1], _R_SP));
		_offs@(w)[_2] -> _offs;
		_i _add _2 -> _i;
	enduntil;

	;;; Initialise pop on-stack lvars to zero
	fast_repeat _pint(_Npopstkvars) times
		Drop(Store(_STW, _R_PZERO, _offs, _R_SP));
		_offs@(w)[_1] -> _offs;
	endrepeat;

	;;; Save the procedure base register
	Drop(Store(_STW, _R_PB, _-4, _R_SP));

	;;; Complete first dlocal save
	if _instr then Drop(_instr) endif;
enddefine;

define I_UNWIND_SF();

	;;; Compute SP offset to start of dlocal save area
	lvars _offs = @@(w)-[_Nstkvars _add _1];

	;;; Restore dynamic locals:
	lvars _i = _Nlocals, _instr = false;
	while _i _sgr _1 do
		_i _sub _2 -> _i;
		_offs@(w)-[_2] -> _offs;
		Drop(Load(_LDW, @@PD_TABLE[_i _add _1], _R_PB, _R_T1));
		Drop(Load(_LDW, _offs@(w)[_1], _R_SP, _R_T2));
		Drop(Load(_LDW, @@PD_TABLE[_i], _R_PB, _R_T3));
		Drop(Load(_LDW, _offs@(w)[_0], _R_SP, _R_T4));
		Drop(Store(_STW, _R_T2, @@ID_VALOF, _R_T1));
		Drop(Store(_STW, _R_T4, @@ID_VALOF, _R_T3));
	endwhile;
	if _i == _1 then
		_offs@(w)-[_1] -> _offs;
		Drop(Load(_LDW, @@PD_TABLE, _R_PB, _R_T1));
		Drop(Load(_LDW, _offs, _R_SP, _R_T2));
		;;; Keep this for later
		Store(_STW, _R_T2, @@ID_VALOF, _R_T1) -> _instr;
	endif;

	;;; Restore register lvars
	lvars _reg = _R_POP0;
	_R_POP5 _add _shift(_regmask, _-8) -> _i;
	while _reg _sgreq _i do
		_offs@(w)-[_1] -> _offs;
		Drop(Load(_LDW, _offs, _R_SP, _reg));
		_reg _sub _1 -> _reg;
	endwhile;
	_R_NPOP0 -> _reg;
	_R_NPOP4 _add (_regmask _bimask _16:FF) -> _i;
	while _reg _sgreq _i do
		_offs@(w)-[_1] -> _offs;
		Drop(Load(_LDW, _offs, _R_SP, _reg));
		_reg _sub _1 -> _reg;
	endwhile;

	;;; Restore return address and deallocate the stack frame
	Drop(Load(_LDWM, _negate(_framesize), _R_SP, _R_31));

	;;; Complete last dlocal restore
	if _instr then Drop(_instr) endif;

	;;; Restore caller's procedure base
	Drop(Load(_LDW, _-4, _R_SP, _R_PB));
enddefine;


;;; -- The Assembler ------------------------------------------------------

#_IF DEF I_DEBUG

section;
global constant procedure (initintvec, isclosed);
endsection;

lvars
	logdev	= false,
	logbuff	= writeable initintvec(1),
;

define lconstant Dump_pdr(pdr, codesize);
	lvars pdr, codesize, n;
	;;; Open I-code log file
	if not(logdev) or isclosed(logdev) then
		unless syscreate('ICODE.LOG', 1, true) ->> logdev then
			mishap(0, 'CANNOT OPEN I-CODE LOG FILE' <> sysiomessage());
		endunless;
	endif;
	;;; Dump procedure code
	if codesize fi_> datalength(logbuff) then
		initintvec(codesize) -> logbuff;
	endif;
	4*codesize ->> codesize -> logbuff(1);
	syswrite(logdev, logbuff, 4);
	_moveq(_int(codesize), pdr@PD_TABLE{_strsize}, logbuff@V_WORDS) -> ;
	syswrite(logdev, logbuff, codesize);
	sysflush(logdev);
enddefine;

#_ENDIF

;;; Do_consprocedure:
;;;		outputs machine code into a procedure record from pop assembler
;;;		input

define Do_consprocedure(codelist, reg_locals) -> pdr;
	lvars	codelist, reg_locals, pdr, _code_offset, _size;
	dlocal	_asm_drop_ptr, _asm_pass, _framesize, _regmask, _strsize = _0;

	define lconstant Do_code_pass(/* _asm_pass, codelist */) with_nargs 2;
		dlocal
			_arg_reg_3	= false,
			_last_instr	= false,
			_tmp_regs	= [],
		;
		Code_pass(/* _asm_pass, codelist */);
	enddefine;

	;;; Compute stack frame size (the 2 is for return address + owner)
	@@(w)[_Nreg _add _Nlocals _add _Nstkvars _add _2] -> _framesize;

	;;; Register mask records the number of pop and non-pop registers
	;;; *not* saved
	_shift(_N_POP_REGS _sub _Npopreg, _8)
	_biset (_N_NPOP_REGS _add _Npopreg _sub _Nreg) -> _regmask;

	;;; Pass 1 -- calculate instruction offsets
	Do_code_pass(0, codelist) -> _code_offset;
	@@(w)[_int(listlength(asm_struct_list))] -> _strsize;

	;;; Pass 2 -- adjust branch offsets for big procedures
	if ##(w){_code_offset} _gr _16:7FF then
		Do_code_pass(1, codelist) -> _code_offset;
	endif;

	;;; Pass 3 -- optimisations
	;;; Not made use of (yet)
	;;; unless pop_debugging then
	;;;		Do_code_pass(2, codelist) -> _code_offset;
	;;;	endunless;

	;;; Now calculate total size of procedure and allocate store for it.
	;;; The procedure record will be returned with the header and structure
	;;;	table already filled in, and with _asm_drop_ptr pointing to the
	;;;	start of the executable code section.
	@@PD_TABLE{_strsize _add _code_offset | b.r} _sub @@POPBASE -> _size;
	Get_procedure(_size, _regmask) -> pdr;

	;;; Final pass -- plants the code
	Do_code_pass(false, codelist) -> ;
#_IF DEF I_DEBUG
	;;; write to log file
	Dump_pdr(pdr, _pint(##(w){_code_offset | b.r}));
#_ENDIF
enddefine;

endsection;		/* $-Sys$-Vm */


/* --- Revision History ---------------------------------------------------
--- Robert Duncan, Oct 22 1996
		Fixed for recent change to I-code label offsets: offsets are < 0
		until explicitly assigned by I_LABEL
 */
