/* --- Copyright University of Sussex 1994. All rights reserved. ----------
 > File:			C.hppa/src/syscomp/genproc.p
 > Purpose:			Compiles M-code to assembly code for HP PA-RISC 1.1
 > Author:			Robert Duncan & Simon Nichols, Jan 11 1993 (see revisions)
 */


#_INCLUDE 'common.ph'

section $-Popas$-M_trans;

global constant procedure (
	perm_const_opnd,
);

global vars
	current_pdr_label,
	current_pdr_exec_label,
	false_immediate,
;


	/****************************************************************
	* This file is contained entirely in section Genproc, and must  *
	*   define all the exports to section M_trans listed below.     *
	****************************************************************/

section Genproc =>

		/*	M-opcode Procedures */

		M_ADD
		M_ASH
		M_BIC
		M_BIM
		M_BIS
		M_BIT
		M_BRANCH
		M_BRANCH_std
		M_BRANCH_ON
		M_BRANCH_ON_INT
		M_CALL
		M_CALL_WITH_RETURN
		M_CALLSUB
		M_CHAIN
		M_CHAINSUB
		M_CLOSURE
		M_CMP
		M_CMPKEY
		M_CREATE_SF
		M_END
		M_ERASE
		M_LABEL
		M_LOGCOM
		M_MOVE
		M_MOVEb
		M_MOVEbit
		M_MOVEs
		M_MOVEsb
		M_MOVEsbit
		M_MOVEss
		M_MULT
		M_NEG
		M_PADD
		M_PADD_TEST
		M_PCMP
		M_PLOG_IFNOT_ATOM
		M_PLOG_TERM_SWITCH
		M_PSUB
		M_PSUB_TEST
		M_PTR_ADD_OFFS
		M_PTR_CMP
		M_PTR_SUB
		M_PTR_SUB_OFFS
		M_RETURN
		M_SETSTKLEN
		M_SUB
		M_TEST
		M_UNWIND_SF
		M_UPDb
		M_UPDbit
		M_UPDs

		/*	Registers */

		SP
		USP
		USP_+
		-_USP
		i_USP
;;;		i_USP_+					;;; optional
;;;		ii_USP					;;; optional
		WK_ADDR_REG_1
		WK_ADDR_REG_2
		WK_REG
		CHAIN_REG
		SPECIAL_VAR_BLOCK_REG	;;; optional

		/*	Register Lists */

		nonpop_registers
		pop_registers

		/*	Register Procedures */

		reglabel
		regnumber
		autoidreg

		/*	M-code Mapping of Subroutines */

		mc_inline_conditions_list
		mc_inline_procs_list

		/*	Procedures Needed by -m_optimise- */

		cvt_pop_subscript
		can_defer_opnd
		pdr_index_opnd

		/*	Procedure to Generate Code */

		mc_code_generator
;


;;; -- REGISTERS ----------------------------------------------------------

;;; Local register names:
;;;	(see "asmout.p" for definition of -asm_reg_name-)

lconstant

	R_0		  = asm_reg_name(1),		;;; Hardware zero
	R_1		  = asm_reg_name(2),		;;; ADDIL/CHAIN_REG
	R_2		  = asm_reg_name(3),		;;; Scratch
	R_NPOP4	  = asm_reg_name(4),		;;; Non-pop lvar
	R_NPOP3	  = asm_reg_name(5),		;;;		""
	R_NPOP2	  = asm_reg_name(6),		;;;		""
	R_NPOP1	  = asm_reg_name(7),		;;;		""
	R_NPOP0	  = asm_reg_name(8),		;;;		""
	R_POP5	  = asm_reg_name(9),		;;;	Pop lvar
	R_POP4	  = asm_reg_name(10),		;;;		""
	R_POP3	  = asm_reg_name(11),		;;;		""
	R_POP2	  = asm_reg_name(12),		;;;		""
	R_POP1	  = asm_reg_name(13),		;;;		""
	R_POP0	  = asm_reg_name(14),		;;;		""
	R_PZERO	  = asm_reg_name(15),		;;;	Permanent pop zero (3)
	R_FALSE	  = asm_reg_name(16),		;;; Permanent false
	R_SVB	  = asm_reg_name(17),		;;; SPECIAL_VAR_BLOCK_REG
	R_PB	  = asm_reg_name(18),		;;; Procedure base
	R_USP	  = asm_reg_name(19),		;;; User stack pointer
	R_19	  = asm_reg_name(20),		;;; Scratch
	R_T3	  = asm_reg_name(21),		;;; WK_ADDR_REG_2
	R_T2	  = asm_reg_name(22),		;;; WK_ADDR_REG_1
	R_T1	  = asm_reg_name(23),		;;; WK_REG
	R_ARG3	  = asm_reg_name(24),		;;; Subroutine argument
	R_ARG2	  = asm_reg_name(25),		;;;		""
	R_ARG1	  = asm_reg_name(26),		;;;		""
	R_ARG0	  = asm_reg_name(27),		;;;		""
	R_DP	  = asm_reg_name(28),		;;;	Not used (global data pointer)
	R_RET0	  = asm_reg_name(29),		;;; Scratch (subroutine result)
	R_RET1	  = asm_reg_name(30),		;;;		""
	R_SP	  = asm_reg_name(31),		;;; Stack pointer
	R_31	  = asm_reg_name(32),		;;; Poplog (external) return address

	SR_0	  = asm_reg_name(33),		;;; Scratch space reg.
	SR_CODE	  = asm_reg_name(34),		;;; Code space ID
	SR_DATA	  = asm_reg_name(35),		;;; Data space ID

	FR_8	  = asm_reg_name(36),		;;; Scratch float register
	FR_8R	  = asm_reg_name(37),		;;; Right half of %fr8
	FR_8L	  = asm_reg_name(38),		;;; Left half of %fr8

	POP_REGS  = [^R_POP0 ^R_POP1 ^R_POP2 ^R_POP3 ^R_POP4 ^R_POP5],
	NPOP_REGS = [^R_NPOP0 ^R_NPOP1 ^R_NPOP2 ^R_NPOP3 ^R_NPOP4],
	ARG_REGS  = [^R_ARG0 ^R_ARG1 ^R_ARG2 ^R_ARG3],
	TMP_REGS  = [^R_2 ^R_19 ^R_RET0 ^R_RET1],

;

;;; Exported register names:

constant
	WK_REG					= R_T1,
	WK_ADDR_REG_1			= R_T2,
	WK_ADDR_REG_2			= R_T3,
	SP						= R_SP,
	USP						= R_USP,
	CHAIN_REG				= R_1,
	SPECIAL_VAR_BLOCK_REG	= R_SVB,
;

;;; regnumber:
;;;		maps register names -> numbers

define regnumber =
	newproperty([%
		lvars i;
		for i to length(asm_reg_name) do
			[% asm_reg_name(i), i-1 %];
		endfor;
	%], 64, false, "perm");
enddefine;

;;; reglabel:
;;;		maps register numbers -> names

define reglabel =
	newproperty([%
		lvars i;
		for i to length(asm_reg_name) do
			[% i-1, asm_reg_name(i) %];
		endfor;
	%], 64, false, "perm");
enddefine;

;;; isreg:
;;;		<true> if operand is a register name

lconstant macro isreg = "isword";

;;; autoidreg:
;;;     <true> if operand is a register supporting auto-increment/
;;;     decrement addressing

define autoidreg =
	isreg(%%);
enddefine;

;;; tmp_reg:
;;;     allocates a new scratch register. Registers are used in rotation
;;;     to help increase the independence between instructions.
;;;     -regs_in_use- is a list of scratch registers temporarily
;;;     reserved for particular uses. There should never be more than
;;;     one or two of these, so we assume that there's always some
;;;     register free.

lvars tmp_regs = [], regs_in_use = [];

define lconstant tmp_reg() -> reg;
	lvars reg;
	repeat
		if tmp_regs == [] then
			TMP_REGS -> tmp_regs;
		endif;
		fast_destpair(tmp_regs) -> (reg, tmp_regs);
		returnunless(fast_lmember(reg, regs_in_use));
	endrepeat;
enddefine;

;;; Register lvars:

constant
	pop_registers	 = [] :: maplist(POP_REGS, regnumber),
	nonpop_registers = [] :: maplist(NPOP_REGS, regnumber),
;

;;; Special register operands:

constant
	USP_+	= {^R_USP ^true},
	-_USP	= {^R_USP ^false},
	i_USP	= {^R_USP 0},
;


;;; -- OTHER OPERAND TYPES ------------------------------------------------

;;; isimm:
;;;		<true> if operand is an immediate value (integer or symbol)

define lconstant isimm(opnd);
	lvars opnd;
	isintegral(opnd) or isref(opnd);
enddefine;

;;; isimm5, isimm11, isimm14:
;;;     <true> if operand is a signed immediate value known to fit in N
;;;     bits

define lconstant isimm5(opnd);
	lvars opnd;
	isinteger(opnd) and integer_length(opnd) fi_< 5;
enddefine;

define lconstant isimm11(opnd);
	lvars opnd;
	isinteger(opnd) and integer_length(opnd) fi_< 11;
enddefine;

define lconstant isimm14(opnd);
	lvars opnd;
	isinteger(opnd) and integer_length(opnd) fi_< 14;
enddefine;

;;; is_bitfield:
;;;		<true> if the operand is an integer value which can be encoded
;;;		as a bit-field, i.e. one whose binary representation consists of
;;;		a sequence of 1 bits followed by a sequence of 0 bits.

define lconstant is_bitfield(opnd);
	lvars opnd;
	isintegral(opnd) and integer_length(opnd) fi_<= 32 and
	(	opnd < 0 and integer_bitcount(opnd) ==
						integer_leastbit(opnd)
		or
		opnd > 0 and integer_bitcount(opnd) ==
						(integer_length(opnd) fi_- integer_leastbit(opnd))
	);
enddefine;

;;; bitfield:
;;;     returns the bitfield encoding (p, len) for an operand for which
;;;     -is_bitfield- has returned <true>. -len- is the number of 1 bits
;;;     in the operand and -p- is the bit number of the rightmost 1 bit
;;;     (remembering that in the HP-PA, bit 31 is the least
;;;     significant).

define lconstant bitfield(opnd) -> (p, len);
	lvars opnd, p, len;
	31 fi_- integer_leastbit(opnd) -> p;
	if opnd > 0 then
		integer_bitcount(opnd) -> len;
	else
		;;; -integer_bitcount- will return the number of 0 bits
		32 fi_- integer_bitcount(opnd) -> len;
	endif;
enddefine;

;;; isregind:
;;;		<true> if operand is register indirect (based address)

lconstant macro isregind = "isvector";

;;; issym:
;;;		<true> if operand is a symbol (absolute address)

lconstant macro issym = "isstring";

;;; wof:
;;;		convert word offset to bytes

define lconstant wof =
	nonop fi_*(% WORD_OFFS %);
enddefine;

;;; field_@@:
;;;		returns the byte-offset of a word-length field

define lconstant field_@@ =
	field_## <> wof;
enddefine;

;;; can_defer_opnd:
;;;     constructs a field-access operand from an address operand and an
;;;     offset. Returns <false> if no suitable operand exists.

define can_defer_opnd(opnd, offs, upd);
	lvars opnd, offs, upd;
	if isreg(opnd) then
		{^opnd ^offs};
	elseif isref(opnd) then
		;;; immediate label
		if offs == 0 then
			fast_cont(opnd);
		elseif offs > 0 then
			fast_cont(opnd) <> '+' >< offs;
		else
			fast_cont(opnd) >< offs;
		endif;
	else
		false;
	endif;
enddefine;

;;; pdr_index_opnd:
;;;     constructs an operand for accessing values from the current
;;;     procedure header (not used in closures, so R_PB will hold the
;;;     procedure address)

define pdr_index_opnd(fld_index);
	lvars fld_index;
	{^R_PB ^(wof(fld_index))};
enddefine;


;;; -- CODE GENERATION ----------------------------------------------------

lconstant
	NOP = {nop},
		;;; no-op instruction, planted after a jump
	NOP_fixed = {nop},
		;;; like NOP, but not to be deleted or relocated
;

lvars
	m_instr,
		;;; current M-code instruction
	codelist,
		;;; list of generated assembly code
	codelen,
		;;; length of the code list
;

;;; plant:
;;;		adds an instruction to the code list. Code is assembled backwards
;;;		initially, then reversed by the improvement pass.

define lconstant plant();
	conspair((), codelist) -> codelist;
	codelen fi_+ 1 -> codelen;
enddefine;

;;; plant_label:
;;;     adds a label to the instruction list (without changing the code
;;;     length)

define lconstant plant_label(lab);
	lvars lab;
	conspair({\.label ^lab}, codelist) -> codelist;
enddefine;

;;; UNIMPLEMENTED:
;;;		report an unimplemented instruction

define lconstant UNIMPLEMENTED();
	mishap(pdprops(m_instr(1)), 1, 'UNIMPLEMENTED M-OPCODE');
enddefine;


/*
 *	Data movement
 */

;;; L_&_R:
;;;     construct expressions denoting the left-hand (high) and
;;;     right-hand (low) parts of a symbol or large integer.

define lconstant L_&_R(x);
	lvars x, l_x, r_x;
	if isintegral(x) then
		(x >> 11, x && 16:7FF);
	else
		;;; symbol, either as immediate or absolute
		asm_uselab(if isref(x) then fast_cont(x) else x endif);
	endif;
enddefine;

;;; gen_ldil:
;;;     load the left-hand (high) part of an address to a register using
;;;     an LDIL instruction. The argument registers (%argN) which are
;;;     otherwise little used, are used as a cache to avoid repeated
;;;     loads when the same address is referenced twice.

lvars ldil_regs = [], ldil_cache = [];

define lconstant gen_ldil(l_addr) -> reg;
	lvars l_addr, reg, reg_free = false;

	lconstant procedure list_assoc_val = $-lisp$-list_assoc;

	fast_for reg in ARG_REGS do
		unless fast_lmember(reg, regs_in_use) then
			returnif(list_assoc_val(reg, ldil_cache) = l_addr);
			true -> reg_free;
		endunless;
	endfor;
	;;; this address not cached
	if reg_free then
		;;; there's at least one arg register free; use the next one and
		;;; remember its contents
		repeat
			if ldil_regs == [] then
				ARG_REGS -> ldil_regs;
			endif;
			fast_destpair(ldil_regs) -> (reg, ldil_regs);
			quitunless(fast_lmember(reg, regs_in_use));
		endrepeat;
		conspair(reg, conspair(l_addr, ldil_cache)) -> ldil_cache;
	else
		tmp_reg() -> reg;
	endif;
	plant({ldil ^l_addr ^reg});
enddefine;

;;; gen_addil:
;;;     add the left-hand (high) part of an address to a register using
;;;     an ADDIL instruction. The result is always written to register
;;;     %r1, so we have to check that's not in use (shouldn't be ...)

define lconstant gen_addil(l_addr, reg);
	lvars l_addr, reg;
	if fast_lmember(R_1, regs_in_use) then
		mishap(0, 'SYSTEM ERROR (conflicting use of register 1)');
	endif;
	plant({addil ^l_addr ^reg});
enddefine;

;;; gen_load:
;;;		load an arbitrary word-length operand to a register.
;;;		If -reg- is <false>, any register will do.

define lconstant gen_load(src, reg) -> reg;
	lvars src, reg;
	if src == 0 then
		R_0 -> src;
	elseif src == #_< popint(0) >_# then
		R_PZERO -> src;
	elseif src = false_immediate then
		R_FALSE -> src;
	endif;
	if isreg(src) then
		unless reg then
			src -> reg;
		elseunless src == reg then
			plant({copy ^src ^reg});
		endunless;
	elseif isimm(src) then
		if isimm14(src) then
			;;; ldi src, reg
			unless reg then tmp_reg() -> reg endunless;
			plant({ldi ^src ^reg});
		elseif is_bitfield(src) then
			;;; zdepi -1, p, len, reg
			unless reg then tmp_reg() -> reg endunless;
			plant({zdepi -1 ^(bitfield(src)) ^reg});
		else
			lvars (l_src, r_src) = L_&_R(src);
			if r_src == 0 then
				if reg then
					;;; load the value direct to the destination, bypassing
					;;; the LDIL cache
					plant({ldil ^l_src ^reg});
				else
					gen_ldil(l_src) -> reg;
				endif;
			else
				;;; ldil L'src, tmp
				;;; ldo  R'src(tmp), reg
				if l_src == 0 then R_0 else gen_ldil(l_src) endif -> l_src;
				unless reg then tmp_reg() -> reg endunless;
				plant({ldo {^l_src ^r_src} ^reg});
			endif;
		endif;
	elseif isregind(src) then
		lvars (base, disp) = explode(src);
		unless reg then tmp_reg() -> reg endunless;
		if disp == true then
			;;; post-increment:
			;;; ldwm 4(base), reg
			plant({ldwm {^base 4} ^reg});
		elseif disp == false then
			;;; pre-decrement:
			;;; ldwm -4(base), reg
			plant({ldwm {^base -4} ^reg});
		elseif isimm14(disp) then
			;;; ldw disp(base), reg
			plant({ldw ^src ^reg});
		else
			;;;	addil L'disp, base
			;;; ldw   R'disp(%r1), reg
			lvars (l_disp, r_disp) = L_&_R(disp);
			gen_addil(l_disp, base);
			plant({ldw {^R_1 ^r_disp} ^reg});
		endif;
	else
		;;; ldil L'src, tmp
		;;; ldw  R'src(tmp), reg
		lvars (l_src, r_src) = L_&_R(src);
		gen_ldil(l_src) -> l_src;
		unless reg then tmp_reg() -> reg endunless;
		plant({ldw {^l_src ^r_src} ^reg});
	endif;
enddefine;

;;; gen_store:
;;;     store a word value from a register to a register/memory
;;;     destination.

define lconstant gen_store(reg, dst);
	lvars reg, dst;
	if isreg(dst) then
		unless reg == dst then
			;;; copy reg, dst
			plant({copy ^reg ^dst});
		endunless;
	elseif isregind(dst) then
		lvars (base, disp) = explode(dst);
		if disp == true then
			;;; post-increment:
			;;; stwm reg, 4(base)
			plant({stwm ^reg {^base 4}});
		elseif disp == false then
			;;; pre-decrement:
			;;; stwm reg, -4(base)
			plant({stwm ^reg {^base -4}});
		elseif isimm14(disp) then
			;;; stwm reg, disp(base)
			plant({stw ^reg ^dst});
		else
			;;; addil L'disp, base
			;;; stw   reg, R'disp(%r1)
			lvars (l_disp, r_disp) = L_&_R(disp);
			gen_addil(l_disp, base);
			plant({stw ^reg {^R_1 ^r_disp}});
		endif;
	elseif issym(dst) then
		;;; ldil L'dst, tmp
		;;; stw  reg, R'dst(tmp)
		lvars (l_dst, r_dst) = L_&_R(dst);
		gen_ldil(l_dst) -> l_dst;
		plant({stw ^reg {^l_dst ^r_dst}});
	else
		;;; immediate
		mishap(dst, 1, 'SYSTEM ERROR (illegal destination operand)');
	endif;
enddefine;

;;; gen_load_s:
;;;     load a word-, half- or byte-sized operand from memory to
;;;     register. If -reg- is <false>, any register will do.

define lconstant gen_load_s(src, reg, opcode, signed) -> reg;
	lvars src, reg, opcode, signed;

	define lconstant datum_size(opcode);
		lvars opcode;
		if opcode == "ldb" then
			1;
		elseif opcode == "ldh" then
			2;
		else
			4;
		endif;
	enddefine;

	if isregind(src) then
		lvars (base, disp) = explode(src);
		unless reg then tmp_reg() -> reg endunless;
		if disp == true then
			;;; post-increment:
			;;; ldXs,ma n(base), reg
			plant({^(opcode <> "s\,ma") {^base ^(datum_size(opcode))} ^reg});
		elseif disp == false then
			;;; pre-decrement:
			;;; ldXs,mb -n(base), reg
			plant({^(opcode <> "s\,mb") {^base ^(-datum_size(opcode))} ^reg});
		elseif isimm14(disp) then
			;;; ldX disp(base), reg
			plant({^opcode ^src ^reg});
		else
			;;;	addil L'disp, base
			;;; ldX   R'disp(%r1), reg
			lvars (l_disp, r_disp) = L_&_R(disp);
			gen_addil(l_disp, base);
			plant({^opcode {^R_1 ^r_disp} ^reg});
		endif;
	elseif issym(src) then
		;;; ldil L'src, tmp
		;;; ldX  R'src(tmp), reg
		lvars (l_src, r_src) = L_&_R(src);
		gen_ldil(l_src) -> l_src;
		unless reg then tmp_reg() -> reg endunless;
		plant({^opcode {^l_src ^r_src} ^reg});
	else
		mishap(src, 1, 'SYSTEM ERROR (illegal source operand)');
	endif;
	if signed then
		;;; need explicit sign extension for non-word fields
		if opcode == "ldb" then
			plant({extrs ^reg 31 8 ^reg});
		elseif opcode == "ldh" then
			plant({extrs ^reg 31 16 ^reg});
		endif;
	endif;
enddefine;

;;; gen_store_s:
;;;     store a word-, half- or byte-sized operand from a register to
;;;     memory.

define lconstant gen_store_s(reg, dst, opcode);
	lvars reg, dst, opcode;

	define lconstant datum_size(opcode);
		lvars opcode;
		if opcode == "stb" then
			1;
		elseif opcode == "sth" then
			2;
		else
			4;
		endif;
	enddefine;

	if isregind(dst) then
		lvars (base, disp) = explode(dst);
		if disp == true then
			;;; post-increment:
			;;; stXs,ma reg, n(base)
			plant({^(opcode <> "s\,ma") ^reg {^base ^(datum_size(opcode))}});
		elseif disp == false then
			;;; pre-decrement:
			;;; stXs,mb reg, -n(base)
			plant({^(opcode <> "s\,mb") ^reg {^base ^(-datum_size(opcode))}});
		elseif isimm14(disp) then
			;;; stX reg, disp(base)
			plant({^opcode ^reg ^dst});
		else
			;;;	addil L'disp, base
			;;; stX   reg, R'disp(%r1)
			lvars (l_disp, r_disp) = L_&_R(disp);
			gen_addil(l_disp, base);
			plant({^opcode ^reg {^R_1 ^r_disp}});
		endif;
	elseif issym(dst) then
		;;; ldil L'dst, tmp
		;;; stX  reg, R'dst(tmp)
		lvars (l_dst, r_dst) = L_&_R(dst);
		gen_ldil(l_dst) -> l_dst;
		plant({^opcode ^reg {^l_dst ^r_dst}});
	else
		mishap(dst, 1, 'SYSTEM ERROR (illegal destination operand)');
	endif;
enddefine;

;;; gen_load_f:
;;;		load a word-sized operand into a floating-point coprocessor
;;;		register -reg-.

define lconstant gen_load_f(src, reg) -> reg;
	lvars src, reg;
	if isreg(src) or isimm(src) then
		;;; FP coprocessor can only load memory operands
		gen_store(src, -_USP);
		USP_+ -> src;
	endif;
	if isregind(src) then
		lvars (base, disp) = explode(src);
		if disp == true then
			plant({fldws\,ma {^base 4} ^reg});
		elseif disp == false then
			plant({fldws\,mb {^base -4} ^reg});
		elseif isimm5(disp) then
			plant({fldws ^src ^reg});
		else
			lvars index = gen_load(disp >> 2, false);
			;;; NB: false displacement in the operand marks this as an
			;;; indexed load
			plant({fldwx\,s {^base ^index ^false} ^reg});
		endif;
	else
		;;; issym(src)
		lvars base = gen_load(consref(src), false);
		plant({fldws {^base 0} ^reg});
	endif;
enddefine;

;;; gen_store_f:
;;;     store a word-sized operand from a floating-point coprocessor
;;;     register to -dst-.

define lconstant gen_store_f(reg, dst);
	lvars reg, dst, tmp = false;
	if isreg(dst) then
		;;; FP coprocessor can only store to memory
		(-_USP, dst) -> (dst, tmp);
	elseif isimm(dst) then
		mishap(dst, 1, 'SYSTEM ERROR (illegal destination operand)');
	endif;
	if isregind(dst) then
		lvars (base, disp) = explode(dst);
		if disp == true then
			plant({fstws\,ma ^reg {^base 4}});
		elseif disp == false then
			plant({fstws\,mb ^reg {^base -4}});
		elseif isimm5(disp) then
			plant({fstws ^reg ^dst});
		else
			lvars index = gen_load(disp >> 2, false);
			;;; NB: false displacement in the operand marks this as an
			;;; indexed store
			plant({fstwx\,s ^reg {^base ^index ^false}});
		endif;
	else
		;;; issym(dst)
		lvars base = gen_load(consref(dst), false);
		plant({fstws ^reg {^base 0}});
	endif;
	if tmp then gen_load(USP_+, tmp) -> endif;
enddefine;

;;; M_MOVE src dst:
;;;		move an arbitrary word-length operand from -src- to -dst-

define M_MOVE();
	lvars (, src, dst) = explode(m_instr);
	unless src = dst or src == USP_+ and dst == -_USP then
		gen_store(gen_load(src, isreg(dst) and dst), dst);
	endunless;
enddefine;

;;; M_MOVEs src dst:
;;;		move an unsigned, half-word field from memory -src- to -dst-.

define M_MOVEs();
	lvars (, src, dst) = explode(m_instr);
	gen_store(gen_load_s(src, isreg(dst) and dst, "ldh", false), dst);
enddefine;

;;; M_MOVEss src dst:
;;;		move a signed, half-word field from memory -src- to -dst-.

define M_MOVEss();
	lvars (, src, dst) = explode(m_instr);
	gen_store(gen_load_s(src, isreg(dst) and dst, "ldh", true), dst);
enddefine;

;;; M_MOVEb src dst:
;;;		move an unsigned, byte field from memory -src- to -dst-.

define M_MOVEb();
	lvars (, src, dst) = explode(m_instr);
	gen_store(gen_load_s(src, isreg(dst) and dst, "ldb", false), dst);
enddefine;

;;; M_MOVEsb src dst:
;;;		move a signed, byte field from memory -src- to -dst-.

define M_MOVEsb();
	lvars (, src, dst) = explode(m_instr);
	gen_store(gen_load_s(src, isreg(dst) and dst, "ldb", true), dst);
enddefine;

;;; M_UPDs src dst:
;;;		update a half-word field at memory -dst- from -src-

define M_UPDs();
	lvars (, src, dst) = explode(m_instr);
	gen_store_s(gen_load(src, false), dst, "sth");
enddefine;

;;; M_UPDb src dst:
;;;		update a byte field at memory -dst- from -src-

define M_UPDb();
	lvars (, src, dst) = explode(m_instr);
	gen_store_s(gen_load(src, false), dst, "stb");
enddefine;


/*
 *	Basic Arithmetic
 */

;;; gen_add:
;;;     add -src1- to -src2- as system integers and place the result in
;;;     -dst-.

define lconstant gen_add(src1, src2, dst);
	lvars src1, src2, dst, tmp;
	if isimm14(src1) then
		gen_load(src2, false) -> src2;
	elseif isimm14(src2) then
		;;; put the immediate value first
		(gen_load(src1, false), src2) -> (src2, src1);
	else
		gen_load(src1, false) -> src1;
		gen_load(src2, false) -> src2;
	endif;
	if isreg(dst) then dst else tmp_reg() endif -> tmp;
	if isimm11(src1) then
		plant({addi ^src1 ^src2 ^tmp});
	elseif isimm14(src1) then
		plant({ldo {^src2 ^src1} ^tmp});
	else
		plant({add ^src1 ^src2 ^tmp});
	endif;
	unless tmp == dst then gen_store(tmp, dst) endunless;
enddefine;

;;; gen_sub:
;;;		subtract -src1- from -src2- and place the result in -dst-

define lconstant gen_sub(src1, src2, dst);
	lvars src1, src2, dst;
	if isimm14(src1) then
		;;; add its negation
		gen_add(src2, -src1, dst);
	else
		gen_load(src1, false) -> src1;
		unless isimm11(src2) then
			gen_load(src2, false) -> src2;
		endunless;
		lvars tmp = if isreg(dst) then dst else tmp_reg() endif;
		if isimm11(src2) then
			plant({subi ^src2 ^src1 ^tmp});
		else
			plant({sub ^src2 ^src1 ^tmp});
		endif;
		unless tmp == dst then gen_store(tmp, dst) endunless;
	endif;
enddefine;

;;; gen_padd:
;;;     add -src1- to -src2- as pop integers and place the result in
;;;     -dst-.

define lconstant gen_padd(src1, src2, dst);
	lvars src1, src2, dst;
	if isintegral(src1) then
		src1 - 3 -> src1;
	elseif isintegral(src2) then
		src2 - 3 -> src2;
	else
		gen_sub(3, src1, tmp_reg() ->> src1);
	endif;
	gen_add(src1, src2, dst);
enddefine;

;;; gen_psub:
;;;     subtract -src1- from -src2- as pop integers and place the result
;;;     in -dst-.

define lconstant gen_psub(src1, src2, dst);
	lvars src1, src2, dst;
	if isintegral(src1) then
		src1 - 3 -> src1;
	elseif isintegral(src2)
	and (src2 < 0 or integer_length(src2 + 3) fi_<= 31)
	then
		;;; adding 3 won't overflow
		src2 + 3 -> src2;
	else
		gen_sub(3, src1, tmp_reg() ->> src1);
	endif;
	gen_sub(src1, src2, dst);
enddefine;

;;; gen_mult:
;;;		multiply -src1- by -src2- and place the result in -dst-.

define lconstant gen_mult(src1, src2, dst);
	lvars src1, src2, dst;

		;;; transform multiplication by a constant into a sequence of
		;;; shifts, additions and subtractions
	define lconstant gen_mult_imm(i, src, dst) -> reg;
		lvars i, src, dst, reg;

		returnif(i == 0)(R_0 -> reg);

			;;; do partial multiplication
			;;; precondition: i && 1 == 1
		define lconstant mult(i, reg, src, tmp, dst) -> reg;
			lvars i, reg, src, tmp, dst;
			returnif(i == 1);
			lvars subtract;
			if i && 2:111 == 2:111 then
				i + 1 -> i;
				true -> subtract;
			else
				i - 1 -> i;
				false -> subtract;
			endif;
			lvars n = 0;
			repeat
				i >> 1 -> i;
				n fi_+ 1 -> n;
				quitif(i &&/=_0 1);
			endrepeat;
			mult(i, reg, src, tmp, tmp) -> reg;
			if subtract then
				plant({zdep ^reg ^(31 fi_- n) ^(32 fi_- n) ^tmp});
				plant({sub ^tmp ^src ^dst});
			elseif n == 1 then
				plant({sh1add ^reg ^src ^dst});
			elseif n == 2 then
				plant({sh2add ^reg ^src ^dst});
			elseif n == 3 then
				plant({sh3add ^reg ^src ^dst});
			else
				plant({zdep ^reg ^(31 fi_- n) ^(32 fi_- n) ^tmp});
				plant({add ^tmp ^src ^dst});
			endif;
			dst -> reg;
		enddefine;

		lvars tmp, negated = false;
		if dst then
			if dst == src then tmp_reg() else dst endif -> tmp;
		else
			tmp_reg() ->> dst -> tmp;
		endif;
		if i < 0 then
			;;; NB: this does work for the most -ve number
			negate(i) -> i;
			true -> negated;
		endif;
		if i && 2:111 == 2:111 then
			;;; avoid subtraction as the last instruction in case of
			;;; overflow
			mult(i >> 1, src, src, tmp, tmp) -> reg;
			plant({sh1add ^reg ^src ^(dst ->> reg)});
		else
			lvars n = 0;
			while i &&=_0 1 do
				i >> 1 -> i;
				n fi_+ 1 -> n;
			endwhile;
			mult(i, src, src, tmp, dst) -> reg;
			unless n == 0 then
				plant({zdep ^reg ^(31 fi_- n) ^(32 fi_- n) ^(dst ->> reg)});
			endunless;
		endif;
		if negated then
			plant({sub 0 ^reg ^(dst ->> reg)});
		endif;
	enddefine;

	if isintegral(src1) then
		gen_load(src2, false) -> src2;
		gen_store(gen_mult_imm(src1, src2, isreg(dst) and dst), dst);
	elseif isintegral(src2) then
		gen_load(src1, false) -> src1;
		gen_store(gen_mult_imm(src2, src1, isreg(dst) and dst), dst);
	else
		gen_load_f(src1, FR_8L) -> src1;
		gen_load_f(src2, FR_8R) -> src2;
		plant({xmpyu ^src1 ^src2 ^FR_8});
		gen_store_f(FR_8R, dst);
	endif;
enddefine;

;;; gen_bis:
;;;     set the bits of -src2- which are set in -src1- and place the
;;;     result in -dst-. (Logical Inclusive OR)

define lconstant gen_bis(src1, src2, dst);
	lvars src1, src2, dst, tmp;
	if is_bitfield(src1) then
		gen_load(src2, if isreg(dst) then dst else tmp_reg() endif) -> tmp;
		plant({depi -1 ^(bitfield(src1)) ^tmp});
	elseif is_bitfield(src2) then
		gen_load(src1, if isreg(dst) then dst else tmp_reg() endif) -> tmp;
		plant({depi -1 ^(bitfield(src2)) ^tmp});
	else
		gen_load(src1, false) -> src1;
		gen_load(src2, false) -> src2;
		if isreg(dst) then dst else tmp_reg() endif -> tmp;
		plant({or ^src1 ^src2 ^tmp});
	endif;
	unless tmp == dst then gen_store(tmp, dst) endunless;
enddefine;

;;; gen_bim:
;;;		clear the bits of -src2- which are clear in -src1- and place the
;;;		result in -dst-. (Logical AND)

define lconstant gen_bim(src1, src2, dst);
	lvars src1, src2, dst, tmp;
	if is_bitfield(src1) and integer_leastbit(src1) == 0 then
		;;; extract the corresponding field from -src2-
		gen_load(src2, false) -> src2;
	elseif is_bitfield(src2) and integer_leastbit(src2) == 0 then
		;;; extract the corresponding field from -src1-
		(src2, gen_load(src1, false)) -> (src1, src2);
	else
		gen_load(src1, false) -> src1;
		gen_load(src2, false) -> src2;
	endif;
	if isreg(dst) then dst else tmp_reg() endif -> tmp;
	if isintegral(src1) then
		;;; right-adjusted bitfield
		plant({extru ^src2 ^(bitfield(src1)) ^tmp});
	else
		plant({and ^src1 ^src2 ^tmp});
	endif;
	unless tmp == dst then gen_store(tmp, dst) endunless;
enddefine;

;;; gen_bic:
;;;     clear the bits of -src2- which are set in -src1- and place the
;;;     result in -dst-.

define lconstant gen_bic(src1, src2, dst);
	lvars src1, src2, dst, tmp;
	if is_bitfield(src1) then
		;;; zero the corresponding field in -src2-
		gen_load(src2, if isreg(dst) then dst else tmp_reg() endif) -> tmp;
		plant({dep ^R_0 ^(bitfield(src1)) ^tmp});
	else
		gen_load(src1, false) -> src1;
		gen_load(src2, false) -> src2;
		if isreg(dst) then dst else tmp_reg() endif -> tmp;
		plant({andcm ^src2 ^src1 ^tmp});
	endif;
	unless tmp == dst then gen_store(tmp, dst) endunless;
enddefine;

;;; gen_shift:
;;;     shift -src2- by an amount specified by -src1- and place the
;;;     result in -dst-. A positive value of -src1- means shift left; a
;;;     negative value means (arithmetic) shift right.

define lconstant gen_shift(src1, src2, dst);
	lvars src1, src2, dst, tmp1, tmp2;
	if isinteger(src1) then
		gen_load(src2, false) -> src2;
		if isreg(dst) then dst else tmp_reg() endif -> tmp1;
		if src1 fi_< 0 then
			;;; shift right
			;;; extrs src2, p, len, tmp1
			plant({extrs ^src2 ^(31+src1) ^(32+src1) ^tmp1});
		else
			;;; shift left:
			;;; zdep src2, p, len, tmp1
			plant({zdep ^src2 ^(31-src1) ^(32-src1) ^tmp1});
		endif;
	else
		lvars lab1 = genlab(), lab2 = genlab();
		gen_load(src1, false) -> src1;
		gen_load(src2, false) -> src2;
		tmp_reg() -> tmp2;
		if isreg(dst) then dst else tmp_reg() endif -> tmp1;
		;;; test sign bit of -src1-:
		;;; bb,< src1, 0, L$1
		plant({bb\,\< ^src1 0 ^lab1});
		plant(NOP);
		;;; shift left:
		gen_sub(src1, 31, tmp2);
		plant({mtsar ^tmp2});
		plant({zvdep ^src2 32 ^tmp1});
		plant({b ^lab2});
		plant(NOP);
		plant_label(lab1);
		;;; shift right:
		gen_add(src1, 31, tmp2);
		plant({mtsar ^tmp2});
		plant({vextrs ^src2 32 ^tmp1});
		plant_label(lab2);
	endif;
	unless tmp1 == dst then gen_store(tmp1, dst) endunless;
enddefine;

;;; M_ADD src1 src2 dst:
;;;		add -src1- to -src2- and place the result in -dst-

define M_ADD();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_add(src1, src2, dst);
enddefine;

;;; M_SUB src1 src2 dst:
;;;		subtract -src1- from -src2- and place the result in -dst-

define M_SUB();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_sub(src1, src2, dst);
enddefine;

;;; M_MULT src1 src2 dst:
;;;		multiply -src1- by -src2- and place the result in -dst-.

define M_MULT();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_mult(src1, src2, dst);
enddefine;

;;; M_NEG src dst:
;;;		negate the value at -src- and place the result in -dst-

define M_NEG();
	lvars (, src, dst) = explode(m_instr);
	;;; no negate instruction, so do subtraction from 0
	gen_sub(src, R_0, dst);
enddefine;

;;; M_PADD src1 src2 dst:
;;;     add popint -src1- to popint -src2- and place the result in -dst-

define M_PADD();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_padd(src1, src2, dst);
enddefine;

;;; M_PSUB src1 src2 dst:
;;;     subtract popint -src1- from popint -src2- and place the result
;;;     in -dst-.

define M_PSUB();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_psub(src1, src2, dst);
enddefine;

;;; M_BIS src1 src2 dst:
;;;     set the bits of -src2- which are set in -src1- and place the
;;;     result in -dst-.

define M_BIS();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_bis(src1, src2, dst);
enddefine;

;;; M_BIM src1 src2 dst:
;;;     clear the bits of -src2- which are clear in -src1- and place the
;;;     result in -dst-.

define M_BIM();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_bim(src1, src2, dst);
enddefine;

;;; M_BIC src1 src2 dst:
;;;     clear the bits of -src2- which are set in -src1- and place the
;;;     result in -dst-.

define M_BIC();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_bic(src1, src2, dst);
enddefine;

;;; M_LOGCOM src dst:
;;;		place the logical complement of -src- into -dst-.

define M_LOGCOM();
	lvars (, src, dst) = explode(m_instr);
	gen_sub(src, -1, dst);
enddefine;

;;; M_ASH src1 src2 dst:
;;;     shift -src2- by an amount specified by -src1- and place the
;;;     result in -dst-.

define M_ASH();
	lvars (, src1, src2, dst) = explode(m_instr);
	gen_shift(src1, src2, dst);
enddefine;

;;; M_PTR_ADD_OFFS type offs base dst:
;;;     add offset to base pointer of a given type (same as integer
;;;     addition)

define M_PTR_ADD_OFFS();
	lvars (, /*type*/, offs, base, dst) = explode(m_instr);
	gen_add(offs, base, dst);
enddefine;

;;; M_PTR_SUB_OFFS type offs base dst:
;;;     subtract offset from base pointer of a given type (same as
;;;     integer subtraction)

define M_PTR_SUB_OFFS();
	lvars (, /*type*/, offs, base, dst) = explode(m_instr);
	gen_sub(offs, base, dst);
enddefine;

;;; M_PTR_SUB type src1 src2 dst:
;;;     subtract pointer -src1- from pointer -src2- and place resulting
;;;     offset in -dst- (same as integer subtraction)

define M_PTR_SUB();
	lvars (, /*type*/, src1, src2, dst) = explode(m_instr);
	gen_sub(src1, src2, dst);
enddefine;


/*
 *	Branches and Tests
 */

;;; condition:
;;;		converts a pop condition to an assembler condition

define lconstant condition =
	newproperty([
		[EQ		=]
		[NEQ	<>]
		[LT		<]
		[LEQ	<=]
		[GT		>]
		[GEQ	>=]
		[ULT	<<]
		[ULEQ	<<=]
		[UGT	>>]
		[UGEQ	>>=]
		[POS	>=]
		[NEG	<]
		[OVF	sv]
		[NOVF	nsv]
	], 16, false, "perm");
enddefine;

;;; commute_cond:
;;;     alternative condition to use when the order of arguments is
;;;     reversed

define lconstant commute_cond =
	newproperty([
		[=		=]
		[<>		<>]
		[<		>]
		[<=		>=]
		[>		<]
		[>=		<=]
		[<<		>>]
		[<<=	>>=]
		[>>		<<]
		[>>=	<<=]
		[sv		sv]
		[nsv	nsv]
	], 16, false, "perm");
enddefine;

;;; negate_cond:
;;;     alternative condition to use when the sense of the test is
;;;     reversed

define lconstant negate_cond =
	newproperty([
		[=		<>]
		[<>		=]
		[<		>=]
		[<=		>]
		[>		<=]
		[>=		<]
		[<<		>>=]
		[<<=	>>]
		[>>		<<=]
		[>>=	<<]
		[sv		nsv]
		[nsv	sv]
	], 16, false, "perm");
enddefine;

;;; gen_cmp:
;;;		test the result of computing (src1-src2) and branch to -lab- on
;;;		condition.

define lconstant gen_cmp(src1, src2, cond, lab);
	lvars src1, src2, cond, lab;
	if isimm5(src1) then
		gen_load(src2, false) -> src2;
		plant({^("comib\," <> cond) ^src1 ^src2 ^lab});
	elseif isimm5(src2) then
		commute_cond(cond) -> cond;
		gen_load(src1, false) -> src1;
		plant({^("comib\," <> cond) ^src2 ^src1 ^lab});
	else
		gen_load(src1, false) -> src1;
		gen_load(src2, false) -> src2;
		plant({^("comb\," <> cond) ^src1 ^src2 ^lab});
	endif;
	plant(NOP);
enddefine;

;;; gen_switch:
;;;     computed goto. Branches to a label selected by the value of
;;;     register -index- or falls through if the index is out of range.

define lconstant gen_switch(index, labs);
	lvars index, labs, lab, continue = genlab();
	;;; test for out of range:
	;;; the unsigned comparison copes with a negative index as well
	gen_cmp(index, length(labs), ">>", continue);
	;;; BLR has the effect of a branch-indexed
	plant({blr ^index ^R_0});
	plant(NOP);
	;;; plant the table:
	;;; each entry must be exactly 8 bytes for the BLR;
	;;; fall-through case added for index = 0
	fast_for lab in [^continue ^^labs] do
		plant({b ^lab});
		plant(NOP_fixed);
	endfor;
	plant_label(continue);
	;;; label invalidates the address cache
	[] -> ldil_cache;
enddefine;

;;; gen_bit:
;;;     test the bits of -src- specified by -mask- and branch if
;;;     condition is satisfied. NB: assumes the condition is always
;;;     equal/not-equal to zero.

define lconstant gen_bit(mask, src, cond, lab);
	lvars mask, src, cond, lab, tmp;
	unless cond == "=" or cond == "<>" then
		mishap(cond, 1, 'SYSTEM ERROR (unsupported bit test)');
	endunless;
	if is_bitfield(mask) then
		;;; bit field test -- can optimise
		lvars (p, len) = bitfield(mask);
		if len == 1 then
			;;; single bit -- can be done in one instruction
			;;; bb,cond src, p, lab
			if cond == "=" then ">=" else "<" endif -> cond;
			gen_load(src, false) -> src;
			plant({^("bb\," <> cond) ^src ^p ^lab});
		else
			;;; extract bit-field and test result
			gen_load(src, false) -> src;
			tmp_reg() -> tmp;
			plant({extru ^src ^p ^len ^tmp});
			plant({^("comb\," <> cond) ^tmp ^R_0 ^lab});
		endif;
	else
		;;; generate AND to a temporary register and test the result
		gen_bim(mask, src, tmp_reg() ->> tmp);
		plant({^("comb\," <> cond) ^tmp ^R_0 ^lab});
	endif;
	plant(NOP);
enddefine;

;;; gen_padd_test:
;;;     push the result of adding popint -src1- to popint -src2-. Branch
;;;     to -lab- if the result satisfies a condition.

define lconstant gen_padd_test(src1, src2, cond, lab);
	lvars src1, src2, cond, lab, tmp = tmp_reg();
	;;; remove popint bits from one operand
	if isintegral(src1) then
		src1 - 3 -> src1;
	elseif isintegral(src2) then
		;;; put the immediate value first
		(src1, src2 - 3) -> (src2, src1);
	else
		gen_sub(3, src1, tmp);
		;;; swapping the operands here can avoid an unnecessary
		;;; register copy below
		(tmp, src2) -> (src2, src1);
	endif;
	if isimm5(src1) then
		;;; addib,cond src1, tmp, lab
		gen_load(src2, tmp) -> ;
		plant({^("addib\," <> cond) ^src1 ^tmp ^lab});
	else
		;;; addb,cond src1, tmp, lab
		gen_load(src1, false) -> src1;
		gen_load(src2, tmp) -> ;
		plant({^("addb\," <> cond) ^src1 ^tmp ^lab});
	endif;
	;;; push result in the branch delay slot:
	;;; stwm tmp, -4(%usp)
	gen_store(tmp, -_USP);
	;;; plant a dummy label to ensure the push doesn't get relocated in
	;;; the improvement pass
	plant_label(genlab());
enddefine;

;;; gen_psub_test:
;;;     push the result of subtracting popint -src1- from popint -src2-.
;;;     Branch to -lab- if the result satisfies a condition.

define lconstant gen_psub_test(src1, src2, cond, lab);
	lvars src1, src2, cond, lab;
	if isintegral(src1)
	and (src1 >= 0 or integer_length(3 - src1) fi_<= 31)
	then
		;;; simulate subtract-and-branch with an add-and-branch of
		;;; the negation (NB: 6-src1 negates src1 as a popint)
		gen_padd_test(6-src1, src2, cond, lab);
	else
		;;; remove popint bits from src1
		lvars tmp = tmp_reg();
		if isintegral(src2)
		and (src2 < 0 or integer_length(src2 + 3) fi_<= 31)
		then
			;;; src2 + 3 won't overflow
			src2 + 3 -> src2;
		else
			gen_sub(3, src1, tmp ->> src1);
		endif;
		;;; there's no subtract-and-branch instruction, so we have to
		;;; use a subtract-and-nullify with a negated condition,
		;;; followed by an unconditional branch
		negate_cond(cond) -> cond;
		if isimm11(src2) then
			gen_load(src1, false) -> src1;
			plant({^("subi\," <> cond) ^src2 ^src1 ^tmp});
		else
			gen_load(src1, false) -> src1;
			gen_load(src2, false) -> src2;
			plant({^("sub\," <> cond) ^src2 ^src1 ^tmp});
		endif;
		plant({b ^lab});
		;;; push result in the branch delay slot:
		gen_store(tmp, -_USP);
		;;; plant a dummy label to ensure the push doesn't get relocated
		;;; in the improvement pass
		plant_label(genlab());
	endif;
enddefine;

;;; M_LABEL lab:
;;;		plant a label.

define M_LABEL();
	lvars (, lab) = explode(m_instr);
	plant_label(lab);
	;;; label invalidates the address cache
	[] -> ldil_cache;
enddefine;

;;; M_BRANCH lab:
;;;		unconditional branch.

define M_BRANCH();
	lvars (, lab) = explode(m_instr);
	plant({b ^lab});
	plant(NOP);
enddefine;

;;; M_BRANCH_std lab:
;;;     like M_BRANCH, but the generated code has to be of a fixed size.
;;;     This is guaranteed by using NOP_fixed which won't be deleted or
;;;     changed, giving a code size of 8 bytes.

define M_BRANCH_std();
	lvars (, lab) = explode(m_instr);
	plant({b ^lab});
	plant(NOP_fixed);
enddefine;

;;; M_BRANCH_ON src labels default:
;;;		go_on a pop integer. If -default- is true, then a default case
;;;		follows.

define M_BRANCH_ON();
	lvars (, src, labs, default) = explode(m_instr), index;
	;;; convert -src- to system integer
	gen_load(src, false) -> src;
	gen_shift(-2, src, tmp_reg() ->> index);
	gen_switch(index, labs);
	unless default then
		;;; error case follows, expecting -src- on the stack
		gen_store(src, -_USP);
	endunless;
enddefine;

;;; M_BRANCH_ON_INT src labels:
;;;		go_on a system integer.

define M_BRANCH_ON_INT();
	lvars (, src, labs) = explode(m_instr);
	gen_switch(gen_load(src, false), labs);
enddefine;

;;; M_CMP src1 src2 cond lab:
;;;     subtract -src1- from -src2- and branch to lab if the result
;;;     satisifies -cond-.

define M_CMP();
	lvars (, src1, src2, cond, lab) = explode(m_instr);
	gen_cmp(src1, src2, condition(cond), lab);
enddefine;

;;; M_PCMP src1 src2 cond lab:
;;;     subtract popint -src1- from popint -src2- and branch to lab if
;;;     the result satisifies -cond-. (Same as M_CMP)

define M_PCMP();
	lvars (, src1, src2, cond, lab) = explode(m_instr);
	gen_cmp(src1, src2, condition(cond), lab);
enddefine;

;;; M_PTR_CMP type src1 src2 cond lab:
;;;     subtract pointer -src1- from pointer -src2- and branch to lab if
;;;     the result satisifies -cond-. (Same as M_CMP)

define M_PTR_CMP();
	lvars (, /*type*/, src1, src2, cond, lab) = explode(m_instr);
	gen_cmp(src1, src2, condition(cond), lab);
enddefine;

;;; M_TEST src cond lab:
;;;		test -src- and branch to -lab- if condition is satisified.

define M_TEST();
	lvars (, src, cond, lab) = explode(m_instr);
	gen_cmp(src, R_0, condition(cond), lab);
enddefine;

;;; M_BIT mask src cond lab:
;;;     test the bits of -src- specified by -mask- and branch if
;;;     condition is satisfied.

define M_BIT();
	lvars (, mask, src, cond, lab) = explode(m_instr);
	gen_bit(mask, src, condition(cond), lab);
enddefine;

;;; M_PADD_TEST src1 src2 cond lab:
;;;     add popint -src1- to popint -src2-, push the result and branch to
;;;		-lab- on condition

define M_PADD_TEST();
	lvars (, src1, src2, cond, lab) = explode(m_instr);
	gen_padd_test(src1, src2, condition(cond), lab);
enddefine;

;;; M_PSUB_TEST src1 src2 cond lab:
;;;     subtract popint -src1- from popint -src2-, push the result and
;;;     branch to -lab- on condition

define M_PSUB_TEST();
	lvars (, src1, src2, cond, lab) = explode(m_instr);
	gen_psub_test(src1, src2, condition(cond), lab);
enddefine;

;;; M_CMPKEY key src cond lab:
;;;     test the key of compound item -src-. Test may be for a specific
;;;     key or for the key flags, in which case -key- is an integer
;;;     mask. Branch if the condition is satisified (can only be equal
;;;     or not equal).

define M_CMPKEY();
	lconstant KEY = field_@@("KEY"), K_FLAGS = field_@@("K_FLAGS");
	lvars (, key, src, cond, lab) = explode(m_instr), lab1;
	condition(cond) -> cond;
	gen_load(src, false) -> src;
	;;; test for issimple(src)
	if cond == "=" then genlab() else lab endif -> lab1;
	plant({bb\,\< ^src 31 ^lab1});
	plant(NOP);
	;;; not simple -- get the key and test
	gen_load({^src ^KEY}, false) -> src;
	if isinteger(key) then
		;;; test the flags
		gen_bit(key, {^src ^K_FLAGS}, negate_cond(cond), lab);
	else
		;;; compare the key
		gen_cmp(key, src, cond, lab);
	endif;
	unless lab == lab1 then
		plant_label(lab1);
	endunless;
enddefine;


/*
 *	Procedure Call and Return
 */

;;; gen_pop_call:
;;;		call a pop procedure. If -link- is true, then save return address
;;;		in %r31.

define lconstant gen_pop_call(link, pdr);
	lconstant PD_EXECUTE = field_@@("PD_EXECUTE");
	lvars link, pdr, target, opcode = if link then "ble" else "be" endif;
	if isimm(pdr) then
		;;; system procedure:
		execlabof(cont(pdr), true) -> target;
		;;; ldil  L'target, reg
		;;; ldsid (reg), tmp
		;;; mtsp  tmp, %sr0
		;;; ble   R'target(%sr0, reg)
		lvars reg, (l_target, r_target) = L_&_R(target);
		gen_ldil(l_target) -> l_target;
		if asm_labtype(target) ->> reg then
			;;; target space is known (procedure must be defined in the
			;;; current file)
			if reg == "code" then SR_CODE else SR_DATA endif -> reg;
		else
			;;; must compute the target space at run time (in %sr0)
			tmp_reg() -> reg;
			plant({ldsid {^l_target 0} ^reg});
			plant({mtsp ^reg ^SR_0});
			SR_0 -> reg;
		endif;
		{^l_target ^reg ^r_target} -> target;
	else
		gen_load(pdr, false) -> pdr;
		;;; ldw   PD_EXECUTE(%pdr), tmp1
		;;; ldsid (%pdr), tmp2
		;;; mtsp  tmp2, %sr0
		;;; ble   (%sr0, tmp1)
		lvars tmp1 = gen_load({^pdr ^PD_EXECUTE}, false), tmp2 = tmp_reg();
		plant({ldsid {^pdr 0} ^tmp2});
		plant({mtsp ^tmp2 ^SR_0});
		{^tmp1 ^SR_0 0} -> target;
	endif;
	plant({^opcode ^target});
	plant(NOP);
	;;; call invalidates the address cache
	[] -> ldil_cache;
enddefine;

;;; gen_nonpop_call:
;;;     call a non-pop subroutine with -n- arguments (n <= 4; operands
;;;     taken from the stack). If -link- is true, then save return
;;;     address in %r31.

define lconstant gen_nonpop_call(n, link, subr);
	lvars	n, link, subr;
	dlocal	regs_in_use;
	;;; load arguments to registers
	while n fi_> 0 do
		lvars reg = ARG_REGS(n);
		gen_load((), reg) -> ;
		conspair(reg, regs_in_use) -> regs_in_use;
		n fi_- 1 -> n;
	endwhile;
	;;; a subroutine will always be in the code space
	lvars target, opcode = if link then "ble" else "be" endif;
	if isimm(subr) then
		;;; ldil L'subr, reg
		;;; ble  R'subr(%sr4, reg)
		lvars (l_subr, r_subr) = L_&_R(subr);
		gen_ldil(l_subr) -> l_subr;
		{^l_subr ^SR_CODE ^r_subr} -> target;
	else
		gen_load(subr, false) -> subr;
		{^subr ^SR_CODE 0} -> target;
	endif;
	plant({^opcode ^target});
	plant(NOP);
	;;; call invalidates the address cache
	[] -> ldil_cache;
enddefine;

;;; M_CALL pdr:
;;;		call a pop procedure

define M_CALL();
	lvars (, pdr) = explode(m_instr);
	gen_pop_call(true, pdr);
enddefine;

;;; M_CHAIN pdr:
;;;		chain a pop procedure

define M_CHAIN();
	lvars (, pdr) = explode(m_instr);
	gen_pop_call(false, pdr);
enddefine;

;;; M_CALL_WITH_RETURN pdr addr:
;;;		call a pop procedure, but with a given return address

define M_CALL_WITH_RETURN();
	lvars (, pdr, addr) = explode(m_instr);
	gen_load(addr, R_31) -> ;
	gen_pop_call(false, pdr);
enddefine;

;;; M_CALLSUB subr [arg ...]:
;;;		call a subroutine, passing any arguments in registers.

define M_CALLSUB();
	lvars subr = m_instr(2), n = datalength(m_instr), i;
	;;; push argument operands
	fast_for i from n by -1 to 3 do
		f_subv(i, m_instr);
	endfor;
	gen_nonpop_call(n fi_- 2, true, subr);
enddefine;

;;; M_CHAINSUB subr:
;;;		chain a subroutine (no arguments)

define M_CHAINSUB();
	lvars (, subr) = explode(m_instr);
	gen_nonpop_call(0, false, subr);
enddefine;

;;; M_RETURN:
;;;     return from a pop procedure. Return address is in %r31, but may
;;;     point to any space.

define M_RETURN();
	lvars tmp = tmp_reg();
	plant({ldsid {^R_31 0} ^tmp});
	plant({mtsp  ^tmp ^SR_0});
	plant({be    {^R_31 ^SR_0 0}});
	plant(NOP);
enddefine;


/*
 *	Procedure Entry and Exit
 */

lblock;

;;; These values have to be remembered bewteen creating and restoring
;;; a stack frame:
lvars
	frame_length,
		;;; size of the frame in bytes
	reg_locals,
		;;; register numbers used for register lvars
	n_stkvars,
		;;; number of on-stack lvars
	dlocal_labs,
		;;; names of dlocal identifiers
;

;;; M_CREATE_SF reg_locals n_pop_regs n_stkvars n_pop_stkvars dlocal_labs
;;;				reg_spec_id:
;;;		create a stack frame on entry to a procedure.
;;;			reg_locals	  is a list of register lvars;
;;;			n_pop_regs	  is the number of pop register lvars;
;;;			n_stkvars	  is the total number of on-stack lvars;
;;;			n_pop_stkvars is the number of those which hold pop values;
;;;			dlocal_labs   is a list of the procedure's dlocal identifiers;
;;;			reg_spec_id   is an identifier which should be updated with the
;;;						  procedure's register mask.

define M_CREATE_SF();
	lvars n_pop_regs, n_pop_stkvars, reg_spec_id, offs, item;

	explode(m_instr) -> (, reg_locals, n_pop_regs, n_stkvars, n_pop_stkvars,
						   dlocal_labs, reg_spec_id);

	;;; Compute the size of the stack frame
	wof(listlength(reg_locals) + listlength(dlocal_labs) + n_stkvars + 2)
		-> frame_length;

	;;; Compute the register mask: this is
	;;;		(n << 8) || m
	;;; where n is the number of pop registers *not* saved and m is the
	;;; number of non-pop registers not saved (see "aprocess.s")
	((#_< listlength(POP_REGS) >_# fi_- n_pop_regs) fi_<< 8) fi_||
	(#_< listlength(NPOP_REGS) >_# fi_+ n_pop_regs fi_- listlength(reg_locals))
		-> idval(reg_spec_id);

	;;; Set the procedure base register for this procedure
	gen_load(consref(current_pdr_label), R_PB) -> ;

	;;; Allocate the stack frame and save the return address
	plant({stwm ^R_31 {^R_SP ^frame_length}});
	4 fi_- frame_length -> offs;

	;;; Save register lvars in order:
	;;;		..., %npop1, %npop0, ..., %pop1, %pop0
	fast_for item in syssort(reg_locals, nonop fi_<) do
		gen_store(reglabel(item), {^R_SP ^offs});
		offs fi_+ 4 -> offs;
	endfor;

	;;; Save dynamic locals
	unless dlocal_labs == [] then
		;;; interleaving the loads and stores can help avoid pipline
		;;; stalls
		lvars tmp1, tmp2;
		gen_load(front(dlocal_labs), false) -> tmp1;
		fast_for item in back(dlocal_labs) do
			gen_load(item, false) -> tmp2;
			gen_store(tmp1, {^R_SP ^offs});
			offs fi_+ 4 -> offs;
			tmp2 -> tmp1;
		endfor;
		gen_store(tmp1, {^R_SP ^offs});
		offs fi_+ 4 -> offs;
	endunless;

	;;; Initialise pop on-stack lvars
	fast_repeat n_pop_stkvars times
		gen_store(R_PZERO, {^R_SP ^offs});
		offs fi_+ 4 -> offs;
	endrepeat;

	;;; Save the procedure base register
	gen_store(R_PB, {^R_SP -4});
enddefine;

;;; M_UNWIND_SF:
;;;     remove the stack frame created by a previous call to
;;;     M_CREATE_SF. The essential parameters of the frame are saved in
;;;     global variables from that call. The generated code must not use
;;;     CHAIN_REG.

define M_UNWIND_SF();
	lvars	offs, item;
	dlocal	regs_in_use = [^CHAIN_REG];

	;;; Compute SP offset to start of dlocal save area
	-wof(n_stkvars+1) -> offs;

	;;; Restore dynamic locals
	unless dlocal_labs == [] then
		lvars tmp1, tmp2;
		offs fi_- 4 -> offs;
		gen_load({^R_SP ^offs}, false) -> tmp1;
		fast_for item in rev(back(dlocal_labs)) do
			offs fi_- 4 -> offs;
			gen_load({^R_SP ^offs}, false) -> tmp2;
			gen_store(tmp1, item);
			tmp2 -> tmp1;
		endfor;
		gen_store(tmp1, front(dlocal_labs));
	endunless;

	;;; Restore register lvars in order:
	;;;		%pop0, %pop1, ..., %npop0, %npop1, ...
	fast_for item in syssort(reg_locals, nonop fi_>) do
		offs fi_- 4 -> offs;
		gen_load({^R_SP ^offs}, reglabel(item)) -> ;
	endfor;

	;;; Restore return address and deallocate the stack frame
	plant({ldwm {^R_SP ^(-frame_length)} ^R_31});

	;;; Restore caller's procedure base
	gen_load({^R_SP -4}, R_PB) -> ;
enddefine;

endlblock;

;;; M_END:
;;;		end a procedure

define M_END();
enddefine;


/*
 *	Special instructions
 */

;;; M_CLOSURE pdpart frozvals:
;;;     closure code: push -frozvals- and chain the -pdpart-. If
;;;     -pdpart- is <false>, then this is a writeable or copyable
;;;     closure in which case the -pdpart- has to be extracted from the
;;;     closure record at run-time.

define M_CLOSURE();
	lconstant PD_CLOS_FROZVALS = field_@@("PD_CLOS_FROZVALS"),
			  PD_CLOS_PDPART = field_@@("PD_CLOS_PDPART");
	lvars	(, frozvals, pdr) = explode(m_instr), base, nfroz;
	dlocal	regs_in_use;
	listlength(frozvals) -> nfroz;
	;;; get the closure address in a register:
	;;; this uses a standard trick of doing a short (8-byte)
	;;; branch-and-link which deposits the program counter value in a
	;;; register; this is then adjusted to point to the start of the
	;;; record (the '-3' in the adjustment is to account for the
	;;; protection bits which will be set in the return address). Doing
	;;; it this way makes the closure fully relocatable.
	tmp_reg() -> base;
	conspair(base, regs_in_use) -> regs_in_use;
	plant({bl '.+8' ^base});
	gen_sub(PD_CLOS_FROZVALS + wof(nfroz) + 8 + 3, base, base);
	if nfroz fi_> 16 then
		;;; call via -Sys$-Exec_closure-
		gen_store(base, -_USP);
		gen_pop_call(false, perm_const_opnd([Sys Exec_closure]));
	else
		;;; push the frozvals:
		unless nfroz == 0 then
			;;; interleaving the loads and stores can help avoid
			;;; pipeline stalls
			lvars tmp1 = tmp_reg(), tmp2 = tmp_reg();
			PD_CLOS_FROZVALS -> i;
			while nfroz fi_> 1 do
				gen_load({^base ^i}, tmp1) -> ;
				gen_load({^base ^(i+4)}, tmp2) -> ;
				gen_store(tmp1, -_USP);
				gen_store(tmp2, -_USP);
				nfroz fi_- 2 -> nfroz;
				i fi_+ 8 -> i;
			endwhile;
			if nfroz fi_> 0 then
				gen_load({^base ^i}, tmp1) -> ;
				gen_store(tmp1, -_USP);
			endif;
		endunless;
		unless pdr then
			;;; extract procedure address from the closure record
			{^base ^PD_CLOS_PDPART} -> pdr;
		endunless;
		;;; chain the pdpart
		gen_pop_call(false, pdr);
	endif;
enddefine;

;;; gen_bfield:
;;;     extract a bit field from structure -base- at bit offset -offs-
;;;     extending for -len- bits, placing the result in -dst-. Values
;;;     may be signed or unsigned.

define lconstant gen_bfield(base, offs, len, dst, signed);
	lvars base, offs, len, dst, signed;
	;;; try to optimise to an inline bitfield extraction
	if isinteger(offs) and isinteger(len) then
		lvars disp, p;
		;;; byte displacement of word containing the start of the field
		(offs fi_>> 3) fi_&&~~ 3 -> disp;
		;;; rightmost bit of the field
		(offs fi_&& 31) fi_+ len fi_- 1 -> p;
		if p fi_<= 31 then
			;;; field fits completely within the word
			lvars opcode, opnd, tmp;
			if signed then "extrs" else "extru" endif -> opcode;
			unless can_defer_opnd(base, disp, false) ->> opnd then
				gen_load(base, false) -> base;
				{^base ^disp} -> opnd;
			endunless;
			gen_load(opnd, false) -> opnd;
			if isreg(dst) then dst else tmp_reg() endif -> tmp;
			plant({^opcode ^opnd ^p ^len ^tmp});
			unless tmp == dst then gen_store(tmp, dst) endunless;
			return;
		endif;
	endif;
	;;; plant call to bitfield access subroutine
	lvars subr = symlabel(if signed then "\^_sbfield" else "\^_bfield" endif);
	gen_nonpop_call(base, offs, len, 3, true, consref(subr));
	gen_store(R_RET0, dst);
enddefine;

;;; M_MOVEbit size offs base dst:
;;;     Move an unsigned bitfield from structure -base- to -dst-. Field
;;;     is at bit pisition -offs- from -base- and has a length of -size-
;;;     bits.

define M_MOVEbit();
	lvars (, size, offs, base, dst) = explode(m_instr);
	gen_bfield(base, offs, size, dst, false);
enddefine;

;;; M_MOVEsbit size offs src dst:
;;;		Move a signed bitfield.

define M_MOVEsbit();
	lvars (, size, offs, base, dst) = explode(m_instr);
	gen_bfield(base, offs, size, dst, true);
enddefine;

;;; M_UPDbit size offs dst src:
;;;		Update a bitfield.

define M_UPDbit();
	lvars (, size, offs, base, src) = explode(m_instr);
	gen_nonpop_call(src, base, offs, size, 4, true,
		consref(symlabel("\^_ubfield")));
enddefine;

;;; M_PLOG_IFNOT_ATOM lab:
;;;     follows subroutine call to -prolog_unify_atom- (from
;;;     "aprolog.s"). Register %ret0 will be zero if the unification
;;;     failed.

define M_PLOG_IFNOT_ATOM();
	lvars (, lab) = explode(m_instr);
	plant({comb\,\<\> ^R_RET0 ^R_0 ^lab});
	plant(NOP);
enddefine;

;;; M_PLOG_TERM_SWITCH lab1 lab2 dst:
;;;     follows subroutine call to -prolog_{pair,term}_switch- (from
;;;     "aprolog.s") which sets %ret0 according to:
;;;			> 0			prologvar    	(goto lab2)
;;;			= 0			match OK		(store arg. to -dst-)
;;;			< 0			match failed	(goto lab1)

define M_PLOG_TERM_SWITCH();
	lvars (, lab1, lab2, dst) = explode(m_instr);
	plant({comb\,\< ^R_RET0 ^R_0 ^lab1});
	plant(NOP);
	plant({comb\,\> ^R_RET0 ^R_0 ^lab2});
	plant(NOP);
	;;; fall through, dereferenced arg. left in %arg0
	gen_store(R_ARG0, dst);
enddefine;

;;; M_SETSTKLEN offs sl:
;;;		optimise call to subroutine -setstklen- when -offs- is a known
;;;		integer value. -sl- is the saved stacklength as a pop integer;
;;;		-offs- is the expected adjustment.

define M_SETSTKLEN();
	lvars	(, offs, sl) = explode(m_instr), lab;
	dlocal	regs_in_use = [^R_ARG0];
	;;; compute desired stack pointer value in %arg0:
	;;; (offs-3) removes the popint bits from saved stacklength
	gen_sub(sl, identlabel("\^_userhi"), R_ARG0);
	gen_sub(offs-3, R_ARG0, R_ARG0);
	;;; if current USP is not the same, call _setstklen_diff to fix
	gen_cmp(USP, R_ARG0, "=", genlab() ->> lab);
	gen_nonpop_call(0, true, consref(symlabel("\^_setstklen_diff")));
	plant_label(lab);
enddefine;

;;; M_ERASE src:
;;;		discard -src-.

define M_ERASE();
	lvars (, src) = explode(m_instr);
	if isregind(src) then gen_load(src, false) -> endif;
enddefine;


/*
 *	Generate assembly code
 */

;;; generate:
;;;		maps a list of M-code instructions to assembly code.

define lconstant generate(m_codelist) -> (codelist, codelen);
	lvars	m_codelist;
	dlocal	m_instr, codelist = [], codelen = 0,
			tmp_regs = [], regs_in_use = [], ldil_regs = [], ldil_cache = [];
	plant_label(current_pdr_exec_label);
	for m_instr in m_codelist do
#_IF DEF M_DEBUG
		;;; add a comment for each instruction
		{; ^^m_instr} :: codelist -> codelist;
#_ENDIF
		;;; M-opcode is a procedure: apply it
		fast_subscrv(1, m_instr)();
	endfor;
enddefine;


;;; -- CODE IMPROVEMENT ---------------------------------------------------

define lconstant improve(oldcode, oldlen) -> (newcode, newlen);
	lvars	oldcode, oldlen, newcode = [], newlen = oldlen,
			offs = oldlen, n_added = 0;

	;;; try to replace a no-op instruction at the front of -code-
	;;; by moving down some instruction from earlier in the code stream
	define lconstant replace_NOP(code);
		lvars code, last_p, instr, instrs, reads, writes, opcode, rs, ws;

		;;; read/write dependencies for all instruction types generated
		;;; in this file
		define lconstant depends =
			newproperty([
				[add		{add	r	r	w		}]
				[addb		{addb	r	rw	-		}]
				[addi		{addi	-	r	w		}]
				[addib		{addib	-	rw	-		}]
				[addil		{addil	-	r			}]	/****/
				[and		{and	r	r	w		}]
				[andcm		{andcm	r	r	w		}]
				[b			{b		-				}]
				[bb			{bb		r	-	-		}]
				[be			{be		r				}]
				[bl			{bl		r	w			}]
				[ble		{ble	r				}]	/****/
				[blr		{blr	r	w			}]
				[comb		{comb	r	r	-		}]
				[comib		{comib	-	r	-		}]
				[copy		{copy	r	w			}]
				[dep		{dep	r	-	-	w	}]
				[depi		{depi	-	-	-	w	}]
				[extrs		{extrs	r	-	-	w	}]
				[extru		{extru	r	-	-	w	}]
				[fldws		{fldws	r	w			}]
				[fldws\,ma	{fldws	rm	w			}]
				[fldws\,mb	{fldws	rm	w			}]
				[fldwx\,s	{fldwx	r	w			}]
				[fstws		{fstws	r	w			}]
				[fstws\,ma	{fstws	r	wm			}]
				[fstws\,mb	{fstws	r	wm			}]
				[fstwx\,s	{fstwx	r	w			}]
				[ldb		{ldb	r	w			}]
				[ldbs\,ma	{ldbs	rm	w			}]
				[ldbs\,mb	{ldbs	rm	w			}]
				[ldh		{ldh	r	w			}]
				[ldhs\,ma	{ldhs	rm	w			}]
				[ldhs\,mb	{ldhs	rm	w			}]
				[ldi		{ldi	-	w			}]
				[ldil		{ldil	-	w			}]
				[ldo		{ldo	r	w			}]
				[ldsid		{ldsid	r	w			}]
				[ldw		{ldw	r	w			}]
				[ldwm		{ldwm	rm	w			}]
				[ldws\,ma	{ldws	rm	w			}]
				[ldws\,mb	{ldws	rm	w			}]
				[mtsar		{mtsar	r				}]	/****/
				[mtsp		{mtsp	r	w			}]
				[or			{or		r	r	w		}]
				[stb		{stb	r	w			}]
				[stbs\,ma	{stbs	r	wm			}]
				[stbs\,mb	{stbs	r	wm			}]
				[sth		{sth	r	w			}]
				[sths\,ma	{sths	r	wm			}]
				[sths\,mb	{sths	r	wm			}]
				[stw		{stw	r	w			}]
				[stwm		{stwm	r	wm			}]
				[stws\,ma	{stws	r	wm			}]
				[stws\,mb	{stws	r	wm			}]
				[sub		{sub	r	r	w		}]
				[subi		{subi	-	r	w		}]
				[vextrs		{vextrs	r	-	w		}]	/****/
				[xmpyu		{xmpyu	r	r	w		}]
				[zdep		{zdep	r	-	-	w	}]
				[zdepi		{zdepi	-	-	-	w	}]
				[zvdep		{zvdep	r	-	w		}]	/****/
			], 64, false, "perm");
		enddefine;

		;;; insert an item into a set
		define lconstant insert(item, list) -> list;
			lvars item, list;
			unless fast_lmember(item, list) then
				conspair(item, list) -> list;
			endunless;
		enddefine;

		;;; compute the operands read and written by an instruction
		define lconstant reads_&_writes(instr) -> (rs, ws);
			lvars instr, rs = [], ws = [], opcode, template, i, n;
			f_subv(1, instr) -> opcode;
			unless depends(opcode) ->> template then
				if locchar(`,`, 1, opcode) ->> i then
					subword(1, i-1, opcode) -> opcode;
				endif;
				unless depends(opcode) ->> template then
					mishap(opcode, 1, 'SYSTEM ERROR (unrecognised opcode)');
				endunless;
			endunless;
			datalength(template) -> n;
			unless n == datalength(instr) then
				mishap(opcode, 1, 'SYSTEM ERROR (malformed instruction)');
			endunless;
			fast_for i from 2 to n do
				lvars flags = f_subv(i, template);
				unless flags == "-" then
					lvars opnd = f_subv(i, instr);
					if isreg(opnd) then
						if flags == "r" or flags == "rw" then
							insert(opnd, rs) -> rs;
						endif;
						if opnd /== R_0 and (flags == "w" or flags == "rw")
						then
							insert(opnd, ws) -> ws;
						endif;
					elseif isregind(opnd) then
						lvars reg = f_subv(1, opnd);
						;;; base register is always read
						insert(reg, rs) -> rs;
						if flags == "rm" or flags == "wm" then
							;;; base register is also updated
							insert(reg, ws) -> ws;
						endif;
						if datalength(opnd) == 3 then
							;;; index or space register
							insert(f_subv(2, opnd), rs) -> rs;
						endif;
						unless opcode == "ldo" or opcode == "ldsid" then
							;;; true memory access
							if reg == R_USP then
								;;; treat all user stack accesses alike
								i_USP -> opnd;
							elseif reg == R_SP then
								;;; call stack locations can be uniquely
								;;; identified by their displacements
								f_subv(2, opnd) -> opnd;
							else
								;;; other memory operands can't be
								;;; distinguished reliably, so treat
								;;; them all alike
								"mem" -> opnd;
							endif;
							if flags == "r" or flags == "rm" then
								insert(opnd, rs) -> rs;
							else
								insert(opnd, ws) -> ws;
							endif;
						endunless;
					endif;
				endunless;
			endfor;
			;;; special cases for hidden operands
			if opcode == "addil" then
				insert(R_1, ws) -> ws;
			elseif opcode == "ble" then
				insert(R_31, ws) -> ws;
			elseif opcode == "mtsar" then
				insert("sar", ws) -> ws;
			elseif opcode == "zvdep" or opcode == "vextrs" then
				insert("sar", rs) -> rs;
			endif;
		enddefine;

		define lconstant independent(rs1, ws1, rs2, ws2);
			lvars item, rs1, ws1, rs2, ws2;
			fast_for item in rs1 do
				returnif(fast_lmember(item, ws2))(false);
			endfor;
			fast_for item in ws1 do
				returnif(fast_lmember(item, rs2) or fast_lmember(item, ws2))
					(false);
			endfor;
			true;
		enddefine;

		define lconstant merge(rs1, ws1, rs2, ws2) -> (rs2, ws2);
			lvars item, rs1, ws1, rs2, ws2;
			fast_for item in ws1 do
				insert(item, ws2) -> ws2;
				unless item == "mem" then
					fast_ncdelete(item, rs2, nonop ==, 1) -> rs2;
				endunless;
			endfor;
			fast_for item in rs1 do
				insert(item, rs2) -> rs2;
			endfor;
		enddefine;

		fast_back(code) -> last_p;
		fast_destpair(last_p) -> (instr, instrs);
		reads_&_writes(instr) -> (reads, writes);
		until instrs == [] do
			fast_front(instrs) -> instr;
			f_subv(1, instr) -> opcode;
			if opcode == "\.label" or opcode == "nop" then
				quitloop;
			elseif opcode /== ";" then
				reads_&_writes(instr) -> (rs, ws);
				if independent(rs, ws, reads, writes) then
					;;; -instr- can be moved
					fast_back(instrs) -> fast_back(last_p);
					instr -> fast_front(code);
					return(true);
				else
					merge(rs, ws, reads, writes) -> (reads, writes);
				endif;
			endif;
			instrs -> last_p;
			fast_back(instrs) -> instrs;
		enduntil;
		false;
	enddefine;	/* replace_NOP */

	;;; opcodes for condtional branches
	lconstant BRANCH_OPS = [bb comb comib addb addib];

	;;; record the word offsets of local labels
	define lconstant label_offset =
		newproperty([], 64, false, "tmparg");
	enddefine;

	;;; see whether the no-op at the front of -oldcode- can be deleted
	;;; by setting the nullify bit in the preceding branch instruction.
	;;; -newcode- is the code following
	define lconstant nullify_branch(oldcode, newcode) -> nullify;
		lvars oldcode, newcode, nullify = false, instr, opcode, tmp;
		fast_front(fast_back(oldcode)) -> instr;
		f_subv(1, instr) -> opcode;
		if opcode == "b" or opcode == "be" then
			;;; unconditional branch can always be nullified, provided
			;;; there's at least one instruction following
			until newcode == [] do
				f_subv(1, fast_front(newcode)) -> tmp;
				quitunless(tmp == "\.label" or tmp == ";");
				fast_back(newcode) -> newcode;
			enduntil;
			unless newcode == [] then
				opcode <> "\,n" -> f_subv(1, instr);
				true -> nullify;
			endunless;
		elseif locchar(`,`, 1, opcode) ->> tmp then
			subword(1, tmp-1, opcode) -> tmp;
			if fast_lmember(tmp, BRANCH_OPS) then
				;;; conditional branch can be nullified provided that
				;;; the target is forward (in which case it'll have been
				;;; recorded in the label_offset table)
				if label_offset(f_subv(datalength(instr), instr)) then
					opcode <> "\,n" -> f_subv(1, instr);
					true -> nullify;
				endif;
			endif;
		endif;
	enddefine;

	;;; check that the target of a conditional branch is in range,
	;;; and expand it to two instructions if not
	define lconstant check_branch(opcode, instr);
		lvars opcode, instr, i, root, lab, disp;
		;;; a conditional branch op is of the form: root,cond
		locchar(`,`, 1, opcode) -> i;
		subword(1, i-1, opcode) -> root;
		returnunless(fast_lmember(root, BRANCH_OPS))(false);
		;;; branch target is always the last operand
		f_subv(datalength(instr), instr) -> lab;
		;;; compute its distance from the current instruction
		if label_offset(lab) ->> disp then
			;;; forward branch: -n_added- accounts for any extra
			;;; instructions added since the label was seen
			disp fi_+ n_added fi_- offs -> disp;
		else
			;;; backward branch: don't know the distance exactly, so
			;;; assume the worst case of the current offset plus 50%
			;;; (allowing for the extreme case where all preceding
			;;; instructions are conditional branches with no-ops and
			;;; all the branches expand to two instructions).
			negate((offs fi_* 3 fi_+ 1) fi_>> 1) -> disp;
		endif;
		;;; the displacement can't exceed 12-bit signed
		returnif(integer_length(disp) fi_< 12)(false);
		;;; replace the conditional branch with two instructions:
		;;; some operation with a conditional nullify, followed
		;;; by a branch to the target, e.g.
		;;;		comb,= r1,r2,t ==> sub,<> r1,r2,0 ! b t
		;;; (note the negation of the condition)
#_IF DEF M_DEBUG
		printf(';;; \tReplacing opcode: %p\n', [^opcode]);
#_ENDIF
		lvars cond = allbutfirst(i, opcode), branch_op = "b";
		if locchar(`,`, 1, cond) ->> i then
			;;; includes the nullify bit
			subword(1, i-1, cond) -> cond;
			"b\,n" -> branch_op;
		endif;
		negate_cond(cond) -> cond;
		if root == "bb" then
			;;; cond can only be < or >=
			{%	if cond == ">=" then "'extru,='" else "'extru,<>'" endif,
				f_subv(2, instr), f_subv(3, instr), 1, R_0
			%} -> instr;
		elseif root == "comb" then
			"sub\," <> cond -> f_subv(1, instr);
			R_0 -> f_subv(4, instr);
		elseif root == "comib" then
			"subi\," <> cond -> f_subv(1, instr);
			R_0 -> f_subv(4, instr);
		elseif root == "addb" then
			"add\," <> cond -> f_subv(1, instr);
			f_subv(3, instr) -> f_subv(4, instr);
		elseif root == "addib" then
			"addi\," <> cond -> f_subv(1, instr);
			f_subv(3, instr) -> f_subv(4, instr);
		else
			mishap(opcode, 1, 'SYSTEM ERROR (unrecognised branch opcode)');
		endif;
		;;; return new -instr- plus the following branch
		({^branch_op ^lab}, instr);
	enddefine;	/* check_branch */

	lvars opcode, instr, tmp, big_procedure;

	clearproperty(label_offset);

	;;; if the word length of the code exceeds 12 bits,
	;;; some conditional branches may have to be expanded
	integer_length(oldlen) fi_>= 12 -> big_procedure;
#_IF DEF M_DEBUG
	if big_procedure then
		printf(';;; NB: big procedure: %p\n', [^current_pdr_label]);
	endif;
#_ENDIF
	until oldcode == [] do
		fast_front(oldcode) -> instr;
		f_subv(1, instr) -> opcode;
		if opcode == "\.label" then
			;;; record the label's word offset
			offs fi_- n_added -> label_offset(f_subv(2, instr));
		elseif opcode /== ";" then
			;;; -offs- is the word offset of the current instruction
			offs fi_- 1 -> offs;
			if opcode == "nop" then
				unless instr == NOP_fixed then
					;;; try to remove no-ops
					if replace_NOP(oldcode) then
						;;; one instruction has been deleted
						offs fi_- 1 -> offs;
						newlen fi_- 1 -> newlen;
					elseif nullify_branch(oldcode, newcode) then
						;;; branch has been nullified, so delete the no-op
						fast_back(oldcode) -> oldcode;
						newlen fi_- 1 -> newlen;
						nextloop;
					endif;
				endunless;
			elseif big_procedure
			and locchar(`,`, 1, opcode)
			and (check_branch(opcode, instr) ->> tmp)
			then
				;;; conditional branch has had to be replaced by a test ...
				tmp -> fast_front(oldcode);
				;;; ... with an extra instruction left on the stack
				conspair((), newcode) -> newcode;
				n_added fi_+ 1 -> n_added;
			endif;
		endif;
		;;; move the instruction pair from old to new
		(oldcode, fast_back(oldcode)) -> (tmp, oldcode);
		(newcode, tmp) -> (fast_back(tmp), newcode);
	enduntil;
	newlen fi_+ n_added -> newlen;
enddefine;


;;; -- CODE OUTPUT --------------------------------------------------------

;;; outopnd:
;;;		writes out an assembly code operand.

define lconstant outopnd(opnd);
	lvars opnd;
	if isref(opnd) then
		fast_cont(opnd), '%p';
	elseif isregind(opnd) then
		lvars (disp, n) = destvector(opnd);
		if n == 2 then
			if disp == 0 then '(%p)' else disp, '%p(%p)' endif;
		elseif disp then
			;;; space register + offset
			if disp == 0 then '(%p,%p)' else disp, '%p(%p,%p)' endif;
		else
			;;; base register + index (no displacement)
			'%p(%p)';
		endif;
	else
		opnd, '%p';
	endif;
	asmf_printf();
enddefine;

;;; outinstr:
;;;		writes out an assembly code instruction.

define lconstant outinstr(instr);
	lvars instr, i, n = datalength(instr), opcode = f_subv(1, instr);
	if opcode == "\.label" then
		outlab(f_subv(2, instr));
	elseif opcode == ";" then
		pdprops(f_subv(2, instr)) -> opcode;
		asmf_printf(opcode, ';\t%p\t');
		if datalength(opcode) fi_< 8 then asmf_charout(`\t`) endif;
		unless n == 2 then
			asmf_printf(f_subv(3, instr), '%p');
			fast_for i from 4 to n do
				asmf_printf(f_subv(i, instr), ' %p');
			endfor;
		endunless;
		asmf_charout(`\n`);
	else
		asmf_printf(opcode, '\t%p\t');
		if datalength(opcode) fi_< 8 then asmf_charout(`\t`) endif;
		unless n == 1 then
			outopnd(f_subv(2, instr));
			fast_for i from 3 to n do
				asmf_charout(`,`);
				outopnd(f_subv(i, instr));
			endfor;
		endunless;
		asmf_charout(`\n`);
	endif;
enddefine;

define mc_code_generator(codelist, hdr_len) -> (gencode, pdr_len);
	lvars codelist, hdr_len, procedure gencode, pdr_len, code_len;
#_IF DEF M_DEBUG
	dlocal pop_mishap_doing_lim = false;
#_ENDIF

	;;; translate to assembler
	improve(generate(codelist)) -> (codelist, code_len);

	;;; construct code output procedure
	applist(% codelist, outinstr %) -> gencode;

	;;; compute procedure size (the HP assembler is incapable of doing
	;;; this for itself)
	code_len fi_+ hdr_len -> pdr_len;
enddefine;


;;; -- OTHER DEFINITIONS NEEDED BY "m_trans.p" ----------------------------


/*
 *	M-code tables for machine-dependent in-line subroutines.
 *	These are added to the corresponding properties in m_trans.p
 */

constant

	mc_inline_procs_list = [
		[ \^_ptr_to_offs	[{^M_ERASE ^USP_+}]]
		[ \^_offs_to_ptr	[{^M_ERASE ^USP_+}]]
		[ \^_int			[{^M_ASH -2 ^USP_+ ^ -_USP}]]
		[ \^_pint			[{^M_ASH  2 ^USP_+ ^ -_USP}
							 {^M_ADD 3 ^USP_+ ^ -_USP}]]
		[ \^_por			[{^M_BIS ^USP_+ ^USP_+ ^ -_USP}]]
		[ \^_pand			[{^M_BIM ^USP_+ ^USP_+ ^ -_USP}]]
		[ \^_mksimple		[{^M_ADD 1 ^USP_+ ^ -_USP}]]
		[ \^_mkcompound		[{^M_SUB 1 ^USP_+ ^ -_USP}]]
		[ \^_mksimple2		[{^M_ADD 3 ^USP_+ ^ -_USP}]]
		[ \^_mkcompound2	[{^M_SUB 3 ^USP_+ ^ -_USP}]]
	],

	mc_inline_conditions_list = [
		[ \^_iscompound		{^M_BIT  2:01   ^USP_+ EQ  ?}]
		[ \^_issimple		{^M_BIT  2:01   ^USP_+ NEQ ?}]
		[ \^_issimple2		{^M_BIT	 2:10	^USP_+ NEQ ?}]
		[ \^_isinteger		{^M_BIT  2:10   ^USP_+ NEQ ?}]
		[ \^_isaddress		{^M_BIT  2:11   ^USP_+ EQ  ?}]
	],
;


	/*	Procedure to convert a pop integer subscript to an appropriate offset
		for the data type being accessed, used by OP_SUBV in m_trans.p to
		compile code for fast_subscrv, vectorclass field accesses, etc.
			-scale- is the scale for the data type involved; the results are
		the M-code instructions (if any) necessary to convert the subscript
		on top of the stack to an offset, plus a constant correction to be
		added.
	*/
define cvt_pop_subscript(scale);
	lvars pow, scale;
	if is_power2(scale) ->> pow then
		;;; pow-2 accounts for popint being shifted left 2
		unless (pow-2 ->> pow) == 0 then
			{^M_ASH ^pow ^USP_+ ^ -_USP}
		endunless,
		-(popint(0) << pow)		;;; additive correction to remove popint bits
	else
		;;; just convert to sysint and multiply
		{^M_ASH -2 ^USP_+ ^ -_USP},		;;; _int()
		{^M_MULT ^scale ^USP_+ ^ -_USP},
		0 						;;; no correction necessary
	endif
enddefine;


endsection;		/* Genproc */

endsection;		/* $-Popas$-M_trans */



/* --- Revision History ---------------------------------------------------
--- John Gibson, Aug 31 1994
		Removed M_AR*RAY_SUB (no longer necessary)
 */
