/* --- Copyright University of Sussex 1993. All rights reserved. ----------
 > File:            C.all/lib/objectclass/lib/oc_regexp.p
 > Purpose:         Objectclass file
 > Author:          Steve Knight, HP Labs, 1992-1993
 > Documentation:   HELP OBJECTCLASS
 > Related Files:
 */
/*
	This is an early version of Jon Meyer's regular expression
	matching code.  If is provdied so that people with LIB REGEXP
	will be able to use the ObjectClass browsing facilities.
	The only change to the code was to isolate it in a different
	section in order to prevent the identifiers from escaping.
*/

/* --- Copyright University of Sussex 1992. All rights reserved. ----------
 > File:            C.all/???/regexp_compile.p
 > Purpose:         Regular expression matcher
 > Author:          Jonathan Meyer, July 10 1992
 > Documentation:   REF *REGEXP
 > Related Files:   SRC *VED_REGEXP_SEARCH.P LIB *MATCH_WORDSWITH
*/
compile_mode :pop11 +strict;

;;; ------ REGULAR EXPRESSION MATCHING IN STRINGS ---------------------

weak global vars
	vedindentstep
  ;

weak global constant
	procedure (
		vedatitemstart,
		vedatitemend,
		vedusedsize,
	  ),
  ;

;;; -------------------------------------------------------------------

section $-objectclass;

section regexp =>
			regexp_escape_char, regexp_escape_mode, regexp_compile,
			regexp_break_count, regexp_subexp_count, regexp_subexp,
			regexp_delimeter, regexp_anchored, isregexp,
		;

global vars
	regexp_escape_char  = `@`,
	regexp_escape_mode  = `L`,
  ;

;;; This code is very strongly related to the code in /usr/include/regexp.h
;;; on System V.3.2 and SunOS.

lconstant
	macro (
		;;; output opcodes generated by the compiler
		CBRA    = 2,
		CCHR    = 4,
		CDOT    = 8,
		CCL     = 12,
		CXCL    = 16,
		CDOL    = 20,
		CCEOF   = 22,
		CKET    = 24,
		CBRC    = 28,
		CLET    = 30,
		CBACK   = 36,
		NCCL    = 40,
		CCASE   = 44,
		NCCASE  = 46,

		STAR    = 01,
		RNGE    = 03,

		;;; maximum of 9 bracketed expressions
		NBRA    = 9,

		;;; Bits for flags argument to regexp_compile
		F_IGNORE_CASE       = 2:1e0,    ;;; case sensitivity
		F_NOT_EMBEDDED      = 2:1e1,    ;;; non-embedded " and ` searches
		F_DETECT_BREAK      = 2:1e2,    ;;; allow long regular expressions
		F_USE_VEDCHARTYPE   = 2:1e3,    ;;; vedatitemstart/end for word bounds
		F_USE_VEDINDENTSTEP = 2:1e4,    ;;; expect tabs to be padded VED style

		;;; Indexes to closures on Regexp_search
		CLOS_PROPS          = 1,    ;;; list of properties about regexp
		CLOS_BRASLIST       = 2,
		CLOS_BRAELIST       = 3,
		CLOS_BRAVLIST       = 4,
		CLOS_NBREAKS        = 5,
		CLOS_LENGTH         = 12, ;;; number of frozen arguments

		;;; Contents of CLOS_PROPS field of Regexp_search closure -
		;;; These things aren't used by Regexp_search but may be useful to
		;;; user. See regexp_delimeter, regexp_subexp_count, regexp_anchored.
		PROPS_DELIM         = 1,
		PROPS_NBRA          = 2,
		PROPS_ANCHORED      = 3,

		;;; under POPC this should be set to "Consclos_protect"
		CONSCLOSURE         = "consclosure",
	  ),

	bittab                  = consstring( 1, 2, 4, 8, 16, 32, 64, 128, 8 ),

	;;; codes that are recognised by the itemiser.
	backslashcodes          = 'nbtsre^(GS[{\\',
  ;

;;; ---- REGULAR EXPRESSION SEARCHING ---------------------------------------

;;; VED TABS: Jump_over_tab(STR, COLUMN, GOING_FORWARD) -> COLUMN
;;;   Takes an index into the string str. Adjusts the index to
;;;   skip over any tab pad characters. If GOING_FORWARD, jumps to nearest
;;;   non-pad location to right of COLUMN, otherwise jumps to the nearest
;;;   non-pad loction to left of COLUMN.

define lconstant Jump_over_tab(str, strp, forward) -> strp;
	lvars str, strp, forward, indent = weakref vedindentstep;

	;;; These came from SRC vdtabs.p

	define lconstant Tab_size_at(col);
		lvars col;
		(indent fi_- ((col fi_- 1) fi_rem indent))
	enddefine;

	define lconstant Col_is_inside_tab(col, string);
		lvars string, col;
		not(col fi_<= 1 or col fi_> datalength(string)
			or fast_subscrs(col, string) /== `\t`
			or fast_subscrs(col fi_- 1, string) /== `\t`
			or (col fi_- 1) fi_rem indent == 0
		)
	enddefine;

	;;; find the start of a tab in string backwards from col
	define lconstant Find_tab_start(col, string);
		lvars string, col, lim;
		col fi_+ Tab_size_at(col) fi_- indent -> lim;
		while col fi_> lim  do
			col fi_- 1 -> col;
			returnif(fast_subscrs(col, string) /== `\t`) (col fi_+ 1)
		endwhile;
		lim;
	enddefine;

	if Col_is_inside_tab(strp, str) then
		;;; string has hard tabs in it and strp is in the middle of a tab.
		if forward then
			strp fi_+ Tab_size_at(strp) -> strp;
		else
			Find_tab_start(strp, str) -> strp;
		endif;
	endif;
enddefine; /* Jump_over_tab */

;;; DECLARE -goto- LABELS AS LVARS TO STOP THE COMPILER FROM AUTOLOADING THEM
lvars do_ccl, do_ccl_star, do_ccl_range, star;

;;; Macros for iterating over the compiled regular expression.
define :inline lconstant R(n); ;;; ep[n]
	(fast_subscrs(regexp fi_+ (n), regexstr))
enddefine;
define lconstant macro *_R; ;;; *ep
	[(fast_subscrs(regexp, regexstr))].dl
enddefine;
define lconstant macro R_+= n; ;;; ep += n
	lvars n;
	"regexp", "fi_+", n, "->", "regexp"
enddefine;
define lconstant macro R_++; ;;; *r++
	[(*_R, R_+= 1)].dl
enddefine;
;;; Macros for iterating over the string that is being searched.
define lconstant macro CHECKTAB;
	[(if jumptabs then Jump_over_tab(str, strp, true) -> strp endif)].dl
enddefine;
define lconstant macro CHECKTAB_BACK;
	[(if jumptabs then Jump_over_tab(str, strp, false) -> strp endif)].dl
enddefine;
define lconstant macro *_S; ;;; *lp
	[(strp fi_<= strsize and fast_apply(fast_subscrs(strp, str),filter_p)
	  or termin)].dl
enddefine;
define lconstant macro S_+= n; ;;; lp += n
	lvars n;
	[(strp fi_+ (].dl, n, [) -> strp, CHECKTAB)].dl
enddefine;
define lconstant macro S_-= n; ;;; lp -= n
	lvars n;
	[(strp fi_- (].dl, n, [) -> strp, CHECKTAB_BACK)].dl
enddefine;
define lconstant macro S_++; ;;; *lp++
	[ ( *_S, S_+= 1 ) ].dl
enddefine;
define :inline lconstant ISTHERE(c);
	(R(c fi_>> 3) &&/=_0 fast_subscrs(c fi_&& 7 fi_+ 1, bittab))
enddefine;
define :inline lconstant WHITESPACE(c);
	;;; this is the definition used by the `C` regexp.h
	not(isalphacode(c) or c == `_` or isnumbercode(c))
enddefine;

;;; used to make locchar and skipchar case-insensitive if necessary.
define lconstant Case(proc, c, n, str, ignorecase);
	lvars proc, c, n, str, ignorecase, l1, l2;
	if ignorecase and isalphacode(c) then
		if proc == skipchar then
			;;; must implement this one ourselves
			datalength(str) -> l2;
			unless n fi_>= 1 then mishap(n,1, 'INTEGER >= 1 NEEDED') endunless;
			while n fi_<= l2 and uppertolower(fast_subscrs(n, str)) == c do
				n fi_+ 1 -> n;
			endwhile;
			(n fi_<= l2 and n);
		else
			;;; apply the search to both upper and lower case characters
			fast_apply(c, n, str, proc) -> l1;
			fast_apply(lowertoupper(c), n, str, proc) -> l2;
			if l1 and l2 then
				;;; select the nearest/farthest match depending on proc
				proc == locchar and fi_min(l1, l2) or fi_max(l1, l2)
			else
				;;; select either character
				l1 or l2
			endif;
		endif;
	else
		;;; can use the routine as it stands
		fast_chain(c, n, str, proc);
	endif;
enddefine;

define lconstant Regexp_search( strp, str, strsize, backward,
								props, braslist, braelist, bravlist,
								nlines, linedata, regexstr, circf, firstchar,
								ignorecase, usevedindentstep, usevedchartype )
					-> (loc1, loc2);
	lvars
		c,
		;;; user specified arguments:
		strp, str, strsize,     ;;; string to match against
		backward,               ;;; true if we are searching right-to-left

		;;; these are part of the Regexp_search closure made by regexp_compile
		props                   ;;; unused - see regexp_delimeter, etc.
		braslist, braelist,     ;;; start and end of bracketed expressions
		bravlist,               ;;; vector of strings which brackets matched
		nlines,                 ;;; number of lines in regexp
		linedata,               ;;; data which is local to each line
		regexstr,               ;;; compiled regular expression
		circf,                  ;;; true if constrained to start of line
		firstchar,              ;;; first character in expression
		ignorecase,             ;;; true if we ignore case
		usevedindentstep,       ;;; true if search should adjust for VED tabs
		usevedchartype,         ;;; true if word boundaries use vedchartype

		;;; results:
		loc1, loc2,             ;;; return locations

		;;; working variables:
		jumptabs,               ;;; true if usevedindentstep and string has tab
		filter_p        = ignorecase and uppertolower or identfn,
	  ;

	;;; Succeeds if substring(start, n, str2) = substring(strp, n, str).
	define lconstant Strncmp(start, n, str2, strp);
		lvars str2, start, strp, n, bsize, jump, endpos;
		;;; Handles VED tabs if necessary.
		if str2 == str then
			jumptabs -> jump;
			strsize -> bsize;
		else
			jumptabs or (usevedindentstep and locchar(`\t`, 1, str2)) -> jump;
			if usevedindentstep then
				weakref vedusedsize(str2)
			else
				datalength(str)
			endif -> bsize;
		endif;
		start fi_+ n -> endpos;
		if not(jump) and strp fi_+ n fi_-1 fi_> strsize then
			return(false);
		endif;
		until start fi_>= endpos do
			returnunless(fast_apply(fast_subscrs(start,str2), filter_p)
				== fast_apply(fast_subscrs(strp,str), filter_p))(false);
			start fi_+1 -> start; strp fi_+1 -> strp;
			if jump then
				;;; adjust to cope with any tab padding characters in the str.
				Jump_over_tab(str2, start, true) -> start;
				Jump_over_tab(str, strp, true) -> strp;
				if strp fi_> strsize then
					return(false);
				endif;
			endif;
		enduntil;
		strp;
	enddefine; /* Strncmp */

	;;; this is the main procedure for iterating over the regular expression.
	define lconstant Advance(strp, regexp);
		lvars
			strp, regexp,           ;;; index pointers into string/regexp.
			savestrp,               ;;; used to store start of * expressions.
			neg,                    ;;; true for negative expressions.
			bbeg, bnchars, bstr,    ;;; used for @n back references.
			low, size,              ;;; holds range for character classes.
			c, tmp,
		  ;
		define lconstant Getrange(offs);
			lvars offs;
			R(offs) -> low;
			R(offs fi_+ 1) -> size;
			size == 255 and 20000 or size fi_- low -> size;
		enddefine;

		fast_repeat;
			false -> neg;
			R_++ -> c;

			if c == CCHR then
				if R_++ == S_++ then
					nextloop;
				endif;
				return(false);

			elseif c == CDOT then
				if S_++ /== termin then
					nextloop;
				endif;
				return(false);

			elseif c == CDOL then
				if *_S == termin then
					nextloop;
				endif;
				return(false);

			elseif c == CCEOF then
				strp -> loc2;
				return(true);

			elseif c == CXCL then
				S_++ -> c;
				if c /== termin and ISTHERE(c) then
					R_+= 32;
					nextloop;
				endif;
				return(false);

			elseif c == NCCL then
				true -> neg;
				goto do_ccl;

			elseif c == CCL then
			do_ccl:
				S_++ -> c;
				if c /== termin and ((c &&=_0 8:200 and ISTHERE(c)) /== neg)
				then
					R_+= 16;
					nextloop;
				endif;
				return(false);

			elseif c == CBRA then
				R_++ -> c;
				strp -> subscrintvec(c, braslist);
				str -> bravlist(c);
				nextloop;

			elseif c == CKET then
				R_++ -> c;
				strp -> braelist(c);
				nextloop;

			elseif c == #_< CCHR || RNGE >_# then
				R_++ -> c;
				Getrange(0);
				fast_repeat low times
					if S_++ /== c then
						return(false);
					endif;
				endfast_repeat;
				strp -> savestrp;
				until (size fi_-1 ->> size) == -1 do
					if S_++ /== c then
						quitloop;
					endif;
				enduntil;
				if size == -1 then
					S_+= 1;
				endif;
				R_+= 2;
				goto star;

			elseif c == #_< CDOT || RNGE >_#  then
				Getrange(0);
				fast_repeat low times
					if S_++ == termin then
						return(false);
					endif;
				endfast_repeat;
				strp -> savestrp;
				until (size fi_-1 ->> size) == -1 do
					if S_++ == termin then
						quitloop;
					endif;
				enduntil;
				if size == -1 then
					S_+= 1;
				endif;
				R_+= 2;
				goto star;

			elseif c == #_< CXCL || RNGE >_# then
				Getrange(32);
				fast_repeat low times
					S_++ -> c;
					if c == termin or not(ISTHERE(c)) then
						return(false);
					endif;
				endfast_repeat;
				strp -> savestrp;
				until (size fi_-1 ->> size) == -1 do
					S_++ -> c;
					if c == termin or not(ISTHERE(c)) then
						quitloop;
					endif;
				enduntil;
				if size == -1 then
					S_+= 1;
				endif;
				R_+= 34;        ;;; 32 + 2
				goto star;

			elseif c == #_< NCCL || RNGE >_# then
				true -> neg;
				goto do_ccl_rnge;

			elseif c == #_< CCL || RNGE >_# then
			do_ccl_rnge:
				Getrange(16);
				fast_repeat low times
					S_++ -> c;
					if c == termin or (((c &&/=_0 8:200) or not(ISTHERE(c)))
												/== neg) then
						return(false);
					endif;
				endfast_repeat;
				strp -> savestrp;
				until (size fi_-1 ->> size) == -1 do
					S_++ -> c;
					if c == termin or (((c &&/=_0 8:200) or not(ISTHERE(c)))
												/== neg) then
						quitloop;
					endif;
				enduntil;
				if size fi_< 0 then
					S_+= 1;
				endif;
				R_+= 18;        ;;; 16 + 2
				goto star;

			elseif c == #_< CBACK || RNGE >_# then
				;;; This is greatly complicated by VED tabs. A 5 character
				;;; piece of text in one part of a string may be any number of
				;;; characters in another part of the string.
				R_++ -> c;
				subscrintvec(c, braslist) -> bbeg;
				subscrintvec(c, braelist) fi_- bbeg -> bnchars;
				subscrv(c, bravlist) -> bstr;
				Getrange(0);
				R_+= 2;
				fast_repeat low times
					unless Strncmp(bbeg, bnchars, bstr, strp) ->> strp then
						return(false);
					endunless;
				endfast_repeat;
				strp -> savestrp;
				;;; build a list of possible matches.
				[^savestrp] -> tmp;
				until (size fi_-1 ->> size) == -1 do
					if Strncmp(bbeg, bnchars, bstr, strp) ->> strp then
						strp :: tmp -> tmp;
					else
						quitloop;
					endif;
				enduntil;
				until tmp == [] do
					sys_grbg_destpair(tmp) -> tmp -> strp;
					if Advance(strp, regexp) then
						sys_grbg_list(tmp);
						return(true);
					endif;
				enduntil;
				return(false);

			elseif c == CBACK then
				R_++ -> c;
				subscrintvec(c, braslist) -> bbeg;
				subscrintvec(c, braelist) fi_- bbeg -> bnchars;
				subscrv(c, bravlist) -> bstr;

				nextif(Strncmp(bbeg, bnchars, bstr, strp) ->> strp);
				return(false);

			elseif c == #_< CBACK || STAR >_# then
				;;; see also comment on CBACK || RNGE above.
				R_++ -> c;
				subscrintvec(c, braslist) -> bbeg;
				subscrintvec(c, braelist) fi_- bbeg -> bnchars;
				subscrv(c, bravlist) -> bstr;
				strp -> savestrp;

				;;; build a list of possible matches.
				[^savestrp] -> tmp;
				while Strncmp(bbeg, bnchars, bstr, strp) ->> strp do
					strp :: tmp -> tmp;
				endwhile;
				until tmp == [] do
					sys_grbg_destpair(tmp) -> tmp -> strp;
					if Advance(strp, regexp) then
						sys_grbg_list(tmp);
						return(true);
					endif;
				enduntil;
				return(false);

			elseif c == #_< CDOT || STAR >_# then
				strp -> savestrp;
				strsize fi_+ 2 -> strp; ;;; one after the end of the string
				goto star;

			elseif c == #_< CCHR || STAR >_# then
				strp -> savestrp;
				*_R -> c;
				unless Case(skipchar, c, strp, str, ignorecase) ->> strp then
					strsize fi_+ 1 -> strp;
				endunless;
				S_+= 1; ;;; one char after the final match
				R_+= 1;
				goto star;

			elseif c == #_< CXCL || STAR >_# then
				strp -> savestrp;
				fast_repeat;
					S_++ -> c;
					quitunless(c /== termin and ISTHERE(c));
				endfast_repeat;
				R_+= 32;
				goto star;

			elseif c == #_< NCCL || STAR >_# then
				true -> neg;
				goto do_ccl_star;

			elseif c == #_< CCL || STAR >_# then
			do_ccl_star:
				strp -> savestrp;
				fast_repeat
					S_++ -> c;
					quitif(c == termin or
						((c &&=_0 8:200) and ISTHERE(c)) == neg);
				endfast_repeat;
				R_+= 16;
				;;; FALLTHROUGH to star

			star:
				S_-= 1;
				if strp == savestrp then
					nextloop;
				endif;

				*_R -> c;
				if c == CCHR or c == CBACK then
					;;; fast search for the first character.
					if c == CCHR then
						R(1)
					else
						R(1) -> c;
						subscrs(subscrintvec(c, braslist),
								subscrv(c, bravlist))
					endif -> c;
					while (Case(locchar_back, c, strp, str, ignorecase)
											->> strp)
								and (CHECKTAB_BACK, strp fi_>= savestrp) do
						if Advance(strp, regexp) then
							return(true);
						endif;
						strp fi_-1 -> strp;
					endwhile;
					return(false);
				endif;

				fast_repeat;
					if Advance(strp, regexp) then
						return(true);
					endif;
					quitunless(strp fi_> savestrp);
					S_-= 1;
				endfast_repeat;
				return(false);

			elseif c == CBRC then
				;;; constrain to start of item
				*_S -> c;
				returnif(c == termin)(false); ;;; items cannot start here
				if strp == 1 then  ;;; maybe this should be strp == loc1 ?
					;;; first character of string must be the start of an item
					nextloop;
				elseif usevedchartype then
					if weakref vedatitemstart(strp, str, strsize) then
						nextloop;
					endif;
				else
					if not(WHITESPACE(c)) then
						;;; get the last character
						S_-= 1; S_++ -> c;
						if WHITESPACE(c) then
							nextloop;
						endif;
					endif;
				endif;
				return(false);

			elseif c == CLET then
				;;; constrain to end of item
				*_S -> c;
				if c == termin then
					nextloop;
				endif;
				if usevedchartype then
					if strp /== 1 then
						S_-= 1;
						if weakref vedatitemend(strp, str, strsize fi_+1) then
							S_+= 1;
							nextloop;
						endif;
					endif;
				else
					*_S -> c;
					if WHITESPACE(c) then
						nextloop;
					endif;
				endif;
				return(false);

			elseif c == CCASE then
				false -> ignorecase;
				identfn -> filter_p;
				nextloop;

			elseif c == NCCASE then
				true -> ignorecase;
				uppertolower -> filter_p;
				nextloop;

			else
				;;; this should never happen but just in case ...
				mishap(c, 1, 'INVALID REGULAR EXPRESSION');
			endif;

		endfast_repeat;
	enddefine; /* Advance */

	;;; Do_search - calls Advance to perform a search
	define lconstant Do_search;
		lvars regexp, minlength, anchor, startstrp, strlen;

		;;; PROCESS ARGUMENTS - strp, str, strsize,
		if str.isword then
			fast_word_string(str) -> str;
		elseunless str.isstring then
			mishap(str,1,'STRING NEEDED')
		endif;
		if usevedindentstep then
			weakref vedusedsize(str)
		else
			datalength(str)
		endif -> strlen;
		unless strlen == 0 then
			unless strp.isinteger and strp fi_>= 1 and strp fi_<= strlen then
				mishap(strp, 1,
					'INTEGER >= 1 AND <= ' sys_>< strlen sys_>< ' NEEDED');
			endunless;
		elseunless strp == 1 then
			mishap(strp, str, strlen, 3,
				   'SUBSCRIPT EXCEEDS STRING LENGTH');
		endunless;
		if strsize == false then
			strlen -> strsize;
		else
			unless strsize.isinteger and strsize fi_>= 0
			and strsize fi_<= strlen fi_- strp fi_+ 1 then
				mishap(strsize, 1,
					'INTEGER >= 0 and <= ' sys_>< (strlen - strp + 1)
					sys_>< ' NEEDED');
			endunless;
			strsize fi_+ strp fi_- 1 -> strsize;
		endif;
		if backward == 0 then
			;;; can never match
			return(false ->> loc1 -> loc2);
		elseif backward and not(circf) then
			strp -> startstrp;
			if backward.isinteger then
				;;; backward specifies where to start searching
				unless backward fi_>= strp fi_-1 then
					mishap(backward, 1,
						'INTEGER >= ' sys_>< (strp -1) sys_>< ' NEEDED');
				endunless;
				fi_min(backward, strsize fi_+1) -> strp;
			else
				strsize fi_+ 1 -> strp;
			endif;
		endif;
		;;; set per-line data
		explode(fast_destpair(linedata) -> linedata) -> (regexp, minlength);

		;;; quick check to see if we have enough characters.
		returnif(strsize fi_< minlength)(false ->> loc1 -> loc2);

		define lconstant Set_jumptabs;
			;;; see if we're using vedindentstep and string has hard tab in it.
			if usevedindentstep and locchar(`\t`, 1, str) ->> jumptabs then
				if backward then
					Jump_over_tab(str, strp, false) -> strp;
					Jump_over_tab(str, startstrp, false) -> startstrp;
				else
					Jump_over_tab(str, strp, true) -> strp;
				endif;
			endif;
		enddefine;

		;;; PERFORM SEARCH
		;;; anchored left search (same for left-to-right and right-to-left).
		if circf then
			Set_jumptabs();
			if strp == 1 and Advance(strp, regexp) then
				strp -> loc1;
				loc2 fi_- loc1 -> loc2;
				return;
			endif;

		;;; fast search when we know the first character.
		elseif firstchar then
			firstchar -> c;
			if backward then
				;;; fast right-to-left search.
				if (Case(locchar_back, c, strp, str, ignorecase) ->> strp) then
					Set_jumptabs();
					repeat;
						if Advance(strp, regexp) then
							strp -> loc1;
							loc2 fi_- loc1 -> loc2;
							return;
						endif;
						strp fi_- 1 -> strp;
					quitunless(
						(Case(locchar_back, c, strp, str, ignorecase) ->> strp)
						and (CHECKTAB_BACK, strp fi_>= startstrp)
					  );
					endrepeat;
				endif;
			else
				;;; fast left-to-right search.
				if (Case(locchar, c, strp, str, ignorecase) ->> strp) then
					Set_jumptabs();
					repeat;
						if Advance(strp, regexp) then
							strp -> loc1;
							loc2 fi_- loc1 -> loc2;
							return;
						endif;
						strp fi_+ 1 -> strp;
					quitunless(
						(Case(locchar, c, strp, str, ignorecase) ->> strp)
						and (CHECKTAB, strp fi_<= strsize)
					  );
					endrepeat;
				endif;
			endif;

		;;; right-to-left searching - regular algorithm.
		elseif backward then
			Set_jumptabs();
			while strp fi_>= startstrp do
				if Advance(strp, regexp) then
					strp -> loc1;
					loc2 fi_- loc1 -> loc2;
					return;
				endif;
				S_-= 1;
			endwhile;

		;;; left-to-right searching - regular algorithm.
		else
			Set_jumptabs();
			fast_repeat;
				if Advance(strp, regexp) then
					strp -> loc1;
					loc2 fi_- loc1 -> loc2;
					return;
				endif;
				quitif(S_++ == termin);
			endfast_repeat;
		endif;

		return(false ->> loc1 -> loc2); ;;; FAIL
	enddefine; /* Do_search */

	;;; multi-line matching
	define lconstant Do_long_search();
		lvars startmatch, savestrsize, lines;
		;;; massage arguments:
		;;; strp, str, strsize -> (STRP, STR1, STR2, ..., STRSIZE)
		strsize -> savestrsize;
		conslist(strp, str, nlines) -> lines; ;;; list of STR1 STR2 ...
		;;; FIRST LINE
		;;; set strp, str and strsize
		-> strp; ;;; off the stack.
		sys_grbg_destpair(lines) -> lines -> str;
		false -> strsize;
		;;; do search
		Do_search();
		returnunless(loc1 ->> startmatch)(sys_grbg_list(lines));
		true -> circf; ;;; all other lines are constrained to start
		backward and true -> backward; ;;; ignore any integer backward value
		;;; MIDDLE LINES
		nlines fi_-2 -> nlines;
		fast_repeat nlines times
			;;; set strp, str and strsize
			1 -> strp;
			sys_grbg_destpair(lines) -> lines -> str;
			false -> strsize;
			;;; do search
			Do_search();
			returnunless(loc1)(sys_grbg_list(lines));
		endfast_repeat;
		;;; LAST LINE
		;;; set strp, str and strsize
		1 -> strp;
		sys_grbg_destpair(lines) -> lines -> str;
		savestrsize -> strsize;
		;;; do search
		Do_search();
		if loc2 then startmatch -> loc1 endif;
	enddefine; /* Do_long_search */

	if nlines == 1 then
		Do_search()
	else
		Do_long_search()
	endif;

enddefine; /* Regexp_search - phew. */

;;; ----- REGULAR EXPRESSION COMPILER -------------------------------------

;;; procedure returned if a regular expression cannot be compiled
define lconstant No_regexp with_props '<invalid regular expression>';
	mishap(0, 'INVALID REGULAR EXPRESSION');
enddefine;

;;; DECLARE -goto- LABELS AS LVARS TO STOP THE COMPILER FROM AUTOLOADING THEM
lvars dollar, dot, defchar, nlim;

;;; this compiles a single regular expression
define global regexp_compile(inputstr) -> regexp_p -> err;
	lvars
		;;; inputs
		inputstr,               ;;; the regular expression
		eofc,                   ;;; delimeter character for string
		flags,                  ;;; case sensitivity, etc.
		escc,                   ;;; escape character
		allowunescaped,         ;;; start in 'unliteral' mode?

		;;; outputs
		regexp_p    = No_regexp,;;; the resulting regexp procedure
		err         = false,    ;;; error message

		;;; input string and output string management variables
		inputp = 1,             ;;; index in inputstr
		inputlen,               ;;; length of inputstr
		outputstr   = inits(32),;;; compiled regular expression
		outputp     = 1,        ;;; index in outputstr
		outputlen   = 32,       ;;; length of outputstr (it can grow)
		lastp       = false,    ;;; last position in outputstr

		;;; state variables for () expressions
		bracket     = initintvec(NBRA),
		bracketp    = 1,        ;;; index to bracketp
		nbrackets   = 0,        ;;; total number of () expressions
		closed      = 0,        ;;; test for balance of ) brackets

		;;; working variables
		detectbreak,            ;;; true if ^ after $ is a delimeter
		casemakesdifference,    ;;; true if inputstr has an alpha char
		startoutp   = outputp,  ;;; where this expression starts
		circf       = false,    ;;; true if string starts with @^
		dollar      = false,    ;;; true if string contains a @$
		linedata    = [],       ;;; per-line data
		nlines      = 1,        ;;; number of lines
		iflag,                  ;;; used for non-ascii chars in [] brackets
		commacount,             ;;; used in handling of {} expressions
		neg,                    ;;; -true- for negatives in [] expressions
		notembedded,            ;;; true if it is an implicit @< @> search
		minlength = 0,          ;;; minimum length for matching string
		firstchar = undef,      ;;; first character of expression
		procedure filter_p = identfn, ;;; filter for case sensitivity
		c, lc, i, cclcnt, nextc,
	  ;

	lconstant Error_map = {
			;;; ERROR     MEANING
			/*1*/        'Regular expression too long'
			/*2*/        '@( @) imbalance'
			/*3*/        'Illegal or missing delimiter'
			/*4*/        '@[ @] imbalance'
			/*5*/        'Too many @( '
			/*6*/        'Bad number'
			/*7*/        'Range endpoint too large'
			/*8*/        'More than 2 numbers  given  in @{ @}'
			/*9*/        '} expected after @'
			/*10*/       'First number exceeds second in @{ @}'
			/*11*/       '`@ digit\' out of range'
			/*12*/       'Badly placed @z'
			/*13*/       'More than one * or @{ @} on same item'
		};

	;;; grows the output buffer
	define lconstant Try_grow;
		lvars growbuf, growlen;
		inits(outputlen + 32 ->> growlen) -> growbuf;
		outputstr -> substring(1, outputlen, growbuf);
		growbuf -> outputstr; growlen -> outputlen;
		true;
	enddefine;
	define lconstant Get_char() -> c; lvars c;
		;;; get the next character in the input, advance to next location.
		;;; sets nextc to a one-character lookahead.
		inputp > inputlen and termin or subscrs(inputp, inputstr) -> c;
		inputp + 1 -> inputp;
		inputp > inputlen and termin or subscrs(inputp, inputstr) -> nextc;
		filter_p(c) -> c; filter_p(nextc) -> nextc;
	enddefine;
	define lconstant Output_byte(c); lvars c;
		;;;write -c- to the current location, advance to next location
		c -> subscrs(outputp, outputstr);
		outputp +1 -> outputp
	enddefine;

	;;; PROCESS ARGUMENTS
	undef ->> (escc, allowunescaped); 0 -> flags; false -> eofc;
	;;; HANDLE OPTIONAL ARGUMENTS
	unless inputstr.isstring or inputstr.isword then
		inputstr -> (inputstr, flags);
	endunless;
	unless inputstr.isstring or inputstr.isword then
		(inputstr, flags) -> (inputstr, flags, eofc);
	endunless;
	unless inputstr.isstring or inputstr.isword then
		(inputstr, flags, eofc) -> (inputstr, flags, eofc, escc);
	endunless;
	unless inputstr.isstring or inputstr.isword then
		(inputstr, flags, eofc, escc)
				-> (inputstr, flags, eofc, escc, allowunescaped);
	endunless;

	if inputstr.isword then
		fast_word_string(inputstr) -> inputstr
	elseunless inputstr.isstring then
		mishap(inputstr, 1, 'WORD or STRING NEEDED');
	endif;
	datalength(inputstr) -> inputlen;
	if escc == undef then regexp_escape_char -> escc endif;
	if allowunescaped == undef then regexp_escape_mode -> allowunescaped endif;
	allowunescaped == `u` or allowunescaped == `U` -> allowunescaped;

	;;; PROCESS FLAGS
	uppertolower(inputstr) /== lowertoupper(inputstr) -> casemakesdifference;
	if flags &&/=_0 F_IGNORE_CASE then
		if casemakesdifference then
			;;; ignore case.
			uppertolower -> filter_p;
		else
			;;; case makes no difference anyway.
			flags &&~~ F_IGNORE_CASE -> flags;
		endif;
	endif;
	flags &&/=_0 F_NOT_EMBEDDED -> notembedded;
	flags &&/=_0 F_DETECT_BREAK -> detectbreak;
	;;; test that the VED procedures are available.
	if flags &&/=_0 F_USE_VEDINDENTSTEP then
		unless testdef vedindentstep then
			mishap(0, 'VEDINDENTSTEP NOT LOADED');
		endunless;
		unless testdef vedusedsize then
			mishap(0, 'VEDUSEDSIZE NOT LOADED');
		endunless;
	endif;
	if flags &&/=_0 F_USE_VEDCHARTYPE then
		unless testdef vedatitemstart then
			mishap(0, 'VEDATITEMSTART NOT LOADED');
		endunless;
		unless testdef vedatitemend then
			mishap(0, 'VEDATITEMEND NOT LOADED');
		endunless;
	endif;

	;;; test for special first characters.
	Get_char() -> c;
	uppertolower(nextc) -> lc;
	while c == escc and lc /== termin and strmember(lc, 'luic') do
		if lc == `u` then
			true -> allowunescaped;
		elseif lc == `l` then
			false -> allowunescaped;
		elseif lc == `i` and casemakesdifference then
			uppertolower -> filter_p;
			flags || F_IGNORE_CASE -> flags;
		elseif lc == `c` and casemakesdifference then
			identfn -> filter_p;
			flags &&~~ F_IGNORE_CASE -> flags;
		endif;
		Get_char() -> /*luic*/;
		Get_char() -> c;
		uppertolower(nextc) -> lc;
	endwhile;
	if allowunescaped and c == `^` then
		true -> circf;
	elseif c == escc and (nextc == `a` or nextc == `A`
	or (not(allowunescaped) and nextc == `^`)) then
		;;; @a or @A maps onto ^
		true -> circf;
		inputp + 1 -> inputp;
	else
		inputp - 1 -> inputp;
	endif;

	;;; deal with non-embedded searches
	if notembedded then Output_byte(CBRC); endif;

	;;; COMPILE REGULAR EXPRESSION
	repeat;
		if outputlen - outputp < 0 then
			returnunless(Try_grow())(Error_map(1) -> err);
		endif;
		Get_char() -> c;
		unless (allowunescaped and c == `*`) or (c == escc and
		(nextc == `{` or (not(allowunescaped) and nextc == `*`))) then
			outputp -> lastp;
		endunless;

		if c == eofc or c == termin or c == `\n` or detectbreak == 1 then
			;;; Exit condition.
			;;; we have reached the end of the regular expression.

			if bracketp /== 1 then
				return(Error_map(2) -> err); ;;; ERROR
			endif;

			;;; deal with implicit non-embedded searches.
			if notembedded then
				if outputlen - outputp < 1 then
					returnunless(Try_grow())(Error_map(1) -> err);
				endif;
				Output_byte(CLET);
			endif;

			;;; mark the end of the regular expression.
			Output_byte(CCEOF);

			if firstchar == undef then false -> firstchar endif;
			{^startoutp ^minlength} :: linedata -> linedata;

			if detectbreak == 1 then
				;;; we hit a line break - start a new line
				0 -> minlength;
				outputp -> startoutp;
				false -> lastp;
				true -> detectbreak;
				nlines + 1 -> nlines;
				nextloop;
			endif;

			;;; return a closure on Regexp_search
			CONSCLOSURE(
				Regexp_search,
				{% c == eofc and inputp - 1, nbrackets, dollar or circf %},
				if nbrackets == 0 then
					;;; no point allocating vectors for brackets.
					false, false, false,
				else
					;;; should be writeable to cope with locking.
					writeable initintvec(nbrackets),
					writeable initintvec(nbrackets),
					writeable initv(nbrackets),
				endif,
				nlines,
				rev(linedata),
				outputstr,
				circf,
				firstchar,
				flags &&/=_0 F_IGNORE_CASE,
				flags &&/=_0 F_USE_VEDINDENTSTEP,
				flags &&/=_0 F_USE_VEDCHARTYPE,
				CLOS_LENGTH
			  ) -> regexp_p;
			substring(1, inputp - 2, inputstr) -> pdprops(regexp_p);
			;;; pdnargs is  nlines + strp + strsize + back or nlines + 3
			nlines + 3 -> pdnargs(regexp_p);
			return(); ;;; SUCCESS

		elseif allowunescaped and c == `.` then ;;; single character match, also @?
		dot:
			if allowunescaped == undef then false -> allowunescaped endif;
			Output_byte(CDOT);
			;;; increase minlength by 1
			minlength + 1 -> minlength;
			if firstchar == undef then false -> firstchar endif;
			nextloop;

		elseif allowunescaped and c == `*` then ;;; multi character match
			if allowunescaped == undef then false -> allowunescaped endif;
			if lastp == false or (subscrs(lastp, outputstr) ->> i) == CBRA or
					i == CKET or i == CBRC or i == CLET  then
				goto defchar;
			endif;
			if i &&/=_0 (STAR || RNGE) then
				return(Error_map(13) -> err); ;;; ERROR
			endif;
			if i /== CBACK then
				;;; because the last character is repeated zero or more times,
				;;; we must discount it from minlength
				minlength - 1 -> minlength
			endif;
			i || STAR -> subscrs(lastp, outputstr);
			if firstchar == undef or lastp == 1 then false -> firstchar endif;
			nextloop;

		elseif allowunescaped and c == `$` then ;;; end of line, also @z
		dollar:
			if allowunescaped == undef then false -> allowunescaped endif;
			if detectbreak then
				;;; check for $@a or $^
				if nextc == escc then
					Get_char() -> ;
					if nextc == `a` or nextc == `A`
					or (not(allowunescaped) and nextc == `^`) then
						1 -> detectbreak;
					else
						;;; undo the last Get_char
						inputp - 1 -> inputp;
					endif;
				elseif allowunescaped and nextc == `^` then
					1 -> detectbreak;
				endif;
			endif;
			unless nextc == eofc or nextc == `\n` or nextc == termin
			or detectbreak == 1 then
				goto defchar; ;;; return(Error_map(12) -> err); ;;; ERROR ?
			endunless;
			true -> dollar;
			Output_byte(CDOL);
			nextloop;

		elseif allowunescaped and c == `[` then ;;; character class
			if allowunescaped == undef then false -> allowunescaped endif;
			if outputlen - outputp < 16 then
				returnunless(Try_grow())(Error_map(1) -> err);
			endif;
			;;; increase minlength by 1
			minlength - 1 -> minlength;
			if firstchar == undef then false -> firstchar endif;
			Output_byte(CCL);
			for i from 0 to 15 do
				0 -> subscrs(outputp + i, outputstr);
			endfor;
			false -> neg;
			if (Get_char() ->> c) == `^` then
				true -> neg;
				Get_char() -> c;
			endif;
			0 -> lc;
			true -> iflag;

			;;; this is used to place a character in a character range.
			define lconstant Place(c); lvars c;
				subscrs(outputp + (c >> 3), outputstr)
				|| subscrs(c && 7 + 1, bittab)
				-> subscrs(outputp + (c >> 3), outputstr)
			enddefine;

			repeat;
				if c == termin or (c && 8:377 ->> c) == `\n` then
					return(Error_map(4) -> err);    ;;; ERROR
				endif;
				if (c &&/=_0 8:200) and iflag then
					false -> iflag;
					if outputlen - outputp < 31 then
						returnunless(Try_grow())(Error_map(1) -> err);
					endif;
					CXCL -> subscrs(outputp - 1, outputstr);
					for i from 16 to 31 do
						0 -> subscrs(outputp + i, outputstr);
					endfor;
				endif;
				if c == `-` and lc /== 0 then
					Get_char() -> c;
					if allowunescaped and c == `]` or (not(allowunescaped) and
					c == escc and nextc == `]`) then
						Place(`-`);
						quitloop;
					elseif c == termin then
						return(Error_map(4) -> err); ;;; ERROR
					endif;
					if (c &&/=_0 8:200) and iflag then
						false -> iflag;
						if outputlen - outputp < 31 then
							returnunless(Try_grow())(Error_map(1) -> err);
						endif;
						CXCL -> subscrs(outputp - 1, outputstr);
						for i from 16 to 31 do
							0 -> subscrs(outputp + i, outputstr);
						endfor;
					endif;
					while(lc < c ) do
						Place(lc);
						lc +1 -> lc;
					endwhile;
				endif;
				c -> lc;
				Place(c);
				Get_char() -> c;
				quitif(allowunescaped and c == `]` or (not(allowunescaped) and
					c == escc and nextc == `]`));
			endrepeat;
			unless allowunescaped then Get_char() -> endunless; ;;; remove ]
			if neg then
				if not(iflag) then
					for cclcnt from 0 to 31 do
						subscrs(outputp + cclcnt, outputstr) ||/& 8:377
							-> subscrs(outputp + cclcnt, outputstr);
					endfor;
					subscrs(outputp, outputstr) && 8:376
						-> subscrs(outputp, outputstr);
				else
					NCCL -> subscrs(outputp - 1, outputstr);
					;;; make nulls match so test fails.
					subscrs(outputp, outputstr) || 8:1
						-> subscrs(outputp, outputstr);
				endif;
			endif;
			(iflag and 16 or 32) + outputp -> outputp;
			nextloop;

		elseif c == escc then ;;; ie. `@'
			Get_char() -> c;
			uppertolower(c) -> lc;

			if c == termin or c == escc then
				;;; @@ maps to @, as does @ at the end of the string
				if c /== escc then
					inputp - 1 -> inputp;
				endif;
				escc -> c;
				goto defchar;

			elseif not(allowunescaped) and locchar(c, 1, '.*$[') then
				;;; make . * $ and [ temporarily special
				undef -> allowunescaped;
				inputp - 1 -> inputp;
				nextloop;

			elseif lc == `z` then
				;;; @z or @Z maps onto $
				goto dollar;

			elseif c == `?` then
				;;; @? maps onto .
				goto dot;

			elseif lc == `u` then
				;;; @u turns on unliteral mode - ie. turns off literal mode.
				true -> allowunescaped;

			elseif lc == `l` then
				;;; @l turns on literal mode - ie. turns off unliteral mode.
				false -> allowunescaped;

			elseif lc == `c` then
				unless casemakesdifference and filter_p == uppertolower then
					Output_byte(CCASE);
					identfn -> filter_p;
				endunless;
				nextloop;

			elseif lc == `i` then
				unless casemakesdifference and filter_p == identfn then
					Output_byte(NCCASE);
					uppertolower -> filter_p;
				endunless;
				nextloop;

			elseif c == `<` then    ;;; @< constrains to start of word
				Output_byte(CBRC);
				nextloop;

			elseif c == `>` then
				;;; @> constrains to end of word
				Output_byte(CLET);
				nextloop;

			elseif c == `(` then    ;;; @( marks start of sub-expression
				if nbrackets >= NBRA then
					return(Error_map(5) -> err); ;;; ERROR
				endif;
				if outputlen - outputp < 1 then
					returnunless(Try_grow())(Error_map(1) -> err);
				endif;
				nbrackets + 1 -> nbrackets;
				nbrackets -> bracket(bracketp);
				bracketp + 1 -> bracketp;
				Output_byte(CBRA);
				Output_byte(nbrackets);
				nextloop;

			elseif c == `)` then    ;;; @) marks end of sub-expression
				if bracketp <= 1 then
					return(Error_map(2) -> err); ;;; ERROR
				endif;
				if outputlen - outputp < 1 then
					returnunless(Try_grow())(Error_map(1) -> err);
				endif;
				Output_byte(CKET);
				Output_byte(bracket(bracketp -1 ->> bracketp));
				closed +1 -> closed;
				nextloop;

			elseif c >= `1` and c <= `9` then
				;;; @n where n is a digit refers back to a sub-expression
				if (c - `0` ->> c) > closed then
					return(Error_map(11) -> err); ;;; ERROR
				endif;
				if outputlen - outputp < 1 then
					returnunless(Try_grow())(Error_map(1) -> err);
				endif;
				Output_byte(CBACK);
				Output_byte(c);
				nextloop;

			elseif c == `{` then
				;;; @{ @} places constraint on number of matches
				if lastp == false then
					goto defchar;
				endif;
				subscrs(lastp, outputstr) -> lc;
				if lc &&/=_0 (RNGE || STAR) then
					return(Error_map(13) -> err); ;;; ERROR
				endif;
				lc || RNGE -> subscrs(lastp, outputstr);
				0 -> commacount;
			nlim:
				Get_char() -> c;
				0 -> i;
				repeat;
					if c /== termin and `0` <= c and c <= `9` then
						10 * i + c - `0` -> i;
					else
						;;; not a number
						return(Error_map(6) -> err); ;;; ERROR
					endif;
					Get_char() -> c;
					quitif(c == escc or c == `,`);
				endrepeat;
				if i >= 255 then
					return(Error_map(7) -> err); ;;; ERROR
				elseif commacount == 0 and i == 0 and lastp == 1 then
					;;; must clear firstchar if low is 0.
					false -> firstchar;
				endif;
				Output_byte(i);
				if commacount == 0 and lc /== CBACK then
					;;; must modify minlength
					minlength - 1 + i -> minlength
				endif;
				if c == `,` then
					if commacount /== 0 then
						return(Error_map(8) -> err); ;;; ERROR
					endif;
					commacount +1 -> commacount;
					if (Get_char() ->> c) == escc then
						if outputlen - outputp < 0 then
							returnunless(Try_grow())(Error_map(1) -> err);
						endif;
						Output_byte(255);
					else
						inputp - 1 -> inputp;
						goto nlim;
						;;; get 2`nd number.
					endif;
				endif;
				if Get_char() /== `}` then
					return(Error_map(9) -> err); ;;;ERROR
				endif;
				if commacount == 0 then ;;; one number.
					if outputlen - outputp < 0 then
						returnunless(Try_grow())(Error_map(1) -> err);
					endif;
					Output_byte(i);
				elseif (subscrs(outputp - 1, outputstr) && 8:377)
						< (subscrs(outputp - 2, outputstr) && 8:377) then
					return(Error_map(10) -> err); ;;; ERROR
				endif;
				nextloop;

			elseif c == `\n` then
				return(Error_map(3) -> err); ;;; ERROR

			elseif escc == `\\` and strmember(c, 'nbtsre') ->> lc then
				subscrs(lc, '\n\b\t\s\r\e') -> c;
				;;; FALLTHROUGH TO DEFCHAR

			elseif escc == `\\` and c == `^` then
				if isalphacode(nextc) or nextc == `@` or nextc == `_` then
					lowertoupper(Get_char()) - `@  -> c;
				elseif nextc == `?` then
					erase(Get_char()); `\^?` -> c;
				endif;
				;;; FALLTHROUGH TO DEFCHAR
			endif;
			goto defchar;

		;;; DEFAULT ACTION
		else
		defchar:
			outputp -> lastp;
			if outputlen - outputp < 1 then
				returnunless(Try_grow())(Error_map(1) -> err);
			endif;
			Output_byte(CCHR);
			Output_byte(c);
			;;; increase minlength by 1
			minlength + 1 -> minlength;
			if firstchar == undef then c -> firstchar endif;
		endif;
	endrepeat;
enddefine; /* regexp_compile */

;;; ---- REGULAR EXPRESSION UTILITIES --------------------------------------

define global isregexp(r);
	lvars r;
	r.isclosure and r.pdpart == Regexp_search and r.datalength == CLOS_LENGTH
enddefine;

define lconstant Check_regexp(r) -> r;
	lvars r;
	unless r.isregexp then
		mishap(r, 1, 'REGULAR-EXPRESSION SEARCH PROCEDURE NEEDED');
	endunless;
enddefine;

;;; Accessors for the PROPS field of Regexp_search:

;;; the location of the delimeter in the string
define global regexp_delimeter(r);
	lvars r;
	subscrv(PROPS_DELIM, frozval(CLOS_PROPS, Check_regexp(r)));
enddefine;

;;; the number of sub expressions
define global regexp_subexp_count(r);
	lvars r;
	subscrv(PROPS_NBRA, frozval(CLOS_PROPS, Check_regexp(r)));
enddefine;

;;; true if the string contains an @a or @z
define global regexp_anchored(r);
	lvars r;
	subscrv(PROPS_ANCHORED, frozval(CLOS_PROPS, Check_regexp(r)));
enddefine;

;;; returns string and location in string for @( @) matches
define global regexp_subexp(n, r);  /* -> (n, len, string) */
	lvars n, r, maxn, loc1;
	regexp_subexp_count(r) -> maxn;
	unless n.isinteger and n >= 1 and n <= maxn then
		mishap(n, 1, 'INVALID SUB-EXPRESSION INDEX');
	endunless;
	subscrintvec(n, frozval(CLOS_BRASLIST, r)) -> loc1;
	;;; stack the results
	loc1,
	subscrintvec(n, frozval(CLOS_BRAELIST, r)) - loc1,
	subscrv(n, frozval(CLOS_BRAVLIST, r))
enddefine;

define global regexp_break_count(r);
	lvars r;
	frozval(CLOS_NBREAKS, Check_regexp(r)) - 1;
enddefine;

endsection;     /* $-objectclass */
endsection;     /* $-regexp */

/* --- Revision History ---------------------------------------------------
--- Jonathan Meyer, Nov 12 1992
		Fixed bug when regexp_escape_char is `\\` and a `\` appears at the
		end of the string.
--- Jonathan Meyer, Nov 11 1992
		Removed Ret*reat - backward searches no longer find the
		longest rightmost match, but instead find the first rightmost
		match. Made occurances of locchar/skipchar use the Case procedure so
		that they can be case insensitive if necessary.
--- Jonathan Meyer, Nov 4 1992
		Made Regexp_search accept an integer for -backward- to specify
		where the backward search starts from. Made regexp_compile take
		its arguments optionally.
--- Jonathan Meyer, Oct 30 1992
		Added @i and @c switches. Added regexp_anchored, and PROPS_ fields.
--- Jonathan Meyer, Oct 28 1992
		Added Set_jumptabs. Avoided calling Set_jumptabs before doing the
		firstchar test. This is a good thing since Set_jumptabs may take
		some time (it looks in the string for a tab) and is wasted effort
		if the string cannot match in the first place.
--- Jonathan Meyer, Oct 26 1992
		Cleared firstchar in * and @{0 if lastp is 1. Improved calculation of
		minlength in * and @{. Made Regexp_search use vedusedsize to determine
		a strings length if it is handling VED tabs.
--- Jonathan Meyer, Oct 23 1992
		Added the CBACK || RNGE which is missing in the `C' version.
		Made the Advance procedure mishap if it finds a code in the compiled
		regexpstr that it doesn't know.
--- Jonathan Meyer, Oct 22 1992
		Added test when compiling regular expression to see if calling
		uppertolower makes any difference - no point converting
		lots of things to lower case if the regular expression contains
		no alpha characters.
--- Jonathan Meyer, Oct 20 1992
		Added the `unliteral' and `literal' mode switch for old-stlye VED
		search pattern support, and regexp_escape_mode.
--- Jonathan Meyer, Oct 8 1992
		Added fast check using locchar on firstchar to quickly reject
		strings that can't match. Changed CCHR || STAR to use locchar,
		and also a few other places switched to locchar/skipchar.
--- Jonathan Meyer, Oct 5 1992
		Mods to ensure that it can be compiled with POPC as part of the system.
		Changed to use weakref/testdef for VED procedures.
--- Jonathan Meyer, Sep 19 1992
		Added minlength to quickly reject strings that are too short.
--- Jonathan Meyer, Sep 18 1992
		Support for VED hard tabs added and F_USE_VEDINDENTSTEP.
--- Jonathan Meyer, Sep 12 1992
		Extended to cope with @z@a line breaks and long lines. Added
		regexp_break_count, F_DETECT_BREAK.
--- Jonathan Meyer, Aug 4 1992
		Changed to use vedatitemend/vedatitemstart to detect words. Added
		F_NOT_EMBEDDED.
*/
