[Date Prev] [Date Next] [Thread Prev] [Thread Next] Date Index Thread Index Search archive:
Date:Mon Dec 1 15:17:09 1999 
Subject:Extensible vectors 
From:"Stephen F. K. Leach" 
Volume-ID:991201.01 

Thanks to everyone who responded to my posting about extensible vectors,
especially Brian Logan whose superb response prompted me to write two
new and very different interpretations.  I have added these to the
(imminent) Global Open Source Poplog Library - which will be appearing
on http://www.poplog.org/ in the early hours of the morning.

The first implementation "lib buffers" is pretty much what people have
come to expect.  A buffer is implemented as a procedure which wraps an
arbitrary vectorclass (vector, string, intvec, etc).  This is
optimised for access/update of the individual elements.  The second
implementation I call "accumulators" and is optimised for gathering
large amounts of input and then turning it into a compact data
structure.  Slightly peculiar but nice.

There are 6 files included below separated by '============':
     source code                     documentation
         buffers.p                       buffers
         new_char_accumlator.p           new_char_accumulator
         new_item_accumulator.p          new_item_accumulator

I hope this will be useful.  I will be posting again just as soon
as I make the GOSPL available.

Steve

===============================================================

;;; buffers.p

compile_mode :pop11 +strict;

section;


;;; Error messages.
lconstant
     non_neg_msg = 'Non-negative integer needed',
     pos_int_msg = 'Positive integer needed';

defclass lconstant store {
     store_vector,
     store_used      : pint,
     store_capacity  : pint,
     store_default,
     store_buffer
};

define lconstant fast_buffer_store( buffer );
     buffer.pdprops.back
enddefine;

define lconstant buffer_store( item );
     lvars s;
     if
         item.isprocedure and
         item.pdprops.ispair and
         isstore( item.pdprops.back ->> s )
     then
         s
     else
         mishap( item, 1, 'Buffer required' )
     endif
enddefine;

define lconstant ensure( k, store );
     lvars vec = store.store_vector;
     max( k, store.store_capacity * 2 + 8 ) -> k;
     ;;; [ expanding from % store.store_capacity % to ^k ( ^k was 
suggested ) ] =>
     lvars newvec = class_init( datakey( store.store_vector ) )( k );
     move_subvector( 1, vec, 1, newvec, vec.datalength );
     k -> store.store_capacity;
     newvec -> store.store_vector;
enddefine;

define lconstant buffer_diagnosis( num, store );
     lvars buf = store.store_buffer;
     if num.isinteger then
         if num < 1 then
             mishap( num, buf, 2, 'Index out of range (too small)' )
         elseif num > store.store_used then
             mishap( num, buf, 2, 'Index out of range (too big)' )
         else
             mishap( 0, 'Internal error' )
         endif
     else
         mishap( num, buf, 2, 'Invalid index for buffer (integer needed)' )
     endif
enddefine;

define new_buffer_hint( key, size, default ) -> buffer;

     key or vector_key -> key;
     lvars store =
         consstore(
             class_init( key )( fi_check( size, 0, false ) ),
             0,
             size,
             default,
             _           ;;; will become a self-pointer.
         );
     lvars props = conspair( key.class_dataword <> "_buffer", store );

     define check();
         sysLVARS( "num", 0 );
         sysPOP( "num" );
         sysPUSH( "num" );
         sysCALL( "isinteger" );
         sysIFNOT( "ouch" );
         sysPUSHQ( store );
         sysFIELD( 2, class_field_spec( store_key ), false, false );
         sysPUSH( "num" );
         sysCALL( "fi_<" );
         sysIFSO( "ouch" );
         sysPUSH( "num" );
         sysPUSHQ( 1 );
         sysCALL( "fi_<" );
         sysIFSO( "ouch" );
     enddefine;

     define push();
         sysPUSH( "num" );
         sysPUSHQ( store );
         sysFIELD( 1, class_field_spec( store_key ), false, false );
     enddefine;

     define diagnosis();
         sysGOTO( "done" );
         sysLABEL( "ouch" );
         sysPUSH( "num" );
         sysPUSHQ( store );
         sysCALLQ( buffer_diagnosis );
         sysLABEL( "done" );
     enddefine;

     procedure();
         sysPROCEDURE( props, 1 );
         check();
         push();
         sysFIELD( false, conspair( class_field_spec( key ), _ ), 
false, false );
         diagnosis();
         sysPUSHQ( sysENDPROCEDURE() );
         sysEXECUTE();
     endprocedure.sysCOMPILE ->> buffer -> store.store_buffer;

     procedure();
         sysPROCEDURE( false, 2 );
         check();
         push();
         sysUFIELD( false, conspair( class_field_spec( key ), _ ), 
false, false );
         diagnosis();
         sysPUSHQ( sysENDPROCEDURE() );
         sysEXECUTE();
     endprocedure.sysCOMPILE -> buffer.updater;

enddefine;

define new_buffer( key, default );
     new_buffer_hint( key, 0, default )
enddefine;

define app_buffer( buffer, procedure proc );
     lvars store = buffer.buffer_store;
     lvars vec = store.store_vector;
     lvars subscr = class_fast_subscr( datakey( vec ) );
     lvars i;
     fast_for i from 1 to store.store_used do
         proc( fast_apply( i, vec, subscr ) )
     endfor;
enddefine;


define buffer_insert( item, num, buffer );

     unless isinteger( num ) and num fi_>= 1 do
         mishap( num, 1, pos_int_msg )
     endunless;

     lvars store = buffer.buffer_store;
     lvars capacity = store.store_capacity;
     lvars used = store.store_used;
     lvars vec = store.store_vector;

     lvars used1 = used fi_+ 1;              ;;; safe because used <= capacity
     lvars newused = max( used1, num );      ;;; and capacity is a datalength

     if newused fi_>= capacity then
         newused -> store.store_used;
         if num fi_<= used then
             move_subvector(
                 num, vec,
                 num fi_+ 1, vec,            ;;; safe because num <= used
                 used1 fi_- num              ;;; safe because both > 0
             )
         else
             set_subvector(
                 store.store_default,
                 used1,
                 vec,
                 num fi_- used1              ;;; safe because both > 0
             )
         endif;
         item -> vec( num )
     else
         ensure( num, store );
         chain( item, num, buffer, buffer_insert )
     endif
enddefine;


;;; We need a key lemma here for fast integer subtraction :-
;;; Lemma:
;;;     for all integers A >= 0 and B >= 0,
;;;         A - B == A fi_- B

;;; We also make use of an absolutely key assumption - that datalength
;;; always returns a results SMALLER than the pop_max_int.  This
;;; assumption lets us write datalength( x ) fi_+ 1 safely.  But does
;;; Poplog enforce this?  If it doesn't it would be a bug!  Note that
;;; we CANNOT make the same requirement of length (because of dynamic
;;; lists).

define buffer_insert_n( n, num, buffer );
     ;;; [ insert ^n ^num ^buffer ] =>

     ;;; Sneaky code to avoid lots of work for inserting 0 items.
     unless isinteger( n ) and n fi_> 0 do
         returnif( n == 0 );     ;;; ALERT!  Efficiency hack!
         mishap( num, 1, 'Number of items to insert must be a 
non-negative integer' )
     endunless;

     ;;; ASSERT: isinteger( n ) and n >= 1.

     unless isinteger( num ) and num fi_>= 1 do
         mishap( num, 1, 'Insertion point must be a non-negative integer' )
     endunless;

     lvars store = buffer.buffer_store;
     lvars used = store.store_used;

     ;;; This will be the length of active portion of the buffer.
     lvars newused = max( num fi_- 1, used ) + n;
     unless isinteger( newused ) do
         mishap( n, 1, 'Attempt to overflow buffer overflow trapped 
(too many items)' )
     endunless;

     ;;; This is the index of the last item inserted into the list.  Typically
     ;;; this is not the same as newused.
     lvars newfinal = num + ( n fi_- 1 );               ;;; not safe, use +
     unless isinteger( newfinal ) do
         mishap( n, num, 2, 'Attempt to overflow buffer (insertion 
point too big)' )
     endunless;

     lvars capacity = store.store_capacity;
     lvars used = store.store_used;
     lvars vec = store.store_vector;

     lvars used1 = used fi_+ 1;       ;;; safe because used <= 
capacity & capacity is a datalength.
     if newused fi_<= capacity then
         if num fi_<= used then
             ;;; We will have to shift items out of the way.
             move_subvector(
                 num, vec,
                 num fi_+ n, vec,
                 used1 fi_- num      ;;; safe because used1 and num are >= 0
             );
         else
             ;;; [ extending vector, woah! ] =>
             set_subvector(
                 store.store_default,
                 used1,
                 vec,
                 num fi_- used1      ;;; safe because used1 and num are >= 0
             );
         endif;
     else
         ensure( newused, store );
         chain( n, num, buffer, buffer_insert_n )
     endif;

     newused -> store.store_used;
     lvars usubscr = updater( class_fast_subscr( datakey( vec ) ) );
     lvars i;
     fast_for i from newfinal by -1 to num do
         fast_apply( (), i, vec, usubscr )
     endfor;
enddefine;

define buffer_length( buffer );
     buffer.buffer_store.store_used
enddefine;

define buffer_pop( buffer );
     lvars store = buffer.buffer_store;
     lvars used = store.store_used;
     if used fi_>= 1 then
         lvars vec = store.store_vector;
         vec( used );                        ;;; push item onto stack.
         used fi_- 1 -> store.store_used;
     else
         mishap( buffer, 1, 'Trying to pop an empty buffer' )
     endif
enddefine;

define buffer_pop_n( n, buffer );

     ;;; "Cunning" trick used again.  We do not want to do much if n == 0.
     ;;; However, we do not want to add a n == 0 test since that would
     ;;; increase, even if only minutely, the overall cost of the common
     ;;; case.  Instead we trap it indirectly via a test we are obliged
     ;;; to perform anyway.
     unless isinteger( n ) and n fi_> 0 do
         returnif( n == 0 );
         mishap( n, 1, non_neg_msg )
     endunless;

     ;;; We now know that n is at least 1.  This is vital because later
     ;;; on we wish to compute used - n + 1.  We can only do this in
     ;;; fast arithmetic because of this guarantee!  The cunning trick
     ;;; pays double dividends.

     ;;; BIZARRE INFLAMMATORY INSERT ------------------------------------

     ;;; I am sorry it is so ugly.  Blame them - the secret evil
     ;;; conspiracy that makes computers that cannot do general arithmetic.
     ;;; The ugliness stems directly from the horrid restrictions on
     ;;; arithmetic.  It is not my fault.  It is not Poplog's fault.

     ;;; And to think that people programming in C, C++ and Java neglect
     ;;; this nearly all the time!  Hey - if you do not believe me try
     ;;; doing weakest preconditions on the safety of all your arithmetic.
     ;;; You have to prove that EVERY + and EVERY - and EVERY * (etc)
     ;;; is safe i.e. will not overflow.  Go on - do it.  I can promise
     ;;; a form of ghastly enlightenment.  Perhaps one should call it
     ;;; endarkening, the kind of revelation that shows matters are
     ;;; worse, far worse, than you had ever imagined?

     ;;; And what are the consequences if there is an overflow?  The
     ;;; damage is potentially unlimited.  In point of fact, indexing
     ;;; from an unproven safe integer in C can potentially lead to total
     ;;; system corruption and, worse, can act as a backdoor for hostile
     ;;; penetration.  The tradeoff in Java is rather more subtle, though.
     ;;; You may come to believe, as I do, that the use of 32-bit
     ;;; arithmetic reduces the overall performance of the JVM.

     ;;; Come, the darkness is beckoning ....

     ;;; END OF BIZARRE INFLAMMATORY INSERT  ----------------------------

     lvars store = buffer.buffer_store;
     lvars used = store.store_used;

     if used fi_>= n then
         lvars vec = store.store_vector;
         lvars i, subscr = class_fast_subscr( datakey( vec ) );
         fast_for i from used fi_- n fi_+ 1 to used do
             fast_apply( i, vec, subscr );               ;;; push item 
onto stack.
         endfor;
         used fi_- n -> store.store_used;
     else
         mishap( n, buffer, 2, 'Not enough elements in buffer (only ' 
sys_>< used sys_>< ')' )
     endif
enddefine;

;;; We utilise the guarantee that capcity is an integer.  Thus if used
;;; is an integer less than capacity is is safe to add 1.
;;;
define buffer_push( item, buffer );
     lvars store = buffer.buffer_store;
     lvars capacity = store.store_capacity;
     lvars used = store.store_used;
     lvars vec = store.store_vector;
     if used fi_< capacity then
         item -> vec( used fi_+ 1 ->> store.store_used )
     else
         ensure( used + 1, store );
         chain( item, buffer, buffer_push )
     endif
enddefine;

;;; You have to be VERY CAREFUL with the fast integer arithmetic in
;;; this routine.  You cannot assume n is an integer or that even if it
;;; is that (used + n) is an integer.  However, since used >= 0 and
;;; isinteger( used ), if (used + n) is an integer, everything is fine.
;;;
define buffer_push_n( n, buffer );
     lvars store = buffer.buffer_store;
     lvars capacity = store.store_capacity;
     lvars used = store.store_used;
     lvars vec = store.store_vector;
     lvars newused = fi_check( used + n, 0, false );
     if newused fi_<= capacity then
         lvars usubscr = updater( class_fast_subscr( datakey( vec ) ) );
         lvars i;
         fast_for i from 0 to n fi_- 1 do
             fast_apply( (), newused fi_- i, vec, usubscr )
         endfor;
         newused -> store.store_used;
     else
         ;;; [ newused ^newused used ^used ^n ] =>
         ensure( newused, store );
         chain( n, buffer, buffer_push_n )
     endif
enddefine;

define buffer_remove( num, buffer ) -> item;

     lvars store = buffer.buffer_store;
     lvars used = store.store_used;
     lvars vec = store.store_vector;

     ;;; Incidentally proves isinteger( num ) and 1 <= num <= capacity
     vec( num ) -> item;

     if used fi_> 0 and used fi_>= num then
         move_subvector(
             num fi_+ 1, vec,      ;;; safe because num <= capacity 
and capacity is a datalength
             num, vec,
             used fi_- num
         );
         used fi_- 1 -> store.store_used;    ;;; safe by Lemma
     else
         mishap( num, buffer, 2, 'Index too big for buffer' )
     endif
enddefine;

define buffer_remove_n( n, num, buffer );

     ;;; Yet again we do the ugly trick.  However, I resist bursting
     ;;; into another rant against evil architectures.  Well, almost ...
     unless isinteger( n ) and n fi_> 0 do
         returnif( n == 0 );     ;;; ALERT!  All too clever by half ...
         mishap( n, 1, non_neg_msg )
     endunless;

     ;;; ASSERT n >= 1 and isinteger( n )

     unless isinteger( num ) and num fi_>= 1 do
         mishap( num, 1, pos_int_msg )
     endunless;

     lvars store = buffer.buffer_store;
     lvars used = store.store_used;
     lvars vec = store.store_vector;

     lvars final = num + ( n fi_- 1 );
     unless isinteger( final ) and final fi_<= used do
         mishap( n, num, buffer, 3, 'Not enough elements in buffer for 
removal' )
     endunless;

     ;;; ASSERT final <= used <= capacity  (therefore final fi_+ 1 is OK)
     ;;; ASSERT final >= num ..... so num <= used
     ;;; ASSERT final >= n   ..... so n   <= used

     lvars subscr = class_fast_subscr( datakey( vec ) );
     lvars i;
     fast_for i from num to final do
         fast_apply( i, vec, subscr )        ;;; PUSH!
     endfor;

     move_subvector(
         final fi_+ 1, vec,
         num, vec,
         used fi_- final
     )
enddefine;

define dest_buffer( buffer );
     #| app_buffer( buffer, identfn ) |#
enddefine;

define explode_buffer( buffer );
     app_buffer( buffer, identfn )
enddefine;

define is_buffer( item );
     item.isprocedure and
     item.pdprops.ispair and
     item.pdprops.back.isstore
enddefine;

define is_empty_buffer( buffer );
     buffer.buffer_store.store_used == 0
enddefine;

define map_buffer( buffer, proc ) -> new;
     lvars store = buffer.buffer_store;
     new_buffer_hint(
         store.store_vector.datakey,
         store.store_used,
         store.store_default
     ) -> new;
     buffer_push_n(
         #| app_buffer( buffer, proc ) |#,
         new
     )
enddefine;

define buffer_contents( buffer ) -> result;
     lvars store = buffer.buffer_store;
     lvars vec = store.store_vector;
     lvars used = store.store_used;
     class_init( datakey( vec ) )( used ) -> result;
     move_subvector(
         1, vec,
         1, result,
         used
     )
enddefine;

define buffer_copy( buffer );
     map_buffer( buffer, identfn )
enddefine;

define ncmap_buffer( buffer, procedure proc ) -> buffer;
     lvars store = buffer.buffer_store;
     lvars vec = store.store_vector;
     lvars subscr = class_fast_subscr( datakey( vec ) );
     lvars usubscr = updater( subscr );
     lvars i;
     fast_for i from 1 to store.store_used do
         fast_apply( proc( fast_apply( i, vec, subscr ) ), i, vec, usubscr )
     endfor;
enddefine;

define subscr_buffer( num, buffer );
     buffer.buffer_store -> _;
     fast_apply( num, buffer )
enddefine;

define updaterof subscr_buffer( value, num, buffer );
     buffer.buffer_store -> _;
     value -> fast_apply( num, buffer )
enddefine;

endsection;

===============================================================

HELP BUFFERS                                            Steve Leach, Nov 99

lib buffers

This file documents the buffers library, an implementation of extensible
vectors.

          CONTENTS - (Use <ENTER> g to access required sections)

   1   Introduction to LIB BUFFERS

   2   Constructing Buffers

   3   Queries About Buffers

   4   Accessing and Updating Elements

   4   Iterating Over Buffer Elements

-----------------------------------------------------------------------
1  Introduction to LIB BUFFERS
-----------------------------------------------------------------------

Buffers are procedures which behave like extensible vectors.  They
can be called and updated to access/update the elements of the buffer.
Extensible vectors are difficult to implement without making some
kind of tradeoff.  This implementation optimises these point-wise
operations.

You can create buffers using new_buffer.  This takes two arguments,
a key on which to base the underlying vector, and a default to be
used when the vector has to be filled with otherwise unspecified
values.  Initially, the buffer is empty.

You can add and remove elements to or from the end of the buffer using
buffer_push and buffer_pop respectively.  You can add elements
into the middle of a buffer using buffer_insert and remove them
using buffer_remove.  All four of these procedures have multi-item
analogues: buffer_push_n, buffer_pop_n, buffer_insert_n, buffer_remove_n.

Addition and removal of elements is done in-place.  Extensible vectors
are generally visualised as an imperative programming gadget and that
is how they are approached here.





-----------------------------------------------------------------------
2  Constructing Buffers
-----------------------------------------------------------------------

buffer_copy( buffer ) -> new_buffer
         Returns a new buffer that is exactly the same as the first
         but shares no store.


new_buffer( key|false, default ) -> buffer
         This procedure creates a procedure that implements an
         extensible vector-like object.  The underlying store is
         implemented in by a vectorclass which is derived from a
         key (see HELP *KEYS).  Instead of a key you can use <false>
         and -vector_key- is used instead.  The default is employed
         when extending the vector without specific values.


-----------------------------------------------------------------------
3  Queries About Buffers
-----------------------------------------------------------------------

buffer_length( buffer ) -> n
         Returns the effective length of the buffer.  (The underlying
         store may be considerably bigger.  You do not have to worry
         about this.)


is_buffer( item ) -> bool
         Returns <true> if item is a buffer, otherwise <false>.


is_empty_buffer( buffer ) -> bool
         Returns <true> if the buffer is empty, otherwise <false>.


-----------------------------------------------------------------------
4  Accessing and Updating Elements
-----------------------------------------------------------------------

buffer( num ) -> value
value -> buffer( num )
         Access or update the num'th element of the buffer.  Like
         all Poplog sequences, buffers are 1-indexed and num must
         be in the range 1 to buffer_length( buffer ).

         This is the same as subscr_buffer, see below.


buffer_contents( buffer ) -> vec
         Returns a vectorclass object vec containing all the elements
         in the buffer.  No store is shared, this is a fresh vectorclass
         object.


buffer_insert( item, num, buffer )
         Inserts an item into a buffer so that it has index num.
         The buffer elements from num upwards are slid up by one
         to make room.  If num is greater than the buffer length,
         the buffer is extended and the initial default is
         used to fill in any unspecified values.


buffer_insert_n( item_1, ..., item_n, n, num, buffer )
         Inserts n items so that they have indexes num to
         num + n - 1.  The buffer elements from num upwards
         to num are slid up by n to make room.  If num is greater
         than the current buffer length, the buffer is extended and the
         initial default used to fill in any unspecified values.


buffer_pop( buffer ) -> item
         Removes and returns the last element from the buffer.  If
         the buffer is empty a mishap is raised.


buffer_pop_n( n, buffer ) -> ( item_1, ..., item_n )
         Removes and returns the last n elements from the buffer.  If
         the buffer is empty a mishap is raised.


buffer_push( item, buffer )
         Adds item to the end of the buffer.  Buffers are optimised
         for this operation.  To add an item anywhere else use
         buffer_insert.


buffer_push_n( item_1, ..., item_n, n, buffer )
         Adds n items to the end of the buffer.  If you want to add
         several items this is the most efficient way to do it.


buffer_remove( num, buffer ) -> item
         Removes the item at index num.  All the buffer elements
         subsequent to num are slid down by one.  The item is
         returned.


buffer_remove_n( n, num, buffer ) -> ( item_1, item_2, ..., item_n )
         Removes the n items starting from index num.  All the buffer
         elements subsequent to num + n - 1 are slid down by n.  The items
         are returned.


dest_buffer( buffer ) -> ( item_1, ..., item_n, n )
         Pushes the buffer contents onto the stack along with a
         count of the number of items pushed.


explode_buffer( buffer ) -> ( item_1, ..., item_n )
         Pushes the buffer elements onto the stack.


subscr_buffer( num, buffer ) -> value
value -> subscr_buffer( num, buffer )
         Accesses or updates the num'th element of the buffer.  Much
         the same as applying the buffer except it checks that the
         buffer satisfies is_buffer.



-----------------------------------------------------------------------
4  Iterating Over Buffer Elements
-----------------------------------------------------------------------

app_buffer( buffer, proc )
         Applies procedure proc to each item in the buffer.


map_buffer( buffer, proc ) -> new_buffer
         Returns a new buffer based on all the elements of buffer
         with proc applied to them.  Copes gracefully with proc
         returning an arbitrary number of arguments.


ncmap_buffer( buffer, proc ) -> buffer
         Applies procedure proc to every element of buffer,
         replacing that element with the result of calling proc.
         Does not cope gracefully with proc returning anything
         other than a single argument.

===============================================================
;;; new_char_accumlator.p

compile_mode :pop11 +strict;

section;

lconstant size = 256;

define lconstant copy_across( idx_in, dump, result );
     if dump.ispair then
         lvars ( d, rest ) = dump.destpair;
         lvars ( idx_next, more_flag ) = copy_across( idx_in, rest, result );
         lvars n = datalength( d );
         move_bytes( 1, d, idx_next, result, n );
         ( idx_next + n, more_flag )
     else
         ( idx_in, dump )
     endif
enddefine;

define new_char_accumulator( more );

     lvars buffer = false;
     lvars index = 0;
     lvars dump = more and true;     ;;; bool terminated list of strings.
     lvars ndump = 0;                ;;; The number of characters in the dump.
                                     ;;; ... doubles up as dead flag.

     procedure( ch );
         lconstant dead_msg = 'Applying a dead accumulator';
         if ch == termin then
             unless ndump do
                 mishap( ch, 1, dead_msg )
             endunless;
             lvars result = inits( ndump + index );
             lvars ( idx, more_flag ) = copy_across( 1, dump, result );
             if buffer then move_bytes( 1, buffer, idx, result, index ) endif;
             unless more_flag do
                 false -> buffer;    ;;; free space AND avoid 
performance critical code
                 false -> ndump;
                 false -> dump;      ;;; free space
             endunless;
             return( result )
         elseif buffer then
             ;;; Performance critical - must execute fast.
             if index == size then
                 size + ndump -> ndump;
                 conspair( buffer, dump ) -> dump;
                 inits( size ) -> buffer;
                 0 -> index;
             endif;
             index fi_+ 1 -> index;
             ch -> fast_subscrs( index, buffer );
         else
             unless ndump do
                 mishap( ch, 1, dead_msg )
             endunless;
             inits( size ) -> buffer;
             1 -> index;
             ch -> fast_subscrs( 1, buffer );
         endif
     endprocedure
enddefine;

endsection;

===============================================================

HELP new_char_accumlator                            Steve Leach, Nov 99

     new_char_accumulator( reusable ) -> <procedure>

The procedure -new_char_accumulator- is used for generating accumulators,
a special type of consumer.  An accumulator secretly stores all the elements
that it is applied to.  Finally, it can be persuaded to disgorge a string
of all the accumulated characters by applying it to the special item
<termin>.

A boolean flag "reusable" is used to indicate whether or not the
accumulator can be reused after being applied to <termin> more than
once.   <true> means it is reusable and <false> means it is disposable.
This defaults to <false> which means the accumulator dies after its
first result.  When an accumulator "dies" it immediately drops all
references to its secret store.  This means that these potentially large
internal objects will be garbage collected at the next opportunity,
which is a kind thing to do.  So disposable accumulators are more
efficient than reusable ones.

A good use for a character accumulator is to assign it to cucharout.  Now
all printing goes into the accumulator's buffer.  It is retrieved by
applying to <termin>.  Here's an example :-

     define get_print_string( x );
         dlocal cucharout = new_char_accumulator( false );
         pr( x );
         cucharout( termin )     ;;; returns string!
     enddefine;

     : get_print_string( identfn ).lowertoupper =>
     ** <PROCEDURE IDENTFN>

The accumulator returned by new_char_accumulator( true ) will still
works, even after it has been applied to <termin>.  You can carry on
adding in more characters and extracting them afterwards.

     : vars acc = new_char_accumulator( true );
     : appdata( 'foo', acc );
     : acc( termin ) =>
     ** foo
     : appdata( 'foo', acc );
     : acc( termin ) =>
     ** foofoo


-- Note for expert programmers ----------------------------

The strings returned by an accumulator are guaranteed to be unshared.
This is a deliberate design decision, incidentally, to bring it into
line with -new_item_accumulator-.


-- See also -----------------------------------------------

HELP * new_item_accumulator,    an accumulator that can store any item


===============================================================
;;; new_item_accumulator.p

compile_mode :pop11 +strict;

section;

lconstant size = 64;

define lconstant boom( dump ) -> flag;
     if dump.ispair then
         lvars ( d, rest ) = dump.destpair;
         boom( rest ) -> flag;
         explode( d )
     else
         dump -> flag
     endif
enddefine;

define new_item_accumulator( reusable );
     lvars constructor = conslist;
     if reusable.isprocedure then
         reusable -> constructor;
         () -> reusable;
     endif;
     unless reusable.isboolean do
         mishap( reusable, 1, 'Boolean needed' )
     endunless;

     lvars buffer = false;
     lvars index = 0;
     lvars dump = [];        ;;; A list of filled string buffers.
     lvars ndump = 0;        ;;; The number of characters in the dump.

     procedure( ch );
         lconstant dead_msg = 'Applying a dead accumulator';
         if ch == termin then
             unless ndump do
                 mishap( ch, 1, dead_msg )
             endunless;
             lvars more_flag = boom( dump );
             lvars i;
             fast_for i from 1 to index do
                 fast_subscrv( i, buffer )
             endfor;
             unless more_flag do
                 false -> buffer;    ;;; free space AND avoid 
performance critical code
                 false -> ndump;
                 false -> dump;      ;;; free space
             endunless;
             return( constructor( ndump + index ) );
         elseif buffer then
             if index == size then
                 size + ndump -> ndump;
                 conspair( buffer, dump ) -> dump;
                 initv( size ) -> buffer;
                 0 -> index;
             endif;
             index fi_+ 1 -> index;
             ch -> fast_subscrv( index, buffer );
         else
             unless ndump do
                 mishap( ch, 1, dead_msg )
             endunless;
             initv( size ) -> buffer;
             1 -> index;
             ch -> fast_subscrv( 1, buffer );
         endif
     endprocedure
enddefine;

endsection;

===============================================================

HELP new_item_accumlator                            Steve Leach, Nov 99

     new_item_accumulator( reusable ) -> <procedure>
     new_item_accumulator( reusable, constructor ) -> <procedure>

The procedure -new_item_accumulator- is used for generating
accumulators, a special type of consumer.  An accumulator secretly
stores all the elements that it is applied to.  Finally, it can be
persuaded to disgorge a string of all the accumulated items by applying
it to the special item <termin>.  The results are collected together by
the optional constructor which should take its inputs in this form.

     constructor( x1, ..., xN, N )

The constructor defaults to -conslist-.

A boolean flag "reusable" is used to indicate whether or not the
accumulator can be reused after being applied to <termin> more than
once.   <true> means it is reusable and <false> means it is disposable.
This defaults to <false> which means the accumulator dies after its
first result.  When an accumulator "dies" it immediately drops all
references to its secret store.  This means that these potentially large
internal objects will be garbage collected at the next opportunity,
which is a kind thing to do.  So disposable accumulators are more
efficient than reusable ones.

A good use for an item accumulator is to collect values in a loop.
Here's an example :-

     : vars words = new_item_accumulator( false );
     : vars numbers = new_item_accumulator( false );
     : lvars i;
     : for i in [ [ a 1 ] [ b 2 ] [ c 3 ] [ d 4 ] ] do
     :     words( i(1) );
     :     numbers( i(2) );
     : endfor;
     : words( termin ) -> words;
     : numbers( termin ) -> numbers;
     : words =>
     ** [a b c d]
     : numbers =>
     ** [1 2 3 4]

The accumulator returned by, say,
     new_item_accumulator( true, consvector )
still works, even after it has been applied to <termin>.  You can
carry on adding in more items and extracting them afterwards.

     : vars acc = new_item_accumulator( true, consvector );
     : applist( [ alpha beta gamma ], acc );
     ** {alpha beta gamma}
     : applist( [ alpha beta gamma ], acc );
     :  acc( termin ) =>
     ** {alpha beta gamma alpha beta gamma}

-- See also -----------------------------------------------

HELP * new_char_accumulator,    an accumulator specialised for characters

===============================================================