Toke1.0: scanner.c Source File

00001 /*
00002  *                     OpenBIOS - free your system! 
00003  *                         ( FCode tokenizer )
00004  *                          
00005  *  scanner.c - simple scanner for forth files.
00006  *  
00007  *  This program is part of a free implementation of the IEEE 1275-1994 
00008  *  Standard for Boot (Initialization Configuration) Firmware.
00009  *
00010  *  Copyright (C) 2001-2005 by Stefan Reinauer <stepan@openbios.org>
00011  *
00012  *  This program is free software; you can redistribute it and/or modify
00013  *  it under the terms of the GNU General Public License as published by
00014  *  the Free Software Foundation; version 2 of the License.
00015  *
00016  *  This program is distributed in the hope that it will be useful,
00017  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  *  GNU General Public License for more details.
00020  *
00021  *  You should have received a copy of the GNU General Public License
00022  *  along with this program; if not, write to the Free Software
00023  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA, 02110-1301 USA
00024  *
00025  */
00026 
00027 /* **************************************************************************
00028  *         Modifications made in 2005 by IBM Corporation
00029  *      (C) Copyright 2005 IBM Corporation.  All Rights Reserved.
00030  *      Modifications Author:  David L. Paktor    dlpaktor@us.ibm.com
00031  **************************************************************************** */
00032 
00033 #include <stdio.h>
00034 #include <stdlib.h>
00035 #include <unistd.h>
00036 #ifdef __GLIBC__
00037 #define __USE_XOPEN_EXTENDED
00038 #endif
00039 #include <string.h>
00040 #include <time.h>
00041 #include <ctype.h>
00042 
00043 #include "macros.h"
00044 #include "stack.h"
00045 #include "stream.h"
00046 #include "emit.h"
00047 #include "toke.h"
00048 #include "dictionary.h"
00049 #include "vocabfuncts.h"
00050 #include "scanner.h"
00051 #include "errhandler.h"
00052 #include "tokzesc.h"
00053 #include "conditl.h"
00054 #include "flowcontrol.h"
00055 #include "usersymbols.h"
00056 #include "clflags.h"
00057 #include "devnode.h"
00058 #include "tracesyms.h"
00059 #include "nextfcode.h"
00060 
00061 #include "parselocals.h"
00062 
00063 /* **************************************************************************
00064  *
00065  *  Some VERY IMPORTANT global variables follow
00066  *
00067  **************************************************************************** */
00068 
00069 u8  *statbuf=NULL;      /*  The word just read from the input stream  */
00070 u8   base=0x0a;         /*  The numeric-interpretation base           */
00071 
00072 /* pci data */
00073 bool pci_is_last_image=TRUE;
00074 u16  pci_image_rev=0x0001;  /*  Vendor's Image, NOT PCI Data Structure Rev */
00075 u16  pci_vpd=0x0000;
00076 
00077 
00078 /*  Having to do with the state of the tokenization  */
00079 bool offs16       = TRUE;    /*  We are using 16-bit branch- (etc) -offsets */
00080 bool in_tokz_esc  = FALSE;   /*  TRUE if in "Tokenizer Escape" mode   */
00081 bool incolon      = FALSE;   /*  TRUE if inside a colon definition    */
00082 bool haveend      = FALSE;   /*  TRUE if the "end" code was read.     */
00083 int do_loop_depth = 0;       /*  How deep we are inside DO ... LOOP variants  */
00084 
00085 /*  Used for error-checking of IBM-style Locals  */
00086 int lastcolon;   /*  Location in output stream of latest colon-definition. */
00087 
00088 /*  Used for error reporting   */
00089 char *last_colon_defname = NULL;   /*  Name of last colon-definition        */
00090 char *last_colon_filename = NULL;  /*  File where last colon-def'n made     */
00091 unsigned int last_colon_lineno;    /*  Line number of last colon-def'n      */
00092 bool report_multiline = TRUE;      /*  False to suspend multiline warning   */
00093 unsigned int last_colon_abs_token_no;
00094 
00095 /* **************************************************************************
00096  *  Local variables
00097  **************************************************************************** */
00098 static u16  last_colon_fcode;  /*  FCode-number assigned to last colon-def'n  */
00099                                /*      Used for RECURSE  */
00100 
00101 static bool do_not_overload = TRUE ;  /*  False to suspend dup-name-test     */
00102 static bool got_until_eof = FALSE ;   /*  TRUE to signal "unterminated"      */
00103 
00104 static unsigned int last_colon_do_depth = 0;
00105 
00106 /*  State of headered-ness for name-creation  */
00107 typedef enum headeredness_t {
00108        FLAG_HEADERLESS ,
00109        FLAG_EXTERNAL ,
00110        FLAG_HEADERS }  headeredness ;
00111 static headeredness hdr_flag = FLAG_HEADERLESS ;  /*  Init'l default state  */
00112 
00113 /*  Local variables having to do with:                                      */
00114 /*       ...  the state of the tokenization                                 */
00115 static bool is_instance = FALSE;        /*  Is "instance" is in effect?     */
00116 static char *instance_filename = NULL;  /*  File where "instance" invoked   */
00117 static unsigned int instance_lineno;    /*  Line number of "instance"       */
00118 static bool fcode_started = FALSE ;     /*  Only 1 fcode_starter per block. */
00119 static bool first_fc_starter = TRUE;    /*  Only once per tokenization...   */
00120 
00121 /*       ... with the state of the input stream,                            */
00122 static bool need_to_pop_source;
00123 
00124 /*       ... with the use of the return stack,                              */
00125 static int ret_stk_depth = 0;          /*  Return-Stack-Usage-Depth counter */
00126 
00127 /*       ... and with control of error-messaging.                           */
00128            /*  Should a warning about a dangling "instance" 
00129             *      be issued at the next device-node change?
00130             */
00131 static bool dev_change_instance_warning = TRUE;
00132 
00133            /*  Has a gap developed between "instance" and its application?  */
00134 static bool instance_definer_gap = FALSE;
00135 
00136            /*  Shared phrases                                               */
00137 static char *in_tkz_esc_mode = "in Tokenizer-Escape mode.\n";
00138 
00139 
00140 /* **************************************************************************
00141  *
00142  *      Function name:  skip_ws
00143  *      Synopsis:       Advance the PC past all whitespace.
00144  *                      Protect against pointer over-runs 
00145  *
00146  *      Inputs:
00147  *         Parameters:                  NONE
00148  *         Global Variables:        
00149  *             pc                       Input-source Scanning pointer
00150  *             end                      End of input-source buffer
00151  *
00152  *      Outputs:
00153  *         Returned Value:      TRUE if PC reached END before non-blank char
00154  *         Global Variables:    
00155  *             pc            Advanced to first non-blank char, or to END  
00156  *             lineno        Incremented if encountered new-line along the way
00157  *
00158  *      Error Detection:
00159  *          Return a TRUE if End of input-source buffer reached before
00160  *              non-blank character.  Not necessarily an error; allow
00161  *              calling routine to decide...
00162  *
00163  **************************************************************************** */
00164 
00165 static bool skip_ws(void)
00166 {
00167     bool retval = TRUE;
00168     char ch_tmp;
00169 
00170     for (  ; pc < end; pc++ )
00171 {
00172         ch_tmp = *pc;
00173         if ( (ch_tmp != '\t') && (ch_tmp != ' ') && (ch_tmp != '\n' ) )
00174         {
00175             retval = FALSE;
00176             break;
00177         }
00178         if ( ch_tmp == '\n')  lineno++;
00179     }
00180     return ( retval );
00181 }
00182 
00183 /* **************************************************************************
00184  *
00185  *      Function name:  skip_until
00186  *      Synopsis:       Advance the PC to the given character.
00187  *                      Do not copy anything into statbuf.
00188  *                      Protect against pointer over-runs 
00189  *
00190  *      Inputs:
00191  *         Parameters:
00192  *             lim_ch                   Limiting Character
00193  *         Global Variables:        
00194  *             pc                       Input-source Scanning pointer
00195  *             end                      End of input-source buffer
00196  *
00197  *      Outputs:
00198  *         Returned Value:      TRUE if PC reached END before finding LIM_CH
00199  *         Global Variables:    
00200  *             pc            Advanced to first occurrence of LIM_CH, or to END  
00201  *             lineno        Incremented if encountered new-line along the way
00202  *
00203  *      Error Detection:
00204  *          Return a TRUE if End of input-source buffer reached before
00205  *              desired character.  Not necessarily an error; allow calling
00206  *              routine to decide...
00207  *
00208  **************************************************************************** */
00209 
00210 bool skip_until( char lim_ch)
00211 {
00212     bool retval = TRUE;
00213     char ch_tmp;
00214 
00215     for (  ; pc < end; pc++ )
00216     {
00217         ch_tmp = *pc;
00218         if ( ch_tmp == lim_ch )
00219         {
00220             retval = FALSE;
00221             break;
00222         }
00223         if ( ch_tmp == '\n')  lineno++;
00224         }
00225     return ( retval );
00226 }
00227 
00228 
00229 /* **************************************************************************
00230  *
00231  *      Function name:  get_until
00232  *      Synopsis:       Return, in  statbuf, the string from  PC  to the first
00233  *                      occurrence of the given delimiter-character..
00234  *
00235  *      Inputs:
00236  *         Parameters:
00237  *             needle          The given delimiter-character
00238  *         Global Variables:
00239  *             pc              Input-source Scanning Pointer
00240  *
00241  *      Outputs:
00242  *         Returned Value:     Length of the string obtained
00243  *         Global Variables:
00244  *             statbuf         The string obtained from the input stream;
00245  *                                 does not include the delimiter-character.
00246  *             pc              Bumped past the delimiter-character, unless
00247  *                                 it's a new-line, in which case leave it
00248  *                                 to be handled by  get_word()
00249  *         Local Static Variables:
00250  *             got_until_eof   Pass this as a signal that the end of the
00251  *                                 buffer was reached before the delimiter;
00252  *                                 Testing whether PC has reached END is
00253  *                                 not a sufficient indication.
00254  *
00255  *      Error Detection:
00256  *          If string overflows  statbuf  allocation, ERROR, and 
00257  *              return "no string" (i.e., length = 0).
00258  *          Otherwise, if delimiter not found before eof, keep string.
00259  *              Protection against PC pointer-over-run past END is
00260  *              provided by  skip_until() .  Reaching END will be
00261  *              handled by calling routine; pass indication along
00262  *              via Local Static Variable.
00263  *
00264  *      Process Explanation:
00265  *          Skip the delimiter-character from further input, unless it's a
00266  *              new-line which will be skipped anyway.  Let  skip_until() 
00267  *              and  get_word()  handle incrementing line-number counters.
00268  *          If skip_until()  indicated reaching end-of-file, don't bump PC
00269  *
00270  *      Revision History:
00271  *          Updated Thu, 14 Jul 2005 by David L. Paktor
00272  *              More robust testing for when PC exceeds END 
00273  *                  Involved replacing  firstchar()
00274  *
00275  **************************************************************************** */
00276         
00277 static signed long get_until(char needle)
00278 {                                                                               
00279         u8 *safe;                                                         
00280         unsigned long len = 0;
00281 
00282         safe=pc;
00283 
00284         got_until_eof = skip_until(needle);
00285 
00286         len = pc - safe;
00287         if (len >= GET_BUF_MAX )
00288         {
00289             tokenization_error( TKERROR,
00290                 "get_until buffer overflow.  Max is %d.\n", GET_BUF_MAX-1 );
00291             len = GET_BUF_MAX-1;
00292 }
00293 
00294         memcpy(statbuf, safe, len);
00295         statbuf[len]=0;
00296 
00297         if ( INVERSE(got_until_eof) )
00298 {
00299             if ( needle != '\n' )  pc++;
00300         }
00301         return len;
00302 }
00303 
00304 
00305 /* **************************************************************************
00306  *
00307  *          We are going to use a fairly sophisticated mechanism to
00308  *              make a smooth transition between processing the body
00309  *              of a Macro, a User-defined Symbol or an FLOADed file 
00310  *              and the resumption of processing the source file, so
00311  *              that the end-of-file will only be seen at the end of
00312  *              the primary input file (the one from the command-line).
00313  *         This mechanism will be tied in with the get_word() routine
00314  *
00315  *         We are going to define a private data-structure in which
00316  *              we will save the state of the current source file,
00317  *              and from which, of course, we will recover it.  Its
00318  *              fields will be:
00319  *                   A pointer to the next structure in the list.
00320  *                   The saved values of  START  END  and  PC
00321  *                   The saved values of  INAME  and  LINENO
00322  *                   A flag indicating that get-word should "pause"
00323  *                        before popping the source-stream because
00324  *                        the input file will be changing.
00325  *                   A place from which to save and recover the state of
00326  *                        whether we're testing for "Multi-line" strings;
00327  *                        to prevent undeserved "Multi-line" warnings
00328  *                        during Macro processing.
00329  *                   A pointer to a "resumption" routine, to call
00330  *                        when resuming processing the source file;
00331  *                        the routine takes a pointer parameter
00332  *                        and has no return value.  The pointer
00333  *                        may be NULL if no routine is needed.
00334  *                   The pointer to pass as the parameter to the
00335  *                        resumption routine.
00336  *
00337  **************************************************************************** */
00338 
00339 typedef struct source_state
00340     {
00341         struct source_state   *next;
00342         u8                    *old_start;
00343         u8                    *old_pc;
00344         u8                    *old_end;
00345         char                  *old_iname;
00346         unsigned int           old_lineno;
00347         bool                   pause_before_pop;
00348         bool                   sav_rep_multlin;
00349         void                 (*resump_func)();
00350         _PTR                   resump_param;
00351     } source_state_t ;
00352 
00353 static source_state_t  *saved_source = NULL;
00354 
00355 
00356 /* **************************************************************************
00357  *
00358  *      Function name:  push_source
00359  *      Synopsis:       Save the state of the current source file, in the
00360  *                          source_state data-structure LIFO linked-list.
00361  *
00362  *      Inputs:
00363  *         Parameters:
00364  *             res_func              Pointer to routine to call when resuming
00365  *                                       processing the saved source file.
00366  *             res_param             Parameter to pass to res_func.
00367  *                                   Either or both pointers may be NULL.
00368  *             file_chg              TRUE if input file is going to change.
00369  *         Global Variables:
00370  *             start                 Points to current input buffer
00371  *             end                   Points to end of current input buffer
00372  *             pc                    Input point in current buffer
00373  *             iname                 Name of current source file
00374  *             lineno                Line number in current source file
00375  *             report_multiline      Whether we're testing for "Multi-line"
00376  *         Local Static Variables:
00377  *             saved_source          Pointer to the source_state data-structure
00378  *
00379  *      Outputs:
00380  *         Returned Value:           NONE
00381  *         Local Static Variables:
00382  *             saved_source          Points to new source_state entry
00383  *         Memory Allocated
00384  *             for the new source_state entry
00385  *         When Freed?
00386  *             When resuming processing the source file, by drop_source().
00387  *
00388  *      Process Explanation:
00389  *          The calling routine will establish the new input buffer via
00390  *              a call to init_inbuf() or the like.
00391  *
00392  **************************************************************************** */
00393 
00394 void push_source( void (*res_func)(), _PTR res_parm, bool file_chg )
00395 {
00396     source_state_t  *new_sav_src;
00397 
00398     new_sav_src = safe_malloc( sizeof(source_state_t), "pushing Source state");
00399 
00400     new_sav_src->next = saved_source;
00401     new_sav_src->old_start = start;
00402     new_sav_src->old_pc = pc;
00403     new_sav_src->old_end = end;
00404     new_sav_src->old_iname = iname;
00405     new_sav_src->old_lineno = lineno;
00406     new_sav_src->pause_before_pop = file_chg;
00407     new_sav_src->sav_rep_multlin = report_multiline;
00408     new_sav_src->resump_func = res_func;
00409     new_sav_src->resump_param = res_parm;
00410 
00411     saved_source = new_sav_src;
00412 }
00413 
00414 /* **************************************************************************
00415  *
00416  *      Function name:  drop_source
00417  *      Synopsis:       Remove last saved state of source processing
00418  *                          from the source_state LIFO linked-list,
00419  *                          without (or after) restoring.
00420  *
00421  *      Inputs:
00422  *         Parameters:               NONE
00423  *         Local Static Variables:
00424  *             saved_source          Pointer to the source_state data-structure
00425  *
00426  *      Outputs:
00427  *         Returned Value:           NONE
00428  *         Local Static Variables:
00429  *             saved_source          Points to previous source_state entry
00430  *         Memory Freed
00431  *             Saved source_state entry that was just "dropped"
00432  *
00433  *      Error Detection:
00434  *          None.  Called only when linked-list is known not to be at end.  
00435  *
00436  **************************************************************************** */
00437 
00438 static void drop_source( void)
00439 {
00440     source_state_t  *former_sav_src = saved_source;
00441 
00442     saved_source = saved_source->next ;
00443     free( former_sav_src);
00444 }
00445 
00446 /* **************************************************************************
00447  *
00448  *      Function name:  pop_source
00449  *      Synopsis:       Restore the state of source processing as it was
00450  *                          last saved in the source_state linked-list.
00451  *
00452  *      Inputs:
00453  *         Parameters:               NONE
00454  *         Local Static Variables:
00455  *             saved_source          Pointer to the source_state data-structure
00456  *             need_to_pop_source    If TRUE, don't check before popping.
00457  *
00458  *      Outputs:
00459  *         Returned Value:           TRUE if reached end of linked-list
00460  *         Global Variables:
00461  *             start                 Points to restored input buffer
00462  *             end                   Points to end of restored input buffer
00463  *             pc                    Input point in restored buffer
00464  *             iname                 Name of restored source file
00465  *             lineno                Line number in restored source file
00466  *             report_multiline      Restored to saved value.
00467  *         Local Static Variables:
00468  *             saved_source          Points to previous source_state entry
00469  *             need_to_pop_source    TRUE if postponed popping till next time
00470  *         Memory Freed
00471  *             Saved source-state entry that was just "popped"
00472  *
00473  *      Process Explanation:
00474  *          First check the need_to_pop_source flag.
00475  *          If it is set, we will clear it and go ahead and pop.
00476  *          If it is not set, we will check the  pause_before_pop  field
00477  *                  of the top entry in the source_state linked-list.
00478  *              If the  pause_before_pop  field is set, we will set the
00479  *                  need_to_pop_source flag and return.
00480  *              If it is not, we will go ahead and pop.
00481  *          If we are going to go ahead and pop, we will call the
00482  *              "Resume-Processing" routine (if it's not NULL) before
00483  *              we restore the saved source state.
00484  *
00485  **************************************************************************** */
00486 
00487 static bool pop_source( void )
00488 {
00489     bool retval = TRUE;
00490 
00491     if ( saved_source != NULL )
00492     {
00493         retval = FALSE;
00494         if ( need_to_pop_source )
00495         {
00496             need_to_pop_source = FALSE;
00497         }else{
00498             if ( saved_source->pause_before_pop )
00499             {
00500                 need_to_pop_source = TRUE;
00501                 return( retval);
00502             }
00503         }
00504 
00505         if ( saved_source->resump_func != NULL )
00506         {
00507             saved_source->resump_func( saved_source->resump_param);
00508         }
00509         report_multiline = saved_source->sav_rep_multlin;
00510         lineno = saved_source->old_lineno ;
00511         iname = saved_source->old_iname ;
00512         end = saved_source->old_end ;
00513         pc = saved_source->old_pc ;
00514         start = saved_source->old_start ;
00515 
00516         drop_source();
00517     }
00518     return( retval);
00519 }
00520 
00521 
00522 /* **************************************************************************
00523  *
00524  *      Function name:  get_word
00525  *      Synopsis:       Gather the next "word" (aka Forth Token) from the
00526  *                          input stream.
00527  *                      A Forth Token is, of course, a string of characters
00528  *                          delimited by white-space (blank, tab or new-line).
00529  *                      Do not increment line-number counters here; leave
00530  *                          the delimiter after the word unconsumed.
00531  *
00532  *      Inputs:
00533  *         Parameters:                 NONE
00534  *         Global Variables:
00535  *             pc                      Input-stream Scanning Pointer
00536  *         Local Static Variables:
00537  *             need_to_pop_source      If TRUE, pop_source() as first step
00538  *
00539  *      Outputs:
00540  *         Returned Value:             Length of "word" gotten;
00541  *                                     0 if  reached end of file.
00542  *                                     -1 if reached end of primary input
00543  *                                         (I.e., end of all source)
00544  *         Global Variables:
00545  *             statbuf                 Copy of "gotten" word
00546  *             pc                      Advanced to end of "gotten" word,
00547  *                                         (i.e., the next word is "consumed")
00548  *                                         unless returning zero.
00549  *             abs_token_no            Incremented, if valid "word" (token)
00550  *                                         was gotten.
00551  *
00552  *      Process Explanation:
00553  *          Skip whitespace to the start of the token, 
00554  *             then skip printable characters to the end of the token.
00555  *          That part's easy, but what about when skipping whitespace
00556  *              brings you to the end of the input stream?
00557  *          First, look at the  need_to_pop_source  flag.  If it's set,
00558  *              we came to the end of the input stream the last time
00559  *              through.  Now we need to  pop_source()  first.
00560  *          Next, we start skipping whitespace; this detects when we've
00561  *                  reached the end of the input stream.  If we have,
00562  *                  then we need to  pop_source()  again.
00563  *              If  pop_source()  returned a TRUE, we've reached the end
00564  *                  of the primary input file.  Return -1.
00565  *              If  pop_source()  turned the  need_to_pop_source  flag
00566  *                  to TRUE again, then we need to "pause" until the
00567  *                  next time through; return zero.
00568  *          Otherwise, we proceed with collecting the token as described.
00569  *
00570  *      Revision History:
00571  *          Updated Thu, 23 Feb 2006 by David L. Paktor
00572  *              Tied this routine in with a more sophisticated mechanism that
00573  *                  makes a smooth transition between processing the body of
00574  *                  a Macro, a User-defined Symbol or an FLOADed file, and 
00575  *                  the resumption of processing the source file, so that the
00576  *                  end-of-file will only be seen at the end of the primary
00577  *                  input file (the one that came from the command-line)
00578  *          Updated Fri, 24 Feb 2006 by David L. Paktor
00579  *              This is trickier than I thought.  Added a global indicator
00580  *                  of whether a file-boundary was crossed while getting
00581  *                  the word; previously, that was indicated by a return
00582  *                  value of zero, which now means something else...
00583  *              The flag,  closed_stream , will be cleared every time this
00584  *                  routine is entered, and set whenever close_stream() is
00585  *                  entered.
00586  *         Updated Tue, 28 Feb 2006 at 10:13 PST by David L. Paktor
00587  *              Trickier still.  On crossing a file-boundary, must not
00588  *                  consume the first word in the resumed file, for one
00589  *                  call; instead, return zero.  Consume it on the next
00590  *                  call.  The  closed_stream  flag is now irrelevant and
00591  *                  has gone away.
00592  *
00593  **************************************************************************** */
00594 
00595 signed long get_word( void)
00596 {
00597         size_t len;
00598         u8 *str;
00599         bool keep_skipping;
00600         bool pop_result;
00601 
00602         if ( need_to_pop_source )
00603         {
00604             pop_result = pop_source();
00605         }
00606 
00607         do {
00608             keep_skipping = skip_ws();
00609             if ( keep_skipping )
00610             {
00611                 pop_result = pop_source();
00612                 if ( pop_result || need_to_pop_source )
00613                 {
00614                     statbuf[0] = 0;
00615                     if ( pop_result )
00616                     {
00617                         return -1;
00618                     }
00619                 return 0;
00620                 }
00621             }
00622         } while ( keep_skipping );
00623 
00624         str=pc;
00625         while ( (str < end) && *str && *str!='\n' && *str!='\t' && *str!=' ')
00626                 str++;
00627 
00628         len=(size_t)(str-pc);
00629         if (len >= GET_BUF_MAX )
00630         {
00631             tokenization_error ( FATAL,
00632                 "get_word buffer overflow.  Max is %d.", GET_BUF_MAX-1 );
00633         }
00634 
00635         memcpy(statbuf, pc, len); 
00636         statbuf[len]=0;
00637 
00638 #ifdef DEBUG_SCANNER
00639         printf("%s:%d: debug: read token '%s', length=%ld\n",
00640                         iname, lineno, statbuf, len);
00641 #endif
00642         pc+=len;
00643         abs_token_no++;
00644         return len;
00645 }
00646 
00647 
00648 /* **************************************************************************
00649  *
00650  *      Function name:  get_word_in_line
00651  *      Synopsis:       Get the next word on the same line as the current
00652  *                      line of input.  If the end of line was reached
00653  *                      before a word was found, print an error message
00654  *                      and return an indication.
00655  *
00656  *      Inputs:
00657  *         Parameters:
00658  *             func_nam        Name of the function expecting the same-line
00659  *                                 input; for use in the Error Message.
00660  *                                 If NULL, do not issue Error Message
00661  *         Global Variables:
00662  *             pc              Input character pointer.  Saved for comparison
00663  *             lineno          Current input line number.  Saved for comparison
00664  *
00665  *      Outputs:
00666  *         Returned Value:     TRUE = success.  Word was acquired on same line.
00667  *         Global Variables:
00668  *             statbuf         Advanced to the next word in the input stream.
00669  *             pc              Advanced if no error; restored otherwise.
00670  *
00671  *      Error Detection:
00672  *          If no next word is gotten (i.e., we're at end-of-file), or if
00673  *              one is gotten but not on the same line, the routine will
00674  *              return FALSE; if  func_nam  is not NULL, an ERROR Message
00675  *              will be issued.
00676  *          Also, the values of  PC  LINENO  and  ABS_TOKEN_NO  will be reset
00677  *              to the positions they had when this routine was entered.
00678  *
00679  **************************************************************************** */
00680 
00681 bool get_word_in_line( char *func_nam)
00682 {                                                                               
00683     signed long wlen;
00684     bool retval = TRUE;
00685     u8 *save_pc = pc;
00686     unsigned int save_lineno = lineno;
00687     unsigned int save_abs_token_no = abs_token_no;
00688 
00689     /*  Copy of function name, for error message  */
00690     char func_cpy[FUNC_CPY_BUF_SIZE+1];
00691 
00692     /*  Do this first, in the likely event that  func_nam  was  statbuf   */
00693     if ( func_nam != NULL )
00694     {
00695         strncpy( func_cpy, func_nam, FUNC_CPY_BUF_SIZE);
00696         func_cpy[FUNC_CPY_BUF_SIZE] = 0;  /*  Guarantee a null terminator  */
00697     }
00698 
00699     wlen = get_word();
00700     if ( ( lineno != save_lineno ) || ( wlen <= 0 ) )
00701     {
00702         abs_token_no = save_abs_token_no;
00703         lineno = save_lineno;
00704         pc = save_pc;
00705         retval = FALSE;
00706         if ( func_nam != NULL )
00707         {
00708             tokenization_error ( TKERROR,
00709                "Operator %s expects its target on the same line\n",
00710                    strupr(func_cpy));
00711         }
00712     }
00713     return ( retval );
00714 }
00715 
00716 
00717 /* **************************************************************************
00718  *
00719  *      Function name:  get_rest_of_line
00720  *      Synopsis:       Get all the remaining text on the same line as
00721  *                      the current line of input.  If there is no text
00722  *                      (not counting whitespace) before the end of line,
00723  *                      return an indication.
00724  *
00725  *      Inputs:
00726  *         Parameters:         NONE
00727  *         Global Variables:
00728  *             pc              Input character pointer.  Saved for restoration
00729  *             lineno          Current input line number.  Saved for comparison
00730  *
00731  *      Outputs:
00732  *         Returned Value:     TRUE = success.  Text was acquired on same line.
00733  *         Global Variables:
00734  *             statbuf         Contains the text found in the input stream.
00735  *             pc              Advanced to end of line or of whitespace, if
00736  *                                 no error; restored otherwise.
00737  *             lineno          Preserved if no error; otherwise, restored.
00738  *             abs_token_no    Restored if error; otherwise, advanced as normal.
00739  *
00740  *      Error Detection:
00741  *          Routine will return FALSE if no text is gotten on the same line.
00742  *
00743  **************************************************************************** */
00744 
00745 bool get_rest_of_line( void)
00746 {
00747     bool retval = FALSE;
00748     u8 *save_pc = pc;
00749     unsigned int save_lineno = lineno;
00750     unsigned int save_abs_token_no = abs_token_no;
00751 
00752     if ( INVERSE( skip_ws() ) )
00753     {
00754         if ( lineno == save_lineno )
00755         {
00756             signed long wlen = get_until('\n');
00757             if ( wlen > 0 ) retval = TRUE;
00758         }else{
00759             abs_token_no = save_abs_token_no;
00760             lineno = save_lineno;
00761             pc = save_pc;
00762         }
00763     }
00764     return( retval);
00765 }
00766 
00767 
00768 /* **************************************************************************
00769  *
00770  *      Function name:  warn_unterm
00771  *      Synopsis:       Message for "Unterminated ..." something
00772  *                      Show saved line-number, where the "something" started,
00773  *                      and the definition, if any, in which it occurred.
00774  *
00775  *      Inputs:
00776  *         Parameters:
00777  *             severity              Type of error/warning message to display
00778  *                                       usually either WARNING or TKERROR
00779  *             something             String to print after "Unterminated"
00780  *             saved_lineno          Line-Number where the "something" started
00781  *         Global Variables:
00782  *             lineno                Saved, then restored.
00783  *             last_colon_defname    Used only if unterm_is_colon is TRUE;
00784  *         Local Static Variables:
00785  *             unterm_is_colon       See 07 Mar 2006 entry under Rev'n History
00786  *
00787  *      Outputs:
00788  *         Returned Value:           NONE
00789  *         Global Variables:
00790  *             lineno                Saved, then restored.
00791  *         Local Static Variables:
00792  *             unterm_is_colon       Reset to FALSE
00793  *         Printout:
00794  *             Warning or Error message
00795  *
00796  *      Revision History:
00797  *          Updated Mon, 06 Mar 2006 by David L. Paktor
00798  *              Added call to in_last_colon()
00799  *          Updated Tue, 07 Mar 2006 by David L. Paktor
00800  *              Call to in_last_colon() works okay in most cases except for
00801  *                  when the "something" is a Colon Definition; there, it
00802  *                  results in the phrase: ... Definition in definition of ...
00803  *                  which is awkward.  To eliminate that, I am introducing 
00804  *                  a Local Static Variable flag called  unterm_is_colon
00805  *                  which will be set only in the appropriate place and
00806  *                  re-cleared here.  It's a retro-fit, of course; it could
00807  *                  have been a parameter had the need for it occurred when
00808  *                  this routine was first constructed... 
00809  *
00810  **************************************************************************** */
00811 
00812 static bool unterm_is_colon = FALSE;
00813 void warn_unterm( int severity, char *something, unsigned int saved_lineno)
00814 {
00815     unsigned int tmp = lineno;
00816     lineno = saved_lineno;
00817     if ( unterm_is_colon )
00818     {
00819         tokenization_error( severity, "Unterminated %s of %s\n",
00820             something, strupr( last_colon_defname) );
00821         unterm_is_colon = FALSE;
00822     }else{
00823         tokenization_error( severity, "Unterminated %s", something);
00824         in_last_colon();
00825     }
00826     lineno = tmp;
00827 }
00828 
00829 /* **************************************************************************
00830  *
00831  *      Function name:  warn_if_multiline
00832  *      Synopsis:       Test for "Multi-line ..." something and issue WARNING
00833  *                      Show saved line-number, where the "something" started
00834  *
00835  *      Inputs:
00836  *         Parameters:
00837  *             something          String to print after "Unterminated"
00838  *             start_lineno       Line-Number where the "something" started
00839  *         Global Variables:
00840  *             lineno             Line-Number where we are now
00841  *             iname              Input file name, to satisfy ...where_started()
00842  *                                    (Not crossing any actual file boundary.)
00843  *             report_multiline   TRUE = go ahead with the message
00844  *
00845  *      Outputs:
00846  *         Returned Value:        NONE
00847  *         Global Variables:
00848  *             report_multiline   Restored to TRUE.
00849  *
00850  *      Error Detection:
00851  *          Only issue message if the current  lineno  doesn't equal
00852  *              the start_lineno  
00853  *
00854  *      Process Explanation:
00855  *          The directive "multi-line" allows the user to specify that
00856  *              the next "Multi-line ..." something is intentional, and
00857  *              will cause its warning to be suppressed.  It remains in
00858  *              effect until it's "used"; afterwards, it's reset.
00859  *
00860  **************************************************************************** */
00861 
00862 void warn_if_multiline( char *something, unsigned int start_lineno )
00863 {
00864     if ( report_multiline && ( start_lineno != lineno ) )
00865     {
00866         tokenization_error( WARNING, "Multi-line %s, started", something);
00867         where_started( iname, start_lineno);
00868     }
00869     report_multiline = TRUE;
00870 }
00871 
00872 
00873 /* **************************************************************************
00874  *
00875  *      Function name:  string_remark
00876  *      Synopsis:       Suspend string parsing past end of line and
00877  *                      whitespace at start of the new line.
00878  *
00879  *      Inputs:
00880  *         Parameters:
00881  *             errmsg_txt            Text to be used for error-message.
00882  *         Global Variables:
00883  *             pc                    Input-source Scanning pointer
00884  *
00885  *      Outputs:
00886  *         Returned Value:           NONE
00887  *         Global Variables:
00888  *             pc                    Will point to first non-blank in new line
00889  *
00890  *      Error Detection:
00891  *          The return value of the skip_until() or skip_ws() routine
00892  *             will indicate if PC goes past END.  Issue a WARNING.
00893  *             The calling routine will handle things from there.
00894  *
00895  **************************************************************************** */
00896 
00897 static void string_remark(char *errmsg_txt)
00898 {
00899     unsigned int sav_lineno = lineno;
00900     bool eof = skip_until('\n');
00901     if ( ! eof )
00902     {
00903         eof = skip_ws();
00904     }
00905     if ( eof )
00906     {
00907         warn_unterm(WARNING, errmsg_txt, sav_lineno);
00908         }
00909         
00910 }
00911 
00912 
00913 /*  Convert the given string to a number in the supplied base   */
00914 /*  Allow -- and ignore -- embedded periods.    */
00915 /*  The  endptr  param represents a pointer that will be updated
00916  *      with the address of the first non-numeric character encountered,
00917  *      (unless it is a NULL, in which case it is ignored).
00918  */
00919 /*  There is no test for a completely invalid string;
00920  *  the calling routine is responsible for ascertaining
00921  *  the validity of the string being passed.
00922  */
00923 static long parse_number(u8 *start, u8 **endptr, int lbase) 
00924 {
00925         long val = 0;
00926         bool negative = FALSE ;
00927         int  curr;
00928         u8 *nptr=start;
00929 
00930         curr = *nptr;
00931         if (curr == '-')
00932         {
00933                 negative = TRUE ;
00934                 nptr++;
00935         }
00936         
00937         for (curr = *nptr; (curr = *nptr); nptr++) {
00938                 if ( curr == '.' )
00939                         continue;
00940                 if ( curr >= '0' && curr <= '9')
00941                         curr -= '0';
00942                 else if (curr >= 'a' && curr <= 'f')
00943                         curr += 10 - 'a';
00944                 else if (curr >= 'A' && curr <= 'F')
00945                         curr += 10 - 'A';
00946                 else
00947                         break;
00948                 
00949                 if (curr >= lbase)
00950                         break;
00951                 
00952                 val *= lbase;
00953                 val += curr;
00954         }
00955 
00956 #ifdef DEBUG_SCANNER
00957         if (curr)
00958                 printf( "%s:%d: warning: couldn't parse number '%s' (%d/%d)\n",
00959                                 iname, lineno, start,curr,lbase);
00960 #endif
00961 
00962         if (endptr)
00963                 *endptr=nptr;
00964 
00965         if (negative)
00966         {
00967                 val = -val;
00968         }
00969         return val;
00970 }
00971 
00972 /* **************************************************************************
00973  *
00974  *      Function name:  add_byte_to_string
00975  *      Synopsis:       Add the given byte (or character) to the string
00976  *                          being accumulated in statbuf, but protect
00977  *                          against a buffer overflow.
00978  *
00979  *      Inputs:
00980  *         Parameters:
00981  *             nu_byte           The given character to be added
00982  *             walk              Pointer to pointer to the position
00983  *                                   in  statbuf  where the character
00984  *                                   is to be placed
00985  *         Global Variables:
00986  *             statbuf           Buffer where the string is accumulated
00987  *         Macros:
00988  *             GET_BUF_MAX       Size of the buffer
00989  *
00990  *      Outputs:
00991  *         Returned Value:       NONE
00992  *         Supplied Pointers:
00993  *             **walk            Given character is placed here
00994  *             *walk             Incremented in any case
00995  *
00996  *      Error Detection:
00997  *          If  walk  has reached end of string buffer, do not place
00998  *              the character, but continue to increment  walk .
00999  *              Calling routine will detect overflow.
01000  *
01001  **************************************************************************** */
01002                                 
01003 static void add_byte_to_string( u8 nu_byte, u8 **walk )
01004 {
01005     if ( *walk - statbuf < GET_BUF_MAX )
01006     {
01007         **walk = nu_byte;
01008         }
01009     (*walk)++;
01010 }
01011 
01012 /* **************************************************************************
01013  *
01014  *      Function name:  c_string_escape
01015  *      Synopsis:       Process C-style escape syntax in strings
01016  *
01017  *      Inputs:
01018  *         Parameters:
01019  *             walk                    Pointer to pointer to area into
01020  *                                         which to put acquired values
01021  *         Global Variables:
01022  *             pc                      Input-source Scanning pointer
01023  *
01024  *      Outputs:
01025  *         Returned Value:             NONE
01026  *         Global Variables:
01027  *             pc                      Point to last character processed.
01028  *         Supplied Pointers:
01029  *             *walk                   Advanced by number of bytes acquired
01030  *
01031  *      Error Detection:
01032  *          WARNING conditions.  See under "Process Explanation" below.
01033  *
01034  *      Process Explanation:
01035  *          Start with  PC  pointing to the first character to process
01036  *              i.e., after the backslash.
01037  *          We recognize newline, tab and numbers
01038  *          A digit-string in the current base can be converted to a number.
01039  *          The first non-numeric character ends the numeric sequence
01040  *              and gets swallowed up.
01041  *          If the number exceeds the size of a byte, use the truncated
01042  *              value and issue a WARNING.
01043  *          If the first character in the "digit"-string was non-numeric,
01044  *              use the character literally and issue a WARNING.
01045  *          If the character that ended the numeric sequence is a quote,
01046  *              it might be the end of the string, or the start of a
01047  *              special-character or even of an "( ... ) hex-sequence,
01048  *              so don't swallow it up.
01049  *
01050  *      Still to be done:
01051  *          Better protection against PC pointer-over-run past END.
01052  *              Currently, this works, but it's held together by threads:
01053  *              Because  init_stream  forces a null-byte at the end of
01054  *              the input buffer, parse_number() exits immediately upon
01055  *              encountering it.  This situation could be covered more
01056  *              robustly...
01057  *
01058  **************************************************************************** */
01059 
01060 static void c_string_escape( u8 **walk)
01061 {
01062     char c = *pc;
01063     u8 val;
01064     /*  We will come out of this "switch" statement
01065      *      with a value for  val  and a decision
01066      *      as to whether to write it.
01067      */
01068     bool write_val = TRUE;
01069         
01070     switch (c)
01071     {
01072                         case 'n':
01073                                 /* newline */
01074             val = '\n';
01075                                 break;
01076                         case 't':
01077                                 /* tab */
01078             val = '\t';
01079                                 break;
01080                         default:
01081 
01082             /*  Digit-string?  Convert it to a number, using the current base.
01083              *  The first non-numeric character ends the numeric sequence
01084              *      and gets swallowed up.
01085              *  If the number exceeds the size of a byte, use the truncated
01086              *      value and issue a WARNING.
01087              *  If the first character in the "digit"-string was non-numeric,
01088              *      use the character literally and issue a WARNING.
01089              */
01090 
01091              /*
01092              *  If the sequence ender is a quote, it might be the end of
01093              *      the string, or the start of a special-character or even
01094              *      of an "( ... ) hex-sequence, so don't swallow it up.
01095              */
01096             {
01097                 long lval;
01098                 u8 *sav_pc = pc;
01099                 lval=parse_number(pc, &pc, base);
01100                 val = (u8)lval;
01101 #ifdef DEBUG_SCANNER
01102                                 if (verbose)
01103                                         printf( "%s:%d: debug: escape code "
01104                                                 "0x%x\n",iname, lineno, val);
01105 #endif
01106                 if ( lval > 0x0ff )
01107                 {
01108                     tokenization_error ( WARNING,
01109                         "Numeric String after \\ overflows byte.  "
01110                             "Using 0x%02x.\n", val);
01111                         }
01112 
01113                 if ( pc == sav_pc )
01114                 {
01115                     /*  NOTE:  Here, PC hasn't been advanced past its
01116                      *      saved value, so we can count on  C  remaining
01117                      *      unchanged since the start of the routine.
01118                      */ 
01119                     /*  Don't use the null-byte at the end of the buffer  */
01120                     if ( ( pc >= end ) 
01121                     /*        or a sequence-ending quote.                 */
01122                          || ( c == '"' ) )
01123                     {
01124                         write_val = FALSE;
01125                     }else{
01126                         /*  In the WARNING message, print the character
01127                          *      if it's printable or show it in hex
01128                          *      if it's not.
01129                          */
01130                         if ( (c > 0x20 ) && ( c <= 0x7e) )
01131                         {
01132                             tokenization_error ( WARNING,
01133                                 "Unrecognized character, %c, "
01134                                     "after \\ in string.  "
01135                                         "Using it literally.\n", c);
01136                         }else{
01137                             tokenization_error ( WARNING,
01138                                 "Unrecognized character, 0x%02x, "
01139                                     "after \\ in string.  "
01140                                         "Using it literally.\n", c);
01141                         }
01142                         val = c;
01143                     }
01144                 }
01145                 /*  NOTE:  Here, however, PC may have been advanced...  */
01146                 /*  Don't swallow the sequence-ender if it's a quote.   */
01147                 if ( *pc == '"' )
01148                 {
01149                     pc--;
01150                 }
01151 
01152             }   /*  End of the  "default"  clause  */
01153     }    /*  End of the  "switch"  statement  */
01154 
01155     if ( write_val ) add_byte_to_string( val, walk );
01156 
01157 }
01158 
01159 /* **************************************************************************
01160  *
01161  *      Function name:  get_sequence
01162  *      Synopsis:       Process the Hex-Number option in strings
01163  *                      Protect against PC pointer-over-run past END.
01164  *
01165  *      Inputs:
01166  *         Parameters:
01167  *             **walk           Pointer to pointer to area into which
01168  *                                  to put acquired values
01169  *         Global Variables:
01170  *             pc               Input-source Scanning pointer
01171  *             end              End of input-source buffer
01172  *
01173  *      Outputs:
01174  *         Returned Value:      TRUE = "Normal Completion" (I.e., not EOF)
01175  *         Global Variables:
01176  *             pc               Points at terminating close-paren, or END
01177  *             lineno           Input File Line-Number Counter, may be incr'd
01178  *         Supplied Pointers:
01179  *             *walk            Advanced by number of values acquired
01180  *
01181  *      Error Detection:
01182  *          End-of-file encountered before end of hex-sequence:
01183  *              Issue a Warning, conclude processing, return FALSE.
01184  *
01185  *      Process Explanation:
01186  *          SETUP and RULES:
01187  *              Start with  PC  pointing to the first character
01188  *                  after the '('  (Open-Paren)     
01189  *              Bytes are gathered from digits in pairs, except
01190  *                  when separated they are treated singly.
01191  *              Allow a backslash in the middle of the sequence
01192  *                  to skip to the end of the line and past the
01193  *                  whitespace at the start of the next line,
01194  *                  i.e., it acts as a comment-escape.
01195  *
01196  *          INITIALIZE:
01197  *              PV_indx = 0
01198  *              Set return-indicator to "Abnormal Completion"
01199  *              Ready_to_Parse = FALSE
01200  *              Stuff NULL into PVAL[2]
01201  *          WHILE PC is less than END
01202  *              Pick up character at PC into Next_Ch
01203  *              IF  Next_Ch  is close-paren :
01204  *                  Set return-indicator to "Normal Completion".
01205  *                  Done!  Break out of loop.
01206  *              ENDIF
01207  *              IF comment-escape behavior (controlled by means of a
01208  *                      command-line switch) is allowed
01209  *                  IF  Next_Ch  is backslash :
01210  *                      Skip to end-of line, skip whitespace.
01211  *                      If that makes PC reach END :  WARNING message.
01212  *                          (Don't need to break out of loop;
01213  *                               normal test will terminate.)
01214  *                      CONTINUE Loop.
01215  *                          (Don't increment PC; PC is already at right place).
01216  *                  ENDIF
01217  *              ENDIF
01218  *              IF  Next_Ch  is a valid Hex-Digit character :
01219  *                  Stuff it into  PVAL[PV_indx]
01220  *                  IF (PV_indx is 0) :
01221  *                      Increment PV_indx
01222  *                  ELSE
01223  *                      Set Ready_to_Parse to TRUE
01224  *                  ENDIF
01225  *              ELSE
01226  *                  IF  Next_Ch  is a New-Line, increment Line Number counter
01227  *                  IF (PV_indx is 1) :
01228  *                      Stuff NULL into PVAL[1]
01229  *                      Set Ready_to_Parse to TRUE
01230  *                  ENDIF
01231  *              ENDIF
01232  *              IF Ready_to_Parse
01233  *                  Parse PVAL
01234  *                  Stuff into WALK
01235  *                  Reset PV_indx to zero
01236  *                  Reset Ready_to_Parse to FALSE
01237  *              ENDIF
01238  *              Increment PC
01239  *          REPEAT
01240  *          Return with Normal/Abnormal completion indicator
01241  *
01242  **************************************************************************** */
01243 
01244 static bool get_sequence(u8 **walk)
01245 {
01246     int pv_indx = 0;
01247     bool retval = FALSE;   /*  "Abnormal Completion" indicator  */
01248     bool ready_to_parse = FALSE;
01249     char next_ch;
01250     char pval[3];
01251 
01252 #ifdef DEBUG_SCANNER
01253         printf("%s:%d: debug: hex field:", iname, lineno);
01254 #endif
01255     pval[2]=0;
01256 
01257     while ( pc < end )
01258     {
01259         next_ch = *pc;
01260         if ( next_ch == ')' )
01261         {
01262             retval = TRUE;
01263                                 break;
01264         }
01265         if ( hex_remark_escape )
01266         {
01267             if ( next_ch == '\\' )
01268             {
01269                 string_remark("string hex-sequence remark");
01270                 continue;
01271             }
01272         }
01273         if ( isxdigit(next_ch) )
01274         {
01275             pval[pv_indx] = next_ch;
01276             if ( pv_indx == 0 )
01277             {
01278                 pv_indx++;
01279             }else{
01280                 ready_to_parse = TRUE;
01281             }
01282         }else{
01283             if ( next_ch == '\n' )  lineno++ ;
01284             if ( pv_indx != 0 )
01285             {
01286                 pval[1] = 0;
01287                 ready_to_parse = TRUE;
01288             }
01289         }
01290         if ( ready_to_parse )
01291         {
01292             u8 val = parse_number(pval, NULL, 16);
01293             *((*walk)++)=val;
01294 #ifdef DEBUG_SCANNER
01295                 printf(" %02x",val);
01296 #endif
01297             pv_indx = 0;
01298             ready_to_parse = FALSE;
01299         }
01300         pc++;
01301     }
01302 #ifdef DEBUG_SCANNER
01303         printf("\n");
01304 #endif
01305     return ( retval );
01306 }
01307 
01308 /* **************************************************************************
01309  *
01310  *    Return the length of the string.
01311  *    Pack the string, without the terminating '"' (Quote), into statbuf
01312  *    Protect against PC pointer-over-run past END.
01313  *    Enable Quote-Backslash as a String-Remark Escape.
01314  *    Allowability of Quote-Backslash as a String-Remark is under control
01315  *        of a command-line switch (string_remark_escape ).
01316  *    Allowability of C-style escape characters is under control
01317  *        of a command-line switch ( c_style_string_escape ).
01318  *
01319  *    Truncate string to size of Forth Packed-String (i.e., uses leading
01320  *        count-byte, so limited to 255, number that one byte can express)
01321  *        unless the string is being gathered for a Message or is being
01322  *        consumed for purposes of ignoring it, in either of which case
01323  *        that limit need not be enforced.  Parameter "pack_str" controls
01324  *        this:  TRUE  if limit needs to be enforced.
01325  *
01326  *    Issue WARNING if string length gets truncated.
01327  *    Issue WARNING if string crosses line.
01328  *        The issuance of the Multi-line WARNING is under control of a
01329  *           one-shot directive similar to OVERLOAD , called  MULTI-LINE
01330  *
01331  *    Still to be decided:
01332  *        Do we want to bring the allowability of strings crossing
01333  *            lines under control of a command-line switch?
01334  *
01335  ************************************************************************** */
01336 
01337 static signed long get_string( bool pack_str)
01338 {
01339         u8 *walk;
01340         unsigned long len;
01341         char c;
01342         bool run = TRUE;
01343         unsigned long start_lineno = lineno;    /*  For warning message  */
01344         
01345         /*
01346          *  Bump past the single whitespace character that delimits
01347          *      the command -- e.g.,  ."  or  "  or suchlike -- that
01348          *      starts the string.  Allow new-line to be a command-
01349          *      -delimiting whitespace character.  Regard any sub-
01350          *      sequent whitespace characters as part of the string
01351          */
01352         if (*pc++=='\n') lineno++;
01353 
01354         got_until_eof = TRUE ;
01355 
01356         walk=statbuf;
01357         while (run) {
01358                 switch ((c=*pc))
01359                 {
01360                     /*  Standard use of '"' (Quote)  for special-char escape  */
01361                     case '\"':
01362                         /*  Skip the '"' (Quote) */
01363                                 pc++;
01364                         /*  End of the buffer also ends the string cleanly  */
01365                         if ( pc >= end )
01366                         {
01367                             run = FALSE;
01368                             got_until_eof = FALSE ;
01369                                 break;
01370                         }
01371                         /*  Pick up the next char after the '"' (Quote) */
01372                         c=*pc;
01373                         switch (c)
01374                         {
01375                             case '(':
01376                                 pc++; /* skip the '(' */
01377                                 run = get_sequence(&walk);
01378                                 break;
01379 
01380                         case 'n':
01381                                 add_byte_to_string( '\n', &walk);
01382                                 break;
01383                         case 'r':
01384                                 add_byte_to_string( '\r', &walk);
01385                                 break;
01386                         case 't':
01387                                 add_byte_to_string( '\t', &walk);
01388                                 break;
01389                         case 'f':
01390                                 add_byte_to_string( '\f', &walk);
01391                                 break;
01392                         case 'l':
01393                                 add_byte_to_string( '\n', &walk);
01394                                 break;
01395                         case 'b':
01396                                 add_byte_to_string( 0x08, &walk);
01397                                 break;
01398                         case '!':
01399                                 add_byte_to_string( 0x07, &walk);
01400                                 break;
01401                         case '^':
01402                                 pc++;    /*   Skip the up-arrow (Caret) */
01403                                 add_byte_to_string( *pc & 0x1f , &walk);
01404                                 break;
01405                             /*  We're done after any of the whitespace
01406                              *     characters follows a quote.
01407                              */
01408                         case ' ':
01409                         case '\t':
01410                                 /*  Advance past the terminating whitespace
01411                                  *       character, except newline.
01412                                  *  Let  get_word()  handle that.
01413                                  */
01414                                 pc++;
01415                             case '\n':
01416                                 run=FALSE;
01417                                 got_until_eof = FALSE ;
01418                                 break;
01419                         default:
01420                                 /*  Control allowability of Quote-Backslash
01421                                  *      as a String-Remark by means of a
01422                                  *      command-line switch.
01423                                  */
01424                                 if ( string_remark_escape )
01425                                 {
01426                                     if ( c == '\\' )
01427                                     {
01428                                         string_remark("string-escape remark");
01429                                         /* The first non-blank in the new line
01430                                          *     has not been processed yet.
01431                                          *     Back up to allow it to be.
01432                                          */
01433                                         pc--;
01434                                 break;
01435                         }
01436                                 }
01437                                 add_byte_to_string( c, &walk);
01438                         }
01439                         break;
01440                     case '\n':
01441                         /*  Allow strings to cross lines.  Include the
01442                          *      newline in the string.  Account for it.
01443                          */
01444                         lineno++;
01445                 default:
01446                         /*  Control allowability of C-style escape-character
01447                          *      syntax by means of a command-line switch.
01448                          */
01449                         if ( c_style_string_escape )
01450                         {
01451                             if ( c == '\\' )
01452                             {
01453                                 pc++;
01454                                 c_string_escape(&walk );
01455                                 break;
01456                             }
01457                         }
01458                         add_byte_to_string( c, &walk);
01459                 }
01460                 /*  Advance past the char processed, unless we're done.     */
01461                 if ( run ) pc++;
01462                 /*  Done if we hit end of file before string was concluded  */
01463                 if ( pc >= end )
01464                 {
01465                     run = FALSE;
01466                     if ( got_until_eof )
01467                     {
01468                         warn_unterm( WARNING, "string", start_lineno);
01469                         /*  Prevent multiple messages for one error  */
01470                         got_until_eof = FALSE;
01471                     }
01472                 }
01473         }
01474         
01475         warn_if_multiline( "string", start_lineno);
01476 
01477         len = walk - statbuf;
01478         if (len >= GET_BUF_MAX )
01479         {
01480             tokenization_error ( TKERROR,
01481                 "get_string buffer overflow.  Max is %d\n.", GET_BUF_MAX-1 );
01482             len = GET_BUF_MAX-1;
01483         }
01484 #ifdef DEBUG_SCANNER
01485         if (verbose)
01486                 printf("%s:%d: debug: scanned string: '%s'\n", 
01487                                         iname, lineno, statbuf);
01488 #endif
01489         if ( pack_str && (len > STRING_LEN_MAX) )
01490         {
01491             tokenization_error ( WARNING,
01492                 "String length being truncated to %d.\n", STRING_LEN_MAX );
01493             len = STRING_LEN_MAX;
01494         }
01495         statbuf[len] = 0;
01496 
01497         return len ;
01498 }
01499 
01500 
01501 /* **************************************************************************
01502  *
01503  *      Function name:  handle_user_message
01504  *      Synopsis:       Collect a user-generated tokenization-time message;
01505  *                          either print it or discard it.  Shared code
01506  *                          for  user_message()  and  skip_user_message()
01507  *
01508  *      Inputs:
01509  *         Parameters:
01510  *             delim                End-of-string delimiter character.
01511  *                                  If it's a double-quote ("), we will use
01512  *                                      the get-string() routine, with all
01513  *                                      its options, to collect the message.
01514  *                                  Otherwise, we'll capture plain text from
01515  *                                      the input stream.
01516  *             print_it             TRUE if we should print the message 
01517  *         Local Static Variables:
01518  *             got_until_eof        TRUE if reached end of buffer before delim.
01519  *         Global Variables:
01520  *             lineno               Save, then restore.
01521  *
01522  *      Outputs:
01523  *         Returned Value:          NONE
01524  *         Global Variables:
01525  *             statbuf              The string will be collected in here
01526  *             
01527  *         Printout (if  print_it  is TRUE):
01528  *            The string, with new-line tacked on, will be printed from
01529  *                the  tokenization_error()  routine as a MESSAGE.
01530  *            The line-number will be shown as of the origin of the message
01531  *
01532  *      Error Detection:
01533  *          Error-reports will be printed regardless of  print_it  param.
01534  *          If delimiter was not found, show "Unterminated" warning message.
01535  *          If delimiter was " (double-quote), the get_string() routine
01536  *              already checked for a multi-line construct; if delimiter is
01537  *              a new-line, then a multi-line construct is impossible.
01538  *              otherwise, we will do the multi-line check here.
01539  *
01540  **************************************************************************** */
01541 
01542 static void handle_user_message( char delim, bool print_it )
01543 {
01544     signed long wlen;
01545     unsigned int start_lineno = lineno;
01546     unsigned int multiline_start = lineno;    /*  For warning message  */
01547     bool check_multiline = FALSE;
01548     const char *ug_msg = "user-generated message";
01549 
01550     if ( delim == '"' )
01551     {
01552         wlen = get_string( FALSE);
01553     }else{
01554         /*
01555          *  When the message-delimiter is a new-line, and the
01556          *      command-delimiter was a new-line, it means the
01557          *      string length is zero; we won't bump the PC.
01558          *  Otherwise, we will honor the convention we extend
01559          *      to  .(  whereby, if the command is delimited
01560          *      by a new-line, we allow the string to begin
01561          *      on the next line.
01562          */
01563         if ( delim == '\n' )
01564         {
01565             if ( *pc != '\n') pc++;
01566         }else{
01567                 if (*pc++=='\n') lineno++;
01568             multiline_start = lineno;
01569             check_multiline = TRUE;
01570         }
01571         wlen = get_until( delim );
01572     }
01573 
01574     if ( print_it )
01575     {
01576         unsigned int tmp_lineno = lineno;
01577         lineno = start_lineno;
01578         /*  Don't add a new-line to body of the message.
01579          *  Routine already takes care of that.
01580          *  Besides, buffer might be full...
01581          */
01582         tokenization_error( MESSAGE, statbuf);
01583         lineno = tmp_lineno;
01584     }
01585 
01586     if ( got_until_eof )   /*  Crude but effective retrofit... */
01587     {
01588         warn_unterm(WARNING, (char *)ug_msg, start_lineno);
01589     }else{
01590         if ( check_multiline )
01591         {
01592             warn_if_multiline( (char *)ug_msg, multiline_start);
01593         }
01594     }
01595 }
01596 
01597 /* **************************************************************************
01598  *
01599  *      Function name:  user_message
01600  *      Synopsis:       Collect a user-generated message and
01601  *                          print it at tokenization-time.
01602  *
01603  *      Tokenizer directive (either mode):
01604  *          Synonyms                              String Delimiter
01605  *             [MESSAGE]  #MESSAGE  [#MESSAGE]        end-of-line
01606  *             #MESSAGE"                                  "  
01607  *      "Tokenizer-Escape" mode directive         String Delimiter
01608  *          .(                                            )
01609  *          ."                                            "
01610  *
01611  *      Inputs:
01612  *         Parameter is the "parameter field" of the TIC entry, which
01613  *             was initialized to the end-of-string delimiter character.
01614  *
01615  *      Outputs:
01616  *         Returned Value:          NONE
01617  *         Printout:                User-message, parsed from input.
01618  *
01619  *      Extraneous Remarks:
01620  *          We would have preferred to simply use the "character value"
01621  *              aspect of the union, but we found portability issues
01622  *              between big- and little- -endian processors, so we still
01623  *              have to recast its type here.
01624  *
01625  **************************************************************************** */
01626 
01627 void user_message( tic_param_t pfield )
01628 {
01629     char delim = (char)pfield.deflt_elem ;
01630     handle_user_message( delim, TRUE);
01631 }
01632 
01633 /* **************************************************************************
01634  *
01635  *      Function name:  skip_user_message
01636  *      Synopsis:       Collect a user-generated message and discard it.
01637  *                          Used when ignoring a Conditional section.
01638  *
01639  *      Tokenizer directive (either mode):
01640  *          Synonyms                              String Delimiter
01641  *             [MESSAGE]  #MESSAGE  [#MESSAGE]        end-of-line
01642  *             #MESSAGE"                                  "  
01643  *      "Tokenizer-Escape" mode directive         String Delimiter
01644  *          .(                                            )
01645  *          ."                                            "
01646  *
01647  *      Inputs:
01648  *         Parameters:
01649  *             pfield               "Parameter field" of the TIC entry, which
01650  *                                      was initialized to the delimiter.
01651  *
01652  *      Outputs:
01653  *         Returned Value:          NONE
01654  *         Printout:                NONE
01655  *
01656  **************************************************************************** */
01657 
01658 void skip_user_message( tic_param_t pfield )
01659 {
01660     char delim = (char)pfield.deflt_elem ;
01661     handle_user_message( delim, FALSE);
01662 }
01663 
01664 
01665 
01666 /* **************************************************************************
01667  *
01668  *      Function name:  get_number
01669  *      Synopsis:       If the word retrieved from the input stream is a
01670  *                      valid number (under the current base) convert it.
01671  *                      Return an indication if it was not.
01672  *
01673  *      Inputs:
01674  *         Parameters:
01675  *             *result             Pointer to place to return the number
01676  *         Global Variables:
01677  *             statbuf             The word just read that is to be converted.
01678  *             base                The current numeric-interpretation base.
01679  *
01680  *      Outputs:
01681  *         Returned Value:         TRUE = Input was a valid number
01682  *         Supplied Pointers:
01683  *             *result             The converted number, if valid
01684  *                                     otherwise undefined
01685  *
01686  *      Revision History:
01687  *          Updated Mon, 28 Mar 2005 by David L. Paktor
01688  *              Always use the current base.
01689  *              Reversed sense of return-flag.
01690  *
01691  **************************************************************************** */
01692 
01693 bool get_number( long *result)
01694 {
01695     u8 *until;
01696     long val;
01697     bool retval = FALSE ;
01698 
01699     val = parse_number(statbuf, &until, base);
01700         
01701 #ifdef DEBUG_SCANNER
01702     printf("%s:%d: debug: parsing number: base 0x%x, val 0x%lx, "
01703                 "processed %ld of %ld bytes\n", iname, lineno, 
01704                  base, val,(size_t)(until-statbuf), strlen((char *)statbuf));
01705 #endif
01706 
01707     /*  If number-parsing ended before the end of the input word,
01708      *      then the input word was not a valid number.
01709      */
01710     if (until==(statbuf+strlen((char *)statbuf)))
01711     {
01712         *result=val;
01713         retval = TRUE;
01714     }
01715 
01716     return ( retval );
01717 }
01718 
01719 /* **************************************************************************
01720  *
01721  *      Function name:  deliver_number
01722  *      Synopsis:       Deliver the supplied number according to the
01723  *                              state of the tokenizer:
01724  *                          In normal tokenization mode, emit it as an
01725  *                              FCode literal.
01726  *                          In  "Tokenizer-Escape" mode, push it onto
01727  *                              the Data Stack.
01728  *
01729  *      Inputs:
01730  *         Parameters:
01731  *             numval                  The number, verified to be valid.
01732  *         Global Variables:
01733  *             in_tokz_esc   TRUE if tokenizer is in "Tokenizer Escape" mode.
01734  *
01735  *      Outputs:
01736  *         Returned Value:             NONE 
01737  *         Items Pushed onto Data-Stack:
01738  *             Top:                    The number, if  in_tokz_esc  was TRUE
01739  *         FCode Output buffer:
01740  *             If  in_tokz_esc  was FALSE, a  b(lit)  token will be written,
01741  *                 followed by the number.
01742  *
01743  **************************************************************************** */
01744 
01745 static void deliver_number( long numval)
01746 {
01747     if ( in_tokz_esc )
01748     {
01749         dpush( numval );
01750     } else {
01751         emit_literal(numval);
01752     }
01753 }
01754 /* **************************************************************************
01755  *
01756  *      Function name:  handle_number
01757  *      Synopsis:       Convert the word just retrieved from the input stream
01758  *                              to a number.
01759  *                      Indicate whether the string was a valid number and
01760  *                              deliver it, as appropriate.
01761  *
01762  *      Inputs:
01763  *         Parameters:                 NONE
01764  *         Global Variables:
01765  *             statbuf       The word that was just read, and to be converted.
01766  *
01767  *      Outputs:
01768  *         Returned Value:    TRUE = Input string was a valid number
01769  *         If input string was a valid number, the converted number will
01770  *             be delivered, as appropriate, by  deliver_number(). 
01771  *
01772  **************************************************************************** */
01773 
01774 static bool handle_number( void )
01775 {
01776     bool retval ;
01777     long numval;
01778 
01779     retval = get_number( &numval );
01780     if ( retval )
01781     {
01782         deliver_number( numval );
01783     }
01784 
01785     return ( retval );
01786 }
01787 
01788 /* **************************************************************************
01789  *
01790  *      Function name:  ascii_right_number
01791  *      Synopsis:       Convert a character sequence to a number, justified
01792  *                          toward the right (i.e., the low-order bytes) and
01793  *                          deliver it, as appropriate.
01794  *
01795  *      Inputs:
01796  *         Parameters:
01797  *             in_str                  The input string
01798  *
01799  *      Outputs:
01800  *         Returned Value:             NONE
01801  *         The converted number will be delivered by  deliver_number(). 
01802  *
01803  *      Process Explanation:
01804  *          The last four characters in the sequence will become the number.
01805  *          If there are fewer than four, they will fill the low-order part
01806  *              of the number.
01807  *          Example:  PCIR            is converted to  h# 50434952
01808  *                    CPU             is converted to  h# 00435055
01809  *             and
01810  *                    LotsOfStuff     is equivalent to  a# tuff 
01811  *                                    and is converted to  h# 74756666
01812  *
01813  **************************************************************************** */
01814 
01815 static void ascii_right_number( char *in_str)
01816 {
01817     u8 nxt_ch;
01818     char *str_ptr = in_str;
01819     long numval = 0;
01820 
01821     for ( nxt_ch = (u8)*str_ptr ;
01822             ( nxt_ch = (u8)*str_ptr ) != 0 ;
01823                 str_ptr++ )
01824     {
01825         numval = ( numval << 8 ) + nxt_ch ;
01826     }
01827     deliver_number( numval );
01828 }
01829 
01830 
01831 /* **************************************************************************
01832  *
01833  *      Function name:  ascii_left_number
01834  *      Synopsis:       Similar to  ascii_right_number()  except justified
01835  *                          toward the left (i.e., the high-order bytes).
01836  *                      
01837  *
01838  *      Inputs:
01839  *         Parameters:
01840  *             in_str                  The input string
01841  *
01842  *      Outputs:
01843  *         Returned Value:            NONE
01844  *         The converted number will be delivered by  deliver_number().
01845  *
01846  *      Process Explanation:
01847  *          If there are fewer than four characters in the sequence, they
01848  *              will fill the high-order part of the number.
01849  *                    CPU             is converted to  h# 43505500
01850  *          In all other respects, similar to  ascii_right_number()
01851  *
01852  **************************************************************************** */
01853 
01854 static void ascii_left_number( char *in_str)
01855 {
01856     u8 nxt_ch;
01857     char *str_ptr = in_str;
01858     long numval = 0;
01859     int shift_amt = 24;
01860     bool shift_over = FALSE ;
01861 
01862     for ( nxt_ch = (u8)*str_ptr ;
01863             ( nxt_ch = (u8)*str_ptr ) != 0 ;
01864                 str_ptr++ )
01865     {
01866         if ( shift_over )  numval <<= 8;
01867         if ( shift_amt == 0 )  shift_over = TRUE ;
01868         numval += ( nxt_ch << shift_amt );
01869         if ( shift_amt > 0 ) shift_amt -= 8;
01870     }
01871     deliver_number( numval );
01872 
01873 }
01874 
01875 /* **************************************************************************
01876  *
01877  *      Function name:  init_scanner
01878  *      Synopsis:       Allocate memory the Scanner will need.
01879  *                          Only need to call once per program run.
01880  *
01881  **************************************************************************** */
01882 
01883 void init_scanner(void)
01884 {
01885         statbuf=safe_malloc(GET_BUF_MAX, "initting scanner");
01886 }
01887 
01888 /* **************************************************************************
01889  *
01890  *      Function name:  exit_scanner
01891  *      Synopsis:       Free up memory the Scanner used
01892  *
01893  **************************************************************************** */
01894 
01895 void exit_scanner(void)
01896 {
01897         free(statbuf);
01898 }
01899 
01900 /* **************************************************************************
01901  *
01902  *      Function name:  set_hdr_flag
01903  *      Synopsis:       Set the state of the "headered-ness" flag to the
01904  *                          value given, unless over-ridden by one or both
01905  *                          of the "always-..." Command-Line Flags
01906  *
01907  *      Inputs:
01908  *         Parameters:
01909  *             new_flag                  New setting
01910  *         Global Variables:
01911  *             always_headers            Override HEADERLESS and make HEADERS
01912  *             always_external           Override HEADERLESS and HEADERS;
01913  *                                           make EXTERNAL
01914  *
01915  *      Outputs:
01916  *         Returned Value:               None
01917  *         Local Static Variables:
01918  *             hdr_flag                  Adjusted to new setting
01919  *
01920  *      Process Explanation:
01921  *          If  always_headers  is TRUE, and  new_flag  is not FLAG_EXTERNAL
01922  *              then set to FLAG_HEADERS
01923  *          If  always_external  is TRUE, set to FLAG_EXTERNAL, regardless.
01924  *              (Note:  always_external  over-rides  always_headers).
01925  *          Otherwise, set to  new_flag
01926  *
01927  **************************************************************************** */
01928 
01929 static void set_hdr_flag( headeredness new_flag)
01930 {
01931     headeredness new_state = new_flag;
01932     switch ( new_flag)
01933     {
01934         case FLAG_HEADERLESS:
01935             {
01936                 if ( always_headers )
01937                 {   new_state = FLAG_HEADERS;
01938                 }
01939             /*  No  break.  Intentional...   */
01940             }
01941         case FLAG_HEADERS:
01942             {
01943                 if ( always_external )
01944                 {   new_state = FLAG_EXTERNAL;
01945                 }
01946             /*  No  break.  Intentional...   */
01947             }
01948         case FLAG_EXTERNAL:
01949             break;  /*  Satisfy compiler's error-checking...   */
01950         /*  No default needed here...   */
01951     }
01952 
01953     hdr_flag = new_state;
01954 
01955 }
01956 
01957 
01958 /* **************************************************************************
01959  *
01960  *      Function name:  init_scan_state
01961  *      Synopsis:       Initialize various state variables for each time
01962  *                          a new tokenization scan is started.
01963  *
01964  *      Inputs:
01965  *         Parameters:             NONE
01966  *
01967  *      Outputs:
01968  *         Returned Value:         NONE
01969  *         Global Variables:   Initialized to:
01970  *             base                            0x0a (I.e., base = "decimal")
01971  *             nextfcode                       By  reset_fcode_ranges()
01972  *             pci_is_last_image               TRUE
01973  *             incolon                         FALSE
01974  *         Local Static Variables:
01975  *             hdr_flag                  FLAG_HEADERLESS (unless over-ridden)
01976  *             is_instance                     FALSE
01977  *             last_colon_filename             NULL
01978  *             instance_filename               NULL
01979  *             dev_change_instance_warning     TRUE
01980  *             instance_definer_gap            FALSE
01981  *             need_to_pop_source              FALSE
01982  *             first_fc_starter                TRUE
01983  *             ret_stk_depth                   0
01984  *         Memory Freed
01985  *             Copies of input-file name in  last_colon_filename  and
01986  *                 instance_filename , if allocated.
01987  *
01988  **************************************************************************** */
01989 
01990 void init_scan_state( void)
01991 {
01992     base = 0x0a;
01993     pci_is_last_image = TRUE;
01994     incolon = FALSE;
01995     is_instance = FALSE;
01996     set_hdr_flag( FLAG_HEADERLESS);
01997     reset_fcode_ranges();
01998     first_fc_starter = TRUE;
01999     if ( last_colon_filename != NULL ) free( last_colon_filename);
02000     if ( instance_filename != NULL ) free( instance_filename);
02001     last_colon_filename = NULL;
02002     instance_filename = NULL;
02003     dev_change_instance_warning = TRUE;
02004     instance_definer_gap = FALSE;
02005     need_to_pop_source = FALSE;
02006     ret_stk_depth = 0;
02007 }
02008 
02009 
02010 /* **************************************************************************
02011  *
02012  *      Function name:  collect_input_filename
02013  *      Synopsis:       Save a copy of the current input file name in the
02014  *                          given variable, for error-reporting purposes
02015  *
02016  *      Inputs:
02017  *         Parameters:
02018  *             saved_nam                    Pointer to pointer for copy of name
02019  *         Global Variables:
02020  *             iname                        Current input file name
02021  *         Local Static Variables:
02022  *
02023  *      Outputs:
02024  *         Returned Value:                  NONE
02025  *         Supplied Pointers:
02026  *             *saved_nam                   Copy of name
02027  *         Memory Allocated
02028  *             For copy of input file name
02029  *         When Freed?
02030  *             Subsequent call to this routine with same pointer
02031  *             (Last copy made will be freed if starting a new tokenization,
02032  *                 otherwise, will persist until end of program.) 
02033  *         Memory Freed
02034  *             Previous copy in same pointer.
02035  *
02036  *      Process Explanation:
02037  *          If there is a previous copy, and it still matches the current
02038  *              input-file name, we don't need to free or re-allocate.
02039  *
02040  **************************************************************************** */
02041 
02042 static void collect_input_filename( char **saved_nam)
02043 {
02044     bool update_lcfn = TRUE;    /*  Need to re-allocate?  */
02045     if ( *saved_nam != NULL )
02046     {
02047         if ( strcmp( *saved_nam, iname) == 0 )
02048         {
02049             /*  Last collected filename unchanged from iname  */
02050             update_lcfn = FALSE;
02051         }else{
02052             free( *saved_nam);
02053         }
02054     }
02055     if ( update_lcfn )
02056     {
02057         *saved_nam = strdup(iname);
02058     }
02059 } 
02060 
02061 /* **************************************************************************
02062  *
02063  *      Function name:  test_in_colon
02064  *      Synopsis:       Error-check whether a word is being used in the
02065  *                      correct state, relative to being inside a colon
02066  *                      definition; issue a message if it's not.
02067  *      
02068  *      Inputs:
02069  *         Parameters:
02070  *             wname            The name of the word in question
02071  *             sb_in_colon      TRUE if the name should be used inside
02072  *                                  a colon-definition only; FALSE if
02073  *                                  it may only be used outside of a
02074  *                                  colon-definition.
02075  *             severity         Type of error/warning message to call.
02076  *                                  usually either WARNING or TKERROR
02077  *             use_instead      Word the error-message should suggest be
02078  *                                  used "instead".  This may be a NULL,
02079  *                                  in which case the "suggestion" part
02080  *                                  of the message will simply be omitted.
02081  *         Global Variables:
02082  *             incolon          State of the tokenization; TRUE if inside
02083  *                                  a colon definition
02084  *
02085  *      Outputs:
02086  *         Returned Value:     TRUE if no error.
02087  *         Printout:           Error messages as indicated.
02088  *
02089  *      Error Detection:
02090  *          If the state, relative to being inside a colon-definition,
02091  *              is not what the parameter says it should be, issue a
02092  *              message of the indicated severity, and return FALSE.
02093  *
02094  **************************************************************************** */
02095 
02096 static bool test_in_colon ( char *wname,
02097                            bool sb_in_colon,    /*  "Should Be IN colon"  */
02098                                 int severity,
02099                                      char *use_instead)
02100 {
02101     bool is_wrong;
02102     bool retval = TRUE ;
02103 
02104     is_wrong = BOOLVAL(( sb_in_colon != FALSE ) != ( incolon != FALSE )) ;
02105     if ( is_wrong )
02106     {  
02107         char *ui_pt1 = "";
02108         char *ui_pt2 = "";
02109         char *ui_pt3 = "";
02110         retval = FALSE;
02111         if ( use_instead != NULL )
02112         {
02113             ui_pt1 = "  Use  ";
02114             ui_pt2 = use_instead;
02115             ui_pt3 = "  instead.";
02116         }
02117         tokenization_error ( severity, "The word  %s  should not be used "
02118             "%sside of a colon definition.%s%s%s\n", strupr(wname),
02119                 sb_in_colon ? "out" : "in", ui_pt1, ui_pt2, ui_pt3 );
02120     }
02121     return ( retval );
02122 }
02123 
02124 /* **************************************************************************
02125  *
02126  *      Function name:  must_be_deep_in_do
02127  *      Synopsis:       Check that the statement in question is called 
02128  *                          from inside the given depth of structures
02129  *                          of the  DO ... LOOP -type (i.e., any combination
02130  *                          of DO  or ?DO  and  LOOP  or  +LOOP ).
02131  *                      Show an error if it is not.
02132  *
02133  **************************************************************************** */
02134 
02135 static void must_be_deep_in_do( int how_deep )
02136 {
02137     int functional_depth = do_loop_depth;
02138     if ( incolon )
02139     {
02140         functional_depth -= last_colon_do_depth;
02141     }
02142     if ( functional_depth < how_deep )
02143     {
02144         char deep_do[64] = "";
02145         int indx;
02146         bool prefix = FALSE;
02147 
02148         for ( indx = 0; indx < how_deep ; indx ++ )
02149         {
02150             strcat( deep_do, "DO ... ");
02151         }
02152         for ( indx = 0; indx < how_deep ; indx ++ )
02153         {
02154             if ( prefix )
02155             {
02156                 strcat( deep_do, " ... ");
02157             }
02158             strcat( deep_do, "LOOP");
02159             prefix = TRUE;
02160         }
02161 
02162         tokenization_error( TKERROR,
02163             "%s outside of  %s  structure", strupr(statbuf), deep_do);
02164         in_last_colon();
02165     }
02166 
02167 }
02168 
02169 /* **************************************************************************
02170  *
02171  *      Function name:  bump_ret_stk_depth
02172  *      Synopsis:       Increment or decrement the Return-Stack-Usage-Depth
02173  *                          counter.
02174  *
02175  *      Inputs:
02176  *         Parameters:
02177  *             bump              Amount by which to increment;
02178  *                                   negative number to decrement.
02179  *         Local Static Variables:
02180  *             ret_stk_depth     The Return-Stack-Usage-Depth counter
02181  *
02182  *      Outputs:
02183  *         Returned Value:        NONE
02184  *         Local Static Variables:
02185  *             ret_stk_depth     Incremented or decremented
02186  *
02187  *      Process Explanation:
02188  *          This simple-seeming function is actually a place-holder
02189  *             for future expansion.  Proper error-detection of
02190  *             Return-Stack usage is considerably more complex than
02191  *             what we are implementing here, and is deferred for a
02192  *             later revision.
02193  *
02194  *      Still to be done:
02195  *          Full detection of whether the Return-Stack has been cleared
02196  *              when required, including analysis of Return-Stack usage
02197  *              within Flow-Control constructs, and before Loop elements...
02198  *
02199  *      Extraneous Remarks:
02200  *          Some FORTHs use a Loop-Control stack separate from the Return-
02201  *              -Stack, but others use the Return-Stack to keep LOOP-control
02202  *              elements.  An FCode program must be portable between different
02203  *              environments, and so must adhere to the restrictions listed
02204  *              in the ANSI Spec:
02205  *
02206  *       3.2.3.3   Return stack  
02207  *        . . . . . .
02208  *       A program may use the return stack for temporary storage during the
02209  *          execution of a definition subject to the following restrictions:
02210  *              A program shall not access values on the return stack (using R@,
02211  *                  R>, 2R@ or 2R>) that it did not place there using >R or 2>R;
02212  *              A program shall not access from within a do-loop values placed
02213  *                  on the return stack before the loop was entered;
02214  *              All values placed on the return stack within a do-loop shall
02215  *                  be removed before I, J, LOOP, +LOOP, UNLOOP, or LEAVE is
02216  *                  executed;
02217  *              All values placed on the return stack within a definition
02218  *                  shall be removed before the definition is terminated
02219  *                  or before EXIT is executed.
02220  *
02221  **************************************************************************** */
02222 
02223 static void bump_ret_stk_depth( int bump)
02224 {
02225     ret_stk_depth += bump;
02226 }
02227 
02228 
02229 /* **************************************************************************
02230  *
02231  *      Function name:  ret_stk_balance_rpt
02232  *      Synopsis:       Display a Message if usage of the Return-Stack
02233  *                          appears to be out of balance.
02234  *
02235  *      Inputs:
02236  *         Parameters:
02237  *             before_what         Phrase to use in Message;
02238  *                                     if NULL, use statbuf...
02239  *             clear_it            TRUE if this call should also clear the
02240  *                                     Return-Stack-Usage-Depth counter
02241  *         Global Variables:
02242  *             statbuf             Word currently being processed
02243  *         Local Static Variables:
02244  *             ret_stk_depth       The Return-Stack-Usage-Depth counter
02245  *
02246  *      Outputs:
02247  *         Returned Value:         NONE
02248  *         Local Static Variables:
02249  *             ret_stk_depth       May be cleared
02250  *
02251  *      Error Detection:
02252  *          Based simply on whether the Return-Stack-Usage-Depth counter
02253  *              is zero.  This is a weak and uncertain implementation;
02254  *              therefore, the Message will be a WARNING phrased with
02255  *              some equivocation.
02256  *
02257  *      Process Explanation:
02258  *          Proper detection of Return-Stack usage errors is considerably
02259  *              more complex, and is deferred for a future revision.
02260  *
02261  *      Still to be done:
02262  *          Correct analysis of Return-Stack usage around Flow-Control
02263  *              constructs.  Consider, for instance, the following:
02264  * 
02265  *          blablabla >R  yadayada IF  R> gubble ELSE flubble R>  THEN
02266  * 
02267  *              It is, in fact, correct, but the present scheme would
02268  *              tag it as a possible error.  Conversely, something like:
02269  * 
02270  *          blablabla >R  yadayada IF  R> gubble THEN
02271  * 
02272  *              would not get tagged, even though it is actually an error.
02273  * 
02274  *          The current simple scheme also does not cover Return-Stack
02275  *              usage within Do-Loops or before Loop elements like I and
02276  *              J or UNLOOP or LEAVE.  Implementing something like that
02277  *              would probably need to be integrated in with Flow-Control
02278  *              constructs, and will be noted in  flowcontrol.c
02279  *
02280  **************************************************************************** */
02281 
02282 static void ret_stk_balance_rpt( char *before_what, bool clear_it)
02283 {
02284     if ( ret_stk_depth != 0 )
02285     {
02286         char *what_flow = ret_stk_depth < 0 ? "deficit" : "excess" ;
02287         char *what_phr =  before_what != NULL ? before_what : strupr(statbuf);
02288 
02289         tokenization_error( WARNING,
02290             "Possible Return-Stack %s before %s", what_flow, what_phr);
02291         in_last_colon();
02292 
02293         if ( clear_it )
02294         {
02295             ret_stk_depth = 0;
02296         }
02297     }
02298 }
02299 
02300 
02301 /* **************************************************************************
02302  *
02303  *      Function name:  ret_stk_access_rpt
02304  *      Synopsis:       Display a Message if an attempt to access a value
02305  *                          on the Return-Stack appears to occur before
02306  *                          one was placed there.
02307  *
02308  *      Inputs:
02309  *         Parameters:                NONE
02310  *         Global Variables:
02311  *             statbuf                Word currently being processed
02312  *         Local Static Variables:
02313  *             ret_stk_depth          The Return-Stack-Usage-Depth counter
02314  *
02315  *      Outputs:
02316  *         Returned Value:             NONE
02317  *
02318  *      Error Detection:
02319  *          Equivocal WARNING, based simply on whether the Return-Stack-
02320  *              -Usage-Depth counter not positive.
02321  *
02322  *      Process Explanation:
02323  *          Proper detection is deferred...
02324  *
02325  *      Still to be done:
02326  *          Correct analysis of Return-Stack usage...
02327  *
02328  **************************************************************************** */
02329 
02330 static void ret_stk_access_rpt( void)
02331 {
02332     if ( ret_stk_depth <= 0 )
02333     {
02334         tokenization_error( WARNING,
02335             "Possible Return-Stack access attempt by %s "
02336                 "without value having been placed there",
02337                 strupr(statbuf) );
02338         in_last_colon();
02339     }
02340 }
02341 
02342 
02343 
02344 /* **************************************************************************
02345  *
02346  *      Function name:  encode_file
02347  *      Synopsis:       Input a (presumably binary) file and encode it
02348  *                      as a series of strings which will be accumulated
02349  *                      and encoded in a manner appropriate for a property.
02350  *
02351  *      Associated Tokenizer directive:        encode-file        
02352  *
02353  *      Error Detection:
02354  *          Handled by support routines.
02355  *
02356  **************************************************************************** */
02357 
02358 static void encode_file( const char *filename )
02359 {
02360         FILE *f;
02361         size_t s;
02362         int num_encoded=0;
02363         
02364         tokenization_error( INFO, "ENCODing File %s\n", filename );
02365 
02366         f = open_expanded_file( filename, "rb", "encoding");
02367         if( f != NULL )
02368         {
02369             while( (s=fread(statbuf, 1, STRING_LEN_MAX, f)) )
02370             {
02371                     emit_token("b(\")");
02372                     emit_string(statbuf, s);
02373                     emit_token("encode-bytes");
02374                     if( num_encoded )
02375                             emit_token("encode+");
02376                     num_encoded += s;
02377             }
02378             fclose( f );
02379             tokenization_error ( INFO, "ENCODed %d bytes.\n", num_encoded);
02380         }
02381 }
02382 
02383 /* **************************************************************************
02384  *
02385  *      Function name:  check_name_length
02386  *      Synopsis:       If the length of a user-defined name exceeds the
02387  *                          ANSI-specified maximum of 31 characters, issue
02388  *                          a message.  This is a hard-coded limit.
02389  *                      Although our Tokenizer can handle longer names,
02390  *                          they will cause big problems when encountered
02391  *                          by an FCode interpreter.
02392  *                      If the name is going to be included in the binary
02393  *                          output, the message severity must be an ERROR.
02394  *                      Otherwise, if the name is HEADERLESS, the severity
02395  *                          can be reduced to a Warning; if the name is only
02396  *                          defined in "Tokenizer Escape" mode the message
02397  *                          severity can be further reduced to an Advisory.
02398  *
02399  *      Inputs:
02400  *         Parameters:
02401  *             wlen                 Length of the newly-created word
02402  *         Global Variables: 
02403  *             in_tokz_esc          TRUE if in "Tokenizer Escape" mode.
02404  *         Local Static Variables:
02405  *             hdr_flag             State of headered-ness for name-creation
02406  *
02407  *      Outputs:
02408  *         Returned Value:          NONE
02409  *         Global Variables:        
02410  *         Printout:                ERROR message if applicable.
02411  *
02412  *      Error Detection:
02413  *             The whole point of this routine.  
02414  *
02415  *      Revision History:
02416  *          Updated Wed, 20 Jul 2005 by David L. Paktor
02417  *               Escalated from merely an informative warning to a TKERROR 
02418  *          Updated Fri, 21 Oct 2005 by David L. Paktor
02419  *               Adjust severity if name doesn't go into the FCode anyway...
02420  *
02421  **************************************************************************** */
02422 
02423 void check_name_length( signed long wlen )
02424 {
02425     if ( wlen > 31 )
02426     {
02427         int severity = TKERROR;
02428         if ( in_tokz_esc )
02429         {   severity = INFO;
02430         }else{
02431             if (hdr_flag == FLAG_HEADERLESS)
02432             {   severity = WARNING;
02433             }
02434         }
02435         tokenization_error( severity,
02436             "ANSI Forth does not permit definition of names "
02437                 "longer than 31 characters.\n" );
02438     }
02439 
02440 }
02441 
02442 
02443 /* **************************************************************************
02444  *
02445  *      Function name:  definer_name
02446  *      Synopsis:       Given a defining-word internal token, return
02447  *                      a printable string for the definer, for use
02448  *                      in an error-message.
02449  *
02450  *      Inputs:
02451  *         Parameters:
02452  *             definer             Internal token for the defining-word
02453  *             reslt_ptr           Pointer to string-pointer that takes
02454  *                                     the result, if successful
02455  *
02456  *      Outputs:
02457  *         Returned Value:         TRUE if definer was recognized
02458  *         Supplied Pointers:
02459  *             *reslt_ptr          If successful, points to printable string;
02460  *                                     otherwise, left unchanged.
02461  *
02462  *
02463  **************************************************************************** */
02464 
02465 static bool definer_name(fwtoken definer, char **reslt_ptr)
02466 {
02467     bool retval = TRUE;
02468     switch (definer)
02469     {
02470         case VARIABLE:
02471             *reslt_ptr = "VARIABLE";
02472             break;
02473         case DEFER:
02474             *reslt_ptr = "DEFER";
02475             break;
02476         case VALUE:
02477             *reslt_ptr = "VALUE";
02478             break;
02479         case BUFFER:
02480             *reslt_ptr = "BUFFER";
02481             break;
02482         case CONST:
02483             *reslt_ptr = "CONSTANT";
02484             break;
02485         case COLON:
02486             *reslt_ptr = "COLON";
02487             break;
02488         case CREATE:
02489             *reslt_ptr = "CREATE";
02490             break;
02491         case FIELD:
02492             *reslt_ptr = "FIELD";
02493             break;
02494         case MACRO_DEF:
02495             *reslt_ptr = "MACRO";
02496             break;
02497         case ALIAS:
02498             *reslt_ptr = "ALIAS";
02499             break;
02500         case LOCAL_VAL:
02501             *reslt_ptr = "Local Value name";
02502             break;
02503         default:
02504             retval = FALSE;
02505     }
02506 
02507     return ( retval);
02508 }
02509 
02510 
02511 /* **************************************************************************
02512  *
02513  *      Function name:  as_a_what
02514  *      Synopsis:       Add the phrase "as a[n] <DEF'N_TYPE>" for the given
02515  *                          definition-type to the given string buffer.
02516  *
02517  *      Inputs:
02518  *         Parameters:
02519  *             definer                 Internal token for the defining-word
02520  *             as_what                 The string buffer to which to add.
02521  *
02522  *      Outputs:
02523  *         Returned Value:             TRUE if an assigned name was found
02524  *                                         for the given definer and text
02525  *                                         was added to the buffer.
02526  *         Supplied Pointers:
02527  *             *as_what                Text is added to this buffer.
02528  *
02529  *      Process Explanation:
02530  *          The calling routine is responsible to make sure the size of
02531  *              the buffer is adequate.  Allow 25 for this routine.
02532  *          The added text will not have spaces before or after; if any
02533  *              are needed, they, too, are the responsibility of the
02534  *              calling routine.  The return value gives a helpful clue.
02535  *
02536  **************************************************************************** */
02537 
02538 bool as_a_what( fwtoken definer, char *as_what)
02539 {
02540     char *defn_type_name;
02541     bool retval = definer_name(definer, &defn_type_name);
02542     if ( retval )
02543     {
02544         strcat( as_what, "as a");
02545         /*  Handle article preceding definer name
02546          *      that starts with a vowel.
02547          */
02548         /*  HACK:  Only one definer name -- ALIAS --
02549          *      begins with a vowel.  Take advantage
02550          *      of that...
02551          *  Otherwise, we'd need to do something involving
02552          *      strchr( "AEIOU", defn_type_name[0] )
02553          */
02554         if ( definer == ALIAS ) strcat( as_what, "n" );
02555 
02556         strcat( as_what, " ");
02557         strcat( as_what, defn_type_name);
02558     }
02559     return( retval);
02560 }
02561 
02562 
02563 /* **************************************************************************
02564  *
02565  *      Function name:  lookup_word
02566  *      Synopsis:       Find the TIC-entry for the given word in the Current
02567  *                          mode -- relative to "Tokenizer-Escape" -- and
02568  *                          Scope into which definitions are being entered.
02569  *                      Optionally, prepare text for various Message types.
02570  *
02571  *      Inputs:
02572  *         Parameters:
02573  *             stat_name               Word to look up
02574  *             where_pt1               Pointer to result-display string, part 1
02575  *                                         NULL if not preparing text
02576  *             where_pt2               Pointer to result-display string, part 2
02577  *                                         NULL if not preparing text
02578  *         Global Variables:
02579  *             in_tokz_esc             TRUE if in "Tokenizer Escape" mode.
02580  *             scope_is_global         TRUE if "global" scope is in effect
02581  *             current_device_node     Current dev-node data-struct
02582  *             ibm_locals              TRUE if IBM-style Locals are enabled
02583  *
02584  *      Outputs:
02585  *         Returned Value:             Pointer to TIC-entry; NULL if not found
02586  *         Supplied Pointers:
02587  *             *where_pt1              Result display string, part 1 of 2
02588  *             *where_pt2              Result display string, part 2 of 2
02589  *
02590  *      Process Explanation:
02591  *          We will set the two-part result-display string in this routine
02592  *              because only here do we know in which vocabulary the word
02593  *              was found.
02594  *          Pre-load the two parts of the result-display string.
02595  *          If we are in "Tokenizer Escape" mode, look up the word:  first,
02596  *              in the "Tokenizer Escape" Vocabulary, or, if not found,
02597  *              among the "Shared" words.
02598  *          Otherwise, we're in Normal" mode.  Look it up:  first, among the
02599  *              Locals, if IBM-style Locals are enabled (it can possibly be
02600  *              one if "Tokenizer Escape" mode was entered during a colon-
02601  *              -definition); then, if it was not found and if "Device"
02602  *              scope is in effect, look in the current device-node; then,
02603  *              if not found, in the "core" vocabulary.
02604  *          Load the second part of the result-display string with the
02605  *               appropriate phrase for whereever it was found.
02606  *          Then adjust the first part of the result-display string with
02607  *               the definer, if known.
02608  *
02609  *          The two strings will be formatted to be printed adjacently,
02610  *              without any additional spaces in the printf() format.
02611  *          The first part of the result-display string will not start with
02612  *              a space, but will have an intermediate space if necessary.
02613  *          The second part of the result-display string will not start
02614  *              with a space, and will contain the terminating new-line
02615  *              if appropriate.  It might or might not have been built
02616  *              with a call to  in_what_node().
02617  *
02618  *          If the calling routine displays the result-display strings,
02619  *              it should follow-up with a call to  show_node_start()
02620  *              This will be harmless if  in_what_node()  was not used
02621  *              in the construction of the result-display string.
02622  *          If the calling routine is NOT going to display the result strings,
02623  *              it should pass NULLs for the string-pointer pointers.
02624  *
02625  *          The second part of the string consists of pre-coded phrases;
02626  *              therefore, we can directly assign the pointer.
02627  *          The first part of the string, however, has developed into
02628  *              something constructed "on the fly".  Earlier, it, too,
02629  *              had been a directly-assignable pointer; all the callers
02630  *              to this routine expect that.  Rather than change all the
02631  *              callers, we will assign a local buffer for it.
02632  *
02633  *      Extraneous Remarks:
02634  *          We had to add the rule allowing where_pt1 or where_pt2 to be
02635  *              NULL after we introduced the  in_what_node()  function.
02636  *              We had cases where residue from a lookup for processing
02637  *              showed up later in an unrelated Message.  The NULL rule
02638  *              should prevent that.
02639  *
02640  **************************************************************************** */
02641 
02642 static char lookup_where_pt1_buf[32];
02643 
02644 tic_hdr_t *lookup_word( char *stat_name, char **where_pt1, char **where_pt2 )
02645 {
02646     tic_hdr_t *found = NULL;
02647     bool trail_space = TRUE;
02648     bool doing_lookup = BOOLVAL( ( where_pt1 != NULL )
02649                               && ( where_pt2 != NULL ) );
02650     char *temp_where_pt2 = "in the core vocabulary.\n";
02651 
02652     lookup_where_pt1_buf[0] = 0;             /*  Init'lz part-1 buffer  */
02653 
02654     /*  "Core vocab" refers both to shared fwords and built-in tokens.  */
02655 
02656     /*  Distinguish between "Normal" and "Tokenizer Escape" mode  */
02657     if ( in_tokz_esc )
02658     {   /*  "Tokenizer Escape" mode.  */
02659         found = lookup_tokz_esc( stat_name);
02660         if ( found != NULL )
02661         {
02662             temp_where_pt2 = in_tkz_esc_mode;
02663         }else{
02664             /*  "Core vocabulary".  */
02665             found = lookup_shared_word( stat_name);
02666         }
02667     }else{
02668         /*  "Normal" tokenization mode  */
02669         if ( ibm_locals )
02670         {
02671             found = lookup_local( stat_name);
02672             if ( doing_lookup && ( found != NULL ) )
02673             {
02674                 trail_space = FALSE;
02675                 temp_where_pt2 = ".\n";
02676             }
02677         }
02678 
02679         if ( found == NULL )
02680         {
02681             found = lookup_in_dev_node( stat_name);
02682             if ( found != NULL )
02683             {
02684                 if ( doing_lookup )
02685                 {
02686                     temp_where_pt2 = in_what_node( current_device_node);
02687                 }
02688             }else{
02689                 /*  "Core vocabulary".  */
02690                 found = lookup_core_word( stat_name);
02691             }
02692         }
02693     }
02694 
02695     if ( ( doing_lookup ) && ( found != NULL ) )
02696     {
02697         if ( as_a_what( found->fword_defr, lookup_where_pt1_buf) )
02698         {
02699             if ( trail_space )
02700             {
02701                 strcat(lookup_where_pt1_buf, " ");
02702             }
02703         }
02704         *where_pt1 = lookup_where_pt1_buf;
02705         *where_pt2 = temp_where_pt2;
02706     }
02707     return( found);
02708 }
02709 
02710 /* **************************************************************************
02711  *
02712  *      Function name:  word_exists
02713  *      Synopsis:       Check whether a given word is already defined in the
02714  *                          Current mode -- relative to "Tokenizer-Escape" --
02715  *                          and Scope into which definitions are being entered. 
02716  *                      Used for error-reporting.
02717  *
02718  *      Inputs:
02719  *         Parameters:
02720  *             stat_name                 Word to look up
02721  *             where_pt1                 Pointer to string, part 1 of 2,
02722  *                                          to display in result
02723  *             where_pt2                 Pointer to string, part 2 of 2,
02724  *                                          to display in result
02725  *
02726  *      Outputs:
02727  *         Returned Value:               TRUE if the name exists.
02728  *         Supplied Pointers:
02729  *             *where_pt1                Result display string, part 1 of 2
02730  *             *where_pt2                Result display string, part 2 of 2
02731  *
02732  *      Process Explanation:
02733  *          If the calling routine displays the result-display strings,
02734  *              it should follow-up with a call to  show_node_start()
02735  *
02736  *      Extraneous Remarks:
02737  *          This used to be a much heftier routine; now it's just
02738  *              a wrapper around  lookup_word() .
02739  *
02740  **************************************************************************** */
02741 
02742 bool word_exists( char *stat_name, char **where_pt1, char **where_pt2 )
02743 {
02744     bool retval = FALSE;
02745     tic_hdr_t *found = lookup_word( stat_name, where_pt1, where_pt2 );
02746 
02747     if ( found != NULL )
02748     {
02749         retval = TRUE;
02750     }
02751 
02752     return( retval);
02753 }
02754 
02755 /* **************************************************************************
02756  *
02757  *      Function name:  warn_if_duplicate
02758  *      Synopsis:       Check whether a given word is already defined in
02759  *                          the current mode and issue a warning if it is.
02760  *
02761  *      Inputs:
02762  *         Parameters:
02763  *             stat_name                Word to check
02764  *         Global Variables:
02765  *             verbose_dup_warning      Whether to run the check at all.
02766  *         Local Static Variables:
02767  *             do_not_overload          FALSE if  OVERLOAD  is in effect.
02768  *
02769  *      Outputs:
02770  *         Returned Value:              NONE
02771  *         Local Static Variables:
02772  *             do_not_overload          Restored to TRUE
02773  *         Printout:
02774  *             Warning message if a duplicate.
02775  *
02776  *      Error Detection:
02777  *             None.  This is merely an informative warning.
02778  *
02779  *      Process Explanation:
02780  *          "Current mode" -- meaning, whether the tokenizer is operating
02781  *              in "Tokenizer Escape" mode or in normal tokenization mode --
02782  *              will be recognized by the  word_exists()  function.
02783  *
02784  *      Extraneous Remarks:
02785  *          The  OVERLOAD  directive is our best shot at creating a more
02786  *              fine-grained way to temporarily bypass this test when
02787  *              deliberately overloading a name.  It would be nice to have
02788  *              a mechanism, comparable to the classic
02789  *                     WARNING @ WARNING OFF  .....  WARNING !
02790  *              that could be applied to a range of definitions, but:
02791  *              (1)  That would require more of a true FORTH infrastructure;
02792  *                       hence, more effort than I am willing to invest, at
02793  *                       this juncture, for such a small return,
02794  *              and
02795  *              (2)  Most intentional-overloading ranges only cover a
02796  *                       single definition anyway.
02797  *
02798  **************************************************************************** */
02799 
02800 void warn_if_duplicate( char *stat_name)
02801 {
02802     if ( verbose_dup_warning && do_not_overload )
02803     {
02804         char *where_pt1;
02805         char *where_pt2; 
02806         if ( word_exists( stat_name, &where_pt1, &where_pt2) )
02807         {
02808             tokenization_error( WARNING, 
02809                 "Duplicate definition:   %s  already exists %s%s",
02810                     stat_name, where_pt1, where_pt2 );
02811             show_node_start();
02812         }
02813     }
02814     do_not_overload = TRUE;
02815 }
02816 
02817 
02818 /* **************************************************************************
02819  *
02820  *      Function name:  glob_not_allowed
02821  *      Synopsis:       Print a Message that "XXX is not allowed."
02822  *                          because Global Scope is in effect.
02823  *                      Used from several places...
02824  *      
02825  *      Inputs:
02826  *         Parameters:
02827  *             severity              Severity of the Message
02828  *             not_ignoring          FALSE = "Ignoring", for the part of the
02829  *                                       message about "How It's being Handled"
02830  *         Global Variables:
02831  *             statbuf               Disallowed word currently being processed
02832  *
02833  *      Outputs:
02834  *         Returned Value:           NONE
02835  *         Printout:                 Message of given severity.
02836  *
02837  **************************************************************************** */
02838 
02839 static void glob_not_allowed( int severity, bool not_ignoring)
02840 {
02841     tokenization_error( severity, "Global Scope is in effect; "
02842                         "%s not allowed.  %s.\n",
02843                             strupr(statbuf), 
02844                                  not_ignoring ?
02845                                      "Attempting to compensate.." :
02846                                           "Ignoring" );
02847 }
02848 
02849 
02850 /* **************************************************************************
02851  *
02852  *      Function name:  not_in_dict
02853  *      Synopsis:       Print the message "XXX is not in dictionary."
02854  *                      Used from several places...
02855  *      
02856  *      Inputs:
02857  *         Parameters:
02858  *             stat_name                Word that could not be processed
02859  *
02860  *      Outputs:
02861  *         Returned Value:              NONE
02862  *         Printout:         Error message.
02863  *
02864  **************************************************************************** */
02865 
02866 static void not_in_dict( char *stat_name)
02867 {
02868     tokenization_error ( TKERROR,
02869         "Word  %s  is not in dictionary.\n", stat_name);
02870 }
02871 
02872 /* **************************************************************************
02873  *
02874  *      Function name:  tokenized_word_error
02875  *      Synopsis:       Report an error when a word could not be processed
02876  *                          by the tokenizer.  Messages will vary...
02877  *      
02878  *      Inputs:
02879  *         Parameters:
02880  *             stat_name                Word that could not be processed
02881  *         Global Variables:
02882  *             in_tokz_esc    TRUE if tokenizer is in "Tokenizer Escape" mode.
02883  *
02884  *      Outputs:
02885  *         Returned Value:              NONE
02886  *         Printout:          Error message.  Possible Advisory about
02887  *
02888  *      Error Detection:
02889  *          Error was detected by the calling routine...
02890  *
02891  *      Process Explanation:
02892  *          If the tokenizer is in "Tokenizer Escape" mode, the word might
02893  *              be one that can be used in normal tokenization mode;
02894  *          Conversely, if the tokenizer is in normal-tokenization mode,
02895  *              the word might be one that can be used in the "Escape" mode.
02896  *          Or, the word is completely unknown.
02897  *          Recognizing the current mode is handled by  word_exists()
02898  *          However, we need to test for the *converse* of the current mode,
02899  *              so before we call  word_exists()  we are going to save and
02900  *              invert the setting of  in_tokz_esc  (and afterwards, of
02901  *              course, restore it...)
02902  *
02903  **************************************************************************** */
02904 
02905 static void tokenized_word_error( char *stat_name)
02906 {
02907     char *where_pt1;
02908     char *where_pt2;
02909     bool found_somewhere;
02910     
02911     bool sav_in_tokz_esc = in_tokz_esc;
02912     in_tokz_esc = INVERSE(sav_in_tokz_esc);
02913 
02914     found_somewhere = word_exists( stat_name, &where_pt1, &where_pt2);
02915     if ( found_somewhere )
02916     {
02917         tokenization_error ( TKERROR, "The word %s is %s recognized "
02918             "in tokenizer-escape mode.\n",
02919                  stat_name, sav_in_tokz_esc ? "not" :  "only" );
02920     } else {
02921         not_in_dict( stat_name);
02922     }
02923 
02924     if ( INVERSE(exists_in_ancestor( stat_name)) )
02925     {
02926         if ( found_somewhere && sav_in_tokz_esc )
02927         {
02928             tokenization_error(INFO,
02929                 "%s is defined %s%s", stat_name, where_pt1, where_pt2 );
02930             show_node_start();
02931         }
02932     }
02933 
02934     in_tokz_esc = sav_in_tokz_esc;
02935 }
02936 
02937 
02938 /* **************************************************************************
02939  *
02940  *      Function name:  unresolved_instance
02941  *      Synopsis:       Print the "unresolved instance" message
02942  *
02943  *      Inputs:
02944  *         Parameters:
02945  *             severity                    Severity of the Message
02946  *         Local Static Variables:
02947  *             instance_filename           File where "instance" invoked
02948  *             instance_lineno             Line number where "instance" invoked
02949  *
02950  *      Outputs:
02951  *         Returned Value:                 NONE
02952  *         Printout:          Message.
02953  *
02954  *      Error Detection:
02955  *          Error was detected by the calling routine...
02956  *
02957  **************************************************************************** */
02958 
02959 static void unresolved_instance( int severity)
02960 {
02961     tokenization_error( severity, "Unresolved \"INSTANCE\"" );
02962     just_where_started( instance_filename, instance_lineno );
02963 }
02964 
02965 
02966 /* **************************************************************************
02967  *
02968  *      Function name:  modified_by_instance
02969  *      Synopsis:       Print the "[not] modified by instance" message
02970  *
02971  *      Inputs:
02972  *         Parameters:
02973  *             definer                     Internal token for the defining-word
02974  *             was_modded                  FALSE if "not modified..."
02975  *         Local Static Variables:
02976  *             instance_filename           File where "instance" invoked
02977  *             instance_lineno             Line number where "instance" invoked
02978  *
02979  *      Outputs:
02980  *         Returned Value:                 NONE
02981  *         Printout:          WARNING message.
02982  *
02983  *      Error Detection:
02984  *          Error was detected by the calling routine...
02985  *
02986  **************************************************************************** */
02987 
02988 static void modified_by_instance( fwtoken definer, bool was_modded)
02989 {
02990     char *was_not = was_modded ? "was" : "not" ;
02991     char *defn_type_name;
02992 
02993     /*  No need to check the return value  */
02994     definer_name(definer, &defn_type_name);
02995 
02996     tokenization_error ( WARNING,
02997         "%s definition %s modified by \"INSTANCE\"",
02998             defn_type_name, was_not );
02999     just_where_started( instance_filename, instance_lineno );
03000  }
03001 
03002 /* **************************************************************************
03003  *
03004  *      Function name:  validate_instance
03005  *      Synopsis:       If "instance" is in effect, check whether it is
03006  *                          appropriate to the defining-word being called.
03007  *
03008  *      Inputs:
03009  *         Parameters:
03010  *             definer                   Internal token for the defining-word
03011  *         Local Static Variables:
03012  *             is_instance               TRUE if "instance" is in effect.
03013  *             instance_definer_gap      TRUE if invalid definer(s) invoked
03014  *                                           since "instance" went into effect.
03015  *
03016  *      Outputs:
03017  *         Returned Value:               NONE
03018  *         Local Static Variables:
03019  *             is_instance               Reset to FALSE if definer was valid.
03020  *             instance_definer_gap      TRUE if definer was not valid;
03021  *                                           FALSE if definer was valid.
03022  *
03023  *      Error Detection:
03024  *          If "instance" is in effect, the only defining-words that are
03025  *              valid are:  value  variable  defer  or  buffer:  Attempts
03026  *              to use any other defining-word will be reported with a
03027  *              WARNING, but "instance" will remain in effect.
03028  *          If an invalid defining-word was invoked since "instance" went
03029  *              into effect, then, when it is finally applied to a valid
03030  *              definer, issue a WARNING.
03031  *
03032  *      Process Explanation:
03033  *          Implicit in the Standard is the notion that, once INSTANCE has
03034  *              been executed, it remains in effect until a valid defining-
03035  *              word is encountered.  We will do the same.
03036  *
03037  **************************************************************************** */
03038 
03039 static void validate_instance(fwtoken definer)
03040 {
03041     if ( is_instance )
03042     {
03043         bool is_error = TRUE ;
03044 
03045         switch ( definer)
03046         {
03047             case VALUE:
03048             case VARIABLE:
03049             case DEFER:
03050             case BUFFER:
03051                 is_error = FALSE;
03052             /*  No default needed, likewise, no breaks;      */
03053             /*  but some compilers get upset without 'em...  */
03054             default:
03055                 break;
03056         }
03057 
03058         if( is_error )
03059         {
03060             modified_by_instance(definer, FALSE );
03061             instance_definer_gap = TRUE;
03062         }else{
03063             if ( instance_definer_gap )
03064             {
03065                 modified_by_instance(definer, TRUE );
03066             }
03067             is_instance = FALSE;
03068             instance_definer_gap = FALSE;
03069         }
03070     }
03071 }
03072     
03073 
03074 /* **************************************************************************
03075  *
03076  *      Function name:  trace_creation
03077  *      Synopsis:       If the word being created is on the Trace List,
03078  *                          display the appropriate message
03079  *
03080  *      Inputs:
03081  *         Parameters:
03082  *             definer                 Internal token for the defining-word
03083  *             nu_name                 The word being created
03084  *         Global Variables:
03085  *             verbose                 No point in doing all this if we're
03086  *                                         not showing the message anyway...
03087  *             in_tokz_esc             TRUE if we are in Tokenizer-Escape mode
03088  *             scope_is_global         TRUE if "global" scope is in effect
03089  *             current_device_node     Current dev-node data-struct
03090  *
03091  *      Outputs:
03092  *         Returned Value:             NONE
03093  *         Printout:
03094  *             Advisory Message, if the word is on the Trace List.
03095  *
03096  *      Process Explanation:
03097  *          The order of scope-checking is important:
03098  *              A Local has no scope beyond the definition in which it occurs.
03099  *              Tokenizer-Escape mode supercedes "Normal" mode, and renders
03100  *                  moot the differences between Global and Device scope.
03101  *              Global scope is mutually exclusive with Device scope.
03102  *              Device scope needs to identify where the Current device-node
03103  *                  began.
03104  *
03105  **************************************************************************** */
03106 
03107 void trace_creation( fwtoken definer, char *nu_name)
03108 {
03109     if ( verbose )
03110     {
03111         if ( is_on_trace_list( nu_name) )
03112         {
03113             char  as_what[96] = "";
03114             bool show_last_colon = BOOLVAL( definer == LOCAL_VAL);
03115 
03116             as_a_what( definer, as_what);  /*  No need to check return value. */
03117 
03118             /*  Scope-checking starts here, unless  show_last_colon  is TRUE.
03119              *  Come out of this with  as_what[]  filled up and
03120              *      terminated with a new-line, if appropriate,
03121              */
03122             while ( ! show_last_colon )
03123             {
03124                 strcat( as_what, " ");
03125 
03126                 if ( in_tokz_esc )
03127                 {
03128                     strcat( as_what, in_tkz_esc_mode);
03129                     break;
03130                 }
03131 
03132                 if ( scope_is_global )
03133                 {
03134                     strcat( as_what, "with Global scope.\n");
03135                 }else{
03136                     /*  In Device scope.  Show the Current node.   */
03137                     strcat( as_what, in_what_node( current_device_node));
03138                 }
03139                 break;
03140 
03141             }   /*  Destination of BREAKs ...   */
03142 
03143             tokenization_error(INFO, "Creating %s %s", nu_name, as_what);
03144 
03145             if ( show_last_colon )
03146             {
03147                 in_last_colon();
03148             }else{
03149                 show_node_start();
03150             }
03151 
03152         }
03153     }
03154 }
03155 
03156 /* **************************************************************************
03157  *
03158  *      Function name:  create_word
03159  *      Synopsis:       
03160  *
03161  *      Inputs:
03162  *         Parameters:
03163  *             definer             Internal token for the defining-word
03164  *         Global Variables:
03165  *             control_stack_depth Number of "Control Stack" entries in effect
03166  *             nextfcode           FCode-number to be assigned to the new name
03167  *             statbuf             Symbol last read from the input stream
03168  *             pc                  Input-source Scanning pointer
03169  *             hdr_flag            State of headered-ness for name-creation
03170  *             force_tokens_case   If TRUE, force token-names' case in FCode
03171  *             force_lower_case_tokens
03172  *                                 If  force_tokens_case  is TRUE, this
03173  *                                     determines which case to force
03174  *             iname               Input-source file name; for error-reporting
03175  *             lineno              Input-source Line number; also for err-rep't
03176  *
03177  *      Outputs:
03178  *         Returned Value:         TRUE if successful
03179  *         Global Variables:  
03180  *             nextfcode           Incremented  (by bump_fcode() )
03181  *             statbuf             Advanced to next symbol; must be re-read
03182  *             pc                  Advanced, then restored to previous value
03183  *         Memory Allocated
03184  *             Copy of the name being defined, by support routine.
03185  *             Copy of input-source file name, for error-reporting
03186  *         When Freed?
03187  *             Copy of name being defined is freed when Current Device Vocab
03188  *                 is "finished", or at end of tokenization.
03189  *             Copy of input-source file name is freed at end of this routine.
03190  *
03191  *      Error Detection:
03192  *          ERROR if already inside a colon-definition.  Discontinue
03193  *              processing and return FALSE.
03194  *          ERROR if inside a control-structure.  Continue processing,
03195  *              though, to catch other errors, and even return TRUE;
03196  *              except:  leave the new token undefined. 
03197  *          Warning on duplicate name (subject to command-line control)
03198  *          Message if name is excessively long; Warning if headerless.
03199  *          FATAL if the value of  nextfcode  is larger than the legal
03200  *              maximum for an FCode, (0x0fff).
03201  *
03202  *      Revision History:
03203  *      Updated Thu, 24 Mar 2005 by David L. Paktor
03204  *          Optional warning when name about to be created is a
03205  *              duplicate of an existing name.
03206  *      Updated Wed, 30 Mar 2005 by David L. Paktor
03207  *          Warning when name length exceeds ANSI-specified max (31 chars).
03208  *      Updated Tue, 05 Apr 2005 by David L. Paktor
03209  *          Add "definer" parameter and call to  add_definer() .  Part
03210  *              of the mechanism to forbid attempts to use the  TO 
03211  *              directive to change values of CONSTANTs in particular
03212  *              and of inappropriate targets in general.
03213  *      Updated Fri, 06 May 2005 by David L. Paktor
03214  *          Error-detection of   DO ...  LOOP  and  BEGIN ...  imbalance
03215  *          Error-detection of  nextfcode  exceeding legal maximum (0x0fff).
03216  *      Updated Wed, 20 Jul 2005 by David L. Paktor
03217  *          Put Duplicate-Name-Test under command-line control...
03218  *      Updated Wed, 24 Aug 2005 by David L. Paktor
03219  *          Error-detection via  clear_control_structs()  routine.
03220  *      Updated Tue, 10 Jan 2006 by David L. Paktor
03221  *          Convert to  tic_hdr_t  type vocabulary.
03222  *      Updated Thu, 20 Apr 2006 by David L. Paktor
03223  *          Allow creation of new definition within body of a flow-control
03224  *              structure.  (Remove error-detection via  clear_control_structs)
03225  *      Updated Tue, 13 Jun 2006 by David L. Paktor
03226  *          Move detection of out-of-bounds  nextfcode  to  assigning_fcode()
03227  *              routine, which also detects Overlapping Ranges error.
03228  *      Updated Thu, 27 Jun 2006 by David L. Paktor
03229  *          Report Error for attempt to create def'n inside control structure.
03230  *
03231  *      Extraneous Remarks:
03232  *          We must not set  incolon  to TRUE (if we are creating a colon
03233  *              definition) until *AFTER* this routine has been called, due
03234  *              to the initial error-checking.  If we need to detect whether
03235  *              we are creating a colon definition, we can do so by testing
03236  *              whether the parameter, DEFINER, equals COLON .
03237  *
03238  **************************************************************************** */
03239 
03240 static bool create_word(fwtoken definer)
03241 {
03242     signed long wlen;
03243     bool retval = FALSE;
03244     char *defn_type_name;
03245 
03246     /*  If already inside a colon, ERROR and discontinueprocessing    */
03247     /*  If an alias to a definer is used, show the name of the alias  */
03248     if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) ) 
03249     {
03250         char defn_type_buffr[32] = "";
03251         unsigned int old_lineno = lineno;    /*  For error message  */
03252         bool define_token = TRUE;
03253 
03254         {   /*  Set up definition-type text for error-message */
03255 
03256             /*  No need to check the return value  */
03257             definer_name(definer, &defn_type_name);
03258 
03259             strcat( defn_type_buffr, defn_type_name);
03260             strcat( defn_type_buffr, " definition");
03261         }
03262         /*  If in a control-structure, ERROR but continue processing  */
03263         if ( control_stack_depth != 0 )
03264         {
03265             announce_control_structs( TKERROR, defn_type_buffr, 0);
03266             /*  Leave the new token undefined.  */
03267             define_token = FALSE;
03268         }
03269 
03270         /*  Get the name of the new token  */
03271         wlen = get_word();
03272 
03273 #ifdef DEBUG_SCANNER
03274         printf("%s:%d: debug: defined new word %s, fcode no 0x%x\n",
03275                         iname, lineno, name, nextfcode);
03276 #endif
03277         if ( wlen <= 0 )
03278         {
03279             warn_unterm( TKERROR, defn_type_buffr, old_lineno);
03280         }else{
03281             bool emit_token_name = TRUE;
03282 
03283             /*  Handle Tracing of new definitions  */
03284             trace_creation( definer, statbuf);
03285 
03286             /*  Other Error or Warnings as applicable  */
03287             validate_instance( definer);
03288             warn_if_duplicate( statbuf);
03289             check_name_length( wlen);
03290 
03291             /*  Bump FCode; error-check as applicable  */
03292             assigning_fcode();
03293 
03294             /*  Define the new token, unless disallowed  */
03295             add_to_current( statbuf, nextfcode, definer, define_token);
03296 
03297             /*  Emit appropriate FCodes:  Type of def'n,   */
03298             switch ( hdr_flag )
03299             {
03300                 case FLAG_HEADERS:
03301                     emit_token("named-token");
03302                     break;
03303 
03304                 case FLAG_EXTERNAL:
03305                     emit_token("external-token");
03306                     break;
03307 
03308                 default:  /*   FLAG_HEADERLESS   */
03309                     emit_token("new-token");
03310                     emit_token_name = FALSE;
03311             }
03312 
03313             /*  Emit name of token, if applicable  */
03314             if ( emit_token_name )
03315             {
03316                 if ( force_tokens_case )
03317                 {
03318                     if ( force_lower_case_tokens )
03319                     {
03320                         strlwr( statbuf);
03321                     }else{
03322                         strupr( statbuf);
03323                     }
03324                 }
03325                 emit_string((u8 *)statbuf, wlen);       
03326             }
03327 
03328             /*  Emit the new token's FCode   */
03329             emit_fcode(nextfcode);
03330 
03331             /*  Prepare FCode Assignment Counter for next definition   */
03332             bump_fcode();
03333 
03334             /*  Declare victory   */
03335             retval = TRUE;
03336         }
03337     }
03338     return( retval);
03339 }
03340 
03341 
03342 /* **************************************************************************
03343  *
03344  *      Function name:  cannot_apply
03345  *      Synopsis:       Print error message of the form:
03346  *                     "Cannot apply <func> to <targ>, which is a <def'n>"
03347  *
03348  *      Inputs:
03349  *         Parameters:
03350  *             func_nam                    The name of the function
03351  *             targ_nam                    The name of the target
03352  *             defr                        The numeric-code of the definer-type
03353  *
03354  *      Outputs:
03355  *         Returned Value:                 NONE
03356  *         Printout:
03357  *             The error message is the entire printout of this routine
03358  *
03359  *      Error Detection:
03360  *          Error was detected by calling routine
03361  *
03362  *      Process Explanation:
03363  *          The calling routine already looked up the definer for its
03364  *              own purposes, so we don't need to do that again here.
03365  *
03366  *      Still to be done:
03367  *          If the definer-name is not found, we might still look up
03368  *              the target name in the various vocabularies and use
03369  *              a phrase for those.  E.g., if it is a valid token,
03370  *              we could say it's defined as a "primitive".  (I'm
03371  *              not sure what we'd say about an FWord...)
03372  *
03373  **************************************************************************** */
03374 
03375 static void cannot_apply( char *func_nam, char *targ_nam, fwtoken defr)
03376 {
03377     char *defr_name = "" ;
03378     const char *defr_phrase = ", which is defined as a " ;
03379 
03380     if ( ! definer_name(defr, &defr_name) )
03381     {
03382         defr_phrase = "";
03383     }
03384 
03385     tokenization_error ( TKERROR , 
03386         "Cannot apply  %s  to  %s %s%s.\n",
03387              func_nam, targ_nam, defr_phrase, defr_name );
03388 
03389 }
03390 
03391 
03392 /* **************************************************************************
03393  *
03394  *      Function name:  lookup_with_definer
03395  *      Synopsis:       Return pointer to data-structure of named word,
03396  *                      if it's valid in Current context, and supply its
03397  *                      definer.  If it's not valid in Current context,
03398  *                      see if it might be a Local, and supply that definer.
03399  *
03400  *      Inputs:
03401  *         Parameters:
03402  *             stat_name                  Name to look up
03403  *             *definr                    Pointer to place to put the definer.
03404  *
03405  *      Outputs:
03406  *         Returned Value:                Pointer to data-structure, or
03407  *                                            NULL if not in Current context.
03408  *         Supplied Pointers:
03409  *             *definr                    Definer; possibly LOCAL_VAL
03410  *
03411  *      Process Explanation:
03412  *          If the name is not found in the Current context, and does not
03413  *              exist as a Local, *definr will remain unchanged.
03414  *
03415  *      Extraneous Remarks:
03416  *          This is an odd duck^H^H^H^H^H^H^H^H^H^H^H^H a highly-specialized 
03417  *              routine created to meet some corner-case needs engendered by
03418  *              the conversion to tic_hdr_t vocabularies all around, combined
03419  *              with an obsessive urge to preserve a high level of detail in
03420  *              our error-messages.
03421  *
03422  **************************************************************************** */
03423 
03424 static tic_hdr_t *lookup_with_definer( char *stat_name, fwtoken *definr )
03425 {
03426     tic_hdr_t *retval = lookup_current( stat_name);
03427     if ( retval != NULL )
03428     {
03429          *definr = retval->fword_defr;
03430     }else{
03431         if ( exists_as_local( stat_name) ) *definr = LOCAL_VAL;
03432     }
03433     return ( retval );
03434 }
03435 
03436 /* **************************************************************************
03437  *
03438  *      Function name:  validate_to_target
03439  *      Synopsis:       Print a message if the intended target
03440  *                          of the  TO  directive is not valid
03441  *      
03442  *      Inputs:
03443  *         Parameters:                NONE
03444  *         Global Variables:
03445  *             statbuf             Next symbol to be read from the input stream
03446  *             pc                  Input-source Scanning pointer
03447  *
03448  *      Outputs:
03449  *         Returned Value:         TRUE = Allow  b(to)  token to be output.
03450  *         Global Variables:
03451  *             statbuf             Advanced to next symbol; must be re-read
03452  *             pc                  Advanced, then restored to previous value
03453  *
03454  *      Error Detection:
03455  *          If next symbol is not a valid target of  TO , issue ERROR    
03456  *              message.  Restored  pc  will cause the next symbol to
03457  *              be processed by ordinary means.
03458  *          Allow  b(to)  token to be output in selected cases.  Even if
03459  *              user has set the "Ignore Errors" flag, certain targets are
03460  *              still too risky to be allowed to follow a  b(to)  token;
03461  *              if "Ignore Errors" is not set, output won't get created
03462  *              anyway.
03463  *          Issue ERROR in the extremely unlikely case that "to" is the
03464  *              last word in the Source.
03465  *
03466  *      Process Explanation:
03467  *          Valid targets for a TO directive are words defined by:
03468  *              DEFER, VALUE and arguably VARIABLE.  We will also allow
03469  *              CONSTANT, but will still issue an Error message.
03470  *          After the check, restore  pc ; this was only a look-ahead.
03471  *              Also restore  lineno  and  abs_token_no 
03472  *
03473  *      Extraneous Remarks:
03474  *          Main part of the mechanism to detect attempts to use the  TO 
03475  *              directive to change the values of CONSTANTs in particular
03476  *              and of inappropriate targets in general.
03477  *
03478  **************************************************************************** */
03479 
03480 static bool validate_to_target( void )
03481 {
03482     signed long wlen;
03483     tic_hdr_t *test_entry;
03484     u8 *saved_pc = pc;
03485     char *cmd_cpy = strupr( strdup( statbuf));    /*  For error message  */
03486     unsigned int saved_lineno = lineno;
03487     unsigned int saved_abs_token_no = abs_token_no;
03488     fwtoken defr = UNSPECIFIED ;
03489     bool targ_err = TRUE ;
03490     bool retval = FALSE ;
03491 
03492     wlen = get_word();
03493     if ( wlen <= 0 )
03494     {
03495         warn_unterm( TKERROR, cmd_cpy, saved_lineno);
03496     }else{
03497 
03498         test_entry = lookup_with_definer( statbuf, &defr);
03499         if ( test_entry != NULL )
03500         {
03501             switch (defr)
03502             {
03503                 case VARIABLE:
03504                     tokenization_error( WARNING,
03505                         "Applying %s to a VARIABLE (%s) is "
03506                         "not recommended; use  !  instead.\n",
03507                         cmd_cpy, statbuf);
03508                 case DEFER:
03509                 case VALUE:
03510                     targ_err = FALSE ;
03511                 case CONST:
03512                     retval = TRUE ;
03513                 /*  No default needed, likewise, no breaks;      */
03514                 /*  but some compilers get upset without 'em...  */
03515                 default:
03516                     break;
03517             }
03518         }
03519 
03520         if ( targ_err )
03521         {
03522             cannot_apply(cmd_cpy, strupr(statbuf), defr );
03523         }
03524 
03525         pc = saved_pc;
03526         lineno = saved_lineno;
03527         abs_token_no = saved_abs_token_no;
03528     }
03529     free( cmd_cpy);
03530     return( retval);
03531 }
03532 
03533 
03534 /* **************************************************************************
03535  *
03536  *      Function name:  you_are_here
03537  *      Synopsis:       Display a generic Advisory of the Source command
03538  *                          or directive encountered and being processed
03539  *
03540  *      Inputs:
03541  *         Parameters:                NONE
03542  *         Global Variables:
03543  *             statbuf                The command being processed 
03544  *
03545  *      Outputs:
03546  *         Returned Value:            NONE
03547  *         Printout:
03548  *             Advisory message
03549  *
03550  **************************************************************************** */
03551 
03552 static void you_are_here( void)
03553 {
03554     tokenization_error( INFO,
03555         "%s encountered; processing...\n",
03556             strupr(statbuf) );
03557 }
03558 
03559 
03560 /* **************************************************************************
03561  *
03562  *      Function name:  fcode_starter
03563  *      Synopsis:       Respond to one of the "FCode Starter" words
03564  *      
03565  *      Inputs:
03566  *         Parameters:
03567  *             token_name         The FCode-token for this "Starter" word
03568  *             spread             The separation between tokens.
03569  *             is_offs16          Whether we are using a 16-bit number
03570  *                                    for branch- (and suchlike) -offsets,
03571  *                                    or the older-style 8-bit offset numbers.
03572  *         Global Variables:
03573  *            iname               Input-File name, used to set ifile_name 
03574  *                                    field of  current_device_node
03575  *            lineno              Current Input line number, used to set
03576  *                                    line_no field of  current_device_node
03577  *         Local Static Variables:
03578  *            fcode_started       If this is TRUE, we have an Error.
03579  *            first_fc_starter    Control calling  reset_fcode_ranges() ;
03580  *                                    only on the first fcode_starter of
03581  *                                    a tokenization.
03582  *
03583  *      Outputs:
03584  *         Returned Value:        NONE
03585  *         Global Variables:
03586  *            offs16              Global "16-bit-offsets" flag
03587  *            current_device_node   The ifile_name and line_no fields will be
03588  *                                    loaded with the current input file name
03589  *                                    and line number.  This node will be the
03590  *                                    top-level device-node.
03591  *            FCode Ranges will be reset the first time per tokenization
03592  *                that this routine is entered.
03593  *            A new FCode Range will be started every time after that.
03594  *         Local Static Variables:
03595  *            fcode_started       Set to TRUE.  We invoke the starter only
03596  *                                    once per image-block.
03597  *            first_fc_starter    Reset to FALSE if not already
03598  *         Memory Allocated
03599  *             Duplicate of Input-File name
03600  *         When Freed?
03601  *             In  fcode_ender()
03602  *
03603  *      Error Detection:
03604  *          Spread of other than 1 -- Warning message.
03605  *          "FCode Starter" previously encountered -- Warning and ignore.
03606  *
03607  *      Question under consideration:
03608  *          Do we want directives -- such as definitions of constants --
03609  *              supplied before the "FCode Starter", to be considered as
03610  *              taking place in "Tokenizer Escape" mode?  That would mean
03611  *              the "Starter" functions must be recognized in "Tokenizer
03612  *              Escape" mode.  Many ramifications to be thought through...
03613  *          I think I'm coming down strongly on the side of "No".  The user
03614  *              who wants to do that can very well invoke "Tokenizer Escape"
03615  *              mode explicitly.
03616  *
03617  **************************************************************************** */
03618 
03619 static void fcode_starter( const char *token_name, int spread, bool is_offs16)
03620 {
03621     you_are_here();
03622     if ( spread != 1 )
03623     {
03624         tokenization_error( WARNING, "spread of %d not supported.\n", spread);
03625     }
03626     if ( fcode_started )
03627     {
03628         tokenization_error( WARNING,
03629             "Only one \"FCode Starter\" permitted per tokenization.  "
03630                 "Ignoring...\n");
03631     } else {
03632 
03633         emit_fcodehdr(token_name);
03634         offs16 = is_offs16;
03635         fcode_started = TRUE;
03636 
03637         current_device_node->ifile_name = strdup(iname);
03638         current_device_node->line_no = lineno;
03639 
03640         if ( first_fc_starter )
03641         {
03642             reset_fcode_ranges();
03643             first_fc_starter = FALSE;
03644         }else{
03645             set_next_fcode( nextfcode);
03646         }
03647     }
03648 }
03649 
03650 /* **************************************************************************
03651  *
03652  *      Function name:  fcode_end_err_check
03653  *      Synopsis:       Do error-checking at end of tokenization,
03654  *                          whether due to FCODE-END or end-of-file,
03655  *                          and reset the indicators we check.
03656  *
03657  *      Inputs:
03658  *         Parameters:                    NONE
03659  *         Global Variables:
03660  *             Data-Stack depth     Is anything left on the stack?
03661  *
03662  *      Outputs:
03663  *         Returned Value:                NONE
03664  *         Global Variables:
03665  *             Data-Stack           Reset to empty
03666  *
03667  *      Error Detection:
03668  *          Unresolved control structures detected by clear_control_structs()
03669  *          If anything is left on the stack, it indicates some incomplete
03670  *              condition; we will treat it as a Warning.
03671  *
03672  **************************************************************************** */
03673 
03674 static void fcode_end_err_check( void)
03675 {
03676     bool stack_imbal = BOOLVAL( stackdepth() != 0 );
03677 
03678         if ( stack_imbal )
03679         {
03680             tokenization_error( WARNING,
03681                 "Stack imbalance before end of tokenization.\n");
03682         }
03683     clear_stack();
03684     clear_control_structs("End of tokenization");
03685 }
03686 
03687 /* **************************************************************************
03688  *
03689  *      Function name:  fcode_ender
03690  *      Synopsis:       Respond to one of the "FCode Ender" words:
03691  *                          The FCode-token for "End0" or "End1"
03692  *                              has already been written to the
03693  *                              FCode Output buffer.
03694  *                          Finish the FCode header:  fill in its
03695  *                              checksum and length.
03696  *                          Reset the token names defined in "normal" mode
03697  *                          (Does not reset the FCode-token number)
03698  *
03699  *      Associated FORTH words:                 END0, END1
03700  *      Associated Tokenizer directive:         FCODE-END
03701  *
03702  *      Inputs:
03703  *         Parameters:            NONE
03704  *         Global Variables:
03705  *             incolon            If TRUE, a colon def'n has not been completed
03706  *             last_colon_filename         For error message.
03707  *             last_colon_lineno           For error message.
03708  *             scope_is_global             For error detection
03709  *             is_instance                 For error detection
03710  *
03711  *      Outputs:
03712  *         Returned Value:        NONE
03713  *         Global Variables:
03714  *             haveend            Set to TRUE
03715  *             fcode_started      Reset to FALSE.  Be ready to start anew.
03716  *             FCode-defined tokens, aliases and macros -- i.e., those
03717  *                 *NOT* defined in tokenizer-escape mode -- are reset.
03718  *                 (Also, command-line-defined symbols are preserved).
03719  *             Vocabularies will be reset
03720  *             Device-node data structures will be deleted
03721  *             Top-level device-node ifile_name and line_no fields
03722  *                 will be reset.
03723  *         Memory Freed
03724  *             Duplicate of Input-File name, in top-level device-node.
03725  *         Printout:
03726  *             Advisory message giving current value of nextfcode
03727  *                 (the "FCode-token Assignment Counter")
03728  *
03729  *      Error Detection:
03730  *          ERROR if a Colon definition has not been completed.
03731  *          ERROR if "instance" is still in effect
03732  *          WARNING if Global-Scope has not been terminated; compensate.
03733  *
03734  *      Extraneous Remarks:
03735  *          In order to accommodate odd cases, such as multiple FCode blocks
03736  *          within a single PCI header, this routine does not automatically
03737  *          reset nextfcode  to h# 0800
03738  *
03739  **************************************************************************** */
03740 
03741 void fcode_ender(void)
03742 {
03743     if ( incolon )
03744     {
03745         char *tmp_iname = iname;
03746         iname = last_colon_filename;
03747         unterm_is_colon = TRUE;
03748         warn_unterm( TKERROR, "Colon Definition", last_colon_lineno);
03749         iname = tmp_iname;    
03750     }
03751     
03752     haveend = TRUE;
03753 
03754     if ( is_instance )
03755     {
03756         unresolved_instance( TKERROR);
03757     }
03758 
03759     if ( scope_is_global )
03760     {
03761         tokenization_error( WARNING ,
03762             "No DEVICE-DEFINITIONS directive encountered before end.  "
03763                 "Compensating...\n");
03764         resume_device_scope();
03765     }
03766     fcode_end_err_check();
03767     reset_normal_vocabs();
03768     finish_fcodehdr();
03769     fcode_started = FALSE;
03770 
03771     if ( current_device_node->ifile_name != default_top_dev_ifile_name )
03772     {
03773         free( current_device_node->ifile_name );
03774         current_device_node->ifile_name = default_top_dev_ifile_name;
03775         current_device_node->line_no = 0;
03776     }
03777 }
03778 
03779 /* **************************************************************************
03780  *
03781  *      Function name:  get_token
03782  *      Synopsis:       Read the next word in the input stream and retrieve
03783  *                          its FCode-token number.  If it's not a symbol to
03784  *                          which a single token is assigned (e.g., if it's
03785  *                          a macro), report an error.
03786  *
03787  *      Associated FORTH words:                   [']  '
03788  *      Associated Tokenizer directive:          F[']
03789  *
03790  *      Inputs:
03791  *         Parameters:
03792  *             *tok_entry             Place to put the pointer to token entry
03793  *         Global Variables:
03794  *             statbuf                The command being processed 
03795  *             pc                     Input stream character pointer
03796  *
03797  *      Outputs:
03798  *         Returned Value:            TRUE if successful (i.e., no error)
03799  *         Supplied Pointers:
03800  *             *tok_entry             The token entry, if no error
03801  *         Global Variables:
03802  *             statbuf                The next word in the input stream
03803  *             pc                     Restored to previous value if error
03804  *
03805  *      Error Detection:
03806  *          The next word in the input stream is expected to be on the
03807  *              same line as the directive.  The  get_word_in_line()
03808  *              routine will check for that.
03809  *          If the next word in the input stream is not a symbol
03810  *              for which a single-token FCode number is assigned,
03811  *              report an ERROR and restore PC to its previous value.
03812  *
03813  **************************************************************************** */
03814 
03815 static bool get_token(tic_hdr_t **tok_entry)
03816 {
03817     bool retval = FALSE;
03818     u8 *save_pc;
03819 
03820     /*  Copy of command being processed, for error message  */
03821     char cmnd_cpy[FUNC_CPY_BUF_SIZE+1];
03822     strncpy( cmnd_cpy, statbuf, FUNC_CPY_BUF_SIZE);
03823     cmnd_cpy[FUNC_CPY_BUF_SIZE] = 0;   /*  Guarantee null terminator. */
03824 
03825     save_pc = pc;
03826 
03827     if ( get_word_in_line( statbuf) )
03828     {
03829         fwtoken defr = UNSPECIFIED;
03830 
03831         /*  We need to scan the newest definitions first; they
03832          *      might supercede standard ones.  We need, though,
03833          *      to bypass built-in FWords that need to trigger
03834          *      some tokenizer internals before emitting their
03835          *      synonymous FCode Tokens, (e.g., version1 , end0 ,
03836          *      and start{0-4}); if we find one of those, we will
03837          *      need to search again, specifically within the list
03838          *      of FCode Tokens.
03839          */
03840         *tok_entry = lookup_with_definer( statbuf, &defr);
03841         if ( *tok_entry != NULL )
03842         {
03843             /*  Built-in FWords can be uniquely identified by their
03844              *      definer,  BI_FWRD_DEFN .  The definer for "shared"
03845              *      FWords is  COMMON_FWORD  but there are none of
03846              *      those that might be synonymous with legitimate
03847              *      FCode Tokens, nor are any likely ever to be...
03848              */
03849             if ( defr == BI_FWRD_DEFN )
03850             {
03851                 *tok_entry = lookup_token( statbuf);
03852                 retval = BOOLVAL( *tok_entry != NULL );
03853             }else{
03854                 retval = entry_is_token( *tok_entry);
03855             }
03856         }
03857 
03858         if ( INVERSE( retval) )
03859         {
03860             cannot_apply( cmnd_cpy, strupr(statbuf), defr );
03861             pc = save_pc;
03862         }
03863     }
03864 
03865     return ( retval );
03866 }
03867 
03868 
03869 static void base_change ( int new_base )
03870 {
03871     if ( incolon && ( INVERSE( in_tokz_esc) ) )
03872     {
03873         emit_literal(new_base );
03874         emit_token("base");
03875         emit_token("!");
03876     } else {
03877         base = new_base;
03878     }
03879 }
03880 
03881 static void base_val (int new_base)
03882 {
03883     u8  *old_pc;
03884 
03885     char base_cmnd[FUNC_CPY_BUF_SIZE+1];
03886     strncpy( base_cmnd, statbuf, FUNC_CPY_BUF_SIZE);
03887     base_cmnd[FUNC_CPY_BUF_SIZE] = 0;  /* Guarantee NULL terminator */
03888 
03889     old_pc=pc;
03890     if ( get_word_in_line( statbuf) )
03891     {
03892         u8 basecpy=base;
03893 
03894         base = new_base;
03895         if ( ! handle_number() )
03896         {
03897             /*  We did get a word on the line, but it's not a valid number */
03898             tokenization_error( WARNING ,
03899                  "Applying %s to non-numeric value.  Ignoring.\n",
03900                       strupr(base_cmnd) );
03901             pc = old_pc;
03902         }
03903         base=basecpy;
03904     }
03905 }
03906 
03907 
03908 /* **************************************************************************
03909  *
03910  *      Function name:  eval_string
03911  *      Synopsis:       Prepare to tokenize a string, artificially generated
03912  *                          by this program or created as a user-defined
03913  *                          Macro.   When done, resume at existing source.
03914  *                      Keep the file-name and line-number unchanged.
03915  *      
03916  *      Inputs:
03917  *         Parameters:
03918  *             inp_bufr          String (or buffer) to evaluate
03919  *
03920  *      Outputs:
03921  *         Returned Value:       NONE
03922  *         Global Variables, changed by call to init_inbuf():
03923  *             start             Points to given string
03924  *             pc                         ditto
03925  *             end               Points to end of given string
03926  *
03927  *      Revision History:
03928  *          Updated Thu, 23 Feb 2006 by David L. Paktor
03929  *              This routine no longer calls its own instance of  tokenize()
03930  *              It has become the gateway to the mechanism that makes a
03931  *                  smooth transition between the body of the Macro, User-
03932  *                  defined Symbol or internally-generated string and the
03933  *                  resumption of processing the source file. 
03934  *              A similar (but more complicated) transition when processing
03935  *                  an FLOADed file will be handled elsewhere.
03936  *          Updated Fri, 24 Feb 2006 by David L. Paktor
03937  *              In order to support Macro-recursion protection, this routine
03938  *                  is no longer the gateway for Macros; they will have to
03939  *                  call push_source() directly.
03940  *
03941  **************************************************************************** */
03942 
03943 void eval_string( char *inp_bufr)
03944 {
03945     push_source( NULL, NULL, FALSE);
03946     init_inbuf( inp_bufr, strlen(inp_bufr));
03947 }
03948 
03949 
03950 /* **************************************************************************
03951  *
03952  *      Function name:  finish_or_new_device
03953  *      Synopsis:       Handle the shared logic for the NEW-DEVICE and
03954  *                          FINISH-DEVICE commands.
03955  *
03956  *      Inputs:
03957  *         Parameters:
03958  *             finishing_device            TRUE for FINISH-DEVICE,
03959  *                                             FALSE for NEW-DEVICE
03960  *         Global Variables:
03961  *             incolon                       TRUE if inside a colon definition
03962  *             noerrors                      TRUE if ignoring errors
03963  *             scope_is_global               TRUE if "global scope" in effect
03964  *         Local Static Variables:
03965  *             is_instance                   TRUE if "instance" is in effect
03966  *             dev_change_instance_warning   TRUE if warning hasn't been issued
03967  *
03968  *      Outputs:
03969  *         Returned Value:                   NONE
03970  *         Local Static Variables:
03971  *             dev_change_instance_warning   FALSE if warning is issued
03972  *             instance_definer_gap          TRUE if "instance" is in effect
03973  *
03974  *      Error Detection:
03975  *          NEW-DEVICE and FINISH-DEVICE should not be used outside of
03976  *              a colon-definition if global-scope is in effect.  Error
03977  *              message; no further action unless we are ignoring errors.
03978  *          Issue a WARNING if INSTANCE wasn't resolved before the current
03979  *              device-node is changed.  Try not to be too repetitive...
03980  *
03981  *      Process Explanation:
03982  *          The words NEW-DEVICE and FINISH-DEVICE may be incorporated into
03983  *              a colon-definition, whether the word is defined in global-
03984  *              or device- -scope.  Such an incorporation does not effect
03985  *              a change in the device-node vocabulary; simply emit the token.
03986  *          If we are in interpretation mode, though, we need to check for
03987  *              errors before changing the device-node vocabulary:
03988  *          If global-scope is in effect, we need to check whether we are
03989  *              ignoring errors; if so, we will compensate by switching to  
03990  *              device-scope.
03991  *          If "instance" is in effect, it's "dangling".  It will remain
03992  *              in effect through a device-node change, but this is very
03993  *              bad style and deserves a WARNING, but only one for each
03994  *              occurrence.  It would be unaesthetic, to say the least,
03995  *              to have multiple messages for the same dangling "instance"
03996  *              in a "finish-device   new-device" sequence.
03997  *           We must be careful about the order we do things, because of
03998  *              the messages printed as a side-effect of the node change...
03999  *
04000  *      Extraneous Remarks:
04001  *          I will violate strict structure here.
04002  *
04003  **************************************************************************** */
04004 
04005 static void finish_or_new_device( bool finishing_device )
04006 {
04007     if ( INVERSE( incolon ) )
04008     {
04009         if ( INVERSE( is_instance) )
04010         {
04011             /*  Arm warning for next time:         */
04012             dev_change_instance_warning = TRUE;
04013         }else{
04014             /*  Dangling "instance"                */
04015             instance_definer_gap = TRUE;
04016             /*   Warn only once.                   */
04017             if ( dev_change_instance_warning )
04018             {
04019                 unresolved_instance( WARNING);
04020                 dev_change_instance_warning = FALSE;
04021             }
04022         }
04023 
04024         /*  Note:  "Instance" cannot be in effect during "global" scope  */ 
04025         if ( scope_is_global )
04026         {
04027             glob_not_allowed( TKERROR, noerrors );
04028             if ( noerrors )
04029             {
04030                  resume_device_scope();
04031             }else{
04032                  return;
04033             }
04034         }
04035 
04036         if ( finishing_device )
04037         {
04038              finish_device_vocab();
04039         }else{
04040              new_device_vocab();
04041         }
04042     }
04043     emit_token( finishing_device ? "finish-device" : "new-device" );
04044         }
04045         
04046         
04047 /* **************************************************************************
04048  *
04049  *      Function name:  abort_quote
04050  *      Synopsis:       Optionally implement the   ABORT"  function as
04051  *                      though it were a macro.  Control whether to allow
04052  *                      it, and which style to support, via switches set
04053  *                      on the command-line at run-time.
04054  *
04055  *      Inputs:
04056  *         Parameters:
04057  *             tok                       Numeric-code associated with the
04058  *                                           FORTH word that was just read.
04059  *         Global Variables:
04060  *             enable_abort_quote        Whether to allow ABORT"
04061  *             sun_style_abort_quote     SUN-style versus Apple-style
04062  *             abort_quote_throw         Whether to use -2 THROW vs ABORT
04063  *
04064  *      Outputs:
04065  *         Returned Value:     TRUE if it was handled
04066  *         Global Variables:
04067  *             report_multiline              Reset to FALSE.
04068  *         Printout:
04069  *             ADVISORY:   ABORT" in fcode is not defined by IEEE 1275-1994
04070  *
04071  *      Error Detection:
04072  *          Performed by other routines.  If user selected not to
04073  *              allow  ABORT" , it will simply be treated as an
04074  *              unknown word.
04075  *          The string following it, however, will still be consumed.
04076  *
04077  *      Process Explanation:
04078  *          If the supplied  tok  was not  ABORTTXT , then return FALSE.
04079  *          If the  enable_abort_quote  flag is FALSE, consume the
04080  *              string following the Abort" token, but be careful to
04081  *              leave the  Abort" token in statbuf, as it will be used
04082  *              for the error message.
04083  *          Otherwise, create and prepare for processing the appropriate Macro:
04084  *              For Apple Style, we push the specified string onto the stack
04085  *                  and do -2 THROW (and hope the stack unwinds correctly).
04086  *              For Sun Style, we test the condition on top of the stack,
04087  *                  and if it's true, print the specified string before we
04088  *                  do the -2 THROW.
04089  *          We perform the underlying operations directly:  placing an "IF"
04090  *              (if Sun Style), then placing the string.  This bypasses
04091  *              any issues of double-parsing, as well as of doubly checking
04092  *              for a multi-line string.
04093  *          Finally, we perform the operational equivalents of the remainder
04094  *              of the command sequence.
04095  *
04096  *      Extraneous Remarks:
04097  *          I would have preferred not to have to directly perform the under-
04098  *              lying operations, and instead simply prepare the entire command
04099  *              sequence in a buffer, but I needed to handle the case where
04100  *              quote-escaped quotes are included in the string:  If the string
04101  *              were simply to be reproduced into the buffer, the quote-escaped
04102  *              quotes would appear as plain quote-marks and terminate the
04103  *              string parsing prematurely, leaving the rest of the string
04104  *              to be treated as code instead of text...
04105  *          Also, the introduction of the variability of whether to do the
04106  *               -2 THROW  or to compile-in the token for  ABORT  makes the
04107  *              buffer-interpretation scheme somewhat too messy for my tastes.
04108  *
04109  **************************************************************************** */
04110         
04111 static bool abort_quote( fwtoken tok)
04112 {
04113     bool retval = FALSE;
04114     if ( tok == ABORTTXT )
04115     {
04116         if ( ! enable_abort_quote )
04117         {
04118             /* ABORT" is not enabled; we'd better consume the string  */
04119             char *save_statbuf;
04120             signed long wlen;
04121             save_statbuf = strdup( (char *)statbuf);
04122             wlen = get_string( FALSE);
04123             strcpy( statbuf, save_statbuf);
04124             free( save_statbuf);
04125         }else{
04126             /* ABORT" is not to be used in FCODE drivers
04127              * but Apple drivers do use it. Therefore we
04128              * allow it. We push the specified string to
04129              * the stack, do -2 THROW and hope that THROW
04130              * will correctly unwind the stack.
04131              * Presumably, Apple Source supplies its own
04132              *  IF ... THEN
04133              */
04134             char *abort_string;
04135             signed long wlen;
04136 
04137             retval = TRUE;
04138             tokenization_error (INFO, "ABORT\" in fcode not "
04139                             "defined by IEEE 1275-1994\n");
04140             test_in_colon("ABORT\"", TRUE, TKERROR, NULL);
04141             wlen=get_string( TRUE);
04142 
04143             if ( sun_style_abort_quote )  emit_if();
04144 
04145             emit_token("b(\")");
04146             emit_string(statbuf, wlen);
04147         
04148             if ( sun_style_abort_quote )  emit_token("type");
04149 
04150             if ( abort_quote_throw )
04151             {
04152                 emit_literal( -2);
04153                 emit_token("throw");
04154             }else{
04155                 emit_token("abort");
04156         }
04157                 
04158             if ( sun_style_abort_quote )  emit_then();
04159                 /*  Sun Style  */
04160                 abort_string = " type -2 THROW THEN:" ;
04161 }
04162         }
04163     return( retval );
04164 }
04165 
04166 
04167 /* **************************************************************************
04168  *
04169  *      Function name:  create_alias
04170  *      Synopsis:       Create an alias, as specified by the user
04171  *
04172  *      Associated FORTH word:                 ALIAS
04173  *
04174  *      Inputs:
04175  *         Parameters:                NONE
04176  *         Global Variables:
04177  *             incolon                Colon-def'n-in-progress indicator
04178  *             in_tokz_esc            "Tokenizer Escape" mode indicator
04179  *         Input Stream
04180  *             Two words will be read.
04181  *
04182  *      Outputs:
04183  *         Returned Value:            TRUE if succeeded.
04184  *         Global Variables:    
04185  *             statbuf                New name will be copied back into here.
04186  *         Memory Allocated
04187  *             The two words will be copied into freshly-allocated memory 
04188  *                 that will be passed to the create_..._alias()  routine.
04189  *         When Freed?
04190  *             When Current Device Vocabulary is "finished", or at end
04191  *                 of tokenization, or upon termination of program.
04192  *             If not able to create alias, the copies will be freed here.
04193  *
04194  *      Error Detection:
04195  *          If the ALIAS command was given during colon-definition, that
04196  *              can be handled by this tokenizer, but it is not supported
04197  *              by IEEE 1275-1994.  Issue a WARNING.
04198  *          If the new name is a copy of an existing word-name, issue a warning.
04199  *          If the word to which an alias is to be created does not exist
04200  *              in the appropriate mode -- relative to "Tokenizer-Escape" --
04201  *              that is an ERROR.
04202  *          If "instance" is in effect, the ALIAS command is an ERROR.
04203  *
04204  *      Process Explanation:
04205  *          Get two words -- the new name and the "old" word -- from the
04206  *              same line of input as the ALIAS command.
04207  *          Copy the new name back into statbuf for use in trace_creation.
04208  *          Determine whether or not we are in "Tokenizer-Escape" mode.
04209  *              Subsequent searches will take place in that same mode.
04210  *          If the "new" name already exists, issue a warning.
04211  *          In each vocabulary applicable to the current mode -- i.e., 
04212  *                  "Tokenizer-Escape" or "Normal" -- (except:  cannot
04213  *                  make aliases to "Locals"):
04214  *              Try using the  create_..._alias()  routine.
04215  *              If it succeeds, we are done.
04216  *          IMPORTANT:  The order in which we try the vocabularies MUST
04217  *              match the order in which  tokenize_one_word()  searches them. 
04218  *          If all the attempts failed, the "old" word does not exist;
04219  *              declare an ERROR and free up the memory that was allocated.
04220  *
04221  *      Extraneous Remarks:
04222  *          With the separation of the  tokenizer[  state, this
04223  *              function has become too complicated to keep as a
04224  *              simple  CASE  in the big  SWITCH  statement anymore...
04225  *
04226  *          I had earlier thought that it was sufficient to create a
04227  *              macro linking the "new" name to the "old" word.  There
04228  *              were too many cases, though, where that didn't work.
04229  *              This is cleaner.
04230  *
04231  *          I will not be adhering to the strict rules of structure in
04232  *              this routine, as it would get me nested too deeply...
04233  *
04234  *      Revision History:
04235  *          Updated Tue, 10 Jan 2006 by David L. Paktor
04236  *              Convert to  tic_hdr_t  type vocabularies.
04237  *
04238  **************************************************************************** */
04239 
04240 static bool create_alias( void )
04241 {
04242     char *new_alias ;
04243 
04244     validate_instance(ALIAS);
04245     if ( incolon )
04246     {
04247          tokenization_error ( WARNING,
04248             "ALIAS during colon-definition "
04249                 "is not supported by IEEE 1275-1994\n");
04250 }
04251     if ( get_word_in_line( "ALIAS") )
04252     {
04253 
04254         new_alias = strdup((char *)statbuf);
04255 
04256         if (get_word_in_line( "ALIAS") )
04257 {
04258             char *old_name = strdup((char *)statbuf) ;
04259 
04260             /*  Copy the "new" alias name back into statbuf.
04261              *      This is a HACK ^H^H^H^H awkward way to retrofit
04262              *      support for the  trace_creation()  function.
04263              */
04264             strcpy( statbuf, new_alias);
04265 
04266             /*  We don't call  trace_creation()  here because we don't
04267              *      know if the creation succeeded.  However, we want
04268              *      to issue a "Duplicate" warning based on the attempt,
04269              *      even if it doesn't succeed.
04270              *  We would prefer to have the "Trace" message precede the 
04271              *      "Duplicate" warning, but we don't think it's worth
04272              *      the effort.  When it becomes worthwhile, the way to
04273              *      do it would be to factor out the block that handles
04274              *      normal-tokenization versus "Tokenizer-Escape" mode;
04275              *      condition the "Trace" message on its success-return, 
04276              *      show the "Duplicate" warning in any case, then show
04277              *      the error-message and do the cleanup conditioned on
04278              *      a failure-return.
04279              *  That will also obviate the need for a return value from
04280              *      this routine and for the copy-back into statbuf.
04281              */
04282             warn_if_duplicate(new_alias);
04283 
04284             /*
04285              *  Here is where we begin trying the  create_..._alias() 
04286              *      routines for the vocabularies.
04287              */
04288 
04289             /*
04290              *  Distinguish between "Normal" tokenization mode
04291              *  and "Tokenizer Escape" mode
04292              */
04293             if ( in_tokz_esc )
04294             {
04295                 if ( create_tokz_esc_alias( new_alias, old_name) )
04296                     return(TRUE);
04297         
04298                 /*
04299                  *  Handle the classes of operatives that are common between
04300                  *      "Tokenizer Escape" mode and "Normal" tokenization mode.
04301                  *  Those classes include selected non-fcode forth constructs
04302                  *     and Conditional-Compilation Operators.
04303                  */
04304                 {
04305                     tic_hdr_t *found = lookup_shared_word( old_name);
04306                     if ( found != NULL )
04307                     {
04308                         if ( create_core_alias( new_alias, old_name) )
04309                             return(TRUE);
04310                     }
04311         }
04312             }else{
04313                 /*  "Normal" tokenization mode  */
04314         
04315                 /*  Can create aliases for "Locals", why not?  */
04316                 if ( create_local_alias( new_alias, old_name) )
04317                     return(TRUE);
04318 
04319                 /*
04320                  *  All other classes of operatives -- non-fcode forth
04321                  *      constructs, Standard and user-defined fcode
04322                  *      tokens, Macros, and Conditional-Compilation
04323                  *      Operators, -- are included in the "currently
04324                  *      active" vocabulary.
04325                  */
04326 
04327                 if ( create_current_alias( new_alias, old_name) )
04328                     return(TRUE);
04329         
04330             }    /*  End of separate handling for normal-tokenization mode
04331                   *      versus  "Tokenizer-Escape" mode
04332                   */
04333 
04334             /*  It's not a word, a macro or any of that other stuff.  */
04335             tokenized_word_error(old_name);
04336             free(old_name);
04337         }
04338         free (new_alias);
04339     }
04340     return(FALSE);
04341 }
04342 
04343         
04344 /* **************************************************************************
04345  *
04346  *      Function name:  string_err_check
04347  *      Synopsis:       Error-check after processing or Ignoring
04348  *                          simple strings
04349  *
04350  *      Inputs:
04351  *         Parameters:
04352  *             is_paren           TRUE if string is Dot-Paren  .( 
04353  *                                    FALSE if Ess-Quote  ( s"  )
04354  *             sav_lineno         Saved Line Number, for Unterminated Error
04355  *             strt_lineno        Start Line Number, for Multiline Warning
04356  *         Global Variables:
04357  *             noerrors           TRUE if ignoring errors
04358  *         Local Static Variables:
04359  *             got_until_eof      TRUE if reached end of buffer before delim.
04360  *
04361  *      Outputs:
04362  *         Returned Value:        TRUE if did not reach end of buffer, or,
04363  *                                    if ignoring errors, TRUE anyway.
04364  *
04365  *      Error Detection:
04366  *          Multi-line warning, "Unterminated" Error messages, as apppropriate
04367  *
04368  **************************************************************************** */
04369 
04370 static  bool string_err_check( bool is_paren,
04371                                   unsigned int sav_lineno,
04372                                       unsigned int strt_lineno )
04373 {
04374     bool retval = noerrors ;
04375     char *item_typ = is_paren ?
04376         "Dot-Paren" : "Ess-Quote" ;
04377     if ( got_until_eof )   /*  Crude retrofit... */
04378     {
04379         warn_unterm( TKERROR, item_typ, sav_lineno );
04380     }else{
04381         retval = TRUE;
04382         warn_if_multiline( item_typ, strt_lineno );
04383         }
04384     return( retval);
04385 }
04386 
04387 
04388 /* **************************************************************************
04389  *
04390  *      Function name:  handle_internal
04391  *      Synopsis:       Perform the functions associated with FORTH words
04392  *                      that do not map directly to a single token.  This
04393  *                      is the functions that will go into the FUNCT field
04394  *                      of entries in the "FWords" and "Shared Words" lists.
04395  *      
04396  *      Inputs:
04397  *         Parameters:
04398  *             pfield               Param-field of the  tic_hdr_t  -type entry
04399  *                                      associated with the FORTH-Word (FWord)
04400  *                                      just read that is being "handled".
04401  *         Global Variables:
04402  *             statbuf              The word that was just read.
04403  *
04404  *      Outputs:
04405  *         Returned Value:          NONE
04406  *         Global Variables:
04407  *             statbuf              More words may be read.
04408  *
04409  *      Error Detection:
04410  *          Too numerous to list here...
04411  *
04412  *      Process Explanation:
04413  *          Recast the type of the param-field of a  tic_hdr_t -type
04414  *              entry and rename it "tok".
04415  *          The "tok" will be used as the control-expression for a
04416  *              SWITCH statement with a large number of CASE labels.
04417  *              Both "FWords" and "shared_words" list entries will
04418  *              be processed by this routine.
04419  *      
04420  *      Revision History:
04421  *      Updated Wed, 20 Jul 2005 by David L. Paktor
04422  *          Put handling of  ABORT"  under control of a run-time
04423  *              command-line switch.
04424  *          Put decision to support IBM-style Locals under control
04425  *              of a run-time command-line switch.
04426  *      Updated Tue, 17 Jan 2006 by David L. Paktor
04427  *          Convert to handler for  tic_hdr_t  type vocab entries.
04428  *
04429  *      Extraneous Remarks:
04430  *          We would prefer to keep this function private, so we will
04431  *              declare its prototype here and in the one other file
04432  *              where we need it, namely, dictionary.c, rather than
04433  *              exporting it widely in a  .h  file.
04434  *
04435  **************************************************************************** */
04436 
04437 void handle_internal( tic_param_t pfield);
04438 void handle_internal( tic_param_t pfield)
04439 {
04440         fwtoken tok = pfield.fw_token;
04441 
04442         signed long wlen;
04443         unsigned int sav_lineno = lineno;    /*  For error message  */
04444 
04445         bool handy_toggle = TRUE ;   /*  Various uses...   */
04446         
04447 #ifdef DEBUG_SCANNER
04448         printf("%s:%d: debug: tokenizing control word '%s'\n",
04449                                                 iname, lineno, statbuf);
04450 #endif
04451         switch (tok) {
04452         case BEGIN:
04453                 emit_begin();
04454                 break;
04455 
04456         case BUFFER:
04457                 if ( create_word(tok) )
04458                 {
04459                 emit_token("b(buffer:)");
04460                 }
04461                 break;
04462 
04463         case CONST:
04464                 if ( create_word(tok) )
04465                 {
04466                 emit_token("b(constant)");
04467                 }
04468                 break;
04469 
04470         case COLON:
04471                 {
04472                     /*  Collect error- -detection or -reporting items,
04473                      *      but don't commit until we're sure the
04474                      *      creation was a success.
04475                      */
04476                     u16 maybe_last_colon_fcode = nextfcode ;
04477                     unsigned int maybe_last_colon_lineno = lineno;
04478                     unsigned int maybe_last_colon_abs_token_no = abs_token_no;
04479                     unsigned int maybe_last_colon_do_depth = do_loop_depth;
04480                     /*  last_colon_defname
04481                      *     has to wait until after call to  create_word()
04482                      */
04483 
04484                     if ( create_word(tok) )
04485                     {
04486                         last_colon_fcode = maybe_last_colon_fcode;
04487                         last_colon_lineno = maybe_last_colon_lineno;
04488                         last_colon_abs_token_no = maybe_last_colon_abs_token_no;
04489                         last_colon_do_depth = maybe_last_colon_do_depth;
04490                         collect_input_filename( &last_colon_filename);
04491                         /*  Now we can get  last_colon_defname  */
04492                         if ( last_colon_defname != NULL )
04493                         {
04494                             free( last_colon_defname);
04495                         }
04496                         last_colon_defname = strdup(statbuf);
04497 
04498                 emit_token("b(:)");
04499                 incolon=TRUE;
04500                         hide_last_colon();
04501                         lastcolon = opc;
04502                     }
04503                 }
04504                 break;
04505         
04506         case SEMICOLON:
04507                 if ( test_in_colon("SEMICOLON", TRUE, TKERROR, NULL) )
04508                 {
04509                     ret_stk_balance_rpt( "termination,", TRUE);
04510                     /*  Clear Control Structures just back to where
04511                      *      the current Colon-definition began.
04512                      */
04513                     clear_control_structs_to_limit(
04514                         "End of colon-definition", last_colon_abs_token_no);
04515 
04516                     if ( ibm_locals )
04517                     {
04518                         finish_locals();
04519                         forget_locals();
04520                     }
04521 
04522                 emit_token("b(;)");
04523                 incolon=FALSE;
04524                     reveal_last_colon();
04525                 }
04526                 break;
04527 
04528         case CREATE:
04529                 if ( create_word(tok) )
04530                 {
04531                 emit_token("b(create)");
04532                 }
04533                 break;
04534 
04535         case DEFER:
04536                 if ( create_word(tok) )
04537                 {
04538                 emit_token("b(defer)");
04539                 }
04540                 break;
04541 
04542         case ALLOW_MULTI_LINE:
04543                 report_multiline = FALSE;
04544                 break;
04545 
04546         case OVERLOAD:
04547                 if ( test_in_colon(statbuf, FALSE, WARNING, NULL) )
04548                 {
04549                     do_not_overload = FALSE;
04550                 }
04551                 break;
04552 
04553         case DEFINED:
04554                 if (get_word_in_line( statbuf) )
04555                 {
04556                     eval_user_symbol(statbuf);
04557                 }
04558                 break;
04559 
04560         case CL_FLAG:
04561                 if (get_word_in_line( statbuf) )
04562                 {
04563                      set_cl_flag( statbuf, TRUE);
04564                 }
04565                 break;
04566 
04567         case SHOW_CL_FLAGS:
04568                 show_all_cl_flag_settings( TRUE);
04569                 break;
04570 
04571         case FIELD:
04572                 if ( create_word(tok) )
04573                 {
04574                 emit_token("b(field)");
04575                 }
04576                 break;
04577 
04578         case VALUE:
04579                 if ( create_word(tok) )
04580                 {
04581                 emit_token("b(value)");
04582                 }
04583                 break;
04584                 
04585         case VARIABLE:
04586                 if ( create_word(tok) )
04587                 {
04588                 emit_token("b(variable)");
04589                 }
04590                 break;
04591 
04592         case AGAIN:
04593                 emit_again();
04594                 break;
04595 
04596         case ALIAS:
04597                 if ( create_alias() )
04598                 {
04599                     trace_creation( ALIAS, statbuf);
04600                 }
04601                 break;
04602 
04603         case CONTROL:
04604                 if ( get_word_in_line( statbuf) )
04605                 {
04606                     emit_literal(statbuf[0]&0x1f);
04607                 }
04608                 break;
04609 
04610         case DO:
04611                 emit_token("b(do)");
04612                 mark_do();
04613                 break;
04614 
04615         case CDO:
04616                 emit_token("b(?do)");
04617                 mark_do();
04618                 break;
04619 
04620         case ELSE:
04621                 emit_else();
04622                 break;
04623 
04624         case CASE:
04625                 emit_case();
04626                 break;
04627 
04628         case ENDCASE:
04629                 emit_endcase();
04630                 break;
04631 
04632         case NEW_DEVICE:
04633                 handy_toggle = FALSE;
04634         case FINISH_DEVICE:
04635                 finish_or_new_device( handy_toggle );
04636                 break;
04637 
04638         case FLITERAL:
04639                 {
04640                     u32 val;
04641                     val = dpop();
04642                     emit_literal(val);
04643                 }
04644                 break;
04645 
04646         case OF:
04647                 emit_of();
04648                 break;
04649 
04650         case ENDOF:
04651                 emit_endof();
04652                 break;
04653                 
04654         case EXTERNAL:
04655                 set_hdr_flag( FLAG_EXTERNAL );
04656                 break;
04657                 
04658         case HEADERLESS:
04659                 set_hdr_flag( FLAG_HEADERLESS );
04660                 break;
04661         
04662         case HEADERS:
04663                 set_hdr_flag( FLAG_HEADERS );
04664                 break;
04665 
04666         case DECIMAL:
04667                 /* in a definition this is expanded as macro "10 base !" */
04668                 base_change ( 0x0a );
04669                 break;
04670                 
04671         case HEX:
04672                 base_change ( 0x10 );
04673                 break;
04674 
04675         case OCTAL:
04676                 base_change ( 0x08 );
04677                 break;
04678 
04679         case OFFSET16:
04680                 if (!offs16)
04681                 {
04682                     tokenization_error(INFO, "Switching to 16-bit offsets.\n");
04683                 }else{
04684                     tokenization_error(WARNING,
04685                         "Call of OFFSET16 is redundant.\n");
04686                 }
04687                 emit_token("offset16");
04688                 offs16=TRUE;
04689                 break;
04690 
04691         case IF:
04692                 emit_if();
04693                 break;
04694 
04695 /* **************************************************************************
04696  *
04697  *      Still to be done:
04698  *          Correct analysis of Return-Stack usage within Do-Loops
04699  *              or before Loop Elements like I and J or UNLOOP or LEAVE.
04700  *
04701  **************************************************************************** */
04702         case UNLOOP:
04703                 emit_token("unloop");
04704                 must_be_deep_in_do(1);
04705                 break;
04706 
04707         case LEAVE:
04708                 emit_token("b(leave)");
04709                 must_be_deep_in_do(1);
04710                 break;
04711 
04712         case LOOP_I:
04713                 emit_token("i");
04714                 must_be_deep_in_do(1);
04715                 break;
04716 
04717         case LOOP_J:
04718                 emit_token("j");
04719                 must_be_deep_in_do(2);
04720                 break;
04721                 
04722         case LOOP:
04723                 emit_token("b(loop)");
04724                 resolve_loop();
04725                 break;
04726                 
04727         case PLUS_LOOP:
04728                 emit_token("b(+loop)");
04729                 resolve_loop();
04730                 break;
04731 
04732 
04733         case INSTANCE:
04734                 {
04735                     bool set_instance_state = FALSE;
04736                     bool emit_instance = TRUE;
04737                     /*  We will treat "instance" in a colon-definition as
04738                      *      an error, but allow it to be emitted if we're
04739                      *      ignoring errors; if we're not ignoring errors,
04740                      *      there's no output anyway...
04741                      */
04742                     if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) )
04743                     {   /*   We are in interpretation (not colon) state.  */
04744                         /*  "Instance" not allowed during "global" scope  */ 
04745                         if ( scope_is_global )
04746                         {
04747                             glob_not_allowed( WARNING, FALSE );
04748                             emit_instance = FALSE;
04749                         }else{
04750                             set_instance_state = TRUE;
04751                         }
04752                     }
04753                     if ( emit_instance )
04754                     {
04755                         if ( set_instance_state )
04756                         {
04757                             /*  "Instance" isn't cumulative....  */
04758                             if ( is_instance )
04759                             {
04760                                 unresolved_instance( WARNING);
04761                             }
04762                             collect_input_filename( &instance_filename);
04763                             instance_lineno = lineno;
04764                             is_instance = TRUE;
04765                             dev_change_instance_warning = TRUE;
04766                         }
04767                         emit_token("instance");
04768                     }
04769                 }
04770                 break;
04771                 
04772         case GLOB_SCOPE:
04773                 if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) )
04774                 {
04775                     if ( INVERSE( is_instance) )
04776                     {
04777                         enter_global_scope();
04778                     }else{
04779                         tokenization_error( TKERROR,
04780                             "Global Scope not allowed.  "
04781                             "\"Instance\" is in effect; issued" );
04782                         just_where_started( instance_filename,
04783                                                 instance_lineno );
04784                     }
04785                 }
04786                 break;
04787 
04788         case DEV_SCOPE:
04789                 if ( test_in_colon(statbuf, FALSE, TKERROR, NULL) )
04790                 {
04791                     resume_device_scope();
04792                 }
04793                 break;
04794 
04795         case TICK:             /*    '    */
04796                 test_in_colon(statbuf, FALSE, WARNING, "[']");
04797         case BRACK_TICK:       /*   [']   */
04798                 {
04799                     tic_hdr_t *token_entry;
04800                     if ( get_token( &token_entry) )
04801                     {
04802                         emit_token("b(')");
04803                         /* Emit the token; warning or whatever comes gratis */
04804                         token_entry->funct( token_entry->pfield);
04805                     }
04806                 }
04807                 break;
04808 
04809         case F_BRACK_TICK:     /*  F['] <name>
04810                                 *     emits the token-number for <name>
04811                                 *  Mainly useful to compute the argument
04812                                 *     to   get-token   or  set-token
04813                                 */
04814                 {
04815                     tic_hdr_t *token_entry;
04816                     if ( get_token( &token_entry) )
04817                     {
04818                         /*  "Obsolete" warning doesn't come gratis here...  */
04819                         token_entry_warning( token_entry);
04820                         /*  In Tokenizer-Escape mode, push the token  */
04821                         if ( in_tokz_esc )
04822                         {
04823                             dpush( token_entry->pfield.deflt_elem);
04824                         }else{
04825                             emit_literal( token_entry->pfield.deflt_elem);
04826                         }
04827                     }
04828                 }
04829                 break;
04830 
04831         case CHAR:
04832                 handy_toggle = FALSE;
04833         case CCHAR:
04834                 test_in_colon(statbuf, handy_toggle, WARNING,
04835                     handy_toggle ? "CHAR" : "[CHAR]" );
04836         case ASCII:
04837                 if ( get_word_in_line( statbuf) )
04838                 {
04839                     emit_literal(statbuf[0]);
04840                 }
04841                 break;
04842                 
04843         case UNTIL:
04844                 emit_until();
04845                 break;
04846 
04847         case WHILE:
04848                 emit_while();
04849                 break;
04850                 
04851         case REPEAT:
04852                 emit_repeat();
04853                 break;
04854                 
04855         case THEN:
04856                 emit_then();
04857                 break;
04858 
04859         case IS:
04860                 tokenization_error ( INFO,
04861                      "Substituting  TO  for deprecated  IS\n");
04862         case TO:
04863                 if ( validate_to_target() )
04864                 {
04865                 emit_token("b(to)");
04866                 }
04867                 break;
04868 
04869         case FLOAD:
04870                 if ( get_word_in_line( statbuf) )
04871                 {
04872                     bool stream_ok ;
04873                         
04874                     push_source( close_stream, NULL, TRUE) ;
04875                         
04876                     tokenization_error( INFO, "FLOADing %s\n", statbuf );
04877                         
04878                     stream_ok = init_stream( statbuf );
04879                     if ( INVERSE( stream_ok) )
04880                     {
04881                         drop_source();
04882                     }
04883                 }
04884                 break;
04885 
04886         case STRING:         /*  Double-Quote ( " ) string  */
04887                 handy_toggle = FALSE;
04888         case PSTRING:        /*  Dot-Quote  ( ." ) string   */
04889                 wlen=get_string( TRUE);
04890                 emit_token("b(\")");
04891                 emit_string(statbuf, wlen);
04892                 if ( handy_toggle )
04893                 {
04894                     emit_token("type");
04895                 }
04896                 break;
04897 
04898         case SSTRING:        /*  Ess-Quote  ( s"  ) string  */
04899                 handy_toggle = FALSE;
04900         case PBSTRING:       /*  Dot-Paren  .(   string  */
04901                 if (*pc++=='\n') lineno++;
04902                 {
04903                     unsigned int strt_lineno = lineno;
04904                     wlen = get_until( handy_toggle ? ')' : '"' );
04905                     if ( string_err_check( handy_toggle,
04906                              sav_lineno, strt_lineno) )
04907                     {
04908                 emit_token("b(\")");
04909                         emit_string(statbuf, wlen);
04910                         if ( handy_toggle )
04911                         {
04912                 emit_token("type");
04913                         }
04914                     }
04915                 }
04916                 break;
04917 
04918         case FUNC_NAME:
04919                 if ( test_in_colon( statbuf, TRUE, TKERROR, NULL) )
04920                 {
04921                     if ( in_tokz_esc )
04922                     {
04923                         tokenization_error( P_MESSAGE, "Currently" );
04924                         in_last_colon();
04925                     }else{
04926                 emit_token("b(\")");
04927                         emit_string( last_colon_defname,
04928                             strlen( last_colon_defname) );
04929                         /*  if ( hdr_flag == FLAG_HEADERLESS ) { WARNING } */
04930                     }
04931                 }
04932                 break;
04933 
04934         case IFILE_NAME:
04935                 emit_token("b(\")");
04936                 emit_string( iname, strlen( iname) );
04937                 break;
04938 
04939         case ILINE_NUM:
04940                 emit_literal( lineno);
04941                 break;
04942                         
04943         case HEXVAL:
04944                 base_val (0x10);
04945                 break;
04946                 
04947         case DECVAL:
04948                 base_val (0x0a);
04949                 break;
04950                 
04951         case OCTVAL:
04952                 base_val (8);
04953                 break;
04954 
04955         case ASC_LEFT_NUM:
04956                 handy_toggle = FALSE;
04957         case ASC_NUM:
04958                 if (get_word_in_line( statbuf) )
04959                 {
04960                     if ( handy_toggle )
04961                     {
04962                         ascii_right_number( statbuf);
04963                         } else {
04964                         ascii_left_number( statbuf);
04965                         }
04966                 }
04967                 break;
04968 
04969         case CONDL_ENDER:   /*  Conditional directives out of context  */
04970         case CONDL_ELSE:
04971                 tokenization_error ( TKERROR,
04972                     "No conditional preceding %s directive\n",
04973                         strupr(statbuf) );
04974                 break;
04975 
04976         case PUSH_FCODE:
04977                 tokenization_error( INFO,
04978                     "FCode-token Assignment Counter of 0x%x "
04979                     "has been saved on stack.\n", nextfcode );
04980                 dpush( (long)nextfcode );
04981                 break;
04982 
04983         case POP_FCODE:
04984                 pop_next_fcode();
04985                 break;
04986 
04987         case RESET_FCODE:
04988                 tokenization_error( INFO,
04989                     "Encountered %s.  Resetting FCode-token "
04990                         "Assignment Counter.  ", strupr(statbuf) );
04991                 list_fcode_ranges( FALSE);
04992                 reset_fcode_ranges();
04993                 break;
04994                 
04995         case EXIT:
04996                 if ( test_in_colon( statbuf, TRUE, TKERROR, NULL)
04997                      || noerrors )
04998                 {
04999                     ret_stk_balance_rpt( NULL, FALSE);
05000                     if ( ibm_locals )
05001                     {
05002                         finish_locals ();
05003                     }
05004                     emit_token("exit");
05005                 }
05006                 break;
05007 
05008         case ESCAPETOK:
05009                 enter_tokz_esc();
05010                 break;
05011         
05012         case VERSION1:
05013         case FCODE_V1:
05014                 tokenization_error( INFO, "Using version1 header "
05015                     "(8-bit offsets).\n");
05016                 fcode_starter( "version1", 1, FALSE) ;
05017                 break;
05018         
05019         case START1:
05020         case FCODE_V2:
05021         case FCODE_V3: /* Full IEEE 1275 */
05022                 fcode_starter( "start1", 1, TRUE);
05023                 break;
05024                 
05025         case START0:
05026                 fcode_starter( "start0", 0, TRUE);
05027                 break;
05028                 
05029         case START2:
05030                 fcode_starter( "start2", 2, TRUE);
05031                 break;
05032                 
05033         case START4:
05034                 fcode_starter( "start4", 4, TRUE);
05035                 break;
05036                 
05037         case END1:
05038                 tokenization_error( WARNING, 
05039                     "Appearance of END1 in FCode source code "
05040                         "is not intended by IEEE 1275-1994\n");
05041                 handy_toggle = FALSE;
05042         case END0:
05043         case FCODE_END:
05044                 if ( handy_toggle )
05045                 {
05046                     you_are_here();
05047                 }
05048                 emit_token( handy_toggle ? "end0" : "end1" );
05049                 fcode_ender();
05050                 FFLUSH_STDOUT
05051                 break;
05052 
05053         case RECURSE:
05054                 if ( test_in_colon(statbuf, TRUE, TKERROR, NULL ) )
05055                 {
05056                     emit_fcode(last_colon_fcode);
05057                 }
05058                 break;
05059                 
05060 
05061         case RECURSIVE:
05062                 if ( test_in_colon(statbuf, TRUE, TKERROR, NULL ) )
05063                 {
05064                     reveal_last_colon();
05065                 }
05066                 break;
05067 
05068         case RET_STK_FETCH:
05069                 ret_stk_access_rpt();
05070                 emit_token( "r@");
05071                 break;
05072 
05073         case RET_STK_FROM:
05074                 ret_stk_access_rpt();
05075                 bump_ret_stk_depth( -1);
05076                 emit_token( "r>");
05077                 break;
05078 
05079         case RET_STK_TO:
05080                 bump_ret_stk_depth( 1);
05081                 emit_token( ">r");
05082                 break;
05083 
05084         case PCIHDR:
05085                 emit_pcihdr();
05086                 break;
05087         
05088         case PCIEND:
05089                 finish_pcihdr();
05090                 reset_fcode_ranges();
05091                 FFLUSH_STDOUT
05092                 break;
05093 
05094         case PCIREV:
05095                 pci_image_rev = dpop();
05096                 tokenization_error( INFO,
05097                     "PCI header revision=0x%04x%s\n", pci_image_rev,
05098                         big_end_pci_image_rev ?
05099                             ".  Will be saved in Big-Endian format."
05100                             : ""  );
05101                 break;
05102 
05103         case NOTLAST:
05104                 handy_toggle = FALSE;
05105         case ISLAST:
05106                 dpush(handy_toggle);
05107         case SETLAST:
05108                 {
05109                     u32 val = dpop();
05110                     bool new_pili = BOOLVAL( (val != 0) );
05111                     if ( pci_is_last_image != new_pili )
05112                     {
05113                         tokenization_error( INFO,
05114                             new_pili ?
05115                                 "Last image for PCI header.\n" :
05116                                 "PCI header not last image.\n" );
05117                         pci_is_last_image = new_pili;
05118                     }
05119                 }
05120                 break;
05121                 
05122         case SAVEIMG:
05123                 if (get_word_in_line( statbuf) )
05124                 {
05125                     free(oname);
05126                     oname = strdup( statbuf );
05127                     tokenization_error( INFO,
05128                         "Output is redirected to file:  %s\n", oname);
05129                 }
05130                 break;
05131 
05132         case RESETSYMBS:
05133                 tokenization_error( INFO,
05134                     "Resetting symbols defined in %s mode.\n",
05135                         in_tokz_esc ? "tokenizer-escape" : "\"normal\"");
05136                 if ( in_tokz_esc )
05137                 {
05138                     reset_tokz_esc();
05139                 }else{
05140                     reset_normal_vocabs();
05141                 }       
05142                 break;
05143 
05144         case FCODE_DATE:
05145                 handy_toggle = FALSE;
05146         case FCODE_TIME:
05147                 {
05148                         time_t tt;
05149                     char temp_buffr[32];
05150                         
05151                         tt=time(NULL);
05152                     if ( handy_toggle )
05153                     {
05154                         strftime(temp_buffr, 32, "%T %Z", localtime(&tt));
05155                     }else{
05156                         strftime(temp_buffr, 32, "%m/%d/%Y", localtime(&tt));
05157                     }
05158                     if ( in_tokz_esc )
05159                     {
05160                         tokenization_error( MESSAGE, temp_buffr);
05161                     }else{
05162                         emit_token("b(\")");
05163                         emit_string((u8 *)temp_buffr, strlen(temp_buffr) );
05164                     }
05165                 }
05166                 break;
05167 
05168         case ENCODEFILE:
05169                 if (get_word_in_line( statbuf) )
05170                 {
05171                 encode_file( (char*)statbuf );
05172                 }
05173                 break;
05174 
05175         default:
05176             /*  IBM-style Locals, under control of a switch  */
05177             if ( ibm_locals )
05178             {
05179                 bool found_it = TRUE;
05180                 switch (tok) {
05181                     case CURLY_BRACE:
05182                         declare_locals( FALSE);
05183                         break;
05184                     case DASH_ARROW:
05185                         assign_local();
05186                         break;
05187                     default:
05188                         found_it = FALSE;
05189         }
05190                 if ( found_it ) break;
05191 }
05192 
05193             /*  Down here, we have our last chance to recognize a token.
05194              *      If  abort_quote  is disallowed, we will still consume
05195              *      the string.  In case the string spans more than one
05196              *      line, we want to make sure the line number displayed
05197              *      in the error-message is the one on which the disallowed
05198              *       abort_quote  token appeared, not the one where the
05199              *      string ended; therefore, we might need to be able to
05200              *      "fake-out" the line number...
05201              */
05202 {
05203                 bool fake_out_lineno = FALSE;
05204                 unsigned int save_lineno = lineno;
05205                 unsigned int true_lineno;
05206                 if ( abort_quote( tok) )
05207                 {   break;
05208                 }else{
05209                     if ( tok == ABORTTXT )  fake_out_lineno = TRUE;
05210                 }
05211                 true_lineno = lineno;
05212 
05213                 if ( fake_out_lineno )  lineno = save_lineno;
05214                 tokenization_error ( TKERROR,
05215                     "Unimplemented control word '%s'\n", strupr(statbuf) );
05216                 if ( fake_out_lineno )  lineno = true_lineno;
05217             }
05218         }
05219 }
05220         
05221 /* **************************************************************************
05222  *
05223  *      Function name:  skip_string
05224  *      Synopsis:       When Ignoring, skip various kinds of strings.  Maps
05225  *                          to string-handlers in handle_internal()...
05226  *
05227  *      Associated FORTH words:                 Double-Quote ( " ) string
05228  *                                              Dot-Quote  ( ." ) string
05229  *                                              Ess-Quote  ( s"  ) string
05230  *                                              Dot-Paren  .(   string
05231  *                                              ABORT" (even if not enabled)
05232  *             { (Local-Values declaration) and -> (Local-Values assignment)
05233  *                  are also handled if  ibm_locals  is enabled.
05234  *
05235  *      Inputs:
05236  *         Parameters:
05237  *             pfield               Param-field of the entry associated with
05238  *                                      the FWord that is being Ignored.
05239  *         Global Variables:
05240  *             statbuf              The word that was just read.
05241  *             pc                   Input-stream pointer
05242  *             lineno               Line-number, used for errors and warnings
05243  *             ibm_locals           TRUE if IBM-style Locals are enabled
05244  *
05245  *      Outputs:
05246  *         Returned Value:          NONE
05247  *
05248  *      Error Detection:
05249  *          Multi-line warnings, "Unterminated" Errors
05250  *              handled by called routines
05251  *
05252  *      Extraneous Remarks:
05253  *          We would prefer to keep this function private, too, so we
05254  *              will declare its prototype here and in the one other
05255  *              file where we need it, namely, dictionary.c, rather
05256  *              than exporting it widely in a  .h  file.
05257  *
05258  **************************************************************************** */
05259 
05260 void skip_string( tic_param_t pfield);
05261 void skip_string( tic_param_t pfield)
05262 {
05263     fwtoken tok = pfield.fw_token;
05264     unsigned int sav_lineno = lineno;
05265     bool handy_toggle = TRUE ;   /*  Various uses...   */
05266                         
05267     switch (tok) {
05268     case STRING:         /*  Double-Quote ( " ) string    */
05269     case PSTRING:        /*  Dot-Quote  ( ." ) string     */
05270     case ABORTTXT:       /*  ABORT", even if not enabled  */
05271         get_string( FALSE);   /*  Don't truncate; ignoring anyway  */
05272         /*  Will handle multi-line warnings, etc.   */
05273                                 break;
05274                         
05275     case SSTRING:        /*  Ess-Quote  ( s"  ) string  */
05276         handy_toggle = FALSE;
05277     case PBSTRING:       /*  Dot-Paren  .(   string  */
05278                         if (*pc++=='\n') lineno++;
05279         {
05280             unsigned int strt_lineno = lineno;
05281             get_until( handy_toggle ? ')' : '"' );
05282             string_err_check( handy_toggle, sav_lineno, strt_lineno );
05283         }
05284         break;
05285 
05286     default:
05287         /*  IBM-style Locals, under control of a switch  */
05288         if ( ibm_locals )
05289         {
05290             bool found_it = TRUE;
05291             switch (tok) {
05292                 case CURLY_BRACE:
05293                     declare_locals( TRUE);
05294                     break;
05295                 case DASH_ARROW:
05296                     get_word();
05297                     break;
05298                 default:
05299                     found_it = FALSE;
05300             }
05301             if ( found_it ) break;
05302         }
05303 
05304         tokenization_error ( FATAL,  "Program Error.  "
05305             "Unimplemented skip-string word '%s'\n", strupr(statbuf) );
05306     }
05307 }
05308 
05309 /* **************************************************************************
05310  *
05311  *      Function name:  process_remark
05312  *      Synopsis:       The active function for remarks (backslash-space)
05313  *                          and comments (enclosed within parens)
05314  *
05315  *      Associated FORTH word(s):        \   (         
05316  *
05317  *      Inputs:
05318  *         Parameters:
05319  *             TIC entry "parameter field", init'd to delimiter character.
05320  *
05321  *      Outputs:
05322  *         Returned Value:          NONE
05323  *
05324  *      Error Detection:
05325  *          Warning if end-of-file encountered before delimiter.
05326  *          Warning if multi-line parentheses-delimited comment.
05327  *
05328  *      Process Explanation:
05329  *          Skip until the delimiter.
05330  *          If end-of-file was encountered, issue Warning.
05331  *          Otherwise, and if delimiter was not new-line,
05332  *              check for multi-line with Warning.
05333  *
05334  **************************************************************************** */
05335 
05336 void process_remark( tic_param_t pfield )
05337 {
05338     char until_char = (char)pfield.deflt_elem ;
05339     unsigned int start_lineno = lineno;
05340 
05341 #ifdef DEBUG_SCANNER
05342 
05343     get_until(until_char);
05344                         printf ("%s:%d: debug: stack diagram: %s)\n",
05345                                                 iname, lineno, statbuf);
05346 #else
05347 
05348     if ( skip_until( until_char) )
05349     {
05350         if ( until_char == '\n' )
05351         {
05352             /*  Don't need any saved line number here ...  */
05353             tokenization_error ( WARNING,
05354                 "Unterminated remark.\n");
05355         }else{
05356             warn_unterm( WARNING, "comment", start_lineno);
05357         }
05358     }else{
05359         if ( until_char != '\n' )
05360         {
05361             pc++;
05362             warn_if_multiline( "comment", start_lineno);
05363         }
05364     }
05365 #endif  /*  DEBUG_SCANNER  */
05366 }
05367                 
05368                         
05369 /* **************************************************************************
05370  *
05371  *      Function name:  filter_comments
05372  *      Synopsis:       Process remarks and comments in special conditions
05373  *      
05374  *      Inputs:
05375  *         Parameters:
05376  *             inword             Current word just parsed
05377  *
05378  *      Outputs:
05379  *         Returned Value:        TRUE if Current word is a Comment-starter.
05380  *                                    Comment will be processed
05381  *
05382  *      Process Explanation:
05383  *          We want to be able to recognize any alias the user may have
05384  *              defined to a comment-delimiter, in whatever applicable
05385  *              vocabulary it might be.
05386  *          The active-function of any such alias will, of necessity, be
05387  *              the  process_remark()  routine, defined just above.
05388  *          We will search for the TIC-entry of the given word; if we don't    
05389  *              find it, it's not a comment-delimiter.  If we do find it, 
05390  *              and it is one, we invoke its active-function and return TRUE.
05391  *          We also want to permit the "allow-multiline-comments" directive   
05392  *              to be processed in the context that calls this routine, so
05393  *              we will check for that condition, too.
05394  *
05395  **************************************************************************** */
05396 
05397 bool filter_comments( u8 *inword)
05398 {
05399     bool retval = FALSE;
05400     tic_hdr_t *found = lookup_word( inword, NULL, NULL );
05401                         
05402     if ( found != NULL )
05403     {
05404         if ( found->funct == process_remark )
05405         {
05406             found->funct( found->pfield);
05407             retval = TRUE;
05408         }else{
05409             /*  Permit the "allow-multiline-comments" directive  */
05410             if ( found->funct == handle_internal )
05411             {
05412                 if ( found->pfield.fw_token == ALLOW_MULTI_LINE )
05413                 {
05414                     /*   Make sure any intended side-effects occur...  */
05415                     found->funct( found->pfield);
05416                     retval = TRUE;
05417                 }
05418             }
05419         }
05420     }
05421     return ( retval );
05422                 }
05423 
05424                 
05425 /* **************************************************************************
05426  *
05427  *      Function name:  tokenize_one_word
05428  *      Synopsis:       Tokenize the currently-obtained word
05429  *                          along with whatever it consumes.
05430  *
05431  *      Inputs:
05432  *         Parameters:
05433  *             wlen       Length of symbol just retrieved from the input stream
05434  *                              This is not really used here any more; it's
05435  *                              left over from an earlier implementation.
05436  *         Global Variables:        
05437  *             statbuf      The symbol (word) just retrieved from input stream.
05438  *             in_tokz_esc  TRUE if "Tokenizer-Escape" mode is in effect; a
05439  *                              different set of vocabularies from "Normal"
05440  *                              mode will be checked (along with those that
05441  *                              are common to both modes).  
05442  *             ibm_locals   Controls whether to check for IBM-style Locals;
05443  *                              set by means of a command-line switch.
05444  *
05445  *      Outputs:
05446  *         Returned Value:      NONE
05447  *         Global Variables:         
05448  *             statbuf          May be incremented    
05449  *             in_tokz_esc      May be set if the word just retrieved is
05450  *                                  the  tokenizer[   directive. 
05451  *             tic_found        
05452  *
05453  *      Error Detection:
05454  *           If the word could neither be identified nor processed as a number,
05455  *               that is an ERROR; pass it to  tokenized_word_error  for a
05456  *               message.
05457  *
05458  *      Process Explanation:
05459  *          Look for the word in each of the various lists and vocabularies
05460  *              in which it might be found, as appropriate to the current
05461  *              state of activity.
05462  *          If found, process it accordingly.
05463  *          If not found, try to process it as a number.
05464  *          If cannot process it as a number, declare an error.
05465  *
05466  *      Revision History:
05467  *          Updated Tue, 10 Jan 2006 by David L. Paktor
05468  *              Convert to  tic_hdr_t  type vocabularies.
05469  *          Updated Mon, 03 Apr 2006 by David L. Paktor
05470  *             Replaced bulky "Normal"-vs-"Escape" block with a call
05471  *                 to  lookup_word .  Attend to a small but important
05472  *                 side-effect of the "handle_<vocab>" routines that
05473  *                 feeds directly into the protection against self-
05474  *                 -recursion in a user-defined Macro:  Set the global
05475  *                 variable  tic_found  to the entry, just before we
05476  *                 execute it, and we're good to go... 
05477  *
05478  *      Extraneous Remarks:
05479  *          We trade off the strict rules of structure for simplicity
05480  *              of coding.
05481  *
05482  **************************************************************************** */
05483                 
05484 void tokenize_one_word( signed long wlen )
05485 {
05486                 
05487     /*  The shared lookup routine now handles everything.   */
05488     tic_hdr_t *found = lookup_word( statbuf, NULL, NULL );
05489                 
05490     if ( found != NULL )
05491     {
05492         tic_found = found;
05493         found->funct( found->pfield);
05494         return ;
05495     }
05496                 
05497     /*  It's not a word in any of our current contexts.
05498      *      Is it a number?
05499      */
05500     if ( handle_number() )
05501     {
05502         return ;
05503                         }
05504 
05505     /*  Could not identify - give a shout. */
05506     tokenized_word_error( statbuf );
05507                 }
05508 
05509 /* **************************************************************************
05510  *
05511  *      Function name:  tokenize
05512  *      Synopsis:       Tokenize the current input stream.
05513  *                          May be called recursively for macros and such.
05514  *
05515  *      Revision History:
05516  *      Updated Thu, 24 Mar 2005 by David L. Paktor
05517  *          Factor-out comment-filtration; apply to  gather_locals
05518  *          Factor-out tokenizing a single word (for conditionals)
05519  *          Separate actions of "Tokenizer-Escape" mode.
05520  *
05521  **************************************************************************** */
05522 
05523 void tokenize(void)
05524 {
05525     signed long wlen = 0;
05526                 
05527     while ( wlen >= 0 )
05528     {
05529         wlen = get_word();
05530         if ( wlen > 0 )
05531         {
05532             tokenize_one_word( wlen );
05533         }
05534         }
05535 }
05536